error in finding similarity using NFM and Tfidf for a Data set for tag “unknown”
$begingroup$
import pandas as pd
df = pd.read_csv('india-news-headlines.csv')
df.head()
nf = ' '.join(df['headline_text'].tolist())
Labels = df['headline_category'][:1000]
News = df['headline_text'][:1000]
hf = pd.DataFrame({'Category':Labels, 'Headlines': News})
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf = TfidfVectorizer()
features = tfidf.fit_transform(hf['Category']).toarray()
features.shape
Perform the necessary imports
from sklearn.decomposition import NMF
from sklearn.preprocessing import MaxAbsScaler, Normalizer
from sklearn.pipeline import make_pipeline
Create a MaxAbsScaler: scaler
scaler = MaxAbsScaler()
Create an NMF model: nmf
nmf = NMF(n_components=10)
Create a Normalizer: normalizer
normalizer = Normalizer()
Create a pipeline: pipeline
pipeline = make_pipeline(scaler, nmf, normalizer)
Apply fit_transform to artists: norm_features
norm_features = pipeline.fit_transform(features)
Import pandas
import pandas as pd
Create a DataFrame: df
nf = pd.DataFrame(norm_features, index=Labels)
Select row of 'Bruce Springsteen': artist
artist = nf.loc['unknown']
Compute cosine similarities: similarities
similarities = nf.dot(artist.T)
Display those with highest cosine similarity
print(similarities.nlargest( ))
recommender-system
New contributor
$endgroup$
add a comment |
$begingroup$
import pandas as pd
df = pd.read_csv('india-news-headlines.csv')
df.head()
nf = ' '.join(df['headline_text'].tolist())
Labels = df['headline_category'][:1000]
News = df['headline_text'][:1000]
hf = pd.DataFrame({'Category':Labels, 'Headlines': News})
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf = TfidfVectorizer()
features = tfidf.fit_transform(hf['Category']).toarray()
features.shape
Perform the necessary imports
from sklearn.decomposition import NMF
from sklearn.preprocessing import MaxAbsScaler, Normalizer
from sklearn.pipeline import make_pipeline
Create a MaxAbsScaler: scaler
scaler = MaxAbsScaler()
Create an NMF model: nmf
nmf = NMF(n_components=10)
Create a Normalizer: normalizer
normalizer = Normalizer()
Create a pipeline: pipeline
pipeline = make_pipeline(scaler, nmf, normalizer)
Apply fit_transform to artists: norm_features
norm_features = pipeline.fit_transform(features)
Import pandas
import pandas as pd
Create a DataFrame: df
nf = pd.DataFrame(norm_features, index=Labels)
Select row of 'Bruce Springsteen': artist
artist = nf.loc['unknown']
Compute cosine similarities: similarities
similarities = nf.dot(artist.T)
Display those with highest cosine similarity
print(similarities.nlargest( ))
recommender-system
New contributor
$endgroup$
add a comment |
$begingroup$
import pandas as pd
df = pd.read_csv('india-news-headlines.csv')
df.head()
nf = ' '.join(df['headline_text'].tolist())
Labels = df['headline_category'][:1000]
News = df['headline_text'][:1000]
hf = pd.DataFrame({'Category':Labels, 'Headlines': News})
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf = TfidfVectorizer()
features = tfidf.fit_transform(hf['Category']).toarray()
features.shape
Perform the necessary imports
from sklearn.decomposition import NMF
from sklearn.preprocessing import MaxAbsScaler, Normalizer
from sklearn.pipeline import make_pipeline
Create a MaxAbsScaler: scaler
scaler = MaxAbsScaler()
Create an NMF model: nmf
nmf = NMF(n_components=10)
Create a Normalizer: normalizer
normalizer = Normalizer()
Create a pipeline: pipeline
pipeline = make_pipeline(scaler, nmf, normalizer)
Apply fit_transform to artists: norm_features
norm_features = pipeline.fit_transform(features)
Import pandas
import pandas as pd
Create a DataFrame: df
nf = pd.DataFrame(norm_features, index=Labels)
Select row of 'Bruce Springsteen': artist
artist = nf.loc['unknown']
Compute cosine similarities: similarities
similarities = nf.dot(artist.T)
Display those with highest cosine similarity
print(similarities.nlargest( ))
recommender-system
New contributor
$endgroup$
import pandas as pd
df = pd.read_csv('india-news-headlines.csv')
df.head()
nf = ' '.join(df['headline_text'].tolist())
Labels = df['headline_category'][:1000]
News = df['headline_text'][:1000]
hf = pd.DataFrame({'Category':Labels, 'Headlines': News})
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf = TfidfVectorizer()
features = tfidf.fit_transform(hf['Category']).toarray()
features.shape
Perform the necessary imports
from sklearn.decomposition import NMF
from sklearn.preprocessing import MaxAbsScaler, Normalizer
from sklearn.pipeline import make_pipeline
Create a MaxAbsScaler: scaler
scaler = MaxAbsScaler()
Create an NMF model: nmf
nmf = NMF(n_components=10)
Create a Normalizer: normalizer
normalizer = Normalizer()
Create a pipeline: pipeline
pipeline = make_pipeline(scaler, nmf, normalizer)
Apply fit_transform to artists: norm_features
norm_features = pipeline.fit_transform(features)
Import pandas
import pandas as pd
Create a DataFrame: df
nf = pd.DataFrame(norm_features, index=Labels)
Select row of 'Bruce Springsteen': artist
artist = nf.loc['unknown']
Compute cosine similarities: similarities
similarities = nf.dot(artist.T)
Display those with highest cosine similarity
print(similarities.nlargest( ))
recommender-system
recommender-system
New contributor
New contributor
New contributor
asked 7 mins ago
manoj kumarmanoj kumar
1
1
New contributor
New contributor
add a comment |
add a comment |
0
active
oldest
votes
Your Answer
StackExchange.ifUsing("editor", function () {
return StackExchange.using("mathjaxEditing", function () {
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["$", "$"], ["\\(","\\)"]]);
});
});
}, "mathjax-editing");
StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "557"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);
StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});
function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: false,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});
}
});
manoj kumar is a new contributor. Be nice, and check out our Code of Conduct.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fdatascience.stackexchange.com%2fquestions%2f44959%2ferror-in-finding-similarity-using-nfm-and-tfidf-for-a-data-set-for-tag-unknown%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
0
active
oldest
votes
0
active
oldest
votes
active
oldest
votes
active
oldest
votes
manoj kumar is a new contributor. Be nice, and check out our Code of Conduct.
manoj kumar is a new contributor. Be nice, and check out our Code of Conduct.
manoj kumar is a new contributor. Be nice, and check out our Code of Conduct.
manoj kumar is a new contributor. Be nice, and check out our Code of Conduct.
Thanks for contributing an answer to Data Science Stack Exchange!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
Use MathJax to format equations. MathJax reference.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fdatascience.stackexchange.com%2fquestions%2f44959%2ferror-in-finding-similarity-using-nfm-and-tfidf-for-a-data-set-for-tag-unknown%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown