Wednesday, 2 July 2014

Explanation for Extending Stemming using NLTK

Notebook
In [2]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer()
analyzer = cv.build_analyzer()

import nltk.stem
english_stemmer = nltk.stem.SnowballStemmer('english')

doc = "A cook cooks delicious meals"
print([english_stemmer.stem(w) for w in analyzer(doc)])
[u'cook', u'cook', u'delici', u'meal']

In []:
 

No comments:

Post a Comment