12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879 |
- .. Copyright (C) 2001-2019 NLTK Project
- .. For license information, see LICENSE.TXT
- ==========
- Stemmers
- ==========
- Overview
- ~~~~~~~~
- Stemmers remove morphological affixes from words, leaving only the
- word stem.
- >>> from __future__ import print_function
- >>> from nltk.stem import *
- Unit tests for the Porter stemmer
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- >>> from nltk.stem.porter import *
- Create a new Porter stemmer.
- >>> stemmer = PorterStemmer()
- Test the stemmer on various pluralised words.
- >>> plurals = ['caresses', 'flies', 'dies', 'mules', 'denied',
- ... 'died', 'agreed', 'owned', 'humbled', 'sized',
- ... 'meeting', 'stating', 'siezing', 'itemization',
- ... 'sensational', 'traditional', 'reference', 'colonizer',
- ... 'plotted']
- >>> singles = [stemmer.stem(plural) for plural in plurals]
- >>> print(' '.join(singles)) # doctest: +NORMALIZE_WHITESPACE
- caress fli die mule deni die agre own humbl size meet
- state siez item sensat tradit refer colon plot
- Unit tests for Snowball stemmer
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- >>> from nltk.stem.snowball import SnowballStemmer
- See which languages are supported.
- >>> print(" ".join(SnowballStemmer.languages))
- arabic danish dutch english finnish french german hungarian italian
- norwegian porter portuguese romanian russian spanish swedish
- Create a new instance of a language specific subclass.
- >>> stemmer = SnowballStemmer("english")
- Stem a word.
- >>> print(stemmer.stem("running"))
- run
- Decide not to stem stopwords.
- >>> stemmer2 = SnowballStemmer("english", ignore_stopwords=True)
- >>> print(stemmer.stem("having"))
- have
- >>> print(stemmer2.stem("having"))
- having
- The 'english' stemmer is better than the original 'porter' stemmer.
- >>> print(SnowballStemmer("english").stem("generously"))
- generous
- >>> print(SnowballStemmer("porter").stem("generously"))
- gener
- .. note::
- Extra stemmer tests can be found in `nltk.test.unit.test_stem`.
|