Class: Porter::Stemmer
- Inherits:
-
Object
- Object
- Porter::Stemmer
- Defined in:
- lib/porter-stemmer.rb
Constant Summary collapse
- STEP_2_SUFFIX_MAPPING =
{ 'ational' => 'ate', 'tional' => 'tion', 'enci' => 'ence', 'anci' => 'ance', 'izer' => 'ize', 'bli' => 'ble', 'alli' => 'al', 'entli' => 'ent', 'eli' => 'e', 'ousli' => 'ous', 'ization' => 'ize', 'ation' => 'ate', 'ator' => 'ate', 'alism' => 'al', 'iveness' => 'ive', 'fulness' => 'ful', 'ousness' => 'ous', 'aliti' => 'al', 'iviti' => 'ive', 'biliti' => 'ble', 'logi' => 'log' }
- STEP_2_SUFFIX_REGEXP =
/( ational | tional | enci | anci | izer | bli | alli | entli | eli | ousli | ization | ation | ator | alism | iveness | fulness | ousness | aliti | iviti | biliti | logi)$/x
- STEP_3_SUFFIX_MAPPING =
{ 'icate' => 'ic', 'ative' => '', 'alize' => 'al', 'iciti' => 'ic', 'ical' => 'ic', 'ful' => '', 'ness' => '' }
- STEP_3_SUFFIX_REGEXP =
/(icate|ative|alize|iciti|ical|ful|ness)$/
- STEP_4_SUFFIX_REGEXP =
/( al | ance | ence | er | ic | able | ible | ant | ement | ment | ent | ou | ism | ate | iti | ous | ive | ize)$/x
- CONSONANT =
consonant
"[^aeiou]"
- VOWEL =
vowel
"[aeiouy]"
- CONSONANT_SEQUENCE =
consonant sequence
"#{CONSONANT}(?>[^aeiouy]*)"
- VOWEL_SEQUENCE =
vowel sequence
"#{VOWEL}(?>[aeiou]*)"
- MGR0 =
Number of consonant sequences
/^(#{CONSONANT_SEQUENCE})?#{VOWEL_SEQUENCE}#{CONSONANT_SEQUENCE}/o
- MEQ1 =
- cc]vvcc[vv
-
is m=1
/^(#{CONSONANT_SEQUENCE})?#{VOWEL_SEQUENCE}#{CONSONANT_SEQUENCE}(#{VOWEL_SEQUENCE})?$/o
- MGR1 =
[cc]vvccvvcc… is m>1
/^(#{CONSONANT_SEQUENCE})?#{VOWEL_SEQUENCE}#{CONSONANT_SEQUENCE}#{VOWEL_SEQUENCE}#{CONSONANT_SEQUENCE}/o
- VOWEL_IN_STEM =
vowel in stem
/^(#{CONSONANT_SEQUENCE})?#{VOWEL}/o
Instance Method Summary collapse
Instance Method Details
#stem(word) ⇒ Object
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
# File 'lib/porter-stemmer.rb', line 94 def stem(word) return word if word.length < 3 # Map initial y to Y so that the patterns never treat it as vowel word[0] = 'Y' if word[0] == 'y' word = step1(word) word = step2(word) word = step3(word) word = step4(word) word = step5(word) # Turn initial Y back to y word[0] = 'y' if word[0] == 'Y' return word end |