Class: Porter::Stemmer

Inherits:
Object
  • Object
show all
Defined in:
lib/porter-stemmer.rb

Constant Summary collapse

STEP_2_SUFFIX_MAPPING =
{
  'ational' => 'ate',
  'tional'  => 'tion',
  'enci'    => 'ence',
  'anci'    => 'ance',
  'izer'    => 'ize',
  'bli'     => 'ble',
  'alli'    => 'al',
  'entli'   => 'ent',
  'eli'     => 'e',
  'ousli'   => 'ous',
  'ization' => 'ize',
  'ation'   => 'ate',
  'ator'    => 'ate',
  'alism'   => 'al',
  'iveness' => 'ive',
  'fulness' => 'ful',
  'ousness' => 'ous',
  'aliti'   => 'al',
  'iviti'   => 'ive',
  'biliti'  => 'ble',
  'logi'    => 'log'
}
STEP_2_SUFFIX_REGEXP =
/(
ational |
tional |
enci |
anci |
izer |
bli |
alli |
entli |
eli |
ousli |
ization |
ation |
ator |
alism |
iveness |
fulness |
ousness |
aliti |
iviti |
biliti |
logi)$/x
STEP_3_SUFFIX_MAPPING =
{
  'icate' => 'ic',
  'ative' => '',
  'alize' => 'al',
  'iciti' => 'ic',
  'ical'  => 'ic',
  'ful'   => '',
  'ness'  => ''
}
STEP_3_SUFFIX_REGEXP =
/(icate|ative|alize|iciti|ical|ful|ness)$/
STEP_4_SUFFIX_REGEXP =
/(
al |
ance |
ence |
er |
ic |
able |
ible |
ant |
ement |
ment |
ent |
ou |
ism |
ate |
iti |
ous |
ive |
ize)$/x
CONSONANT =

consonant

"[^aeiou]"
VOWEL =

vowel

"[aeiouy]"
CONSONANT_SEQUENCE =

consonant sequence

"#{CONSONANT}(?>[^aeiouy]*)"
VOWEL_SEQUENCE =

vowel sequence

"#{VOWEL}(?>[aeiou]*)"
MGR0 =

Number of consonant sequences

/^(#{CONSONANT_SEQUENCE})?#{VOWEL_SEQUENCE}#{CONSONANT_SEQUENCE}/o
MEQ1 =
cc]vvcc[vv

is m=1

/^(#{CONSONANT_SEQUENCE})?#{VOWEL_SEQUENCE}#{CONSONANT_SEQUENCE}(#{VOWEL_SEQUENCE})?$/o
MGR1 =

[cc]vvccvvcc… is m>1

/^(#{CONSONANT_SEQUENCE})?#{VOWEL_SEQUENCE}#{CONSONANT_SEQUENCE}#{VOWEL_SEQUENCE}#{CONSONANT_SEQUENCE}/o
VOWEL_IN_STEM =

vowel in stem

/^(#{CONSONANT_SEQUENCE})?#{VOWEL}/o

Instance Method Summary collapse

Instance Method Details

#stem(word) ⇒ Object



94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/porter-stemmer.rb', line 94

def stem(word)
  return word if word.length < 3

  # Map initial y to Y so that the patterns never treat it as vowel
  word[0] = 'Y' if word[0] == 'y'

  word = step1(word)
  word = step2(word)
  word = step3(word)
  word = step4(word)
  word = step5(word)

  # Turn initial Y back to y
  word[0] = 'y' if word[0] == 'Y'

  return word
end