Class: Lingua::Stemmer

Inherits:
Object
  • Object
show all
Defined in:
lib/lingua/stemmer.rb,
lib/lingua/version.rb,
ext/lingua/stemmer.c
more...

Constant Summary collapse

VERSION =
'3.0.0'

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Stemmer

Creates a new Stemmer, pass :language and :encoding as arguments to change encoding or language, otherwise english with UTF_8 will be used

require 'lingua/stemmer'
s = Lingua::Stemmer.new language: 'fr'
[View source]

43
44
45
46
47
48
49
50
# File 'lib/lingua/stemmer.rb', line 43

def initialize(options = {})
  @language = (options[:language] || 'en').to_s
  @encoding = (options[:encoding] || 'UTF_8').to_s

  @encoding = Encoding.find(@encoding.tr('_', '-'))

  native_init(@language, native_encoding(@encoding))
end

Instance Attribute Details

#encodingObject (readonly)

Returns the value of attribute encoding.


34
35
36
# File 'lib/lingua/stemmer.rb', line 34

def encoding
  @encoding
end

#languageObject (readonly)

Returns the value of attribute language.


33
34
35
# File 'lib/lingua/stemmer.rb', line 33

def language
  @language
end

Instance Method Details

#stemObject

Stems a word

require 'lingua/stemmer'
s = Lingua::Stemmer.new
s.stem "installation" # ==> install
[View source]

75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# File 'ext/lingua/stemmer.c', line 75

static VALUE
rb_stemmer_stem(VALUE self, VALUE word) {
  struct sb_stemmer * stemmer;

  Data_Get_Struct(self, struct sb_stemmer, stemmer);
  if(!stemmer) rb_raise(rb_eRuntimeError, "Stemmer is not initialized");

  VALUE s_word = rb_String(word);
  const sb_symbol * stemmed = sb_stemmer_stem(stemmer,
      (sb_symbol *)RSTRING_PTR(s_word),
      RSTRING_LEN(s_word)
  );

  VALUE rb_enc = rb_iv_get(self, "@encoding");
  return ENCODED_STR_NEW2((char *)stemmed, rb_enc);
}