Class: Lingua::Stemmer

Inherits:
Object
  • Object
show all
Defined in:
lib/lingua/stemmer.rb,
lib/lingua/version.rb,
ext/lingua/stemmer.c

Constant Summary collapse

VERSION =
"2.0.1"

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Stemmer

Creates a new Stemmer, pass :language and :encoding as arguments to change encoding or language, otherwise english with UTF_8 will be used

require 'lingua/stemmer'
s = Lingua::Stemmer.new :language => 'fr'


41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/lingua/stemmer.rb', line 41

def initialize(options={})
  @language = (options[:language] || 'en').to_s
  @encoding = (options[:encoding] || 'UTF_8').to_s

  if RUBY_VERSION >= "1.9"
    if not @encoding.is_a?(Encoding)
      @encoding = Encoding.find(@encoding.gsub("_", "-"))
    end
  else
    @encoding = @encoding.upcase.gsub("-", "_")
  end

  native_init(@language, native_encoding(@encoding))
end

Instance Attribute Details

#encodingObject (readonly)

Returns the value of attribute encoding.



32
33
34
# File 'lib/lingua/stemmer.rb', line 32

def encoding
  @encoding
end

#languageObject (readonly)

Returns the value of attribute language.



31
32
33
# File 'lib/lingua/stemmer.rb', line 31

def language
  @language
end

Instance Method Details

#stemObject

Stems a word

require 'lingua/stemmer'
s = Lingua::Stemmer.new
s.stem "installation" # ==> install


75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# File 'ext/lingua/stemmer.c', line 75

static VALUE
rb_stemmer_stem(VALUE self, VALUE word) {
  struct sb_stemmer * stemmer;

  Data_Get_Struct(self, struct sb_stemmer, stemmer);
  if(!stemmer) rb_raise(rb_eRuntimeError, "Stemmer is not initialized");

  VALUE s_word = rb_String(word);
  const sb_symbol * stemmed = sb_stemmer_stem(stemmer,
      (sb_symbol *)RSTRING_PTR(s_word),
      RSTRING_LEN(s_word)
  );

  VALUE rb_enc = rb_iv_get(self, "@encoding");
  return ENCODED_STR_NEW2((char *)stemmed, rb_enc);
}