Class: Lingua::Stemmer
- Inherits:
-
Object
- Object
- Lingua::Stemmer
- Defined in:
- lib/lingua/stemmer.rb,
lib/lingua/version.rb,
ext/lingua/stemmer.c more...
Constant Summary collapse
- VERSION =
'3.0.0'
Instance Attribute Summary collapse
-
#encoding ⇒ Object
readonly
Returns the value of attribute encoding.
-
#language ⇒ Object
readonly
Returns the value of attribute language.
Instance Method Summary collapse
-
#initialize(options = {}) ⇒ Stemmer
constructor
Creates a new Stemmer, pass
:language
and:encoding
as arguments to change encoding or language, otherwise english with UTF_8 will be used. -
#stem ⇒ Object
Stems a word.
Constructor Details
permalink #initialize(options = {}) ⇒ Stemmer
43 44 45 46 47 48 49 50 |
# File 'lib/lingua/stemmer.rb', line 43 def initialize( = {}) @language = ([:language] || 'en').to_s @encoding = ([:encoding] || 'UTF_8').to_s @encoding = Encoding.find(@encoding.tr('_', '-')) native_init(@language, native_encoding(@encoding)) end |
Instance Attribute Details
permalink #encoding ⇒ Object (readonly)
Returns the value of attribute encoding.
34 35 36 |
# File 'lib/lingua/stemmer.rb', line 34 def encoding @encoding end |
permalink #language ⇒ Object (readonly)
Returns the value of attribute language.
33 34 35 |
# File 'lib/lingua/stemmer.rb', line 33 def language @language end |
Instance Method Details
permalink #stem ⇒ Object
[View source]
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
# File 'ext/lingua/stemmer.c', line 75
static VALUE
rb_stemmer_stem(VALUE self, VALUE word) {
struct sb_stemmer * stemmer;
Data_Get_Struct(self, struct sb_stemmer, stemmer);
if(!stemmer) rb_raise(rb_eRuntimeError, "Stemmer is not initialized");
VALUE s_word = rb_String(word);
const sb_symbol * stemmed = sb_stemmer_stem(stemmer,
(sb_symbol *)RSTRING_PTR(s_word),
RSTRING_LEN(s_word)
);
VALUE rb_enc = rb_iv_get(self, "@encoding");
return ENCODED_STR_NEW2((char *)stemmed, rb_enc);
}
|