Class: Lingua::Stemmer
- Inherits:
-
Object
- Object
- Lingua::Stemmer
- Defined in:
- lib/lingua/stemmer.rb,
lib/lingua/version.rb,
ext/lingua/stemmer.c
Constant Summary collapse
- VERSION =
"2.0.1"
Instance Attribute Summary collapse
-
#encoding ⇒ Object
readonly
Returns the value of attribute encoding.
-
#language ⇒ Object
readonly
Returns the value of attribute language.
Instance Method Summary collapse
-
#initialize(options = {}) ⇒ Stemmer
constructor
Creates a new Stemmer, pass
:language
and:encoding
as arguments to change encoding or language, otherwise english with UTF_8 will be used. -
#stem ⇒ Object
Stems a word.
Constructor Details
#initialize(options = {}) ⇒ Stemmer
Creates a new Stemmer, pass :language
and :encoding
as arguments to change encoding or language, otherwise english with UTF_8 will be used
require 'lingua/stemmer'
s = Lingua::Stemmer.new :language => 'fr'
41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
# File 'lib/lingua/stemmer.rb', line 41 def initialize(={}) @language = ([:language] || 'en').to_s @encoding = ([:encoding] || 'UTF_8').to_s if RUBY_VERSION >= "1.9" if not @encoding.is_a?(Encoding) @encoding = Encoding.find(@encoding.gsub("_", "-")) end else @encoding = @encoding.upcase.gsub("-", "_") end native_init(@language, native_encoding(@encoding)) end |
Instance Attribute Details
#encoding ⇒ Object (readonly)
Returns the value of attribute encoding.
32 33 34 |
# File 'lib/lingua/stemmer.rb', line 32 def encoding @encoding end |
#language ⇒ Object (readonly)
Returns the value of attribute language.
31 32 33 |
# File 'lib/lingua/stemmer.rb', line 31 def language @language end |
Instance Method Details
#stem ⇒ Object
Stems a word
require 'lingua/stemmer'
s = Lingua::Stemmer.new
s.stem "installation" # ==> install
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
# File 'ext/lingua/stemmer.c', line 75 static VALUE rb_stemmer_stem(VALUE self, VALUE word) { struct sb_stemmer * stemmer; Data_Get_Struct(self, struct sb_stemmer, stemmer); if(!stemmer) rb_raise(rb_eRuntimeError, "Stemmer is not initialized"); VALUE s_word = rb_String(word); const sb_symbol * stemmed = sb_stemmer_stem(stemmer, (sb_symbol *)RSTRING_PTR(s_word), RSTRING_LEN(s_word) ); VALUE rb_enc = rb_iv_get(self, "@encoding"); return ENCODED_STR_NEW2((char *)stemmed, rb_enc); } |