Class: Opener::PolarityTagger::Internal
- Inherits:
-
Object
- Object
- Opener::PolarityTagger::Internal
- Defined in:
- lib/opener/polarity_tagger/internal.rb
Constant Summary collapse
- DESC =
'VUA polarity tagger multilanguage'
- LAST_EDITED =
'21may2014'
- VERSION =
'1.2'
- N_WORDS =
5
- CACHE =
LexiconsCache.new
Instance Method Summary collapse
- #get_lexicon(opts, kaf, index) ⇒ Object
- #identify_lexicon(terms, lexicons) ⇒ Object
-
#initialize(ignore_pos: false, **params) ⇒ Internal
constructor
A new instance of Internal.
- #run(input, params = {}) ⇒ Object
Constructor Details
#initialize(ignore_pos: false, **params) ⇒ Internal
Returns a new instance of Internal.
16 17 18 |
# File 'lib/opener/polarity_tagger/internal.rb', line 16 def initialize ignore_pos: false, **params @ignore_pos = ignore_pos end |
Instance Method Details
#get_lexicon(opts, kaf, index) ⇒ Object
85 86 87 88 89 90 91 92 93 |
# File 'lib/opener/polarity_tagger/internal.rb', line 85 def get_lexicon opts, kaf, index if lexicon = identify_lexicon(kaf.terms[index, N_WORDS], opts.multi) index = index + lexicon.lemma.strip.split(' ').size else lexicon = opts.single end [lexicon, index] end |
#identify_lexicon(terms, lexicons) ⇒ Object
95 96 97 98 99 100 101 102 103 104 105 106 |
# File 'lib/opener/polarity_tagger/internal.rb', line 95 def identify_lexicon terms, lexicons return unless lexicons.size > 0 lemma = terms.map{|t| t.lemma&.downcase }.join(' ') text = terms.map{|t| t.text&.downcase }.join(' ') lexicons.each do |lexicon| return lexicon if lemma =~ /^#{Regexp.escape(lexicon.lemma)}($|\s)+/ return lexicon if text =~ /^#{Regexp.escape(lexicon.lemma)}($|\s)+/ end nil end |
#run(input, params = {}) ⇒ Object
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
# File 'lib/opener/polarity_tagger/internal.rb', line 20 def run input, params = {} kaf = KAF::Document.from_xml input @cache_keys = params[:cache_keys] ||= {} @cache_keys.merge! lang: kaf.language @cache_keys[:contract_ids] = nil unless @cache_keys[:contract_ids] @cache_keys = @cache_keys.except :property_type @map = kaf.map = CACHE[**@cache_keys].lexicons raise Opener::Core::UnsupportedLanguageError, kaf.language if @map.blank? next_index = 0 kaf.terms.each_with_index do |t, index| # skip terms when a multi_word_expression is found next if next_index > index lemma = t.lemma&.downcase text = t.text.to_s.downcase pos = if @ignore_pos then nil else t.pos end attrs = Hashie::Mash.new polarity_pos = nil if opts = @map.by_negator(text) || @map.by_negator(lemma) lexicon, next_index = get_lexicon(opts, kaf, index) attrs.sentiment_modifier = 'shifter' if lexicon elsif opts = @map.by_intensifier(text) || @map.by_intensifier(lemma) lexicon, next_index = get_lexicon(opts, kaf, index) attrs.sentiment_modifier = 'intensifier' if lexicon end unless lexicon # text matching have priority as sometimes # the lemma provided by Stanza is a different word [text, lemma].each do |word| opts, polarity_pos = @map.by_polarity word, pos if opts[:multi].size > 0 or opts[:single] lexicon, next_index = get_lexicon opts, kaf, index if lexicon attrs.polarity = lexicon.polarity break end end end end if attrs.size > 0 attrs['lexicon-id'] = lexicon.id.to_s if lexicon&.id attrs.resource = lexicon.resource if lexicon&.resource t.setPolarity attrs, polarity_pos i = index while i < next_index do term = kaf.terms[i] term.setPolarity attrs, polarity_pos i += 1 end end end kaf.add_linguistic_processor DESC, "#{LAST_EDITED}_#{VERSION}", 'terms' kaf.to_xml end |