Class: Opener::PolarityTagger::Internal

Inherits:
Object
  • Object
show all
Defined in:
lib/opener/polarity_tagger/internal.rb

Constant Summary collapse

DESC =
'VUA polarity tagger multilanguage'
LAST_EDITED =
'21may2014'
VERSION =
'1.2'
N_WORDS =
5
CACHE =
LexiconsCache.new

Instance Method Summary collapse

Constructor Details

#initialize(ignore_pos: false, **params) ⇒ Internal

Returns a new instance of Internal.



16
17
18
# File 'lib/opener/polarity_tagger/internal.rb', line 16

def initialize ignore_pos: false, **params
  @ignore_pos = ignore_pos
end

Instance Method Details

#get_lexicon(opts, kaf, index) ⇒ Object



85
86
87
88
89
90
91
92
93
# File 'lib/opener/polarity_tagger/internal.rb', line 85

def get_lexicon opts, kaf, index
  if lexicon = identify_lexicon(kaf.terms[index, N_WORDS], opts.multi)
    index = index + lexicon.lemma.strip.split(' ').size
  else
    lexicon = opts.single
  end

  [lexicon, index]
end

#identify_lexicon(terms, lexicons) ⇒ Object



95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/opener/polarity_tagger/internal.rb', line 95

def identify_lexicon terms, lexicons
  return unless lexicons.size > 0

  lemma = terms.map{|t| t.lemma&.downcase }.join(' ')
  text  = terms.map{|t| t.text&.downcase }.join(' ')

  lexicons.each do |lexicon|
    return lexicon if lemma =~ /^#{Regexp.escape(lexicon.lemma)}($|\s)+/
    return lexicon if text =~ /^#{Regexp.escape(lexicon.lemma)}($|\s)+/
  end
  nil
end

#run(input, params = {}) ⇒ Object

Raises:

  • (Opener::Core::UnsupportedLanguageError)


20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/opener/polarity_tagger/internal.rb', line 20

def run input, params = {}
  kaf         = KAF::Document.from_xml input

  @cache_keys = params[:cache_keys] ||= {}
  @cache_keys.merge! lang: kaf.language
  @cache_keys[:contract_ids] = nil unless @cache_keys[:contract_ids]
  @cache_keys = @cache_keys.except :property_type
  @map = kaf.map = CACHE[**@cache_keys].lexicons

  raise Opener::Core::UnsupportedLanguageError, kaf.language if @map.blank?

  next_index = 0
  kaf.terms.each_with_index do |t, index|
    # skip terms when a multi_word_expression is found
    next if next_index > index
    lemma = t.lemma&.downcase
    text  = t.text.to_s.downcase
    pos   = if @ignore_pos then nil else t.pos end
    attrs = Hashie::Mash.new


    polarity_pos = nil

    if opts = @map.by_negator(text) || @map.by_negator(lemma)
      lexicon, next_index = get_lexicon(opts, kaf, index)
      attrs.sentiment_modifier = 'shifter' if lexicon
    elsif opts = @map.by_intensifier(text) || @map.by_intensifier(lemma)
      lexicon, next_index = get_lexicon(opts, kaf, index)
      attrs.sentiment_modifier = 'intensifier' if lexicon
    end

    unless lexicon
      # text matching have priority as sometimes
      # the lemma provided by Stanza is a different word
      [text, lemma].each do |word|
        opts, polarity_pos = @map.by_polarity word, pos

        if opts[:multi].size > 0 or opts[:single]
          lexicon, next_index = get_lexicon opts, kaf, index
          if lexicon
            attrs.polarity = lexicon.polarity
            break
          end
        end
      end
    end

    if attrs.size > 0
      attrs['lexicon-id'] = lexicon.id.to_s  if lexicon&.id
      attrs.resource      = lexicon.resource if lexicon&.resource
      t.setPolarity attrs, polarity_pos
      i = index
      while i < next_index do
        term = kaf.terms[i]
        term.setPolarity attrs, polarity_pos
        i += 1
      end
    end
  end

  kaf.add_linguistic_processor DESC, "#{LAST_EDITED}_#{VERSION}", 'terms'

  kaf.to_xml
end