38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
|
# File 'lib/opener/stanza/processor.rb', line 38
def run input, params
raise 'missing Stanza server' if ENV['STANZA_SERVER'].blank?
kaf = KAF::Document.from_xml input
lang = LANGUAGES_CACHE.get[kaf.language]
env = params.cache_keys.environment
unless lang&.environments&.include? env or (params.cache_keys.merged and lang&.environments&.include? 'production')
raise Core::UnsupportedLanguageError.new kaf.language
end
if env == 'production' and !lang.supported_by_opener
raise Core::UnsupportedLanguageError.new kaf.language
end
input = kaf.raw
input = input.gsub(/\,[^\ ]/, ', ')
response = ChainedDaemon.http.post BASE_URL, {lang: kaf.language, input: input}.to_query
raise Core::UnsupportedLanguageError, kaf.language if response.status == 406
raise response.body if response.status >= 400
sentences = JSON.parse response.body
sentences.each{ |s| s.map!{ |t| Hashie::Mash.new t } }
w_index = 0
miscs = {}
sentences.each.with_index do |s, i|
miscs[i] = {}
s.each do |word|
if word.id.is_a?(Array)
(word.id.min..word.id.max).each { |id| miscs[i][id] = word.slice(:start_char, :end_char) }
end
end
end
sentences.map{ |s| s.reverse! } if RTL_LANGUAGES.include? kaf.language
sentences.each.with_index do |s, s_index|
s.each do |word|
w_index += 1
next if word.id.is_a? Array
misc = word.slice(:start_char, :end_char).presence || miscs[s_index][word.id]
Rollbar.scoped({ input: input, params: params, sentences: sentences, word: word }) do
raise 'Missing misc'
end unless misc.start_char and misc.end_char
offset = misc.start_char
length = misc.end_char - offset
u_pos = word.upos
pos = POS[u_pos]
raise "Didn't find a map for #{u_pos}" if pos.nil?
type = if POS_OPEN.include? pos then 'open' else 'close' end
params = Hashie::Mash.new(
wid: w_index,
sid: s_index + 1,
tid: w_index,
para: 1,
offset: offset,
length: length,
text: word.text,
lemma: word.lemma,
morphofeat: u_pos,
pos: pos,
type: type,
head: word.head,
xpos: word.xpos.to_s
)
kaf.add_word_form params
kaf.add_term params
end
end
kaf.add_linguistic_processor DESC, "#{VERSION}", 'text', timestamp: true
kaf.to_xml
end
|