Class: EntityExtractor
- Inherits:
-
Object
- Object
- EntityExtractor
- Defined in:
- lib/entity_extractor.rb
Instance Method Summary collapse
-
#extractSetTerms(to_extract, extract_term_fields, case_sensitive) ⇒ Object
Extracts set terms.
-
#getAllOutput ⇒ Object
Gets all results in output.
-
#getOnlyMatching ⇒ Object
Gets only the results for which terms were found/extracted.
-
#getTermList ⇒ Object
Gets a list of the extracted terms by how often they occur.
-
#initialize(input, extract_from, save_field) ⇒ EntityExtractor
constructor
A new instance of EntityExtractor.
Constructor Details
#initialize(input, extract_from, save_field) ⇒ EntityExtractor
Returns a new instance of EntityExtractor.
5 6 7 8 9 10 11 |
# File 'lib/entity_extractor.rb', line 5 def initialize(input, extract_from, save_field) @input = JSON.parse(input) @extract_from = extract_from @save_field = save_field @output = Array.new end |
Instance Method Details
#extractSetTerms(to_extract, extract_term_fields, case_sensitive) ⇒ Object
Extracts set terms
14 15 16 17 18 19 |
# File 'lib/entity_extractor.rb', line 14 def extractSetTerms(to_extract, extract_term_fields, case_sensitive) @input.each do |item| extract = ExtractSetTerms.new(item, @extract_from, to_extract, extract_term_fields, case_sensitive, @save_field) @output.push(extract.extractTerms) end end |
#getAllOutput ⇒ Object
Gets all results in output
22 23 24 |
# File 'lib/entity_extractor.rb', line 22 def getAllOutput JSON.pretty_generate(@output) end |
#getOnlyMatching ⇒ Object
Gets only the results for which terms were found/extracted
27 28 29 30 |
# File 'lib/entity_extractor.rb', line 27 def getOnlyMatching matches = @output.select { |item| !item[@save_field].empty? } JSON.pretty_generate(matches) end |
#getTermList ⇒ Object
Gets a list of the extracted terms by how often they occur
33 34 35 36 37 38 39 40 41 42 43 44 45 |
# File 'lib/entity_extractor.rb', line 33 def getTermList counthash = Hash.new{0} # Increments for each occurrence of term @output.each do |item| item[@save_field].each do |term| counthash[term] += 1 end end # Return hash sorted by value return Hash[counthash.sort_by { |k, v| v}] end |