Class: EntityExtractor

Inherits:
Object
  • Object
show all
Defined in:
lib/entity_extractor.rb

Instance Method Summary collapse

Constructor Details

#initialize(input, extract_from, save_field) ⇒ EntityExtractor

Returns a new instance of EntityExtractor.



5
6
7
8
9
10
11
# File 'lib/entity_extractor.rb', line 5

def initialize(input, extract_from, save_field)
  @input = JSON.parse(input)
  @extract_from = extract_from
  @save_field = save_field

  @output = Array.new
end

Instance Method Details

#extractSetTerms(to_extract, extract_term_fields, case_sensitive) ⇒ Object

Extracts set terms



14
15
16
17
18
19
# File 'lib/entity_extractor.rb', line 14

def extractSetTerms(to_extract, extract_term_fields, case_sensitive)
  @input.each do |item|
    extract = ExtractSetTerms.new(item, @extract_from, to_extract, extract_term_fields, case_sensitive, @save_field)
    @output.push(extract.extractTerms)
  end
end

#getAllOutputObject

Gets all results in output



22
23
24
# File 'lib/entity_extractor.rb', line 22

def getAllOutput
  JSON.pretty_generate(@output)
end

#getOnlyMatchingObject

Gets only the results for which terms were found/extracted



27
28
29
30
# File 'lib/entity_extractor.rb', line 27

def getOnlyMatching
  matches = @output.select { |item| !item[@save_field].empty? }
  JSON.pretty_generate(matches)
end

#getTermListObject

Gets a list of the extracted terms by how often they occur



33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/entity_extractor.rb', line 33

def getTermList
  counthash = Hash.new{0}

  # Increments for each occurrence of term
  @output.each do |item|
    item[@save_field].each do |term|
      counthash[term] += 1
    end
  end

  # Return hash sorted by value
  return Hash[counthash.sort_by { |k, v| v}]
end