Class: Ariel::ExampleDocumentLoader

Inherits:
Object
  • Object
show all
Defined in:
lib/ariel/example_document_loader.rb

Overview

Provides methods that read an example document, using a StructureNode tree to populate a tree of Nodes with each labeled example. TODO: Fix the UTF issues this implementation is bound to create.

Class Method Summary collapse

Class Method Details

.load_directory(dir, structure) ⇒ Object

[View source]

45
46
47
48
49
50
51
52
53
54
55
# File 'lib/ariel/example_document_loader.rb', line 45

def self.load_directory(dir, structure)
  loaded_example_hash = Hash.new {|h, k| h[k]=[]}
  Dir.glob("#{dir}/*") do |doc|
    next if doc=~ /structure\.rb\z/
    File.open(doc) do |file|
      self.load_labeled_example(file, structure, loaded_example_hash)
    end
  end
  self.supervise_learning structure, loaded_example_hash
  return structure
end

.load_labeled_example(file, structure, loaded_example_hash) ⇒ Object

Assumes it is passed a root parent

Raises:

  • (ArgumentError)
[View source]

9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# File 'lib/ariel/example_document_loader.rb', line 9

def self.load_labeled_example(file, structure, loaded_example_hash)
  raise ArgumentError, "Passed structure is not root parent" if structure.parent
  string = file.respond_to?(:read) ? file.read : file
  tokenstream = TokenStream.new
  tokenstream.tokenize(string, true)
  root = ExtractedNode.new(:root, tokenstream, structure)
  structure.apply_extraction_tree_on(root, true)
  root.each_descendant(true) do |extracted_node|
    if extracted_node.parent
      loaded_example_hash[extracted_node.meta.structure] << extracted_node
    end
    extracted_node.tokenstream.remove_label_tags
  end
  return loaded_example_hash
end

.supervise_learning(structure, loaded_example_hash) ⇒ Object

[View source]

25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/ariel/example_document_loader.rb', line 25

def self.supervise_learning(structure, loaded_example_hash)
  loaded_example_hash.each_pair do |structure_node, example_nodes|
    start_examples=[]
    end_examples=[]
    example_nodes.each do |node|
      start_tstream=node.parent.tokenstream #Rules are based on extracting from the parent
      start_tstream.set_label_at(node.tokenstream.tokens.first.start_loc)
      start_examples << start_tstream
      end_tstream=node.parent.tokenstream.reverse
      end_tstream.set_label_at(node.tokenstream.tokens.last.start_loc)
      end_examples << end_tstream
    end
    learner = Learner.new(*start_examples)
    start_rules = learner.learn_rule :forward
    learner = Learner.new(*end_examples)
    end_rules = learner.learn_rule :back
    structure_node.ruleset=RuleSet.new(start_rules, end_rules)
  end
end