Class: Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/object-scraper/scraper.rb

Defined Under Namespace

Classes: DuplicateDefinitionError

Class Attribute Summary collapse

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(name, options = {}, &block) ⇒ Scraper

:nodoc:



29
30
31
32
33
34
35
36
# File 'lib/object-scraper/scraper.rb', line 29

def initialize(name, options = {}, &block) #:nodoc:
  assert_valid_options(options)
  @objects  = []
  @class    = class_for(options[:class])
  @scraper_source   = options[:source]
  @scraper_node     = options[:node]
  @block    = block
end

Dynamic Method Handling

This class handles dynamic methods through the method_missing method

#method_missing(symbol, *args, &block) ⇒ Object



67
68
69
70
71
72
73
74
75
76
77
# File 'lib/object-scraper/scraper.rb', line 67

def method_missing(symbol, *args, &block)
  if block_given?
    @current_object.send("#{symbol}=",  begin
                                          yield(@current_node)
                                        rescue
                                          puts "Warning, parsing failed at #{@current_node.inspect}"
                                        end)
  else
    @current_object.send("#{symbol}=", args.first)
  end
end

Class Attribute Details

.definition_file_pathsObject

Returns the value of attribute definition_file_paths.



10
11
12
# File 'lib/object-scraper/scraper.rb', line 10

def definition_file_paths
  @definition_file_paths
end

.scrape_source_withObject

Returns the value of attribute scrape_source_with.



9
10
11
# File 'lib/object-scraper/scraper.rb', line 9

def scrape_source_with
  @scrape_source_with
end

.scrapersObject

Returns the value of attribute scrapers.



8
9
10
# File 'lib/object-scraper/scraper.rb', line 8

def scrapers
  @scrapers
end

Instance Attribute Details

#scraper_nodeObject (readonly)

Returns the value of attribute scraper_node.



17
18
19
# File 'lib/object-scraper/scraper.rb', line 17

def scraper_node
  @scraper_node
end

#scraper_sourceObject (readonly)

Returns the value of attribute scraper_source.



17
18
19
# File 'lib/object-scraper/scraper.rb', line 17

def scraper_source
  @scraper_source
end

Class Method Details

.define(name, options = {}, &block) ⇒ Object



19
20
21
22
23
24
25
26
27
# File 'lib/object-scraper/scraper.rb', line 19

def self.define(name, options = {}, &block)
  instance = Scraper.new(name, options, &block)

  if self.scrapers[name] 
    raise DuplicateDefinitionError, "Scraper already defined: #{name}"
  end

  self.scrapers[name] = instance
end

.find_definitionsObject



79
80
81
82
83
84
85
86
87
88
89
# File 'lib/object-scraper/scraper.rb', line 79

def self.find_definitions
  definition_file_paths.each do |path|
    require("#{path}.rb") if File.exists?("#{path}.rb")

    if File.directory? path
      Dir[File.join(path, '*.rb')].each do |file|
        require file
      end
    end
  end
end

.get(name) ⇒ Object



38
39
40
# File 'lib/object-scraper/scraper.rb', line 38

def self.get(name)
  scraper_by_name(name)
end

.parse(name) ⇒ Object



42
43
44
# File 'lib/object-scraper/scraper.rb', line 42

def self.parse(name)
  scraper_by_name(name).parse
end

.parse_allObject



46
47
48
49
50
# File 'lib/object-scraper/scraper.rb', line 46

def self.parse_all
  objects = []
  scrapers.each_value { |s| objects << s.parse }
  objects.flatten
end

.scraper_by_name(name) ⇒ Object



63
64
65
# File 'lib/object-scraper/scraper.rb', line 63

def self.scraper_by_name(name)
  scrapers[name.to_sym] or raise ArgumentError, "No such scraper: #{name.to_s}"
end

Instance Method Details

#parseObject



52
53
54
55
56
57
58
59
60
61
# File 'lib/object-scraper/scraper.rb', line 52

def parse
  doc = open(@scraper_source) { |f| Scraper.scrape_source_with.call(f) }
  doc.search(@scraper_node).each do |n|
    @current_node   = n
    @current_object = @class.new
    @objects << @current_object
    @block.call(self)
  end
  @objects
end