Class: Scraptacular::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/scraptacular/scraper.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(name, &block) ⇒ Scraper

Returns a new instance of Scraper.



7
8
9
10
# File 'lib/scraptacular/scraper.rb', line 7

def initialize(name, &block)
  @name = name
  @block = block
end

Instance Attribute Details

#nameObject (readonly)

Returns the value of attribute name.



5
6
7
# File 'lib/scraptacular/scraper.rb', line 5

def name
  @name
end

#pageObject (readonly)

Returns the value of attribute page.



5
6
7
# File 'lib/scraptacular/scraper.rb', line 5

def page
  @page
end

Instance Method Details

#result(&block) ⇒ Object



12
13
14
15
16
17
18
# File 'lib/scraptacular/scraper.rb', line 12

def result(&block)
  retval = Scraptacular::Result.new(@page)
  retval.instance_eval(&block)

  retval.send :remove_instance_variable, :@page
  @results << retval
end

#run(page) ⇒ Object



20
21
22
23
24
25
26
27
# File 'lib/scraptacular/scraper.rb', line 20

def run(page)
  @page = page
  @results = []
  instance_eval &@block

  
  @results
end


29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/scraptacular/scraper.rb', line 29

def scrape_links(selector, options = {})
  if options[:with]
    unless scraper = Scraptacular.world.scrapers[options[:with]]
      raise ArgumentError, "scraper #{options[:with]} does not exist"  
    end
  else
    raise ArgumentError, "You must supply a scraper using the :with option"
  end

  retval = []

  page.search(selector).each do |link|
    subpage = Scraptacular.agent.get(link.attributes["href"].value)
    retval += [*scraper.run(subpage)]
  end

  retval
end