Class: Dynamised::Scraper

Inherits:
Object
  • Object
show all
Extended by:
DSL
Includes:
Curb_DSL, After_Scrape, Before_Scrape, Helpers, Writers
Defined in:
lib/dynamised/helpers.rb,
lib/dynamised/scraper.rb,
lib/dynamised/writers.rb,
lib/dynamised/scraper_dsl.rb,
lib/dynamised/after_scrape_methods.rb,
lib/dynamised/before_scrape_methods.rb

Defined Under Namespace

Modules: After_Scrape, Before_Scrape, DSL, Helpers, Writers

Constant Summary collapse

XPATH_Anchor =
".%s"

Constants included from Curb_DSL

Curb_DSL::Regex

Class Method Summary collapse

Instance Method Summary collapse

Methods included from DSL

crawl, pag_if, pag_inc, pag_item, pag_next, re_useable, scrape_here_if, select_crawl, set_base_url, set_field, set_meta_tag, set_pag_increment, set_up_tree, use, writer, xpath_prefix

Methods included from Writers

#write_csv

Methods included from After_Scrape

#escape_html, #page_url, #scrub_tags, #unescape_html

Methods included from Helpers

#crawl, #field_keys, #get_mpc, #mpc, #pbcopy, #pbpaste, #to_doc

Methods included from Curb_DSL

#body, #decode_html, #encode_html, #form_field, #header, #ignore_error, included, #make_request_of, #post_body, #query_params, #response_code, #response_cookies, #status_code

Constructor Details

#initialize(args = [], &block) ⇒ Scraper

Returns a new instance of Scraper.



39
40
41
42
43
44
45
46
47
48
49
# File 'lib/dynamised/scraper.rb', line 39

def initialize(args=[],&block)
  @args = args
  @tree_pointer = []
  @use_store = false
  @scraped_data = DBM_Wrapper.new("%s_scraped_data" % get_class_name(self.class.to_s))
  [:inc,:uri,:tree,:tree_pointer,:base_url,:writer].each do |attr|
    varb_name = "@%s" % attr
    self.instance_variable_set(varb_name,self.class.instance_variable_get(varb_name))
  end
  super(&block)
end

Class Method Details

.each(&block) ⇒ Object



21
22
23
24
# File 'lib/dynamised/scraper.rb', line 21

def each(&block)
  @scrapers ||= {}
  @scrapers.each(&block)
end

.fetch(*args, &block) ⇒ Object



26
27
28
29
# File 'lib/dynamised/scraper.rb', line 26

def fetch(*args,&block)
  @scrapers ||= {}
  @scrapers.fetch(args.first.downcase) {|name|raise "No scraper called %s was found" % name }
end

.inherited(base) ⇒ Object



7
8
9
10
11
12
13
# File 'lib/dynamised/scraper.rb', line 7

def inherited(base)
  @scrapers ||= {}
  @scrapers[base.to_s.split('::').last.downcase] = base
  base.instance_exec do
    set_up_tree
  end
end

.listObject



15
16
17
18
# File 'lib/dynamised/scraper.rb', line 15

def list
  @scrapers ||= {}
  @scrapers.map {|i,s| i}
end

Instance Method Details

#pull_and_checkObject



58
59
60
61
62
63
64
65
66
67
# File 'lib/dynamised/scraper.rb', line 58

def pull_and_check
  doc = pull_initial
  seperator = "}#{'-' * 40}{"
  ap seperator
  pull(doc,@tree) do |hash|
    ap hash
    ap seperator
   sleep 0.5
  end
end

#pull_and_store(&spinner) ⇒ Object



51
52
53
54
55
56
# File 'lib/dynamised/scraper.rb', line 51

def pull_and_store(&spinner)
  raise "No writer detected" unless @writer
  @use_store = true
  scrape_data(&spinner)
  write_data(&spinner)
end