Class: Scrapers::ManningBooks::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/scrapers/manning_books.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Scraper

Returns a new instance of Scraper.



15
16
17
18
19
20
21
22
# File 'lib/scrapers/manning_books.rb', line 15

def initialize(options={})
  netrc_reader = ::Scrapers::NetrcReader.new(NETRC_MANNING_ENTRY)
  @user = options.fetch("user", netrc_reader.user)
  @pw = options.fetch("pw", netrc_reader.pw)
  @delay_time = options.fetch("delay", DELAY_TIME)
  @destination = options.fetch("destination", ".")
  @dry_run = options.fetch("dry_run", false)
end

Instance Attribute Details

#delay_timeObject

Returns the value of attribute delay_time.



13
14
15
# File 'lib/scrapers/manning_books.rb', line 13

def delay_time
  @delay_time
end

#destinationObject

Returns the value of attribute destination.



13
14
15
# File 'lib/scrapers/manning_books.rb', line 13

def destination
  @destination
end

#dry_runObject

Returns the value of attribute dry_run.



13
14
15
# File 'lib/scrapers/manning_books.rb', line 13

def dry_run
  @dry_run
end

#pwObject

Returns the value of attribute pw.



13
14
15
# File 'lib/scrapers/manning_books.rb', line 13

def pw
  @pw
end

#userObject

Returns the value of attribute user.



13
14
15
# File 'lib/scrapers/manning_books.rb', line 13

def user
  @user
end

Instance Method Details

#download_books(agent, books) ⇒ Object



64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/scrapers/manning_books.rb', line 64

def download_books(agent, books)
  books.map do |book|
    bookname = book.node.parent.parent.parent.parent.at_css('h1').text
    puts "Downloading #{bookname} from #{book.href}"
    if dry_run
      warn "dry run, not saving"
    else
      agent.get book.href
      puts "Saving #{agent.current_page.filename}"
      agent.current_page.save! # overwrite!
    end
    
    wait_a_bit delay_time
    [agent.current_page.filename, agent.current_page.uri.to_s]
  end
end

#login(agent) {|agent| ... } ⇒ Object

Yields:

  • (agent)


37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/scrapers/manning_books.rb', line 37

def (agent, &block)
  raise "Must provide a block to execute after logged in to site" unless block_given?

  agent.get DASHBOARD_URL
  unless agent.current_page.uri == DASHBOARD_URL
    # log in
    agent.current_page.form.field_with(:type => 'email').value= user
    agent.current_page.form.field_with(:type => 'password').value= pw
    agent.current_page.form.submit
    sleep 2
    raise "could not log in" unless agent.current_page.uri.to_s == DASHBOARD_URL
  end
  yield agent
end

#scrapeObject



24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/scrapers/manning_books.rb', line 24

def scrape
  Mechanize.start do |m|
    (m) do |m|
      book_downloads = m.current_page.links_with(:href => %r{/account/bookProduct/download})
      Dir.chdir(destination) do |dir|
        @results = download_books(m, book_downloads)
      end
    end
  end

  Hash[@results]
end

#wait_a_bit(delay) ⇒ Object



52
53
54
55
56
57
58
59
60
61
# File 'lib/scrapers/manning_books.rb', line 52

def wait_a_bit(delay)
  puts "delaying for #{delay} second(s)"
  %w[- * | +].cycle do |c|
    print "\r#{c}"
    sleep 1
    delay -= 1
    break if delay < 1
  end
  print "\r"
end