Class: Scrapers::ManningBooks::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/scrapers/manning_books.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Scraper

Returns a new instance of Scraper.



15
16
17
18
19
20
21
22
# File 'lib/scrapers/manning_books.rb', line 15

def initialize(options={})
  netrc_reader = ::Scrapers::NetrcReader.new(NETRC_MANNING_ENTRY)
  @user = options.fetch("user", netrc_reader.user)
  @pw = options.fetch("pw", netrc_reader.pw)
  @delay_time = options.fetch("delay", DELAY_TIME)
  @destination = options.fetch("destination", ".")
  @dry_run = options.fetch("dry_run", false)
end

Instance Attribute Details

#delay_timeObject

Returns the value of attribute delay_time.



13
14
15
# File 'lib/scrapers/manning_books.rb', line 13

def delay_time
  @delay_time
end

#destinationObject

Returns the value of attribute destination.



13
14
15
# File 'lib/scrapers/manning_books.rb', line 13

def destination
  @destination
end

#dry_runObject

Returns the value of attribute dry_run.



13
14
15
# File 'lib/scrapers/manning_books.rb', line 13

def dry_run
  @dry_run
end

#pwObject

Returns the value of attribute pw.



13
14
15
# File 'lib/scrapers/manning_books.rb', line 13

def pw
  @pw
end

#userObject

Returns the value of attribute user.



13
14
15
# File 'lib/scrapers/manning_books.rb', line 13

def user
  @user
end

Instance Method Details

#build_book_list(page) ⇒ Object



54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/scrapers/manning_books.rb', line 54

def build_book_list(page)
  page.search('.book').map do |book|
    {
      title: book.at('[data-type=title]').children.first.text,
      downloads: book.at('.book_downloads').search('a').map do |link|
        type = link.children.first.text.downcase
        next unless type.match(/download/)
        type = type.split(" ").last
        [type.to_sym, link.attr(:href)]
      end.compact.to_h
    }
  end
end

#download_books(agent, books) ⇒ Object



68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# File 'lib/scrapers/manning_books.rb', line 68

def download_books(agent, books)
  books.map do |book|
    puts "Retrieving #{book[:title]}"
    downloads = book[:downloads].map do |type, href|
      next unless %i[pdf epub kindle].include?(type)
      print "  downloading #{type} ..."
      agent.get href unless dry_run
      agent.current_page.save! unless dry_run
      puts "saved #{agent.current_page.filename}"
      [agent.current_page.filename, href]
    end.compact.to_h
    wait_a_bit delay_time
    [book[:title], downloads]
  end.to_h
end

#login(agent) {|agent| ... } ⇒ Object

Yields:

  • (agent)


39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/scrapers/manning_books.rb', line 39

def (agent, &block)
  raise "Must provide a block to execute after logged in to site" unless block_given?

  agent.get DASHBOARD_URL
  unless agent.current_page.uri == DASHBOARD_URL
    # log in
    agent.current_page.form.field_with(:type => 'email').value= user
    agent.current_page.form.field_with(:type => 'password').value= pw
    agent.current_page.form.submit
    sleep 2
    raise "could not log in" unless agent.current_page.uri.to_s == DASHBOARD_URL
  end
  yield agent
end

#scrapeObject



24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/scrapers/manning_books.rb', line 24

def scrape
  @results = nil
  Dir.chdir(destination) do |dir|

    Mechanize.start do |m|
      (m) do |m|
        books = build_book_list(m.current_page)
        @results = download_books(m, books)
      end
    end

  end
  @results
end

#wait_a_bit(delay) ⇒ Object



84
85
86
87
88
89
90
91
92
93
# File 'lib/scrapers/manning_books.rb', line 84

def wait_a_bit(delay)
  puts "delaying for #{delay} second(s)"
  %w[- * | +].cycle do |c|
    print "\r#{c}"
    sleep 1
    delay -= 1
    break if delay < 1
  end
  print "\r"
end