Class: Scrapers::ManningBooks::Scraper
- Inherits:
-
Object
- Object
- Scrapers::ManningBooks::Scraper
- Defined in:
- lib/scrapers/manning_books.rb
Instance Attribute Summary collapse
-
#delay_time ⇒ Object
Returns the value of attribute delay_time.
-
#destination ⇒ Object
Returns the value of attribute destination.
-
#dry_run ⇒ Object
Returns the value of attribute dry_run.
-
#pw ⇒ Object
Returns the value of attribute pw.
-
#user ⇒ Object
Returns the value of attribute user.
Instance Method Summary collapse
- #download_books(agent, books) ⇒ Object
-
#initialize(options = {}) ⇒ Scraper
constructor
A new instance of Scraper.
- #login(agent) {|agent| ... } ⇒ Object
- #scrape ⇒ Object
- #wait_a_bit(delay) ⇒ Object
Constructor Details
#initialize(options = {}) ⇒ Scraper
Returns a new instance of Scraper.
15 16 17 18 19 20 21 22 |
# File 'lib/scrapers/manning_books.rb', line 15 def initialize(={}) netrc_reader = ::Scrapers::NetrcReader.new(NETRC_MANNING_ENTRY) @user = .fetch("user", netrc_reader.user) @pw = .fetch("pw", netrc_reader.pw) @delay_time = .fetch("delay", DELAY_TIME) @destination = .fetch("destination", ".") @dry_run = .fetch("dry_run", false) end |
Instance Attribute Details
#delay_time ⇒ Object
Returns the value of attribute delay_time.
13 14 15 |
# File 'lib/scrapers/manning_books.rb', line 13 def delay_time @delay_time end |
#destination ⇒ Object
Returns the value of attribute destination.
13 14 15 |
# File 'lib/scrapers/manning_books.rb', line 13 def destination @destination end |
#dry_run ⇒ Object
Returns the value of attribute dry_run.
13 14 15 |
# File 'lib/scrapers/manning_books.rb', line 13 def dry_run @dry_run end |
#pw ⇒ Object
Returns the value of attribute pw.
13 14 15 |
# File 'lib/scrapers/manning_books.rb', line 13 def pw @pw end |
#user ⇒ Object
Returns the value of attribute user.
13 14 15 |
# File 'lib/scrapers/manning_books.rb', line 13 def user @user end |
Instance Method Details
#download_books(agent, books) ⇒ Object
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
# File 'lib/scrapers/manning_books.rb', line 64 def download_books(agent, books) books.map do |book| bookname = book.node.parent.parent.parent.parent.at_css('h1').text puts "Downloading #{bookname} from #{book.href}" if dry_run warn "dry run, not saving" else agent.get book.href puts "Saving #{agent.current_page.filename}" agent.current_page.save! # overwrite! end wait_a_bit delay_time [agent.current_page.filename, agent.current_page.uri.to_s] end end |
#login(agent) {|agent| ... } ⇒ Object
37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
# File 'lib/scrapers/manning_books.rb', line 37 def login(agent, &block) raise "Must provide a block to execute after logged in to site" unless block_given? agent.get DASHBOARD_URL unless agent.current_page.uri == DASHBOARD_URL # log in agent.current_page.form.field_with(:type => 'email').value= user agent.current_page.form.field_with(:type => 'password').value= pw agent.current_page.form.submit sleep 2 raise "could not log in" unless agent.current_page.uri.to_s == DASHBOARD_URL end yield agent end |
#scrape ⇒ Object
24 25 26 27 28 29 30 31 32 33 34 35 |
# File 'lib/scrapers/manning_books.rb', line 24 def scrape Mechanize.start do |m| login(m) do |m| book_downloads = m.current_page.links_with(:href => %r{/account/bookProduct/download}) Dir.chdir(destination) do |dir| @results = download_books(m, book_downloads) end end end Hash[@results] end |
#wait_a_bit(delay) ⇒ Object
52 53 54 55 56 57 58 59 60 61 |
# File 'lib/scrapers/manning_books.rb', line 52 def wait_a_bit(delay) puts "delaying for #{delay} second(s)" %w[- * | +].cycle do |c| print "\r#{c}" sleep 1 delay -= 1 break if delay < 1 end print "\r" end |