Class: Scrape456::Download
- Inherits:
-
Object
- Object
- Scrape456::Download
- Includes:
- Capybara::RSpecMatchers, RSpec::Matchers
- Defined in:
- lib/scrape456/download.rb
Overview
Given a username and password, retrieves all event attendee data from 123Signup.
Usage:
csv_result = Scrape456::Download.new(
username: ENV['LOGIN_USER_1'],
password: ENV['LOGIN_PASSWORD_1']
).call
puts csv_result
Constant Summary collapse
- LOGIN_URL =
'https://redirect.123signup.com/login'
- ONLOAD_REDIRECT_XPATH =
"//body[contains(@onload,'pageLoadRedirect')]"
- REPORT_TAB_LINK_CSS_SELECTOR =
Machine-generated: pulled from Firefox dev tools. But it works, where my handcrafted XPath did not.
'body > form:nth-child(1) > table:nth-child(3) > ' + 'tbody:nth-child(1) > tr:nth-child(1) > td:nth-child(2) > table:nth-child(1) > ' + 'tbody:nth-child(1) > tr:nth-child(1) > td:nth-child(7) > a:nth-child(1)'
- THIRTY_SECONDS =
30
Instance Method Summary collapse
- #call ⇒ Object
- #extract_csv_from_full_response(full_response) ⇒ Object
- #extract_redirect_url ⇒ Object
- #fetch_csv_response_body ⇒ Object
-
#initialize(username:, password:) ⇒ Download
constructor
A new instance of Download.
- #log_in ⇒ Object
- #session ⇒ Object
- #set_up_capybara ⇒ Object
Constructor Details
#initialize(username:, password:) ⇒ Download
Returns a new instance of Download.
33 34 35 36 37 |
# File 'lib/scrape456/download.rb', line 33 def initialize(username:, password:) @username = username @password = password end |
Instance Method Details
#call ⇒ Object
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
# File 'lib/scrape456/download.rb', line 91 def call csv_result = nil session = .current_session STDERR.puts 'Logging in to 123Signup...' log_in session.within_frame('FrameApplication') do session.within_frame('top_menu') do STDERR.puts 'Navigating to reports tab...' session.find(REPORT_TAB_LINK_CSS_SELECTOR).click end session.within_frame('contents') do session.within_frame('MenuList') do STDERR.puts 'Navigating to event reports...' session.click_link 'Event Reports' end session.within_frame('Results') do session.click_link('Event Attendee Data') session. 'BottomDownloadReport' STDERR.puts 'Waiting on report download...' csv_result = extract_csv_from_full_response(fetch_csv_response_body) end end end csv_result end |
#extract_csv_from_full_response(full_response) ⇒ Object
86 87 88 89 |
# File 'lib/scrape456/download.rb', line 86 def extract_csv_from_full_response(full_response) strip_regex = /\A<html.*<pre[^>]*>(SignupMemberID,.*)<\/pre><\/body><\/html>\z/m full_response.sub(strip_regex, '\1') end |
#extract_redirect_url ⇒ Object
61 62 63 64 65 66 |
# File 'lib/scrape456/download.rb', line 61 def extract_redirect_url onload_value = session.find('body')['onload'] lines = onload_value.split(';') redirect_destination_abs_path = lines.grep(/pageLoadRedirect/).first.sub(/^.*"([^"]*)".*$/, '\1') session.current_host + redirect_destination_abs_path end |
#fetch_csv_response_body ⇒ Object
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
# File 'lib/scrape456/download.rb', line 68 def fetch_csv_response_body session.assert_selector('body') expect(session.find('body')).to ( have_text(/^SignupMemberID,/) ).or( match_xpath(ONLOAD_REDIRECT_XPATH) ) if session.find('body').matches_xpath?(ONLOAD_REDIRECT_XPATH) csv_url = extract_redirect_url session.visit(csv_url) STDERR.puts "Report generated by 123Signup. Fetching CSV..." csv_response_body = open(csv_url).read else csv_response_body = session.body end csv_response_body.force_encoding(Encoding::ISO_8859_1).encode(Encoding::UTF_8) end |
#log_in ⇒ Object
50 51 52 53 54 55 |
# File 'lib/scrape456/download.rb', line 50 def log_in session.visit LOGIN_URL session.fill_in 'username', with: @username session.fill_in 'password', with: @password session.click_link 'SignInButton' end |
#session ⇒ Object
57 58 59 |
# File 'lib/scrape456/download.rb', line 57 def session .current_session end |
#set_up_capybara ⇒ Object
39 40 41 42 43 44 45 46 47 48 |
# File 'lib/scrape456/download.rb', line 39 def = {phantomjs_options: ['--ssl-protocol=any']} .register_driver :poltergeist do |app| ::Poltergeist::Driver.new(app, ) end .current_driver = :poltergeist .javascript_driver = :poltergeist .run_server = false .default_max_wait_time = THIRTY_SECONDS end |