Class: Scrape456::Download

Inherits:
Object
  • Object
show all
Includes:
Capybara::RSpecMatchers, RSpec::Matchers
Defined in:
lib/scrape456/download.rb

Overview

Given a username and password, retrieves all event attendee data from 123Signup.

Usage:

csv_result = Scrape456::Download.new(

username: ENV['LOGIN_USER_1'],
password: ENV['LOGIN_PASSWORD_1']

).call

puts csv_result

Constant Summary collapse

LOGIN_URL =
'https://redirect.123signup.com/login'
ONLOAD_REDIRECT_XPATH =
"//body[contains(@onload,'pageLoadRedirect')]"
'body > form:nth-child(1) > table:nth-child(3) > ' +
'tbody:nth-child(1) > tr:nth-child(1) > td:nth-child(2) > table:nth-child(1) > ' +
'tbody:nth-child(1) > tr:nth-child(1) > td:nth-child(7) > a:nth-child(1)'
THIRTY_SECONDS =
30

Instance Method Summary collapse

Constructor Details

#initialize(username:, password:) ⇒ Download

Returns a new instance of Download.



33
34
35
36
37
# File 'lib/scrape456/download.rb', line 33

def initialize(username:, password:)
  @username = username
  @password = password
  set_up_capybara
end

Instance Method Details

#callObject



91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# File 'lib/scrape456/download.rb', line 91

def call
  csv_result = nil

  session = Capybara.current_session
  STDERR.puts 'Logging in to 123Signup...'
  
  session.within_frame('FrameApplication') do
    session.within_frame('top_menu') do
      STDERR.puts 'Navigating to reports tab...'
      session.find(REPORT_TAB_LINK_CSS_SELECTOR).click
    end
    session.within_frame('contents') do
      session.within_frame('MenuList') do
        STDERR.puts 'Navigating to event reports...'
        session.click_link 'Event Reports'
      end
      session.within_frame('Results') do
        session.click_link('Event Attendee Data')
        session.click_button 'BottomDownloadReport'
        STDERR.puts 'Waiting on report download...'
        csv_result = extract_csv_from_full_response(fetch_csv_response_body)
      end
    end
  end
  csv_result
end

#extract_csv_from_full_response(full_response) ⇒ Object



86
87
88
89
# File 'lib/scrape456/download.rb', line 86

def extract_csv_from_full_response(full_response)
  strip_regex = /\A<html.*<pre[^>]*>(SignupMemberID,.*)<\/pre><\/body><\/html>\z/m
  full_response.sub(strip_regex, '\1')
end

#extract_redirect_urlObject



61
62
63
64
65
66
# File 'lib/scrape456/download.rb', line 61

def extract_redirect_url
  onload_value = session.find('body')['onload']
  lines = onload_value.split(';')
  redirect_destination_abs_path = lines.grep(/pageLoadRedirect/).first.sub(/^.*"([^"]*)".*$/, '\1')
  session.current_host + redirect_destination_abs_path
end

#fetch_csv_response_bodyObject



68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/scrape456/download.rb', line 68

def fetch_csv_response_body
  session.assert_selector('body')
  expect(session.find('body')).to (
    have_text(/^SignupMemberID,/)
  ).or(
    match_xpath(ONLOAD_REDIRECT_XPATH)
  )
  if session.find('body').matches_xpath?(ONLOAD_REDIRECT_XPATH)
    csv_url = extract_redirect_url
    session.visit(csv_url)
    STDERR.puts "Report generated by 123Signup. Fetching CSV..."
    csv_response_body = open(csv_url).read
  else
    csv_response_body = session.body
  end
  csv_response_body.force_encoding(Encoding::ISO_8859_1).encode(Encoding::UTF_8)
end

#log_inObject



50
51
52
53
54
55
# File 'lib/scrape456/download.rb', line 50

def 
  session.visit 
  session.fill_in 'username', with: @username
  session.fill_in 'password', with: @password
  session.click_link 'SignInButton'
end

#sessionObject



57
58
59
# File 'lib/scrape456/download.rb', line 57

def session
  Capybara.current_session
end

#set_up_capybaraObject



39
40
41
42
43
44
45
46
47
48
# File 'lib/scrape456/download.rb', line 39

def set_up_capybara
  options = {phantomjs_options: ['--ssl-protocol=any']}
  Capybara.register_driver :poltergeist do |app|
    Capybara::Poltergeist::Driver.new(app, options)
  end
  Capybara.current_driver = :poltergeist
  Capybara.javascript_driver = :poltergeist
  Capybara.run_server = false
  Capybara.default_max_wait_time = THIRTY_SECONDS
end