Module: Scraypa

Includes:
Capybara::DSL
Defined in:
lib/scraypa.rb,
lib/scraypa/version.rb,
lib/scraypa/throttle.rb,
lib/scraypa/configuration.rb,
lib/scraypa/driver_resetter.rb,
lib/scraypa/visit/visit_factory.rb,
lib/scraypa/visit/visit_interface.rb,
lib/scraypa/visit/visit_rest_client.rb,
lib/scraypa/user_agent/user_agent_random.rb,
lib/scraypa/user_agent/user_agent_factory.rb,
lib/scraypa/user_agent/user_agent_abstract.rb,
lib/scraypa/user_agent/user_agent_iterator.rb,
lib/scraypa/visit/visit_capabara_poltergeist.rb,
lib/scraypa/visit/visit_capabara_headless_chromium.rb,
lib/scraypa/user_agent/user_agent_common_aliases_lists.rb

Defined Under Namespace

Classes: CapybaraDriverUnsupported, Configuration, DriverResetter, HeadlessChromiumMissingConfig, Throttle, TorNotSupportedByAgent, UnrecognisedUserAgentsMethod, UserAgentAbstract, UserAgentFactory, UserAgentIterator, UserAgentRandom, VisitCapybaraHeadlessChromium, VisitCapybaraPoltergeist, VisitFactory, VisitInterface, VisitRestClient

Constant Summary collapse

VERSION =
"0.1.1"
USER_AGENT_LIST =
{
    'Linux Firefox' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:43.0) Gecko/20100101 Firefox/43.0',
    'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)',
    'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
    'Mac Firefox' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:43.0) Gecko/20100101 Firefox/43.0',
    'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401',
    'Mac Safari 4' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; de-at) AppleWebKit/531.21.8 (KHTML, like Gecko) Version/4.0.4 Safari/531.21.10',
    'Mac Safari' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/601.3.9 (KHTML, like Gecko) Version/9.0.2 Safari/601.3.9',
    'Windows Chrome' => 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.125 Safari/537.36',
    'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
    'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
    'Windows IE 8' => 'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
    'Windows IE 9' => 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)',
    'Windows IE 10' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; WOW64; Trident/6.0)',
    'Windows IE 11' => 'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko',
    'Windows Edge' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Safari/537.36 Edge/13.10586',
    'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
    'Windows Firefox' => 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0'
}
USER_AGENT_MOBILE_LIST =
{
    'iPhone' => 'Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B5110e Safari/601.1',
    'iPad' => 'Mozilla/5.0 (iPad; CPU OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1',
    'Android' => 'Mozilla/5.0 (Linux; Android 5.1.1; Nexus 7 Build/LMY47V) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.76 Safari/537.36'
}

Class Attribute Summary collapse

Class Method Summary collapse

Class Attribute Details

.agentObject

Returns the value of attribute agent.



24
25
26
# File 'lib/scraypa.rb', line 24

def agent
  @agent
end

.driver_resetterObject

Returns the value of attribute driver_resetter.



24
25
26
# File 'lib/scraypa.rb', line 24

def driver_resetter
  @driver_resetter
end

.throttleObject

Returns the value of attribute throttle.



24
25
26
# File 'lib/scraypa.rb', line 24

def throttle
  @throttle
end

.tor_ip_controlObject

Returns the value of attribute tor_ip_control.



24
25
26
# File 'lib/scraypa.rb', line 24

def tor_ip_control
  @tor_ip_control
end

.tor_processObject

Returns the value of attribute tor_process.



24
25
26
# File 'lib/scraypa.rb', line 24

def tor_process
  @tor_process
end

.tor_proxyObject

Returns the value of attribute tor_proxy.



24
25
26
# File 'lib/scraypa.rb', line 24

def tor_proxy
  @tor_proxy
end

.user_agent_retrieverObject

Returns the value of attribute user_agent_retriever.



24
25
26
# File 'lib/scraypa.rb', line 24

def user_agent_retriever
  @user_agent_retriever
end

Class Method Details

.change_tor_ip_addressObject



54
55
56
# File 'lib/scraypa.rb', line 54

def change_tor_ip_address
  @tor_ip_control.get_new_ip if using_tor?
end

.configurationObject



27
28
29
# File 'lib/scraypa.rb', line 27

def configuration
  @configuration ||= Configuration.new
end

.configuration=(config) ⇒ Object



31
32
33
# File 'lib/scraypa.rb', line 31

def configuration=(config)
  @configuration = config
end

.configureObject



42
43
44
45
46
47
# File 'lib/scraypa.rb', line 42

def configure
  yield(configuration).tap{
    validate_configuration
    setup_scraypa
  }
end

.resetObject



35
36
37
38
39
40
# File 'lib/scraypa.rb', line 35

def reset
  @configuration = Configuration.new
  reset_throttle
  setup_scraypa
  @configuration
end

.user_agentObject



58
59
60
61
# File 'lib/scraypa.rb', line 58

def user_agent
  @user_agent_retriever ?
      @user_agent_retriever.current_user_agent : nil
end

.visit(params = {}) ⇒ Object



49
50
51
52
# File 'lib/scraypa.rb', line 49

def visit params={}
  setup_scraypa unless @agent
  visit_with_throttle params
end