Class: Proxy::Base

Inherits:
Object
  • Object
show all
Defined in:
lib/proxy/base.rb

Direct Known Subclasses

BrightData, CrawlBase, ScraperApi

Instance Method Summary collapse

Constructor Details

#initialize(params = {}) ⇒ Base

Returns a new instance of Base.



12
13
14
15
16
17
18
19
20
21
22
23
# File 'lib/proxy/base.rb', line 12

def initialize(params = {})
  @country = params[:country]
  @headers = params[:headers] || {}
  @query_params = params[:query_params] || {}
  @cookies = params[:cookies] || []
  @timeout = params[:timeout] || 60
  @tls_verify = params.fetch(:tls_verify, true)
  @retry = params[:retry_attr] || {}
  @enable_js = params.fetch(:enable_js, false)
  @enable_image_cache = params[:enable_image_cache] || false
  @logger = Logger.new($stdout)
end

Instance Method Details

#format_response(response) ⇒ Object



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/proxy/base.rb', line 38

def format_response(response)
  if success_response?(response)
    if response.header['Content-Encoding'].eql?('gzip')
      sio = StringIO.new(response.body)
      gz = Zlib::GzipReader.new(sio)
      page_content = gz.read()
    elsif response.header['Content-Encoding'].eql?('br')
      # Decode response which is in brotli encryption and save it as html
      page_content = Brotli.inflate(response.body)
    else
      page_content = response.body
    end
  end

  Response.new(
    code: response.code.to_i,
    body: page_content,
    headers: get_headers(response),
    cookies: response.get_fields('set-cookie')
  )
end

#get_headers(response) ⇒ Object



60
61
62
63
64
65
66
# File 'lib/proxy/base.rb', line 60

def get_headers(response)
  headers = {}
  response.each_header do |key, value|
    headers[key] = value
  end
  headers
end

#prepare_request(uri, proxy_uri = nil) ⇒ Object



68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# File 'lib/proxy/base.rb', line 68

def prepare_request(uri, proxy_uri = nil)
  http = if proxy_uri.nil?
           Net::HTTP.new(uri.host, uri.port)
         else
           Net::HTTP.new(uri.host, uri.port, proxy_uri.host, proxy_uri.port, proxy_uri.user, proxy_uri.password)
         end
  if uri.scheme == 'https'
    http.use_ssl = true
    http.verify_mode = OpenSSL::SSL::VERIFY_NONE
  end

  http.read_timeout = @timeout
  http.open_timeout = @timeout
  http
end

#success_response?(response) ⇒ Boolean

Returns:

  • (Boolean)


84
85
86
# File 'lib/proxy/base.rb', line 84

def success_response?(response)
  response.kind_of?(Net::HTTPSuccess) || response.code.to_i == 200
end

#with_retryObject



25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/proxy/base.rb', line 25

def with_retry
  attempts = 0
  begin
    attempts += 1
    yield
  rescue StandardError => e
    raise e unless attempts <= @retry[:count].to_i

    sleep(@retry[:wait_time] || 5)
    retry
  end
end