Class: ScraperRb::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/scraper_rb.rb

Constant Summary collapse

VALID_PARAMS =
['auth_password', 'auth_username', 'cookie', 'country', 'referer', 'selector']

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url, params, timeout) ⇒ Scraper

Returns a new instance of Scraper.



33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/scraper_rb.rb', line 33

def initialize(url, params, timeout)
  params = {} if params == nil
  @options = {
    url: ENV['PROMPTAPI_TEST_ENDPOINT'] || 'https://api.promptapi.com/scraper',
    params: {url: url},
    request: {timeout: timeout},
    headers: {'Accept' => 'application/json', 'apikey' => ENV['PROMPTAPI_TOKEN']},
  }
  params.each do |key, value|
    @options[:params][key] = value if VALID_PARAMS.map(&:to_sym).include?(key)
  end
  
  @response = {}
end

Instance Attribute Details

#optionsObject

Returns the value of attribute options.



31
32
33
# File 'lib/scraper_rb.rb', line 31

def options
  @options
end

#responseObject

Returns the value of attribute response.



31
32
33
# File 'lib/scraper_rb.rb', line 31

def response
  @response
end

Instance Method Details

#getObject



56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/scraper_rb.rb', line 56

def get
  unless @options[:headers]['apikey']
    @response = {error: "You need to set PROMPTAPI_TOKEN environment variable"}
    return
  end

  conn = Faraday.new(@options) do |c|
    c.use Faraday::Response::RaiseError
    c.use CustomURLMiddleware if ENV['RUBY_DEVELOPMENT']
  end

  begin
    response = conn.get
    @response = parse(response.body)
    @response[:data] = @response[:"data-selector"] if @response.key?(:"data-selector")
  rescue Faraday::ConnectionFailed
    @response = {error: "Connection error"}
  rescue Faraday::TimeoutError => e
    @response = {error: e.message.capitalize}
  rescue Faraday::ClientError => e
    @response = {error: parse(e.response[:body])}
  rescue Faraday::ServerError => e
    @response = {error: e.message.capitalize}
  end
end

#parse(body) ⇒ Object



48
49
50
51
52
53
54
# File 'lib/scraper_rb.rb', line 48

def parse(body)
  begin
    JSON.parse(body, symbolize_names: true)
  rescue JSON::ParserError
    {error: "JSON decoding error"}
  end
end

#save(filename) ⇒ Object



82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'lib/scraper_rb.rb', line 82

def save(filename)
  return {error: 'Data is not available'} unless @response[:data]
  save_extension = '.html'
  save_data = @response[:data]
  if @response[:data].class == Array
    save_extension = '.json'
    save_data = JSON.generate(@response[:data])
  end
  file_dirname = File.dirname(filename)
  file_basename = File.basename(filename, save_extension)
  file_savename = "#{file_dirname}/#{file_basename}#{save_extension}"
  begin
    File.open(file_savename, 'w') {|file| file.write(save_data)}
    return {file: file_savename, size: File.size(file_savename)}
  rescue Errno::ENOENT => e
    return {error: "#{e}"}
  end
end