Class: ScraperRb::Scraper
- Inherits:
-
Object
- Object
- ScraperRb::Scraper
- Defined in:
- lib/scraper_rb.rb
Constant Summary collapse
- VALID_PARAMS =
['auth_password', 'auth_username', 'cookie', 'country', 'referer', 'selector']
Instance Attribute Summary collapse
-
#options ⇒ Object
Returns the value of attribute options.
-
#response ⇒ Object
Returns the value of attribute response.
Instance Method Summary collapse
- #get ⇒ Object
-
#initialize(url, params, timeout) ⇒ Scraper
constructor
A new instance of Scraper.
- #parse(body) ⇒ Object
- #save(filename) ⇒ Object
Constructor Details
#initialize(url, params, timeout) ⇒ Scraper
Returns a new instance of Scraper.
33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
# File 'lib/scraper_rb.rb', line 33 def initialize(url, params, timeout) params = {} if params == nil @options = { url: ENV['PROMPTAPI_TEST_ENDPOINT'] || 'https://api.promptapi.com/scraper', params: {url: url}, request: {timeout: timeout}, headers: {'Accept' => 'application/json', 'apikey' => ENV['PROMPTAPI_TOKEN']}, } params.each do |key, value| @options[:params][key] = value if VALID_PARAMS.map(&:to_sym).include?(key) end @response = {} end |
Instance Attribute Details
#options ⇒ Object
Returns the value of attribute options.
31 32 33 |
# File 'lib/scraper_rb.rb', line 31 def @options end |
#response ⇒ Object
Returns the value of attribute response.
31 32 33 |
# File 'lib/scraper_rb.rb', line 31 def response @response end |
Instance Method Details
#get ⇒ Object
56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
# File 'lib/scraper_rb.rb', line 56 def get unless @options[:headers]['apikey'] @response = {error: "You need to set PROMPTAPI_TOKEN environment variable"} return end conn = Faraday.new(@options) do |c| c.use Faraday::Response::RaiseError c.use CustomURLMiddleware if ENV['RUBY_DEVELOPMENT'] end begin response = conn.get @response = parse(response.body) @response[:data] = @response[:"data-selector"] if @response.key?(:"data-selector") rescue Faraday::ConnectionFailed @response = {error: "Connection error"} rescue Faraday::TimeoutError => e @response = {error: e..capitalize} rescue Faraday::ClientError => e @response = {error: parse(e.response[:body])} rescue Faraday::ServerError => e @response = {error: e..capitalize} end end |
#parse(body) ⇒ Object
48 49 50 51 52 53 54 |
# File 'lib/scraper_rb.rb', line 48 def parse(body) begin JSON.parse(body, symbolize_names: true) rescue JSON::ParserError {error: "JSON decoding error"} end end |
#save(filename) ⇒ Object
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
# File 'lib/scraper_rb.rb', line 82 def save(filename) return {error: 'Data is not available'} unless @response[:data] save_extension = '.html' save_data = @response[:data] if @response[:data].class == Array save_extension = '.json' save_data = JSON.generate(@response[:data]) end file_dirname = File.dirname(filename) file_basename = File.basename(filename, save_extension) file_savename = "#{file_dirname}/#{file_basename}#{save_extension}" begin File.open(file_savename, 'w') {|file| file.write(save_data)} return {file: file_savename, size: File.size(file_savename)} rescue Errno::ENOENT => e return {error: "#{e}"} end end |