Class: Maxwell::Base
- Inherits:
-
Object
- Object
- Maxwell::Base
- Defined in:
- lib/maxwell.rb
Class Method Summary collapse
- .attr_accessor(*attrs) ⇒ Object
- .attrs ⇒ Object
- .concurrency(value) ⇒ Object
- .execute(urls) ⇒ Object
- .javascript(value) ⇒ Object
Instance Method Summary collapse
Class Method Details
.attr_accessor(*attrs) ⇒ Object
27 28 29 30 31 |
# File 'lib/maxwell.rb', line 27 def attr_accessor(*attrs) @attrs ||= [] @attrs.concat attrs super end |
.attrs ⇒ Object
33 34 35 |
# File 'lib/maxwell.rb', line 33 def attrs @attrs || self.superclass.instance_eval("@attrs") end |
.concurrency(value) ⇒ Object
41 42 43 |
# File 'lib/maxwell.rb', line 41 def concurrency(value) @concurrency = value end |
.execute(urls) ⇒ Object
13 14 15 16 17 18 19 20 21 22 23 24 25 |
# File 'lib/maxwell.rb', line 13 def execute(urls) Parallel. map_with_index(urls, in_threads: @concurrency || 1) do |url, id| p "scraping: #{ id + 1 }" scraper = self.new html = Maxwell::Converter.call(url, @use_poltergeist) scraper.parser html scraper.handler ({ id: id + 1 }).merge(scraper.result) end end |
.javascript(value) ⇒ Object
37 38 39 |
# File 'lib/maxwell.rb', line 37 def javascript(value) @use_poltergeist = value end |
Instance Method Details
#handler(result) ⇒ Object
50 51 52 |
# File 'lib/maxwell.rb', line 50 def handler result p result end |
#parser(html) ⇒ Object
46 47 48 |
# File 'lib/maxwell.rb', line 46 def parser html raise NoParserDefinedErr "You need to define #{self}#parser" end |
#result ⇒ Object
54 55 56 |
# File 'lib/maxwell.rb', line 54 def result self.class.attrs.map { |k| [k, self.send(k)] }.to_h end |