Method: MechWarrior::Crawler#initialize

Defined in:
lib/mech_warrior/crawler.rb

#initialize(override_opts = {}) ⇒ Crawler

Returns a new instance of Crawler.



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# File 'lib/mech_warrior/crawler.rb', line 5

def initialize(override_opts={})
  @opts  = DEFAULTS.merge(override_opts)
  @default_host     = opts[:default_host]
  @default_protocol = opts[:default_protocol]
  opts[:allowed_domains] << default_host
  @output_file = opts[:output_file] || File.open(opts[:log_file_name], 'a')
  @logger = opts[:logger_class].new(output_file)
  @agent_pool = MechCell.pool(size: opts[:pool_size], args: [logger])
  @pages = {}
  start_url = opts[:start_url] || "#{default_protocol}#{default_host}/"
  pages[normalize_url(start_url)] = {}
  index_url(start_url) unless opts[:no_index]
  self
ensure
  output_file.close if output_file.respond_to?(:close)
end