Class: Whatsa::Scraper

Inherits:
Object
  • Object
show all
Includes:
Format
Defined in:
lib/whatsa/scraper.rb

Constant Summary collapse

WIKISEARCH =
'https://en.wikipedia.org/w/index.php?search='

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Format

#bulletize_lines, #heading_to_title, #remove_citation_markers, #url_friendly, #word_wrap

Constructor Details

#initialize(term) ⇒ Scraper

Returns a new instance of Scraper.



12
13
14
15
16
17
18
19
# File 'lib/whatsa/scraper.rb', line 12

def initialize(term)
  # only keep word chars and parens, turn everything between each 'word'
  # to a single '+' and remove '+'s at the beginning and end if they're there
  @query = url_friendly(term)

  # store the page in an instance variable so we don't keep polling the site
  @page = Nokogiri::HTML(open(WIKISEARCH + self.query))
end

Instance Attribute Details

#pageObject (readonly)

Returns the value of attribute page.



10
11
12
# File 'lib/whatsa/scraper.rb', line 10

def page
  @page
end

#queryObject (readonly)

Returns the value of attribute query.



10
11
12
# File 'lib/whatsa/scraper.rb', line 10

def query
  @query
end

Instance Method Details

#article?Boolean

Returns:

  • (Boolean)


29
30
31
# File 'lib/whatsa/scraper.rb', line 29

def article?
  !self.page.css('#ca-nstab-main').empty? && !disambig?
end

#disambig?Boolean

Returns:

  • (Boolean)


33
34
35
# File 'lib/whatsa/scraper.rb', line 33

def disambig?
  !self.page.css('#disambigbox').empty?
end

#make_articleObject



37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/whatsa/scraper.rb', line 37

def make_article
  if article?
    Whatsa::Article.new(self.page)
  elsif results_page? && !not_found?
    first_title = self.page.css('.mw-search-results li a').first.text
    self.class.new(first_title).make_article
  elsif disambig?
    self.class.new(make_disambig.choices.first).make_article
  else
    nil
  end
end

#make_disambigObject



50
51
52
# File 'lib/whatsa/scraper.rb', line 50

def make_disambig
  disambig? ? Whatsa::Disambig.new(self.page) : nil
end

#not_found?Boolean

Returns:

  • (Boolean)


25
26
27
# File 'lib/whatsa/scraper.rb', line 25

def not_found?
  !self.page.css('.mw-search-nonefound').empty?
end

#results_page?Boolean

Returns:

  • (Boolean)


21
22
23
# File 'lib/whatsa/scraper.rb', line 21

def results_page?
  !self.page.css('.searchresults').empty?
end