Class: Whatsa::Scraper
- Inherits:
-
Object
show all
- Includes:
- Format
- Defined in:
- lib/whatsa/scraper.rb
Constant Summary
collapse
- WIKISEARCH =
'https://en.wikipedia.org/w/index.php?search='
Instance Attribute Summary collapse
Instance Method Summary
collapse
Methods included from Format
#bulletize_lines, #heading_to_title, #remove_citation_markers, #url_friendly, #word_wrap
Constructor Details
#initialize(term) ⇒ Scraper
Returns a new instance of Scraper.
12
13
14
15
16
17
18
19
|
# File 'lib/whatsa/scraper.rb', line 12
def initialize(term)
@query = url_friendly(term)
@page = Nokogiri::HTML(open(WIKISEARCH + self.query))
end
|
Instance Attribute Details
#page ⇒ Object
Returns the value of attribute page.
10
11
12
|
# File 'lib/whatsa/scraper.rb', line 10
def page
@page
end
|
#query ⇒ Object
Returns the value of attribute query.
10
11
12
|
# File 'lib/whatsa/scraper.rb', line 10
def query
@query
end
|
Instance Method Details
#article? ⇒ Boolean
29
30
31
|
# File 'lib/whatsa/scraper.rb', line 29
def article?
!self.page.css('#ca-nstab-main').empty? && !disambig?
end
|
#disambig? ⇒ Boolean
33
34
35
|
# File 'lib/whatsa/scraper.rb', line 33
def disambig?
!self.page.css('#disambigbox').empty?
end
|
#make_article ⇒ Object
37
38
39
40
41
42
43
44
45
46
47
48
|
# File 'lib/whatsa/scraper.rb', line 37
def make_article
if article?
Whatsa::Article.new(self.page)
elsif results_page? && !not_found?
first_title = self.page.css('.mw-search-results li a').first.text
self.class.new(first_title).make_article
elsif disambig?
self.class.new(make_disambig.choices.first).make_article
else
nil
end
end
|
#make_disambig ⇒ Object
50
51
52
|
# File 'lib/whatsa/scraper.rb', line 50
def make_disambig
disambig? ? Whatsa::Disambig.new(self.page) : nil
end
|
#not_found? ⇒ Boolean
25
26
27
|
# File 'lib/whatsa/scraper.rb', line 25
def not_found?
!self.page.css('.mw-search-nonefound').empty?
end
|
#results_page? ⇒ Boolean
21
22
23
|
# File 'lib/whatsa/scraper.rb', line 21
def results_page?
!self.page.css('.searchresults').empty?
end
|