Class: WhatToWatch::Scraper
- Inherits:
-
Object
- Object
- WhatToWatch::Scraper
- Defined in:
- lib/what_to_watch/scraper.rb
Class Method Summary collapse
-
.get_item_page(object) ⇒ Object
Scrape Search Page on imdb.com to determine the Item Page URL.
- .scrape_imdb(object) ⇒ Object
- .scrape_vulture ⇒ Object
Class Method Details
.get_item_page(object) ⇒ Object
Scrape Search Page on imdb.com to determine the Item Page URL
21 22 23 24 25 26 27 |
# File 'lib/what_to_watch/scraper.rb', line 21 def self.get_item_page(object) search_results_page = Nokogiri::HTML(open("https://www.imdb.com/find?s=tt&q=" + CGI::escape(object.title.gsub("Season ", "")))) url = "https://www.imdb.com" + "#{search_results_page.css("td a").attribute("href").value}" end |
.scrape_imdb(object) ⇒ Object
30 31 32 33 34 35 36 37 38 39 40 |
# File 'lib/what_to_watch/scraper.rb', line 30 def self.scrape_imdb(object) object.url = self.get_item_page(object) item_page = Nokogiri::HTML(open(object.url)) object.description = item_page.css("div.summary_text").text.strip object.genre_year = item_page.css("div.subtext a").collect{|tag| tag.text.strip.gsub("\u2013","-")}.join(" | ") object.cast = {} item_page.css("div.credit_summary_item").each do |category| object.cast[category.css("h4").text.strip] = category.css("a").collect{|tag|tag.text.strip}. join(", ").gsub(", See full cast & crew", "") end end |
.scrape_vulture ⇒ Object
4 5 6 7 8 9 10 11 12 13 14 15 16 17 |
# File 'lib/what_to_watch/scraper.rb', line 4 def self.scrape_vulture doc = Nokogiri::HTML(open("http://vulture.com/streaming")) doc.css("div[data-editable='main']").each do |section| section.css("div.column-item").each do |row| row.css("section").drop(1).each do |item| WhatToWatch::Show.new( item.css("div[itemprop='caption']").text.split("\n")[1].strip.gsub(" ", " "), row.css("section")[0].css("h3").text.strip.capitalize, section.css("h2.clay-subheader").text.strip.downcase ) end end end end |