Class: Scraper
- Inherits:
-
Object
- Object
- Scraper
- Defined in:
- lib/apod_cli/scraper.rb
Class Method Summary collapse
Instance Method Summary collapse
- #data ⇒ Object
-
#initialize ⇒ Scraper
constructor
A new instance of Scraper.
- #pic_data(url) ⇒ Object
Constructor Details
#initialize ⇒ Scraper
Returns a new instance of Scraper.
9 10 11 |
# File 'lib/apod_cli/scraper.rb', line 9 def initialize @@data = self.class.index_data end |
Class Method Details
.get_page ⇒ Object
17 18 19 |
# File 'lib/apod_cli/scraper.rb', line 17 def self.get_page Nokogiri::HTML(open("http://apod.nasa.gov/apod/archivepix.html")) end |
.index_data ⇒ Object
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
# File 'lib/apod_cli/scraper.rb', line 32 def self.index_data array = [] months = ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"] content = self.get_page.css("body b") links = content.css("a") date_titles = content.text.split("\n").reject!{|item| item == ""} date_titles.pop links_hash = {} links.each do |link| links_hash[link.text.strip] = link.attribute("href").value end toggle_2007 = false date_titles.each_with_index do |dt, idx| hash = {} #There's one bloody link with a \n typo in its name that requires me to write this code. if idx == date_titles.length - 1 || months.index(date_titles[idx + 1].match(/[a-zA-Z]{1,}/).to_s).nil? if idx != date_titles.length - 1 toggle_2007 = true next end end if toggle_2007 toggle_2007 = false next end month_str = "" month_num = months.index(dt.match(/[a-zA-Z]{1,}/).to_s) + 1 if month_num.to_s.length == 1 month_str = "0#{month_num}" else month_str = month_num.to_s end name_i = dt.match(/:.+/).to_s name_i[0] = " " hash[:date] = "#{dt.match(/[0-9]{4}/)}-#{month_str}-#{dt.match(/[^0-9][0-9]{2}[^0-9]/).to_s.gsub(/[: ]/, "")}" hash[:name] = name_i.strip hash[:link] = "http://apod.nasa.gov/apod/" + links_hash[hash[:name]] array << hash end array.insert(-4411, {date: "2007-07-16", name: "The Lagoon Nebula in Gas, Dust, and Stars", link: "http://apod.nasa.gov/apod/ap070716.html"}) #This is the dumb typo'd link that I decided to hardcode. array end |
Instance Method Details
#data ⇒ Object
13 14 15 |
# File 'lib/apod_cli/scraper.rb', line 13 def data @@data end |
#pic_data(url) ⇒ Object
21 22 23 24 25 26 27 28 29 30 |
# File 'lib/apod_cli/scraper.rb', line 21 def pic_data(url) explanation = Nokogiri::HTML(open(url)).css("body").text.match(/Explanation:[\s\S]+?(\n(\s*)){3}/).to_s.gsub(/\n/, " ").gsub(/\s{2,}/, " ").strip name = self.data.select{|hash| url.include?(hash[:link])}[0][:name] if Nokogiri::HTML(open(url)).css("p a img").to_a != [] link = "http://apod.nasa.gov/apod/#{Nokogiri::HTML(open(url)).css("p a img").attribute("src").to_s}" else link = self.data.select{|hash| url.include?(hash[:link])}[0][:link] end hash = {name: name, expl: explanation, link:link} end |