Class: EndangeredSpecies::Scraper
- Inherits:
-
Object
- Object
- EndangeredSpecies::Scraper
- Defined in:
- lib/endangered_species/scraper.rb
Overview
Get data and zip it up instantiate Species or Articles based on user input from CLI controller
Instance Method Summary collapse
- #get_articles_index ⇒ Object
- #get_species_index ⇒ Object
- #make_articles ⇒ Object
- #make_species ⇒ Object
- #scrape_articles_index ⇒ Object
- #scrape_species_index ⇒ Object
Instance Method Details
#get_articles_index ⇒ Object
32 33 34 |
# File 'lib/endangered_species/scraper.rb', line 32 def get_articles_index Nokogiri::HTML(open("https://www.worldwildlife.org/stories/")) end |
#get_species_index ⇒ Object
6 7 8 |
# File 'lib/endangered_species/scraper.rb', line 6 def get_species_index Nokogiri::HTML(open("https://www.worldwildlife.org/species/directory")) end |
#make_articles ⇒ Object
40 41 42 43 44 45 46 47 48 49 |
# File 'lib/endangered_species/scraper.rb', line 40 def make_articles scrape_articles_index.each do |content| articles = EndangeredSpecies::Articles.new articles.title = content.css("h2 a").text articles.date = content.css("em").text.gsub("WWF Magazine:","") articles.summary = content.css("div").text.gsub("\n","") articles.url = "https://www.worldwildlife.org#{content.css("a").attr("href").text}" articles.save end end |
#make_species ⇒ Object
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
# File 'lib/endangered_species/scraper.rb', line 14 def make_species scrape_species_index.each do |content| species = EndangeredSpecies::Species.new species.name = content.css("td.keep a").first.text species.scientific = content.css("td em").text species.status = content.css("td").last.text species.url = "https://www.worldwildlife.org#{content.css("a").attr("href").text}" @doc = Nokogiri::HTML(open(species.url)) @doc.search("div.wrapper.section-pop").each do |more_info| species.summary = more_info.css("p").text species.habitat = more_info.css("ul.list-data.list-spaced > li").text.gsub("\n\n", " ").gsub("\n", "").gsub("Places","Places:").gsub("Habitats", "\nHabitats:") species.save end end end |
#scrape_articles_index ⇒ Object
36 37 38 |
# File 'lib/endangered_species/scraper.rb', line 36 def scrape_articles_index self.get_articles_index.css("div.span9.gutter-horiz-in") end |
#scrape_species_index ⇒ Object
10 11 12 |
# File 'lib/endangered_species/scraper.rb', line 10 def scrape_species_index self.get_species_index.css("table.lead.gutter-bottom-2.table-to-list tbody tr") end |