Class: EndangeredSpecies::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/endangered_species/scraper.rb

Overview

Get data and zip it up instantiate Species or Articles based on user input from CLI controller

Instance Method Summary collapse

Instance Method Details

#get_articles_indexObject



32
33
34
# File 'lib/endangered_species/scraper.rb', line 32

def get_articles_index
  Nokogiri::HTML(open("https://www.worldwildlife.org/stories/"))
end

#get_species_indexObject



6
7
8
# File 'lib/endangered_species/scraper.rb', line 6

def get_species_index
  Nokogiri::HTML(open("https://www.worldwildlife.org/species/directory"))
end

#make_articlesObject



40
41
42
43
44
45
46
47
48
49
# File 'lib/endangered_species/scraper.rb', line 40

def make_articles
  scrape_articles_index.each do |content|
    articles = EndangeredSpecies::Articles.new
    articles.title = content.css("h2 a").text
    articles.date = content.css("em").text.gsub("WWF Magazine:","")
    articles.summary = content.css("div").text.gsub("\n","")
    articles.url = "https://www.worldwildlife.org#{content.css("a").attr("href").text}"
    articles.save
  end
end

#make_speciesObject



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/endangered_species/scraper.rb', line 14

def make_species
  scrape_species_index.each do |content|
    species = EndangeredSpecies::Species.new

    species.name = content.css("td.keep a").first.text
    species.scientific = content.css("td em").text
    species.status = content.css("td").last.text
    species.url = "https://www.worldwildlife.org#{content.css("a").attr("href").text}"

    @doc = Nokogiri::HTML(open(species.url))
    @doc.search("div.wrapper.section-pop").each do |more_info|
      species.summary = more_info.css("p").text
      species.habitat = more_info.css("ul.list-data.list-spaced > li").text.gsub("\n\n", " ").gsub("\n", "").gsub("Places","Places:").gsub("Habitats", "\nHabitats:")
      species.save
    end
  end
end

#scrape_articles_indexObject



36
37
38
# File 'lib/endangered_species/scraper.rb', line 36

def scrape_articles_index
  self.get_articles_index.css("div.span9.gutter-horiz-in")
end

#scrape_species_indexObject



10
11
12
# File 'lib/endangered_species/scraper.rb', line 10

def scrape_species_index
  self.get_species_index.css("table.lead.gutter-bottom-2.table-to-list tbody tr")
end