24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
|
# File 'lib/wikihow/scraper.rb', line 24
def self.scrape_topic(topic)
doc = Nokogiri::HTML(open(topic.url))
topic.intro = doc.search("#intro p").last.text
sections_array = []
doc.search("#intro #method_toc .toc_method").each do |method|
sections_array << {:section_title => method.text, :section_steps => []}
end
sections_array.each.with_index do |section, i|
doc.search(".steps")[i].search(".step").each do |section_li|
step_description = [section_li.search(".whb").text.strip + " " + section_li.search("> text()").text.strip]
section_li.search("> ul > li").each do |step_li|
bullet_point = [step_li.search("> text(), a").text.strip]
sub_bullet_point = step_li.search("> ul > li").collect {|bullet_point_li|bullet_point_li.search("> text()").text.strip}
bullet_point << sub_bullet_point if sub_bullet_point !=[]
step_description << bullet_point if bullet_point != []
end
section[:section_steps] << step_description
end
if doc.search(".steps")[i].search(".step").empty?
section[:section_steps] << ["There is no text description for this section"]
end
end
sections_array
end
|