Method: Apollo::Crawler::BaseCrawler#process_url
- Defined in:
- lib/apollo_crawler/crawler/base_crawler.rb
#process_url(url) ⇒ Object
147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
# File 'lib/apollo_crawler/crawler/base_crawler.rb', line 147 def process_url(url) doc = self.fetch_document(url) if(doc.nil?) return nil end # Try extract data from document data = self.extract_data(doc) # Try extract links for another documents links = self.extract_links(doc) # TODO: Make configurable if links extracted from doc should be printed # puts links.inspect # Format ETL result res = { :crawler => self.class.name, :data => data, :links => links } return res end |