Method: Apollo::Crawler::BaseCrawler#process_url

Defined in:
lib/apollo_crawler/crawler/base_crawler.rb

#process_url(url) ⇒ Object



147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# File 'lib/apollo_crawler/crawler/base_crawler.rb', line 147

def process_url(url)
	doc = self.fetch_document(url)
	if(doc.nil?)
		return nil
	end

	# Try extract data from document
	data = self.extract_data(doc)

	# Try extract links for another documents 
	links = self.extract_links(doc)
	
	# TODO: Make configurable if links extracted from doc should be printed
	# puts links.inspect

	# Format ETL result
	res = { 
		:crawler => self.class.name,
		:data => data,
		:links => links
	}

	return res
end