Module: TrueURL::Fetch
Instance Method Summary collapse
- #execute(context) ⇒ Object
- #find_canonical_header(headers) ⇒ Object
- #find_canonical_url(html) ⇒ Object
Instance Method Details
#execute(context) ⇒ Object
8 9 10 11 12 13 14 15 16 |
# File 'lib/true_url/fetch.rb', line 8 def execute(context) starting_url = context.working_url response = HTTP.follow .get(starting_url) canonical_url = find_canonical_header(response.headers) || find_canonical_url(response.to_s) || response.uri context.set_working_url(canonical_url, starting_url) end |
#find_canonical_header(headers) ⇒ Object
18 19 20 21 22 23 24 |
# File 'lib/true_url/fetch.rb', line 18 def find_canonical_header(headers) return if headers['Link'].nil? links = headers['Link'].is_a?(String) ? [headers['Link']] : headers['Link'] links.each { |link| return link.split(/[<>;]/)[1] if link.end_with?('rel="canonical"') } nil end |
#find_canonical_url(html) ⇒ Object
26 27 28 29 30 31 32 33 34 35 36 |
# File 'lib/true_url/fetch.rb', line 26 def find_canonical_url(html) doc = Nokogiri::HTML(html) elem = doc.at('link[rel="canonical"]') canonical_url = elem['href'] unless elem.nil? elem = doc.at('meta[property="og:url"]') og_url = elem['content'] unless elem.nil? canonical_url || og_url end |