Module: TrueURL::Fetch

Extended by:
Fetch
Included in:
Fetch
Defined in:
lib/true_url/fetch.rb

Instance Method Summary collapse

Instance Method Details

#execute(context) ⇒ Object



8
9
10
11
12
13
14
15
16
# File 'lib/true_url/fetch.rb', line 8

def execute(context)
  starting_url = context.working_url

  response = HTTP.follow
                 .get(starting_url)

  canonical_url = find_canonical_header(response.headers) || find_canonical_url(response.to_s) || response.uri
  context.set_working_url(canonical_url, starting_url)
end

#find_canonical_header(headers) ⇒ Object



18
19
20
21
22
23
24
# File 'lib/true_url/fetch.rb', line 18

def find_canonical_header(headers)
  return if headers['Link'].nil?

  links = headers['Link'].is_a?(String) ? [headers['Link']] : headers['Link']
  links.each { |link| return link.split(/[<>;]/)[1] if link.end_with?('rel="canonical"') }
  nil
end

#find_canonical_url(html) ⇒ Object



26
27
28
29
30
31
32
33
34
35
36
# File 'lib/true_url/fetch.rb', line 26

def find_canonical_url(html)
  doc = Nokogiri::HTML(html)

  elem = doc.at('link[rel="canonical"]')
  canonical_url = elem['href'] unless elem.nil?

  elem = doc.at('meta[property="og:url"]')
  og_url = elem['content'] unless elem.nil?

  canonical_url || og_url
end