Class: HatebloMixedContentsFinder::MixedContentsFinder

Inherits:
Object
  • Object
show all
Defined in:
lib/hateblo_mixed_contents_finder/mixed_contents_finder.rb

Constant Summary collapse

VALIDATE_CONDITIONS =
[
%w(img src),
%w(img srcset),
%w(source src),
%w(source srcset),
%w(script src),
%w(video src),
%w(audio src),
%w(iframe src),
%w(embed src),
%w(form action),
%w(object data),
%w(link href),

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(entire_page: false) ⇒ MixedContentsFinder

Returns a new instance of MixedContentsFinder.



7
8
9
# File 'lib/hateblo_mixed_contents_finder/mixed_contents_finder.rb', line 7

def initialize(entire_page: false)
  @entire_page = entire_page
end

Instance Attribute Details

#entire_pageObject (readonly)

Returns the value of attribute entire_page.



5
6
7
# File 'lib/hateblo_mixed_contents_finder/mixed_contents_finder.rb', line 5

def entire_page
  @entire_page
end

Instance Method Details

#validate_all(site_url, limit: 3, sleep_sec: 1) ⇒ Object



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/hateblo_mixed_contents_finder/mixed_contents_finder.rb', line 11

def validate_all(site_url, limit: 3, sleep_sec: 1)
  puts "Validate #{site_url} / entire_page: #{entire_page}, limit: #{limit || 'none'}"
  invalid_contents = []
  archive_url = File.join(site_url, 'archive')
  agent = Mechanize.new
  next_page_link = nil
  counter = 0
  catch(:exit_loop) do
    begin
      list_url = next_page_link ? next_page_link['href'] : archive_url
      puts "Validating #{list_url}"
      page = agent.get(list_url)
      links = page.search('.entry-title-link')
      links.each do |link|
        over_limit = limit && (counter += 1) > limit
        throw :exit_loop if over_limit

        url = link['href']
        invalid_contents += validate_entry(url)
        sleep sleep_sec
      end
      next_page_link = page.search('.pager-next a')&.first
    end while next_page_link
  end
  invalid_contents
end

#validate_entry(url) ⇒ Object



53
54
55
56
57
58
59
60
61
62
63
# File 'lib/hateblo_mixed_contents_finder/mixed_contents_finder.rb', line 53

def validate_entry(url)
  puts "[#{Time.now.strftime("%H:%M:%S")}] Validate #{url}"

  agent = Mechanize.new
  page = agent.get(url)
  root = entire_page ? '' : '.entry-content'
  VALIDATE_CONDITIONS.flat_map do |tag, attr|
    validator = ElementValidator.new(tag, attr, root)
    validator.validate(page)
  end
end