Class: HatebloMixedContentsFinder::MixedContentsFinder
- Inherits:
-
Object
- Object
- HatebloMixedContentsFinder::MixedContentsFinder
- Defined in:
- lib/hateblo_mixed_contents_finder/mixed_contents_finder.rb
Constant Summary collapse
- VALIDATE_CONDITIONS =
[ %w(img src), %w(img srcset), %w(source src), %w(source srcset), %w(script src), %w(video src), %w(audio src), %w(iframe src), %w(embed src), %w(form action), %w(object data), %w(link href),
Instance Attribute Summary collapse
-
#entire_page ⇒ Object
readonly
Returns the value of attribute entire_page.
Instance Method Summary collapse
-
#initialize(entire_page: false) ⇒ MixedContentsFinder
constructor
A new instance of MixedContentsFinder.
- #validate_all(site_url, limit: 3, sleep_sec: 1) ⇒ Object
- #validate_entry(url) ⇒ Object
Constructor Details
#initialize(entire_page: false) ⇒ MixedContentsFinder
Returns a new instance of MixedContentsFinder.
7 8 9 |
# File 'lib/hateblo_mixed_contents_finder/mixed_contents_finder.rb', line 7 def initialize(entire_page: false) @entire_page = entire_page end |
Instance Attribute Details
#entire_page ⇒ Object (readonly)
Returns the value of attribute entire_page.
5 6 7 |
# File 'lib/hateblo_mixed_contents_finder/mixed_contents_finder.rb', line 5 def entire_page @entire_page end |
Instance Method Details
#validate_all(site_url, limit: 3, sleep_sec: 1) ⇒ Object
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
# File 'lib/hateblo_mixed_contents_finder/mixed_contents_finder.rb', line 11 def validate_all(site_url, limit: 3, sleep_sec: 1) puts "Validate #{site_url} / entire_page: #{entire_page}, limit: #{limit || 'none'}" invalid_contents = [] archive_url = File.join(site_url, 'archive') agent = Mechanize.new next_page_link = nil counter = 0 catch(:exit_loop) do begin list_url = next_page_link ? next_page_link['href'] : archive_url puts "Validating #{list_url}" page = agent.get(list_url) links = page.search('.entry-title-link') links.each do |link| over_limit = limit && (counter += 1) > limit throw :exit_loop if over_limit url = link['href'] invalid_contents += validate_entry(url) sleep sleep_sec end next_page_link = page.search('.pager-next a')&.first end while next_page_link end invalid_contents end |
#validate_entry(url) ⇒ Object
53 54 55 56 57 58 59 60 61 62 63 |
# File 'lib/hateblo_mixed_contents_finder/mixed_contents_finder.rb', line 53 def validate_entry(url) puts "[#{Time.now.strftime("%H:%M:%S")}] Validate #{url}" agent = Mechanize.new page = agent.get(url) root = entire_page ? '' : '.entry-content' VALIDATE_CONDITIONS.flat_map do |tag, attr| validator = ElementValidator.new(tag, attr, root) validator.validate(page) end end |