Module: Emails
Overview
Parses emails from html string
Instance Method Summary collapse
Instance Method Details
#get_processed_emails(email_set) ⇒ Object
21 22 23 24 25 26 27 28 29 |
# File 'lib/parsers/emails.rb', line 21 def get_processed_emails(email_set) return [] if email_set.nil? || email_set.empty? unescaped_emails = email_set.map { |email| unescape_html(email) } return [] if unescaped_emails.empty? email_match_regex = /[\w._%-]+@(?!(?:example|e?mail|domain|company|your(?:domain|company|email)|address|emailad?dress|yyy|test)\.)[\w._%-]+\.(?!png|jpe?g|tif|svg|css|js|ico|gif)[A-Z]{2,3}/im unescaped_emails.select { |data| data =~ email_match_regex } end |
#grep_emails(response) ⇒ Object
9 10 11 12 13 14 15 16 17 18 19 |
# File 'lib/parsers/emails.rb', line 9 def grep_emails(response) return if response.nil? || response.empty? first_regex = /(?im)mailto:\s*([^\?"',\\<>\s]+)/ second_regex = %r{(?im)["'\s><\/]*([\w._%-]+@(?!(?:example|e?mail|domain|company|your(?:domain|company|email)|address|emailad?dress|yyy|test)\.)[\w._%-]+\.(?!png|jpe?g|tif|svg|css|js|ico|gif)[A-Z]{2,3})["'\s><]} first_set = response.scan(first_regex).flatten.compact first_set = get_processed_emails(first_set) second_set = response.scan(second_regex).flatten.compact second_set = get_processed_emails(second_set) (first_set | second_set).compact.map(&:downcase).uniq end |