Class: Hatemile::Util::Html::NokogiriLib::NokogiriHTMLDOMParser

Inherits:
HTMLDOMParser
  • Object
show all
Defined in:
lib/hatemile/util/html/nokogiri/nokogiri_html_dom_parser.rb

Overview

The class NokogiriHTMLDOMParser is official implementation of HTMLDOMParser interface for the Nokogiri library.

Instance Method Summary collapse

Constructor Details

#initialize(code_or_parser, encoding = 'UTF-8') ⇒ NokogiriHTMLDOMParser

Initializes a new object that encapsulate the parser of Jsoup.

Parameters:

  • code_or_parser (String, Nokogiri::HTML::Document)

    The HTML code or the parser of Nokogiri.

  • encoding (String) (defaults to: 'UTF-8')

    The enconding of code.



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# File 'lib/hatemile/util/html/nokogiri/nokogiri_html_dom_parser.rb', line 46

def initialize(code_or_parser, encoding = 'UTF-8')
  Hatemile::Helper.require_not_nil(code_or_parser, encoding)
  Hatemile::Helper.require_valid_type(
    code_or_parser,
    String,
    Nokogiri::HTML::Document
  )
  Hatemile::Helper.require_valid_type(encoding, String)

  @document = if code_or_parser.class == String
                Nokogiri::HTML::Document.parse(
                  code_or_parser,
                  nil,
                  encoding
                )
              else
                code_or_parser
              end
  @results = nil
end

Instance Method Details

#clear_parserObject



200
201
202
203
# File 'lib/hatemile/util/html/nokogiri/nokogiri_html_dom_parser.rb', line 200

def clear_parser
  @document = nil
  @results = nil
end

#create_element(tag) ⇒ Object



182
183
184
# File 'lib/hatemile/util/html/nokogiri/nokogiri_html_dom_parser.rb', line 182

def create_element(tag)
  NokogiriHTMLDOMElement.new(@document.create_element(tag))
end

#find(selector) ⇒ Object

See Also:



69
70
71
72
73
74
75
76
77
78
# File 'lib/hatemile/util/html/nokogiri/nokogiri_html_dom_parser.rb', line 69

def find(selector)
  @results = if selector.is_a?(NokogiriHTMLDOMElement)
               [selector.get_data]
             elsif selector.is_a?(Array)
               selector.map(&:get_data)
             else
               @document.css(selector)
             end
  self
end

#find_ancestors(selector) ⇒ Object



133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# File 'lib/hatemile/util/html/nokogiri/nokogiri_html_dom_parser.rb', line 133

def find_ancestors(selector)
  array = []
  selector = [selector] if selector.is_a?(NokogiriHTMLDOMElement)
  if selector.is_a?(Array)
    selector.each do |element|
      native_element = element.get_data
      @results.each do |result|
        parents = result.ancestors
        if parents.include?(native_element)
          array.push(native_element)
          break
        end
      end
    end
  else
    @results.each do |result|
      array = array.concat(result.ancestors(selector))
    end
  end
  @results = array
  self
end

#find_children(selector) ⇒ Object



82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/hatemile/util/html/nokogiri/nokogiri_html_dom_parser.rb', line 82

def find_children(selector)
  array = []
  selector = [selector] if selector.is_a?(NokogiriHTMLDOMElement)
  if selector.is_a?(Array)
    selector.each do |element|
      native_element = element.get_data
      @results.each do |result|
        if result.children.include?(native_element)
          array.push(native_element)
          break
        end
      end
    end
  else
    @results.each do |result|
      result.css(selector).each do |found_element|
        array.push(found_element) if found_element.parent == result
      end
    end
  end
  @results = array
  self
end

#find_descendants(selector) ⇒ Object



108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# File 'lib/hatemile/util/html/nokogiri/nokogiri_html_dom_parser.rb', line 108

def find_descendants(selector)
  array = []
  selector = [selector] if selector.is_a?(NokogiriHTMLDOMElement)
  if selector.is_a?(Array)
    selector.each do |element|
      native_element = element.get_data
      parents = native_element.ancestors
      @results.each do |result|
        if parents.include?(result)
          array.push(native_element)
          break
        end
      end
    end
  else
    @results.each do |result|
      array = array.concat(result.css(selector))
    end
  end
  @results = array
  self
end

#first_resultObject



158
159
160
161
# File 'lib/hatemile/util/html/nokogiri/nokogiri_html_dom_parser.rb', line 158

def first_result
  return nil if @results.nil? || @results.empty?
  NokogiriHTMLDOMElement.new(@results[0])
end

#get_htmlObject



188
189
190
# File 'lib/hatemile/util/html/nokogiri/nokogiri_html_dom_parser.rb', line 188

def get_html
  NokogiriHTMLDOMElement.new(@document).get_outer_html
end

#get_parserObject



194
195
196
# File 'lib/hatemile/util/html/nokogiri/nokogiri_html_dom_parser.rb', line 194

def get_parser
  @document
end

#last_resultObject



165
166
167
168
# File 'lib/hatemile/util/html/nokogiri/nokogiri_html_dom_parser.rb', line 165

def last_result
  return nil if @results.nil? || @results.empty?
  NokogiriHTMLDOMElement.new(@results[@results.length - 1])
end

#list_resultsObject



172
173
174
175
176
177
178
# File 'lib/hatemile/util/html/nokogiri/nokogiri_html_dom_parser.rb', line 172

def list_results
  array = []
  @results.each do |result|
    array.push(NokogiriHTMLDOMElement.new(result))
  end
  array
end