Module: Graboid::Entity::ClassMethods
- Defined in:
- lib/graboid/entity.rb
Instance Method Summary collapse
- #all(opts = {}) ⇒ Object
- #all_fragments ⇒ Object
- #attribute_map ⇒ Object
- #collection ⇒ Object
- #collection=(col) ⇒ Object
- #current_page ⇒ Object
- #current_page=(num) ⇒ Object
- #doc ⇒ Object
- #extract_instance(fragment) ⇒ Object
- #hash_map(fragment) ⇒ Object
- #inferred_selector ⇒ Object
- #max_pages ⇒ Object
- #max_pages=(num) ⇒ Object
- #mode ⇒ Object
- #mode=(m) ⇒ Object
- #next_page? ⇒ Boolean
- #page_fragments ⇒ Object
- #pager(&block) ⇒ Object
- #paginate ⇒ Object
- #read_source ⇒ Object
- #reset_context ⇒ Object
- #root_selector ⇒ Object
- #selector(selector) ⇒ Object (also: #root)
- #set(name, opts = {}, &block) ⇒ Object (also: #field)
- #source ⇒ Object
- #source=(src) ⇒ Object
Instance Method Details
#all(opts = {}) ⇒ Object
107 108 109 110 111 |
# File 'lib/graboid/entity.rb', line 107 def all opts={} reset_context self.max_pages = opts[:max_pages] if opts[:max_pages].present? all_fragments.collect{ |frag| extract_instance(frag) } end |
#all_fragments ⇒ Object
76 77 78 79 80 81 82 83 84 85 86 87 |
# File 'lib/graboid/entity.rb', line 76 def all_fragments return page_fragments if @pager.nil? old_source = self.source while next_page? self.collection += page_fragments run_before_paginate_callbacks paginate run_after_paginate_callbacks end self.source = old_source self.collection end |
#attribute_map ⇒ Object
58 59 60 |
# File 'lib/graboid/entity.rb', line 58 def attribute_map read_inheritable_attribute :attribute_map end |
#collection ⇒ Object
50 51 52 |
# File 'lib/graboid/entity.rb', line 50 def collection @collection ||= [] end |
#collection=(col) ⇒ Object
54 55 56 |
# File 'lib/graboid/entity.rb', line 54 def collection=(col) @collection = col end |
#current_page ⇒ Object
149 150 151 |
# File 'lib/graboid/entity.rb', line 149 def current_page @current_page ||= 0 end |
#current_page=(num) ⇒ Object
153 154 155 |
# File 'lib/graboid/entity.rb', line 153 def current_page=num @current_page = num end |
#doc ⇒ Object
46 47 48 |
# File 'lib/graboid/entity.rb', line 46 def doc eval "Nokogiri::#{self.mode.to_s.upcase}(read_source)" end |
#extract_instance(fragment) ⇒ Object
62 63 64 |
# File 'lib/graboid/entity.rb', line 62 def extract_instance fragment new(hash_map(fragment)) end |
#hash_map(fragment) ⇒ Object
66 67 68 69 70 71 72 73 74 |
# File 'lib/graboid/entity.rb', line 66 def hash_map fragment attribute_map.inject({}) do |extracted_hash, at| selector, processor = at.last[:selector], at.last[:processor] node_collection = self.mode == :html ? fragment.css(selector) : fragment.xpath(selector) extracted_hash[at.first] = processor.nil? ? node_collection.first.inner_html : processor.call(node_collection.first) #rescue "" extracted_hash end end |
#inferred_selector ⇒ Object
42 43 44 |
# File 'lib/graboid/entity.rb', line 42 def inferred_selector @inferred_selector ||= ".#{self.to_s.underscore}" end |
#max_pages ⇒ Object
141 142 143 |
# File 'lib/graboid/entity.rb', line 141 def max_pages @max_pages ||= 0 end |
#max_pages=(num) ⇒ Object
145 146 147 |
# File 'lib/graboid/entity.rb', line 145 def max_pages=num @max_pages = num end |
#mode ⇒ Object
132 133 134 |
# File 'lib/graboid/entity.rb', line 132 def mode @mode ||= :html end |
#mode=(m) ⇒ Object
136 137 138 139 |
# File 'lib/graboid/entity.rb', line 136 def mode=(m) raise ArgumentError unless [:html, :xml].include?(m) @mode = m end |
#next_page? ⇒ Boolean
95 96 97 98 99 100 101 |
# File 'lib/graboid/entity.rb', line 95 def next_page? if max_pages.zero? return true unless @pager.call(doc).nil? else current_page <= max_pages-1 end end |
#page_fragments ⇒ Object
103 104 105 |
# File 'lib/graboid/entity.rb', line 103 def page_fragments doc.css(root_selector) end |
#pager(&block) ⇒ Object
128 129 130 |
# File 'lib/graboid/entity.rb', line 128 def pager &block @pager = block end |
#paginate ⇒ Object
89 90 91 92 93 |
# File 'lib/graboid/entity.rb', line 89 def paginate next_page_url = @pager.call(doc) rescue nil self.source = next_page_url self.current_page += 1 end |
#read_source ⇒ Object
119 120 121 122 123 124 125 126 |
# File 'lib/graboid/entity.rb', line 119 def read_source case self.source when /^http[s]?:\/\// open(self.source, "User-Agent" => Graboid.user_agent) when String self.source end end |
#reset_context ⇒ Object
113 114 115 116 117 |
# File 'lib/graboid/entity.rb', line 113 def reset_context self.collection = [] self.current_page = 0 self.max_pages = 0 end |
#root_selector ⇒ Object
38 39 40 |
# File 'lib/graboid/entity.rb', line 38 def root_selector @root_selector || inferred_selector end |
#selector(selector) ⇒ Object Also known as: root
32 33 34 |
# File 'lib/graboid/entity.rb', line 32 def selector selector @root_selector = selector end |
#set(name, opts = {}, &block) ⇒ Object Also known as: field
23 24 25 26 27 28 |
# File 'lib/graboid/entity.rb', line 23 def set name, opts={}, &block opts.merge!(:selector => ".#{name}") unless opts[:selector].present? opts.merge!(:processor => block) if block_given? attribute_map[name] = opts end |
#source ⇒ Object
15 16 17 |
# File 'lib/graboid/entity.rb', line 15 def source @source end |
#source=(src) ⇒ Object
19 20 21 |
# File 'lib/graboid/entity.rb', line 19 def source=(src) @source = src end |