Module: Graboid::Entity::ClassMethods
- Defined in:
- lib/graboid/entity.rb
Instance Method Summary collapse
- #all(opts = {}) ⇒ Object
- #all_fragments ⇒ Object
- #attribute_map ⇒ Object
- #collection ⇒ Object
- #collection=(col) ⇒ Object
- #current_page ⇒ Object
- #current_page=(num) ⇒ Object
- #doc ⇒ Object
- #extract_instance(fragment) ⇒ Object
- #hash_map(fragment) ⇒ Object
- #inferred_selector ⇒ Object
- #inherited(subclass) ⇒ Object
- #inherited_attributes(*args) ⇒ Object
- #max_pages ⇒ Object
- #max_pages=(num) ⇒ Object
- #mode ⇒ Object
- #mode=(m) ⇒ Object
- #next_page? ⇒ Boolean
- #page_fragments ⇒ Object
- #pager(&block) ⇒ Object
- #paginate ⇒ Object
- #read_source ⇒ Object
- #reset_context ⇒ Object
- #root_selector ⇒ Object
- #selector(selector) ⇒ Object (also: #root)
- #set(name, opts = {}, &block) ⇒ Object (also: #field)
- #source ⇒ Object
- #source=(src) ⇒ Object
Instance Method Details
#all(opts = {}) ⇒ Object
126 127 128 129 130 |
# File 'lib/graboid/entity.rb', line 126 def all opts={} reset_context self.max_pages = opts[:max_pages] unless opts[:max_pages].nil? all_fragments.collect{ |frag| extract_instance(frag) } end |
#all_fragments ⇒ Object
95 96 97 98 99 100 101 102 103 104 105 106 |
# File 'lib/graboid/entity.rb', line 95 def all_fragments return page_fragments if @pager.nil? old_source = self.source while next_page? self.collection += page_fragments run_before_paginate_callbacks paginate run_after_paginate_callbacks end self.source = old_source self.collection end |
#attribute_map ⇒ Object
77 78 79 |
# File 'lib/graboid/entity.rb', line 77 def attribute_map read_inheritable_attribute :attribute_map end |
#collection ⇒ Object
69 70 71 |
# File 'lib/graboid/entity.rb', line 69 def collection @collection ||= [] end |
#collection=(col) ⇒ Object
73 74 75 |
# File 'lib/graboid/entity.rb', line 73 def collection=(col) @collection = col end |
#current_page ⇒ Object
168 169 170 |
# File 'lib/graboid/entity.rb', line 168 def current_page @current_page ||= 0 end |
#current_page=(num) ⇒ Object
172 173 174 |
# File 'lib/graboid/entity.rb', line 172 def current_page=num @current_page = num end |
#doc ⇒ Object
65 66 67 |
# File 'lib/graboid/entity.rb', line 65 def doc eval "Nokogiri::#{self.mode.to_s.upcase}(read_source)" end |
#extract_instance(fragment) ⇒ Object
81 82 83 |
# File 'lib/graboid/entity.rb', line 81 def extract_instance fragment new(hash_map(fragment)) end |
#hash_map(fragment) ⇒ Object
85 86 87 88 89 90 91 92 93 |
# File 'lib/graboid/entity.rb', line 85 def hash_map fragment attribute_map.inject({}) do |extracted_hash, at| selector, processor = at.last[:selector], at.last[:processor] node_collection = self.mode == :html ? fragment.css(selector) : fragment.xpath(selector) extracted_hash[at.first] = processor.nil? ? node_collection.first.inner_html : processor.call(node_collection.first) #rescue "" extracted_hash end end |
#inferred_selector ⇒ Object
61 62 63 |
# File 'lib/graboid/entity.rb', line 61 def inferred_selector @inferred_selector ||= ".#{self.to_s.underscore}" end |
#inherited(subclass) ⇒ Object
27 28 29 30 31 32 |
# File 'lib/graboid/entity.rb', line 27 def inherited(subclass) @inherited_attributes.each do |inheritable_attribute| instance_var = "@#{inheritable_attribute}" subclass.instance_variable_set(instance_var, instance_variable_get(instance_var)) end end |
#inherited_attributes(*args) ⇒ Object
16 17 18 19 20 21 22 23 24 25 |
# File 'lib/graboid/entity.rb', line 16 def inherited_attributes(*args) @inherited_attributes ||= [:inherited_attributes] @inherited_attributes += args args.each do |arg| class_eval %( class << self; attr_accessor :#{arg} end ) end @inherited_attributes end |
#max_pages ⇒ Object
160 161 162 |
# File 'lib/graboid/entity.rb', line 160 def max_pages @max_pages ||= 0 end |
#max_pages=(num) ⇒ Object
164 165 166 |
# File 'lib/graboid/entity.rb', line 164 def max_pages=num @max_pages = num end |
#mode ⇒ Object
151 152 153 |
# File 'lib/graboid/entity.rb', line 151 def mode @mode ||= :html end |
#mode=(m) ⇒ Object
155 156 157 158 |
# File 'lib/graboid/entity.rb', line 155 def mode=(m) raise ArgumentError unless [:html, :xml].include?(m) @mode = m end |
#next_page? ⇒ Boolean
114 115 116 117 118 119 120 |
# File 'lib/graboid/entity.rb', line 114 def next_page? if max_pages.zero? return true unless @pager.call(doc).nil? else current_page <= max_pages-1 end end |
#page_fragments ⇒ Object
122 123 124 |
# File 'lib/graboid/entity.rb', line 122 def page_fragments doc.css(root_selector) end |
#pager(&block) ⇒ Object
147 148 149 |
# File 'lib/graboid/entity.rb', line 147 def pager &block @pager = block end |
#paginate ⇒ Object
108 109 110 111 112 |
# File 'lib/graboid/entity.rb', line 108 def paginate next_page_url = @pager.call(doc) rescue nil self.source = next_page_url self.current_page += 1 end |
#read_source ⇒ Object
138 139 140 141 142 143 144 145 |
# File 'lib/graboid/entity.rb', line 138 def read_source case self.source when /^http[s]?:\/\// open(self.source, "User-Agent" => Graboid.user_agent) when String self.source end end |
#reset_context ⇒ Object
132 133 134 135 136 |
# File 'lib/graboid/entity.rb', line 132 def reset_context self.collection = [] self.current_page = 0 self.max_pages = 0 end |
#root_selector ⇒ Object
57 58 59 |
# File 'lib/graboid/entity.rb', line 57 def root_selector @root_selector || inferred_selector end |
#selector(selector) ⇒ Object Also known as: root
51 52 53 |
# File 'lib/graboid/entity.rb', line 51 def selector selector @root_selector = selector end |
#set(name, opts = {}, &block) ⇒ Object Also known as: field
42 43 44 45 46 47 |
# File 'lib/graboid/entity.rb', line 42 def set name, opts={}, &block opts.merge!(:selector => ".#{name}") if opts[:selector].nil? opts.merge!(:processor => block) if block_given? attribute_map[name] = opts end |
#source ⇒ Object
34 35 36 |
# File 'lib/graboid/entity.rb', line 34 def source @source end |
#source=(src) ⇒ Object
38 39 40 |
# File 'lib/graboid/entity.rb', line 38 def source=(src) @source = src end |