Class: Wiki::Api::Page
- Inherits:
-
Object
- Object
- Wiki::Api::Page
- Defined in:
- lib/wiki/api/page.rb
Overview
MediaWiki Page, collection of all html information plus it’s page title
Instance Attribute Summary collapse
-
#name ⇒ Object
Returns the value of attribute name.
-
#parent ⇒ Object
Returns the value of attribute parent.
-
#parsed_page ⇒ Object
Returns the value of attribute parsed_page.
-
#uri ⇒ Object
Returns the value of attribute uri.
Instance Method Summary collapse
-
#collect_elements(element) ⇒ Object
collect elements within headlines (not nested properties, but next elements).
- #connect ⇒ Object
- #filter_headline(xs, headline_name) ⇒ Object
-
#first_part ⇒ Object
harvest first part of the page (missing heading and class=“mw-headline”).
-
#initialize(options = {}) ⇒ Page
constructor
A new instance of Page.
- #load_page! ⇒ Object
-
#parse_blocks(headline_name = nil) ⇒ Object
parse blocks.
- #reset! ⇒ Object
-
#root_headline ⇒ Object
collect all headlines, keep original page formatting.
- #to_html ⇒ Object
Constructor Details
Instance Attribute Details
#name ⇒ Object
Returns the value of attribute name.
7 8 9 |
# File 'lib/wiki/api/page.rb', line 7 def name @name end |
#parent ⇒ Object
Returns the value of attribute parent.
7 8 9 |
# File 'lib/wiki/api/page.rb', line 7 def parent @parent end |
#parsed_page ⇒ Object
Returns the value of attribute parsed_page.
7 8 9 |
# File 'lib/wiki/api/page.rb', line 7 def parsed_page @parsed_page end |
#uri ⇒ Object
Returns the value of attribute uri.
7 8 9 |
# File 'lib/wiki/api/page.rb', line 7 def uri @uri end |
Instance Method Details
#collect_elements(element) ⇒ Object
collect elements within headlines (not nested properties, but next elements)
85 86 87 88 89 90 91 92 93 94 95 |
# File 'lib/wiki/api/page.rb', line 85 def collect_elements element # capture first element name elements = [] # iterate text until next headline while true do elements << element element = element.next break if element.nil? || element.to_html.include?("class=\"mw-headline\"") end elements end |
#connect ⇒ Object
15 16 17 |
# File 'lib/wiki/api/page.rb', line 15 def connect @connect end |
#filter_headline(xs, headline_name) ⇒ Object
97 98 99 100 101 102 103 104 |
# File 'lib/wiki/api/page.rb', line 97 def filter_headline xs, headline_name # transform name to a wiki_id (downcase and space replace with underscore) headline_name = headline_name.downcase.gsub(" ", "_") # reject not matching id's xs.reject do |t| !t.attributes["id"].value.downcase.start_with?(headline_name) end end |
#first_part ⇒ Object
harvest first part of the page (missing heading and class=“mw-headline”)
79 80 81 82 |
# File 'lib/wiki/api/page.rb', line 79 def first_part self.parsed_page ||= @connect.page self.name self.parsed_page.search("p").first.children.first end |
#load_page! ⇒ Object
43 44 45 |
# File 'lib/wiki/api/page.rb', line 43 def load_page! self.parsed_page ||= @connect.page self.name end |
#parse_blocks(headline_name = nil) ⇒ Object
parse blocks
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
# File 'lib/wiki/api/page.rb', line 49 def parse_blocks headline_name = nil self.load_page! result = {} # get headline nodes by span class xs = self.parsed_page.xpath("//span[@class='mw-headline']") # filter single headline by name (ignore case) xs = self.filter_headline xs, headline_name unless headline_name.nil? # NOTE: first_part has no id attribute and thus cannot be filtered or processed within xpath (xs) if headline_name.nil? || headline_name.start_with?(self.name.downcase) x = self.first_part result[self.name] ||= [] result[self.name] << (self.collect_elements(x.parent)) end # append all blocks xs.each do |x| headline = x.attributes["id"].value elements = self.collect_elements x.parent.next result[headline] ||= [] result[headline] << elements end # create root object PageHeadline.new parent: self, name: result.first[0], headlines: result, level: 0 end |
#reset! ⇒ Object
39 40 41 |
# File 'lib/wiki/api/page.rb', line 39 def reset! self.parse_page = nil end |
#root_headline ⇒ Object
collect all headlines, keep original page formatting
21 22 23 |
# File 'lib/wiki/api/page.rb', line 21 def root_headline self.parse_blocks end |
#to_html ⇒ Object
34 35 36 37 |
# File 'lib/wiki/api/page.rb', line 34 def to_html self.load_page! self.parsed_page.to_xhtml indent: 3, indent_text: " " end |