Class: Wiki::Api::Page
- Inherits:
-
Object
- Object
- Wiki::Api::Page
- Defined in:
- lib/wiki/api/page.rb
Overview
MediaWiki Page, collection of all html information plus it’s page title
Instance Attribute Summary collapse
-
#connect ⇒ Object
readonly
Returns the value of attribute connect.
-
#name ⇒ Object
Returns the value of attribute name.
-
#parent ⇒ Object
Returns the value of attribute parent.
-
#parsed_page ⇒ Object
Returns the value of attribute parsed_page.
-
#uri ⇒ Object
Returns the value of attribute uri.
Instance Method Summary collapse
-
#collect_elements(element) ⇒ Object
collect elements within headlines (not nested properties, but next elements).
- #filter_headline(xs, headline_name) ⇒ Object
-
#first_part ⇒ Object
harvest first part of the page (missing heading and class=“mw-headline”).
-
#initialize(options = {}) ⇒ Page
constructor
A new instance of Page.
- #load_page! ⇒ Object
-
#parse_blocks(headline_name = nil) ⇒ Object
parse blocks.
- #reset! ⇒ Object
-
#root_headline ⇒ Object
collect all headlines, keep original page formatting.
-
#to_html ⇒ Object
# collect headlines by given name, this will flatten the nested headlines def flat_headlines_by_name headline_name raise “not yet implemented!” # TODO: implement flattening of headlines within the root headline # ALT: breath search option in the root of the first headline self.parse_blocks(headline_name) end.
Constructor Details
Instance Attribute Details
#connect ⇒ Object (readonly)
Returns the value of attribute connect.
15 16 17 |
# File 'lib/wiki/api/page.rb', line 15 def connect @connect end |
#name ⇒ Object
Returns the value of attribute name.
7 8 9 |
# File 'lib/wiki/api/page.rb', line 7 def name @name end |
#parent ⇒ Object
Returns the value of attribute parent.
7 8 9 |
# File 'lib/wiki/api/page.rb', line 7 def parent @parent end |
#parsed_page ⇒ Object
Returns the value of attribute parsed_page.
7 8 9 |
# File 'lib/wiki/api/page.rb', line 7 def parsed_page @parsed_page end |
#uri ⇒ Object
Returns the value of attribute uri.
7 8 9 |
# File 'lib/wiki/api/page.rb', line 7 def uri @uri end |
Instance Method Details
#collect_elements(element) ⇒ Object
collect elements within headlines (not nested properties, but next elements)
80 81 82 83 84 85 86 87 88 89 90 |
# File 'lib/wiki/api/page.rb', line 80 def collect_elements(element) # capture first element name elements = [] # iterate text until next headline loop do elements << element element = element.next break if element.nil? || element.to_html.include?('class="mw-headline"') end elements end |
#filter_headline(xs, headline_name) ⇒ Object
92 93 94 95 96 97 98 99 |
# File 'lib/wiki/api/page.rb', line 92 def filter_headline(xs, headline_name) # transform name to a wiki_id (downcase and space replace with underscore) headline_name = headline_name.downcase.gsub(' ', '_') # reject not matching id's xs.select do |t| t.attributes['id'].value.downcase.start_with?(headline_name) end end |
#first_part ⇒ Object
harvest first part of the page (missing heading and class=“mw-headline”)
74 75 76 77 |
# File 'lib/wiki/api/page.rb', line 74 def first_part self.parsed_page ||= @connect.page(name) self.parsed_page.search('p').first.children.first end |
#load_page! ⇒ Object
39 40 41 |
# File 'lib/wiki/api/page.rb', line 39 def load_page! self.parsed_page ||= @connect.page(name) end |
#parse_blocks(headline_name = nil) ⇒ Object
parse blocks
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
# File 'lib/wiki/api/page.rb', line 44 def parse_blocks(headline_name = nil) load_page! result = {} # get headline nodes by span class headlines = self.parsed_page.xpath("//span[@class='mw-headline']") # filter single headline by name (ignore case) headlines = filter_headline(headlines, headline_name) unless headline_name.nil? # NOTE: first_part has no id attribute and thus cannot be filtered or processed within xpath (xs) if headline_name.nil? || headline_name.start_with?(name.downcase) x = first_part result[name] ||= [] result[name] << (collect_elements(x.parent)) end # append all blocks headlines.each do |headline| headline_value = headline.attributes['id'].value elements = collect_elements(headline.parent.next) result[headline_value] ||= [] result[headline_value] << elements end # create root object PageHeadline.new(parent: self, name: result.first[0], headlines: result, level: 0) end |
#reset! ⇒ Object
35 36 37 |
# File 'lib/wiki/api/page.rb', line 35 def reset! self.parse_page = nil end |
#root_headline ⇒ Object
collect all headlines, keep original page formatting
18 19 20 |
# File 'lib/wiki/api/page.rb', line 18 def root_headline parse_blocks end |
#to_html ⇒ Object
# collect headlines by given name, this will flatten the nested headlines def flat_headlines_by_name headline_name
raise "not yet implemented!"
# TODO: implement flattening of headlines within the root headline
# ALT: breath search option in the root of the first headline
self.parse_blocks(headline_name)
end
30 31 32 33 |
# File 'lib/wiki/api/page.rb', line 30 def to_html load_page! parsed_page.to_xhtml(indent: 3, indent_text: ' ') end |