Class: Wiki::Api::Page

Inherits:

Object

Object
Wiki::Api::Page

show all

Defined in:: lib/wiki/api/page.rb

Overview

MediaWiki Page, collection of all html information plus it’s page title

Instance Attribute Summary collapse

#name ⇒ Object

Returns the value of attribute name.
#parent ⇒ Object

Returns the value of attribute parent.
#parsed_page ⇒ Object

Returns the value of attribute parsed_page.
#uri ⇒ Object

Returns the value of attribute uri.

Instance Method Summary collapse

#collect_elements(element) ⇒ Object

collect elements within headlines (not nested properties, but next elements).
#connect ⇒ Object
#filter_headline(xs, headline_name) ⇒ Object
#first_part ⇒ Object

harvest first part of the page (missing heading and class=“mw-headline”).
#initialize(options = {}) ⇒ Page constructor

A new instance of Page.
#load_page! ⇒ Object
#parse_blocks(headline_name = nil) ⇒ Object

parse blocks.
#reset! ⇒ Object
#root_headline ⇒ Object

collect all headlines, keep original page formatting.
#to_html ⇒ Object

Constructor Details

#initialize(options = {}) ⇒ `Page`

Returns a new instance of Page.

# File 'lib/wiki/api/page.rb', line 9

def initialize(options={})
  self.name = options[:name] if options.include? :name
  self.uri = options[:uri] if options.include? :uri
  @connect = Wiki::Api::Connect.new uri: uri
end

Instance Attribute Details

#name ⇒ `Object`

Returns the value of attribute name.



7
8
9

# File 'lib/wiki/api/page.rb', line 7

def name
  @name
end

#parent ⇒ `Object`

Returns the value of attribute parent.



7
8
9

# File 'lib/wiki/api/page.rb', line 7

def parent
  @parent
end

#parsed_page ⇒ `Object`

Returns the value of attribute parsed_page.



7
8
9

# File 'lib/wiki/api/page.rb', line 7

def parsed_page
  @parsed_page
end

#uri ⇒ `Object`

Returns the value of attribute uri.



7
8
9

# File 'lib/wiki/api/page.rb', line 7

def uri
  @uri
end

Instance Method Details

#collect_elements(element) ⇒ `Object`

collect elements within headlines (not nested properties, but next elements)

# File 'lib/wiki/api/page.rb', line 85

def collect_elements element
  # capture first element name
  elements = []
  # iterate text until next headline
  while true do
    elements << element
    element = element.next
    break if element.nil? || element.to_html.include?("class=\"mw-headline\"")
  end
  elements
end

#connect ⇒ `Object`



15
16
17

# File 'lib/wiki/api/page.rb', line 15

def connect
  @connect
end

#filter_headline(xs, headline_name) ⇒ `Object`

# File 'lib/wiki/api/page.rb', line 97

def filter_headline xs, headline_name
  # transform name to a wiki_id (downcase and space replace with underscore)
  headline_name = headline_name.downcase.gsub(" ", "_")
  # reject not matching id's
  xs.reject do |t| 
    !t.attributes["id"].value.downcase.start_with?(headline_name)
  end
end

#first_part ⇒ `Object`

harvest first part of the page (missing heading and class=“mw-headline”)

# File 'lib/wiki/api/page.rb', line 79

def first_part
  self.parsed_page ||= @connect.page self.name
  self.parsed_page.search("p").first.children.first
end

#load_page! ⇒ `Object`



43
44
45

# File 'lib/wiki/api/page.rb', line 43

def load_page!
  self.parsed_page ||= @connect.page self.name
end

#parse_blocks(headline_name = nil) ⇒ `Object`

parse blocks

# File 'lib/wiki/api/page.rb', line 49

def parse_blocks headline_name = nil
  self.load_page!
  result = {}

  # get headline nodes by span class
  xs = self.parsed_page.xpath("//span[@class='mw-headline']")

  # filter single headline by name (ignore case)
  xs = self.filter_headline xs, headline_name unless headline_name.nil?

  # NOTE: first_part has no id attribute and thus cannot be filtered or processed within xpath (xs)
  if headline_name.nil? || headline_name.start_with?(self.name.downcase)
    x = self.first_part
    result[self.name] ||= [] 
    result[self.name] << (self.collect_elements(x.parent))
  end

  # append all blocks
  xs.each do |x|
    headline = x.attributes["id"].value
    elements = self.collect_elements x.parent.next
    result[headline] ||= []
    result[headline] << elements
  end

  # create root object
  PageHeadline.new parent: self, name: result.first[0], headlines: result, level: 0
end

#reset! ⇒ `Object`



39
40
41

# File 'lib/wiki/api/page.rb', line 39

def reset!
  self.parse_page = nil
end

#root_headline ⇒ `Object`

collect all headlines, keep original page formatting



21
22
23

# File 'lib/wiki/api/page.rb', line 21

def root_headline
  self.parse_blocks
end

#to_html ⇒ `Object`

# File 'lib/wiki/api/page.rb', line 34

def to_html
  self.load_page!
  self.parsed_page.to_xhtml indent: 3, indent_text: " "
end

Class: Wiki::Api::Page

Overview

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Page

Instance Attribute Details

#name ⇒ Object

#parent ⇒ Object

#parsed_page ⇒ Object

#uri ⇒ Object

Instance Method Details

#collect_elements(element) ⇒ Object

#connect ⇒ Object

#filter_headline(xs, headline_name) ⇒ Object

#first_part ⇒ Object

#load_page! ⇒ Object

#parse_blocks(headline_name = nil) ⇒ Object

#reset! ⇒ Object

#root_headline ⇒ Object

#to_html ⇒ Object