Class: Wiki::Api::Page

Inherits:
Object
  • Object
show all
Defined in:
lib/wiki/api/page.rb

Overview

MediaWiki Page, collection of all html information plus it’s page title

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Page

Returns a new instance of Page.



9
10
11
12
13
# File 'lib/wiki/api/page.rb', line 9

def initialize(options={})
  self.name = options[:name] if options.include? :name
  self.uri = options[:uri] if options.include? :uri
  @connect = Wiki::Api::Connect.new uri: uri
end

Instance Attribute Details

#nameObject

Returns the value of attribute name.



7
8
9
# File 'lib/wiki/api/page.rb', line 7

def name
  @name
end

#parentObject

Returns the value of attribute parent.



7
8
9
# File 'lib/wiki/api/page.rb', line 7

def parent
  @parent
end

#parsed_pageObject

Returns the value of attribute parsed_page.



7
8
9
# File 'lib/wiki/api/page.rb', line 7

def parsed_page
  @parsed_page
end

#uriObject

Returns the value of attribute uri.



7
8
9
# File 'lib/wiki/api/page.rb', line 7

def uri
  @uri
end

Instance Method Details

#collect_elements(element) ⇒ Object

collect elements within headlines (not nested properties, but next elements)



85
86
87
88
89
90
91
92
93
94
95
# File 'lib/wiki/api/page.rb', line 85

def collect_elements element
  # capture first element name
  elements = []
  # iterate text until next headline
  while true do
    elements << element
    element = element.next
    break if element.nil? || element.to_html.include?("class=\"mw-headline\"")
  end
  elements
end

#connectObject



15
16
17
# File 'lib/wiki/api/page.rb', line 15

def connect
  @connect
end

#filter_headline(xs, headline_name) ⇒ Object



97
98
99
100
101
102
103
104
# File 'lib/wiki/api/page.rb', line 97

def filter_headline xs, headline_name
  # transform name to a wiki_id (downcase and space replace with underscore)
  headline_name = headline_name.downcase.gsub(" ", "_")
  # reject not matching id's
  xs.reject do |t| 
    !t.attributes["id"].value.downcase.start_with?(headline_name)
  end
end

#first_partObject

harvest first part of the page (missing heading and class=“mw-headline”)



79
80
81
82
# File 'lib/wiki/api/page.rb', line 79

def first_part
  self.parsed_page ||= @connect.page self.name
  self.parsed_page.search("p").first.children.first
end

#load_page!Object



43
44
45
# File 'lib/wiki/api/page.rb', line 43

def load_page!
  self.parsed_page ||= @connect.page self.name
end

#parse_blocks(headline_name = nil) ⇒ Object

parse blocks



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/wiki/api/page.rb', line 49

def parse_blocks headline_name = nil
  self.load_page!
  result = {}

  # get headline nodes by span class
  xs = self.parsed_page.xpath("//span[@class='mw-headline']")

  # filter single headline by name (ignore case)
  xs = self.filter_headline xs, headline_name unless headline_name.nil?

  # NOTE: first_part has no id attribute and thus cannot be filtered or processed within xpath (xs)
  if headline_name.nil? || headline_name.start_with?(self.name.downcase)
    x = self.first_part
    result[self.name] ||= [] 
    result[self.name] << (self.collect_elements(x.parent))
  end

  # append all blocks
  xs.each do |x|
    headline = x.attributes["id"].value
    elements = self.collect_elements x.parent.next
    result[headline] ||= []
    result[headline] << elements
  end

  # create root object
  PageHeadline.new parent: self, name: result.first[0], headlines: result, level: 0
end

#reset!Object



39
40
41
# File 'lib/wiki/api/page.rb', line 39

def reset!
  self.parse_page = nil
end

#root_headlineObject

collect all headlines, keep original page formatting



21
22
23
# File 'lib/wiki/api/page.rb', line 21

def root_headline
  self.parse_blocks
end

#to_htmlObject



34
35
36
37
# File 'lib/wiki/api/page.rb', line 34

def to_html
  self.load_page!
  self.parsed_page.to_xhtml indent: 3, indent_text: " "
end