Class: Wiki::Api::Page

Inherits:
Object
  • Object
show all
Defined in:
lib/wiki/api/page.rb

Overview

MediaWiki Page, collection of all html information plus it’s page title

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Page

Returns a new instance of Page.


9
10
11
12
13
# File 'lib/wiki/api/page.rb', line 9

def initialize(options = {})
  self.name = options[:name] if options.include?(:name)
  self.uri = options[:uri] if options.include?(:uri)
  @connect = Wiki::Api::Connect.new(uri:)
end

Instance Attribute Details

#connectObject (readonly)

Returns the value of attribute connect.


15
16
17
# File 'lib/wiki/api/page.rb', line 15

def connect
  @connect
end

#nameObject

Returns the value of attribute name.


7
8
9
# File 'lib/wiki/api/page.rb', line 7

def name
  @name
end

#parentObject

Returns the value of attribute parent.


7
8
9
# File 'lib/wiki/api/page.rb', line 7

def parent
  @parent
end

#parsed_pageObject

Returns the value of attribute parsed_page.


7
8
9
# File 'lib/wiki/api/page.rb', line 7

def parsed_page
  @parsed_page
end

#uriObject

Returns the value of attribute uri.


7
8
9
# File 'lib/wiki/api/page.rb', line 7

def uri
  @uri
end

Instance Method Details

#collect_elements(element) ⇒ Object

collect elements within headlines (not nested properties, but next elements)


80
81
82
83
84
85
86
87
88
89
90
# File 'lib/wiki/api/page.rb', line 80

def collect_elements(element)
  # capture first element name
  elements = []
  # iterate text until next headline
  loop do
    elements << element
    element = element.next
    break if element.nil? || element.to_html.include?('class="mw-headline"')
  end
  elements
end

#filter_headline(xs, headline_name) ⇒ Object


92
93
94
95
96
97
98
99
# File 'lib/wiki/api/page.rb', line 92

def filter_headline(xs, headline_name)
  # transform name to a wiki_id (downcase and space replace with underscore)
  headline_name = headline_name.downcase.gsub(' ', '_')
  # reject not matching id's
  xs.select do |t|
    t.attributes['id'].value.downcase.start_with?(headline_name)
  end
end

#first_partObject

harvest first part of the page (missing heading and class=“mw-headline”)


74
75
76
77
# File 'lib/wiki/api/page.rb', line 74

def first_part
  self.parsed_page ||= @connect.page(name)
  self.parsed_page.search('p').first.children.first
end

#load_page!Object


39
40
41
# File 'lib/wiki/api/page.rb', line 39

def load_page!
  self.parsed_page ||= @connect.page(name)
end

#parse_blocks(headline_name = nil) ⇒ Object

parse blocks


44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/wiki/api/page.rb', line 44

def parse_blocks(headline_name = nil)
  load_page!
  result = {}

  # get headline nodes by span class
  headlines = self.parsed_page.xpath("//span[@class='mw-headline']")

  # filter single headline by name (ignore case)
  headlines = filter_headline(headlines, headline_name) unless headline_name.nil?

  # NOTE: first_part has no id attribute and thus cannot be filtered or processed within xpath (xs)
  if headline_name.nil? || headline_name.start_with?(name.downcase)
    x = first_part
    result[name] ||= []
    result[name] << (collect_elements(x.parent))
  end

  # append all blocks
  headlines.each do |headline|
    headline_value = headline.attributes['id'].value
    elements = collect_elements(headline.parent.next)
    result[headline_value] ||= []
    result[headline_value] << elements
  end

  # create root object
  PageHeadline.new(parent: self, name: result.first[0], headlines: result, level: 0)
end

#reset!Object


35
36
37
# File 'lib/wiki/api/page.rb', line 35

def reset!
  self.parse_page = nil
end

#root_headlineObject

collect all headlines, keep original page formatting


18
19
20
# File 'lib/wiki/api/page.rb', line 18

def root_headline
  parse_blocks
end

#to_htmlObject

# collect headlines by given name, this will flatten the nested headlines def flat_headlines_by_name headline_name

raise "not yet implemented!"
# TODO: implement flattening of headlines within the root headline
# ALT:  breath search option in the root of the first headline
self.parse_blocks(headline_name)

end


30
31
32
33
# File 'lib/wiki/api/page.rb', line 30

def to_html
  load_page!
  parsed_page.to_xhtml(indent: 3, indent_text: ' ')
end