Class: Wikiscript::TableReader

Inherits:
Object
  • Object
show all
Defined in:
lib/wikiscript/table_reader.rb

Class Method Summary collapse

Class Method Details

.parse(txt) ⇒ Object



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# File 'lib/wikiscript/table_reader.rb', line 27

def self.parse( txt )
  tables = []     ## todo/check: allow multiple tables? why? why not?


  rows   = nil   ## note: assume first row is the headers row!!

  row    = nil   ## current row  ## note: same as rows[-1]


  inside_table = false

  txt.each_line do |line|
    line = line.strip

    break if line == '__END__'

    ## note:  allow/add comments

    ##   note: CANNOT allow inline (end-of-line) comments

    ##     would strip/break css colors eg.  bgcolor=#ffff44

    next if line.start_with?( '#' )   ## skip comments too

    next if line.empty?               ## skip empty lines for now



    ## note:  for the table format

    ##  see https://en.wikipedia.org/wiki/Help:Basic_table_markup


    if line.start_with?( '{|' )     ## start table

      inside_table = true
      rows = []
    elsif inside_table && line.start_with?( '|}' )  ## end table

      tables << rows
      rows   = nil
      row    = nil
      inside_table = false
    elsif inside_table && line.start_with?( '|-' )  ## row divider

       row = []
       rows << row
    elsif inside_table && line.start_with?( '!' )    ## header column

       values = line.sub( '!', '' ).strip.split( '!!' )
       ## note: |-  row divider is optional before header columns

       if rows.empty?
         row = []
         rows << row
       end
       if values.empty?
         ## note: support continuing column text in next line

         row << String.new
       else
         ## add each value one-by-one for now (to keep (same) row reference)

         ##   note: also strip leading (optional) attributes

         values.each do |value|
           row <<  strip_emphases( strip_attributes( value.strip ))
         end
       end
    elsif inside_table && line.start_with?( '|' )   ## table data

       values = line.sub( '|', '' ).strip.split( '||' )
       if values.empty?
         ## note: support continuing column text in next line

         row << String.new
       else
         ## add each value one-by-one for now (to keep (same) row reference)

         values.each do |value|
           row << strip_emphases( strip_attributes( value.strip ))
         end
       end
    elsif inside_table
      ## note: support continuing column text in next line

      ##  todo/check: for now doesn't support multi-line just simple continuing line - fix later if needed!!!

      row[-1] << line
    else
      puts "!! ERROR !! unknown line type outside (before or after) table:"
      puts line
      exit 1
    end
  end
  tables
end

.parse_table(txt) ⇒ Object

only allow single table



13
14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/wikiscript/table_reader.rb', line 13

def self.parse_table( txt )   ## only allow single table

  tables = parse( txt )

  if tables.size == 0
    puts "** !!! ERROR !!! no table found in text"
    exit 1
  elsif tables.size > 1
    puts "** !!! ERROR !!! too many tables (#{tables.size}) found in text; only one expected/allowed; sorry"
    exit 1
  else
    tables[0]    ## pass-along first table; everything ok

  end
end

.read(path) ⇒ Object

use - rename to read_file or from_file etc. - why? why not?



7
8
9
10
# File 'lib/wikiscript/table_reader.rb', line 7

def self.read( path )   ## use - rename to read_file or from_file etc. - why? why not?

  txt = File.open( path, 'r:utf-8' ).read
  parse( txt )
end

.strip_attributes(value) ⇒ Object

helper



104
105
106
107
108
109
110
# File 'lib/wikiscript/table_reader.rb', line 104

def self.strip_attributes( value )
  if value =~ /^[a-z]+=/                      ## if starts with 'attribute='

    value = value.sub( /[^|]+\|[ ]*/ , '' )   ## strip everything incl. pipe (|) and trailing spaces

  else
    value   ## return as-is (pass-through)

  end
end

.strip_emphases(value) ⇒ Object

strip bold or emphasis; note: emphases plural of emphasis



112
113
114
115
# File 'lib/wikiscript/table_reader.rb', line 112

def self.strip_emphases( value )   ## strip bold or emphasis; note: emphases plural of emphasis

  value = value.gsub( /'{2,}/, '' ).strip   ## remove two or more quotes e.g. '' or ''' etc.

  value
end