Class: Feedtxt::Parser

Inherits:
Object
  • Object
show all
Includes:
LogUtils::Logging
Defined in:
lib/feedtxt/parser.rb

Constant Summary collapse

FEED_BEGIN_RX =
%r{^\|>>>$}
FEED_END_RX =
%r{^<<<\|$}
FEED_NEXT_RX =

pass 1: split/break up blocks

%r{^</>$}
FEED_META_RX =

pass 2: break up item into metadata and content block

%r{^---$}

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(text) ⇒ Parser

Note: lets keep/use same API as RSS::Parser for now



17
18
19
# File 'lib/feedtxt/parser.rb', line 17

def initialize( text )
  @text = text
end

Class Method Details

.parse(text, opts = {}) ⇒ Object

convenience class/factory method



12
13
14
# File 'lib/feedtxt/parser.rb', line 12

def self.parse( text, opts={} )
  self.new( text ).parse
end

Instance Method Details

#parseObject



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/feedtxt/parser.rb', line 29

def parse

  ## find start marker e.g. |>>>
  ##    todo: use regex - allow three or more >>>>>> or <<<<<<
  ##    todo: allow spaces before and after


  ## todo/fix:
  ##    use index-like finder return posbeg and posend!!!
  ##      regex is not fixed length/width; we need to know the length
  ##   check what is the best way?  use regex match or something???

  posbeg = @text.index( FEED_BEGIN_RX )
  if posbeg.nil?
    ## nothing found return empty array for now; return nil - why? why not?
    puts "warn !!! no begin marker found e.g. |>>>"
    return []
  end

  posend = @text.index( FEED_END_RX, posbeg )
  if posend.nil?
    ## nothing found return empty array for now; return nil - why? why not?
    puts "warn !!! no end marker found e.g. <<<|"
    return []
  end

  ## cutoff - get text between begin and end marker
  buf = @text[ posbeg+4...posend ].strip
  ## pp buf

  ####
  ## pass 1: split blocks by </>
  ###    todo: allow   <<<</>>>>

  blocks = buf.split( FEED_NEXT_RX )
  ## pp blocks

  ## 1st block is feed meta data
  block1st = blocks.shift       ## get/remove 1st block from blocks
   = YAML.load( block1st.strip )

  feed_items = []
  blocks.each do |block|
    ###   note: do NOT use split e.g.--- is used by markdown
    ##      only search for first --- to split (all others get ignored)
    ##    todo: make three dashes --- (3) not hard-coded (allow more)
    posmeta = block.index( FEED_META_RX )
    item = []
    item[0] = block[0...posmeta].strip
    item[1] = block[posmeta+3..-1].strip

     = YAML.load( item[0] )
    item_content  = item[1]

    feed_items << [, item_content]
  end

  [ , feed_items ]
end