Class: SimpleRss

Inherits:
Object
  • Object
show all
Defined in:
lib/simple_rss.rb

Constant Summary collapse

VERSION =
"1.2.3"
@@feed_tags =
[
	:id,
	:title, :subtitle, :link,
	:description, 
	:author, :webMaster, :managingEditor, :contributor,
	:pubDate, :lastBuildDate, :updated, :'dc:date',
	:generator, :language, :docs, :cloud,
	:ttl, :skipHours, :skipDays,
	:image, :logo, :icon, :rating,
	:rights, :copyright,
	:textInput, :'feedburner:browserFriendly',
	:'itunes:author', :'itunes:category'
]
@@item_tags =
[
	:id,
	:title, :link, :'link+alternate', :'link+self', :'link+edit', :'link+replies',
	:author, :contributor,
	:description, :summary, :content, :'content:encoded', :comments,
	:pubDate, :published, :updated, :expirationDate, :modified, :'dc:date',
	:category, :guid,
	:'trackback:ping', :'trackback:about',
	:'dc:creator', :'dc:title', :'dc:subject', :'dc:rights', :'dc:publisher',
	:'feedburner:origLink',
	:'media:content#url', :'media:content#type', :'media:content#height', :'media:content#width',
	:'media:title', :'media:thumbnail#url', :'media:thumbnail#height', :'media:thumbnail#width',
	:'media:credit', :'media:credit#role',
	:'media:category', :'media:category#scheme'
]

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(source, options = {}) ⇒ SimpleRss

Returns a new instance of SimpleRss.



43
44
45
46
47
48
49
# File 'lib/simple_rss.rb', line 43

def initialize(source, options={})
	@source = source.respond_to?(:read) ? source.read : source.to_s
	@items = Array.new
   @options = Hash.new.update(options)
   
	parse
end

Instance Attribute Details

#itemsObject (readonly) Also known as: entries

Returns the value of attribute items.



10
11
12
# File 'lib/simple_rss.rb', line 10

def items
  @items
end

#sourceObject (readonly)

Returns the value of attribute source.



10
11
12
# File 'lib/simple_rss.rb', line 10

def source
  @source
end

Class Method Details

.feed_tagsObject



55
56
57
# File 'lib/simple_rss.rb', line 55

def feed_tags
	@@feed_tags
end

.feed_tags=(ft) ⇒ Object



58
59
60
# File 'lib/simple_rss.rb', line 58

def feed_tags=(ft)
	@@feed_tags = ft
end

.item_tagsObject



62
63
64
# File 'lib/simple_rss.rb', line 62

def item_tags
	@@item_tags
end

.item_tags=(it) ⇒ Object



65
66
67
# File 'lib/simple_rss.rb', line 65

def item_tags=(it)
	@@item_tags = it
end

.parse(source, options = {}) ⇒ Object

The strict attribute is for compatibility with Ruby’s standard RSS parser



70
71
72
# File 'lib/simple_rss.rb', line 70

def parse(source, options={})
	new source, options
end

Instance Method Details

#channelObject Also known as: feed



51
# File 'lib/simple_rss.rb', line 51

def channel() self end

#clean_content(tag, attrs, content) ⇒ Object



140
141
142
143
144
145
146
147
148
149
150
# File 'lib/simple_rss.rb', line 140

def clean_content(tag, attrs, content)
	content = content.to_s
	case tag
		when :pubDate, :lastBuildDate, :published, :updated, :expirationDate, :modified, :'dc:date'
			Time.parse(content) rescue unescape(content)
		when :author, :contributor, :skipHours, :skipDays
			unescape(content.gsub(/<.*?>/,''))
		else
			content.empty? && "#{attrs} " =~ /href=['"]?([^'"]*)['" ]/mi ? $1.strip : unescape(content)
	end
end

#clean_tag(tag) ⇒ Object



152
153
154
# File 'lib/simple_rss.rb', line 152

def clean_tag(tag)
	tag.to_s.gsub(':','_').intern
end

#parseObject

Raises:



76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# File 'lib/simple_rss.rb', line 76

def parse
  raise SimpleRSSError, "Poorly formatted feed" unless @source =~ %r{<(channel|feed).*?>.*?</(channel|feed)>}mi
  
	# Feed's title and link
	feed_content = $1 if @source =~ %r{(.*?)<(rss:|atom:)?(item|entry).*?>.*?</(rss:|atom:)?(item|entry)>}mi
	
	@@feed_tags.each do |tag|
		if feed_content && feed_content =~ %r{<(rss:|atom:)?#{tag}(.*?)>(.*?)</(rss:|atom:)?#{tag}>}mi
			nil
		elsif feed_content && feed_content =~ %r{<(rss:|atom:)?#{tag}(.*?)\/\s*>}mi
			nil
		elsif @source =~ %r{<(rss:|atom:)?#{tag}(.*?)>(.*?)</(rss:|atom:)?#{tag}>}mi
			nil
		elsif @source =~ %r{<(rss:|atom:)?#{tag}(.*?)\/\s*>}mi
			nil
		end
		
		if $2 || $3
       tag_cleaned = clean_tag(tag)
       instance_variable_set("@#{ tag_cleaned }", clean_content(tag, $2, $3))
       self.class.send(:attr_reader, tag_cleaned)
		end
	end

	# RSS items' title, link, and description
	@source.scan( %r{<(rss:|atom:)?(item|entry)([\s][^>]*)?>(.*?)</(rss:|atom:)?(item|entry)>}mi ) do |match|
		item = Hash.new
		@@item_tags.each do |tag|
		  if tag.to_s.include?("+")
		    tag_data = tag.to_s.split("+")
		    tag = tag_data[0]
		    rel = tag_data[1]
		    
 				if match[3] =~ %r{<(rss:|atom:)?#{tag}(.*?)rel=['"]#{rel}['"](.*?)>(.*?)</(rss:|atom:)?#{tag}>}mi
           nil
 				elsif match[3] =~ %r{<(rss:|atom:)?#{tag}(.*?)rel=['"]#{rel}['"](.*?)/\s*>}mi
 				  nil
 				end
 				item[clean_tag("#{tag}+#{rel}")] = clean_content(tag, $3, $4) if $3 || $4
 			elsif tag.to_s.include?("#")
		    tag_data = tag.to_s.split("#")
		    tag = tag_data[0]
		    attrib = tag_data[1]
 				if match[3] =~ %r{<(rss:|atom:)?#{tag}(.*?)#{attrib}=['"](.*?)['"](.*?)>(.*?)</(rss:|atom:)?#{tag}>}mi
           nil
 				elsif match[3] =~ %r{<(rss:|atom:)?#{tag}(.*?)#{attrib}=['"](.*?)['"](.*?)/\s*>}mi
 				  nil
 				end
 				item[clean_tag("#{tag}_#{attrib}")] = clean_content(tag, attrib, $3) if $3
	    else
 				if match[3] =~ %r{<(rss:|atom:)?#{tag}(.*?)>(.*?)</(rss:|atom:)?#{tag}>}mi
 					nil
 				elsif match[3] =~ %r{<(rss:|atom:)?#{tag}(.*?)/\s*>}mi
 					nil
 				end
 				item[clean_tag(tag)] = clean_content(tag, $2, $3) if $2 || $3
			end
		end
		def item.method_missing(name, *args) self[name] end
		@items << item
	end

end

#testObject



7
8
9
# File 'lib/simple_rss.rb', line 7

def test
 "DSC"
end

#unescape(content) ⇒ Object



156
157
158
159
160
161
162
# File 'lib/simple_rss.rb', line 156

def unescape(content)
	if content =~ /([^-_.!~*'()a-zA-Z\d;\/?:@&=+$,\[\]]%)/n then
		CGI.unescape(content).gsub(/(<!\[CDATA\[|\]\]>)/,'').strip
	else
		content.gsub(/(<!\[CDATA\[|\]\]>)/,'').strip
	end
end