Class: LogStash::Inputs::Multirss

Inherits:
Base
  • Object
show all
Defined in:
lib/logstash/inputs/multirss.rb

Overview

if you want to debug it you just have to uncomment the puts and build the gem with

ruby -S gem build logstash-input-multirss.gemspec

and install the gem in a logstash service or container with

logstash-plugin install logstash-input-multirss-x.x.x.gem

Instance Method Summary collapse

Instance Method Details

#include_keywords(key) ⇒ Object



201
202
203
204
205
206
207
208
# File 'lib/logstash/inputs/multirss.rb', line 201

def include_keywords(key) 
  for i in 0..@keywords.length-1
    if key.include?(@keywords[i])
      return true
    end # end if
  end # end for
  return false
end

end def response_link



142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
# File 'lib/logstash/inputs/multirss.rb', line 142

def link_rss_response(queue, item)
    event = LogStash::Event.new()

    if @keywords.size.to_s.to_i > 0 # "Have keywords
      haskey = false

      item.element_children.each do |x|   
          if include_keywords(x.inner_html.to_s)
            #puts "--------------Finded notice with the keyword---------------"
            haskey = true
          end
      end # end loop

      if haskey == true
        item.element_children.each do |x|
          #puts "The notice " + x.name + " is " + x.inner_html.to_s
          if x.inner_html.to_s.chars.first(9).join == "<![CDATA["
            eve = LogStash::Event.new( x.name => x.inner_html.to_s[9..x.inner_html.to_s.length-4] )
            event.append( eve )
          else
            eve = LogStash::Event.new( x.name => x.inner_html.to_s )
            event.append( eve )
          end # end if else       
        end # end loop
      elsif haskey == false # havent haskey
        event = nil
      end # if haskey

    else # havent keywords!
      #puts "Havent keywords, go to get all items"
      item.element_children.each do |x|
        if x.inner_html.to_s.chars.first(9).join == "<![CDATA["
          eve = LogStash::Event.new( x.name => x.inner_html.to_s[9..x.inner_html.to_s.length-4])
          event.append( eve )
        else
          eve = LogStash::Event.new( x.name => x.inner_html.to_s )
          event.append( eve )
        end # end if
      end # end loop
    end # end if have keywords

    if event != nil
      decorate(event)
      queue << event
    end # end if
    
end

#manage_tempdirObject



211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
# File 'lib/logstash/inputs/multirss.rb', line 211

def manage_tempdir
  #set the tempfile to openUri output
  @d = "#{Dir.home}/.tmp"
  #if exists
  if (File::directory?(@d))
    #puts "Dir exists , removed and create again"
    ENV.delete("TMPDIR")
    FileUtils.rm_rf @d
    #create new
    Dir.mkdir @d    #create in /usr/share/logstash
    ENV["TMPDIR"] = @d
  else
    Dir.mkdir @d    #create in /usr/share/logstash
    ENV["TMPDIR"] = @d
    #puts "Dir no exist , created...."
  end
end

#not_include_blacklist(link) ⇒ Object



191
192
193
194
195
196
197
198
# File 'lib/logstash/inputs/multirss.rb', line 191

def not_include_blacklist(link) 
    for i in 0..@blacklist.length-1
      if link.href.include?(@blacklist[i])
        return false
      end # end if
    end # end for
    return true
end

#registerObject

initialize



39
40
41
42
43
# File 'lib/logstash/inputs/multirss.rb', line 39

def register #initialize
 #Mechanize agent
 @agent = Mechanize.new
 @agent.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE
end

def stop



116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/logstash/inputs/multirss.rb', line 116

def response_link(link, queue)
  tried = 2
  begin
    page = Nokogiri::XML(open(link,&:read)) # [&:read] -> no OpenURI outputs in /tmp
    page.search('item').each do |item| 
      link_rss_response(queue, item)
    end # end each page
  rescue => ex
    if link.chars.first(1).join == "/" && link.chars.first(2).join != "//"
      link = "http:/" + link
      retry
    elsif link.chars.first(1).join == "/" && link.chars.first(2).join == "//"
      link = "http:" + link
      retry
    end # end if elsif
    if link.chars.first(4).join == "http" && link.chars.first(5).join != "https"
      link = link.sub('http','https')
      tried = tried - 1
      retry if (tried > 0)
    end # end if 
    #@logger.error("Error : ", :exception => ex)
  rescue => exc
    @logger.error("Uknown error while parsing the feed", :exception => exc)
  end # end begin
end

#run(queue) ⇒ Object



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/logstash/inputs/multirss.rb', line 46

def run(queue)
  # we can abort the loop if stop? becomes true
  urls = []

  #Don't stop, keep going.
  while !stop?

    manage_tempdir

    @multi_feed.each do |rss|     #get the father's children
      #puts "Read parent: " + rss
      begin
        page = @agent.get(rss)
        page.links.each do |link|
            if (link.href.chars.last(3).join == "xml" || link.href.include?('/rss') || link.href.include?('/feed')) && not_include_blacklist(link) 
              urls << link.href
            end
        end
      rescue
        str = "Fail to get " + rss + " childrens links"
        #puts str
      end # end begin
       
      links = urls.uniq
      links.each do |link|
        begin
          response_link(link,queue)
          #puts "Read clidren: " + link
        rescue
          #puts "Fail to get " + link + " children"
          next
        end # end begin
      end # end each links

      urls.clear
    end # multi_feed loop

    @one_feed.each do |feed|
      urls << feed
    end # one_feed loop

    all_links = urls.uniq
    all_links.each do |link|
      begin
        response_link(link,queue)
        #puts "Read clidren: " + link
      rescue
        #puts "Fail to get " + link
        next
      end # begin
    end # all_links loop

    urls.clear

    # Remove the tempfiles
    if (File::directory?(@d))
      ENV.delete("TMPDIR")
      FileUtils.rm_rf @d
      #puts "Remove temp dir"
    end

  #Stoppable_sleep interval
  Stud.stoppable_sleep(@interval) { stop? }
  end # end while
end

#stopObject



113
114
# File 'lib/logstash/inputs/multirss.rb', line 113

def stop
end