Class: LogStash::Inputs::GoogleAnalyticsDaily

Inherits:
Base
  • Object
show all
Defined in:
lib/logstash/inputs/google_analytics_daily.rb

Overview

Pull daily reports from Google Analytics using the v3 Core Reporting API. This plugin will generate one Logstash event per date, with each event containing all the data for that date The plugin will try to maintain a single event per date and list of metrics

Instance Method Summary collapse

Instance Method Details

#registerObject



91
92
# File 'lib/logstash/inputs/google_analytics_daily.rb', line 91

def register
end

#run(queue) ⇒ Object



94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
# File 'lib/logstash/inputs/google_analytics_daily.rb', line 94

def run(queue)
  # we abort the loop if stop? becomes true
  while !stop?
    plugin_start_time = Time.now

    analytics = get_service

    # Setting this here, not in the config at the top,
    # because we need to reset the date for each new loop (new day)
    start_date = @start_date || (Date.today - 1).to_s
    end_date = @end_date || (Date.today - 1).to_s

    @dates = (Date.parse(start_date)..Date.parse(end_date))

    @dates.each do |date|
      date = date.to_s
      options = get_request_parameters(date)

      results = analytics.get_ga_data(
          options[:view_id],
          options[:start_date],
          options[:end_date],
          options[:metrics],
          dimensions: options[:dimensions],
          filters: options[:filters],
          include_empty_rows: options[:include_empty_rows],
          sampling_level: options[:sampling_level],
          segment: options[:segment],
          sort: options[:sort],
      )

      column_headers = results.column_headers.map &:name

      rows = []

      if results.rows && results.rows.first

        # Example with dimensions, multiple metrics:
        # rows: [[Chrome, Cape Town, 6, 8], [Chrome, Paris, 1, 5], [Safari, Paris, 1, 3]], column_headers: ['ga:browser', 'ga:city', 'ga:user', 'ga:sessions']
        # Example with dimension, single metric:
        # rows: [[Chrome, 6]], column_headers: ['ga:browser', 'ga:user']
        # Example with no dimension, single metric:
        # rows: [[6]], column_headers: ['ga:user']
        # Dimensions always appear before values
        results.rows.each do |row|
          dimensions = []
          metrics = []

          column_headers.zip(row) do |header, value|
            # Combine GA column headers with values from row
            if is_num(value)
              float_value = Float(value)
              # Sometimes GA returns infinity. if so, the number is invalid
              # so set it to zero.
              value = (float_value == Float::INFINITY) ? 0.0 : float_value
            end

            entry = {
                name: header,
                value: value
            }
            if @metrics.include?(header)
              metrics << entry
            else
              dimensions << entry
            end

          end

          rows << {metrics: metrics, dimensions: dimensions}
        end

        query = results.query.to_h
        profile_info = results.profile_info.to_h

        # Transform into proper format for one event per metric
        @metrics.each do |metric|
          rows_for_this_metric = rows.clone.map do |row|
            new_row = {}
            new_row[:metric] = row[:metrics].find { |m| m[:name] == metric }
            new_row[:dimensions] = row[:dimensions]
            new_row
          end

          rows_for_this_metric.each do |row|
            event = LogStash::Event.new
            decorate(event)
            # Populate Logstash event fields
            event.set('ga.contains_sampled_data', results.contains_sampled_data?)
            event.set('ga.query', query.to_json) if @store_query
            event.set('ga.profile_info', profile_info) if @store_profile
            event.set('ga.date', date)

            event.set("ga.metric.name", metric)
            event.set("ga.metric.value", row[:metric][:value])


            # Remap dimensions into key: value
            # Might lead to "mapping explosion", but otherwise aggregations are tough
            joined_dimension_name = ''
            row[:dimensions].each do |d|
              dimension_name = d[:name].sub("ga:", '')
              joined_dimension_name += dimension_name
              event.set("ga.dimensions.#{dimension_name}", d[:value])
            end

            queue << event
          end
        end
      end
    end

    # If no interval was set, we're done
    if @interval.nil?
      break
    else
      # Otherwise we sleep till the next run
      time_lapsed = Time.now - plugin_start_time
      # Sleep for the remainder of the interval, or 0 if the duration ran
      # longer than the interval.
      time_to_sleep_for = [0, @interval - time_lapsed].max
      if time_to_sleep_for == 0
        @logger.warn(
            "Execution ran longer than the interval. Skipping sleep.",
            :duration => time_lapsed,
            :interval => @interval
        )
      else
        @logger.info(
            "Sleeping for #{@interval} seconds"
        )
        Stud.stoppable_sleep(time_to_sleep_for) { stop? }
      end
    end
  end # loop
end