Class: Kennel::Models::Monitor

Inherits:
Record show all
Includes:
TagsValidation
Defined in:
lib/kennel/models/monitor.rb

Constant Summary collapse

OPTIONAL_SERVICE_CHECK_THRESHOLDS =
[:ok, :warning].freeze
READONLY_ATTRIBUTES =
superclass::READONLY_ATTRIBUTES + [
  :multi, :matching_downtimes, :overall_state_modified, :overall_state, :restricted_roles, :draft_status, :assets,
  :enable_logs_sample
]
TRACKING_FIELD =
:message
MONITOR_DEFAULTS =
{
  priority: nil
}.freeze
MONITOR_OPTION_DEFAULTS =

defaults that datadog uses when options are not sent, so safe to leave out if our values match their defaults

{
  evaluation_delay: nil,
  new_host_delay: 300,
  timeout_h: 0,
  renotify_interval: 0,
  notify_audit: false,
  no_data_timeframe: nil, # this works out ok since if notify_no_data is on, it would never be nil
  group_retention_duration: nil,
  groupby_simple_monitor: false,
  variables: nil,
  on_missing_data: nil,
  notification_preset_name: nil,
  notify_by: nil
}.freeze
DEFAULT_ESCALATION_MESSAGE =
["", nil].freeze
ALLOWED_PRIORITY_CLASSES =
[NilClass, Integer].freeze
SKIP_NOTIFY_NO_DATA_TYPES =
["event alert", "event-v2 alert", "log alert"].freeze
ON_MISSING_DATA_UNSUPPORTED_TYPES =
["composite", "datadog-usage alert"].freeze
MINUTES_PER_UNIT =
{
  "m" => 1,
  "h" => 60,
  "d" => 60 * 24,
  "w" => 60 * 24 * 7
}.freeze

Constants inherited from Record

Record::ALLOWED_KENNEL_ID_CHARS, Record::ALLOWED_KENNEL_ID_FULL, Record::ALLOWED_KENNEL_ID_REGEX, Record::ALLOWED_KENNEL_ID_SEGMENT, Record::LOCK, Record::MARKER_TEXT, Record::TITLE_FIELDS, Record::TRACKING_FIELDS

Constants included from OptionalValidations

OptionalValidations::UNIGNORABLE, OptionalValidations::UNUSED_IGNORES

Constants inherited from Base

Base::SETTING_OVERRIDABLE_METHODS

Constants included from SettingsAsMethods

SettingsAsMethods::AS_PROCS, SettingsAsMethods::SETTING_OVERRIDABLE_METHODS

Instance Attribute Summary

Attributes inherited from Record

#as_json, #project

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Record

#add_tracking_id, api_resource_map, #build, #diff, #initialize, parse_any_url, parse_tracking_id, #remove_tracking_id, remove_tracking_id, #safe_tracking_id, #tracking_id

Methods included from OptionalValidations

filter_validation_errors, included, #initialize, #invalid!, valid?

Methods inherited from Base

#kennel_id, #name, #to_json

Methods included from SubclassTracking

#abstract_class?, #recursive_subclasses, #subclasses

Methods included from SettingsAsMethods

included, #initialize, #raise_with_location

Constructor Details

This class inherits a constructor from Kennel::Models::Record

Class Method Details

.api_resourceObject



222
223
224
# File 'lib/kennel/models/monitor.rb', line 222

def self.api_resource
  "monitor"
end

.normalize(expected, actual) ⇒ Object



242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
# File 'lib/kennel/models/monitor.rb', line 242

def self.normalize(expected, actual)
  super

  ignore_default(expected, actual, MONITOR_DEFAULTS)

  options = actual.fetch(:options)

  # we do not manage silenced: ignore it when diffing
  options.delete(:silenced)

  # fields are not returned when set to true
  if ["service check", "event alert"].include?(actual[:type])
    options[:include_tags] = true unless options.key?(:include_tags)
    options[:require_full_window] = false unless options.key?(:require_full_window)
  end

  case actual[:type]
  when "event alert"
    # setting nothing results in thresholds not getting returned from the api
    options[:thresholds] ||= { critical: 0 }

  when "service check"
    # fields are not returned when created with default values via UI
    OPTIONAL_SERVICE_CHECK_THRESHOLDS.each do |t|
      options[:thresholds][t] ||= 1
    end
  else # do nothing
  end

  # nil / "" / 0 are not returned from the api when set via the UI
  options[:evaluation_delay] ||= nil

  expected_options = expected[:options] || {}
  ignore_default(expected_options, options, MONITOR_OPTION_DEFAULTS)
  if DEFAULT_ESCALATION_MESSAGE.include?(options[:escalation_message])
    options.delete(:escalation_message)
    expected_options.delete(:escalation_message)
  end

  # locked is deprecated: ignored when diffing
  options.delete(:locked)
end

.parse_url(url) ⇒ Object



230
231
232
233
234
235
236
237
238
239
240
# File 'lib/kennel/models/monitor.rb', line 230

def self.parse_url(url)
  # datadog uses / for show and # for edit as separator in it's links
  id = url[/\/monitors[\/#](\d+)/, 1]

  # slo alert url
  id ||= url[/\/slo\/edit\/[a-z\d]{10,}\/alerts\/(\d+)/, 1]

  return unless id

  Integer(id)
end

.url(id) ⇒ Object



226
227
228
# File 'lib/kennel/models/monitor.rb', line 226

def self.url(id)
  Utils.path_to_url "/monitors/#{id}/edit"
end

Instance Method Details

#allowed_update_error(actual) ⇒ Object

ensure type does not change, but not if it’s metric->query which is supported and used by importer.rb



197
198
199
200
201
# File 'lib/kennel/models/monitor.rb', line 197

def allowed_update_error(actual)
  actual_type = actual[:type]
  return if actual_type == type || (actual_type == "metric alert" && type == "query alert")
  "cannot update type from #{actual_type} to #{type}"
end

#build_jsonObject



79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# File 'lib/kennel/models/monitor.rb', line 79

def build_json
  no_data_options = configure_no_data

  data = super.merge(
    name: "#{name}#{LOCK}",
    type: type,
    query: query.strip,
    message: message.strip,
    tags: tags,
    priority: priority,
    options: {
      timeout_h: timeout_h,
      **no_data_options.except(:on_missing_data),
      notify_audit: notify_audit,
      require_full_window: require_full_window,
      new_host_delay: new_host_delay,
      new_group_delay: new_group_delay,
      include_tags: true,
      escalation_message: Utils.presence(escalation_message.strip),
      evaluation_delay: evaluation_delay,
      renotify_interval: renotify_interval || 0,
      variables: variables,
      **configure_thresholds,
      **no_data_options.slice(:on_missing_data) # moved here to avoid generated diff
    }
  )

  options = data[:options]

  # set without causing lots of nulls to be stored
  if (notify_by_value = notify_by)
    options[:notify_by] = notify_by_value
  end

  # setting this via the api breaks the UI with
  # "The no_data_timeframe option is not allowed for log alert monitors"
  if data.fetch(:type) == "log alert"
    options.delete :no_data_timeframe
  end

  if (windows = threshold_windows)
    options[:threshold_windows] = windows
  end

  if (schedule = scheduling_options)
    options[:scheduling_options] = schedule
  end

  # Datadog requires only either new_group_delay or new_host_delay, never both
  options.delete(options[:new_group_delay] ? :new_host_delay : :new_group_delay)

  if (duration = group_retention_duration)
    options[:group_retention_duration] = duration
  end

  # Add in statuses where we would re notify on. Possible values: alert, no data, warn
  if options[:renotify_interval] != 0
    statuses = ["alert"]
    statuses << "no data" if options[:notify_no_data] || options[:on_missing_data] == "show_and_notify_no_data"
    statuses << "warn" if options.dig(:thresholds, :warning)
    options[:renotify_statuses] = statuses
  end

  # only set when needed to avoid big diff
  if (notification_preset_name = notification_preset_name())
    options[:notification_preset_name] = notification_preset_name
  end

  data
end

#configure_no_dataObject

TODO: migrate everything to only use on_missing_data by only sending notify_no_data when it was set by a user and enforce that it is not set at the same time as on_missing_data



152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
# File 'lib/kennel/models/monitor.rb', line 152

def configure_no_data
  notify = notify_no_data
  timeframe = no_data_timeframe
  action = on_missing_data
  action ||= "default" if type == "event-v2 alert"

  # TODO: mark setting action && !notify.nil? at all as invalid
  if action
    if ON_MISSING_DATA_UNSUPPORTED_TYPES.include?(type)
      invalid! :invalid_no_data_config, "cannot use on_missing_data with #{type} monitor"
    end
    if timeframe
      invalid! :invalid_no_data_config, "set either no_data_timeframe or on_missing_data"
    end
    if action != "default" && type == "query alert" && query.to_s.include?("default_zero(") # is allowed for log alert for example
      invalid! :invalid_no_data_config, "set on_missing_data to `default` when using default_zero"
    end
    if action == "resolve" && timeout_h.to_i != 0
      invalid! :invalid_no_data_config, "timeout_h cannot be set and non-zero when on_missing_data is `resolve`"
    end
  end

  # on_missing_data cannot be used with notify_no_data + no_data_timeframe
  if action
    { on_missing_data: action || "default" }
  else
    {
      notify_no_data: notify,
      no_data_timeframe: notify ? no_data_timeframe || default_no_data_timeframe : nil
    }
  end
end

#default_no_data_timeframeObject

deprecated this setting is no longer returned by dd for new monitors datadog UI warns when setting no data timeframe to less than 2x the query window limited to 24h because ‘no_data_timeframe must not exceed group retention` and max group retention is 24h



206
207
208
209
210
211
212
213
# File 'lib/kennel/models/monitor.rb', line 206

def default_no_data_timeframe
  default = 60
  if type == "query alert" && (minutes = query_window_minutes)
    (minutes * 2).clamp(default, 24 * 60)
  else
    default
  end
end

#resolve_linked_tracking_ids!(id_map, **args) ⇒ Object



185
186
187
188
189
190
191
192
193
194
# File 'lib/kennel/models/monitor.rb', line 185

def resolve_linked_tracking_ids!(id_map, **args)
  case as_json[:type]
  when "composite", "slo alert"
    type = (as_json[:type] == "composite" ? :monitor : :slo)
    as_json[:query] = as_json[:query].gsub(/%{(.*?)}/) do
      resolve($1, type, id_map, **args) || $&
    end
  else # do nothing
  end
end

#timeout_hObject

validate that monitors that alert on no data resolve in external services by using timeout_h, so it sends a notification when the no data group is removed from the monitor, which datadog does automatically after 24h



217
218
219
220
# File 'lib/kennel/models/monitor.rb', line 217

def timeout_h
  sending_no_data_notifications = (on_missing_data ? on_missing_data == "show_and_notify_no_data" : notify_no_data)
  sending_no_data_notifications ? 24 : MONITOR_OPTION_DEFAULTS.fetch(:timeout_h)
end