Class: Evoc::RuleStore

Inherits:
Object
  • Object
show all
Includes:
Enumerable, Logging
Defined in:
lib/evoc/rule_store.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Logging

configure_logger_for, #logger, logger_for, set_level

Constructor Details

#initialize(rules = [], query: nil, aggregator: nil) ⇒ RuleStore



6
7
8
9
10
# File 'lib/evoc/rule_store.rb', line 6

def initialize(rules = [],query: nil,aggregator: nil)
  self.rules = rules
  self.query = query
  self.aggregator = aggregator
end

Instance Attribute Details

#aggregatorObject

Returns the value of attribute aggregator.



4
5
6
# File 'lib/evoc/rule_store.rb', line 4

def aggregator
  @aggregator
end

#queryObject

Returns the value of attribute query.



4
5
6
# File 'lib/evoc/rule_store.rb', line 4

def query
  @query
end

#rulesObject

Returns the value of attribute rules.



4
5
6
# File 'lib/evoc/rule_store.rb', line 4

def rules
  @rules
end

Class Method Details

.parse_file(path_to_rules) ⇒ Object

CLASS METHODS



16
17
18
19
20
21
22
23
24
# File 'lib/evoc/rule_store.rb', line 16

def self.parse_file(path_to_rules)
    rule_store = Evoc::RuleStore.new
    CSV.foreach(path_to_rules, :headers => true) do |row|
      params = row.to_h.symbolize_keys.convert_values(except: [:lhs,:rhs], converter: Evoc::InterestingnessMeasures::VALUE_TYPE)
      rule = Evoc::Rule.new(params)
      rule_store << rule
    end
    rule_store
end

.parse_string(string) ⇒ Object



26
27
28
29
30
31
32
33
34
# File 'lib/evoc/rule_store.rb', line 26

def self.parse_string(string)
    rule_store = Evoc::RuleStore.new
    CSV.parse(string, :headers => true) do |row|
      params = row.to_h.symbolize_keys.convert_values(except: [:lhs,:rhs], converter: Evoc::InterestingnessMeasures::VALUE_TYPE)
      rule = Evoc::Rule.new(params)
      rule_store << rule
    end
    rule_store
end

.sort_on(rules:, measures:) ⇒ Object



197
198
199
# File 'lib/evoc/rule_store.rb', line 197

def self.sort_on(rules:, measures:)
  rules.sort_by {|r| measures.map {|m| r.get_measure(m).value.nil? ? Float::INFINITY : -r.get_measure(m)}}
end

Instance Method Details

#<<(rule) ⇒ Object



250
251
252
# File 'lib/evoc/rule_store.rb', line 250

def << rule
  self.rules << rule
end

#==(other) ⇒ Object



329
330
331
332
333
# File 'lib/evoc/rule_store.rb', line 329

def ==other
  self_rules = self.rules.sort_by {|r| r.name}.map {|r| "#{r.name}#{r.instantiated_measures.map {|m| r.get_measure(m).value}}"}
  other_rules = other.rules.sort_by {|r| r.name}.map {|r| "#{r.name}#{r.instantiated_measures.map {|m| r.get_measure(m).value}}"}
  self_rules == other_rules
end

#[](index) ⇒ Object



51
52
53
# File 'lib/evoc/rule_store.rb', line 51

def [] index
  @rules[index]
end

#[]=(index, value) ⇒ Object



55
56
57
# File 'lib/evoc/rule_store.rb', line 55

def []=(index,value)
  @rules[index] = value
end

#aggregate_by(aggregator:, measures:, &block) ⇒ Object

Aggregates the current set of rules using the given aggregator over the rule clusters specified by the given block

@param: [Symbol] aggregator the name of a defined aggregator function @param: [Array<String>] measures the measures to aggregate @param: [block] define the rules clusters which should be aggregated



88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/evoc/rule_store.rb', line 88

def aggregate_by(aggregator: ,measures:,&block)
    rule_clusters = group_by(&block)
    # remove clusters with only one item
    #aggregatable_rules = rule_clusters.select {|g,cluster| cluster.size > 1}
    #non_aggregatable_rules = rule_clusters.select {|g,cluster| cluster.size == 1}
    aggregation = Evoc::RuleStore.new(query: self.query, aggregator: aggregator)
    # aggregate the rules in each cluster
    rule_clusters.each do |_,cluster|
        # we create one aggregated rule from each rule cluster
      if cluster.size > 1
        aggregation << Evoc::HyperRule.new(cluster,aggregator,measures)
      else
        aggregation << cluster.first
      end
    end
    return aggregation
end

#calculate_measures(measures) ⇒ Object

Calculates the requested measures on the current rule set



70
71
72
73
74
75
76
77
78
79
80
# File 'lib/evoc/rule_store.rb', line 70

def calculate_measures(measures)
  if measures.nil?
    raise ArgumentError.new, "Tried to calculate measures, but list of measures was 'nil'"
  else
    self.each do |rule|
      measures.each do |m|
        rule.get_measure(m)
      end
    end
  end
end

#clearObject



343
344
345
# File 'lib/evoc/rule_store.rb', line 343

def clear
  self.rules.clear
end

#csv_headerObject

generate an array suitable for a csv header



367
368
369
# File 'lib/evoc/rule_store.rb', line 367

def csv_header
  self.instance_values_for_csv.keys
end

#each(&block) ⇒ Object

required by Enumerable



41
42
43
44
45
46
47
48
49
# File 'lib/evoc/rule_store.rb', line 41

def each &block
  @rules.each do |rule|
    if block_given?
      block.call rule
    else
      yield rule
    end
  end
end

#empty?Boolean



339
340
341
# File 'lib/evoc/rule_store.rb', line 339

def empty?
  self.rules.empty?
end

#evaluate_with(evaluators:, expected_outcome:, measure_combination:, topk: nil, unique_consequents: nil) ⇒ Object

Evaluate this recommendation using the given evaluator

Note that the hyper coefficient is added as a last tie breaker for aggregation functions called with ‘aggregator_hc’ Not pretty, sorry..



157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# File 'lib/evoc/rule_store.rb', line 157

def evaluate_with(evaluators:,expected_outcome:,measure_combination:,topk: nil,unique_consequents: nil)
  if measure_combination.empty? then raise ArgumentError, "Cannot evalute a recommendation without specifying which measures to rank on" end
  logger.debug "#{__method__} params: evaluators: #{evaluators}, measure_combination: #{measure_combination}"
  # sort the rules on each combination and evaluate 
  # if !topk.nil?
  #   raise ArgumentError, "Top K must be a number" unless topk.is_a?(Numeric)
  #   sorted_rules = sorted_rules.take(topk)
  # end
  # convert rules into format used in evaluation
  # map to 0/1 list where 1 is a correct item and 0 is not
  # second item in each tuple gives the weight of the rule
  # evaluate the sorted list against the expected outcome 
  recommendation = self.evaluation_format(measures: measure_combination, expected_outcome: expected_outcome, topk: topk)
  potential_params = {rec: recommendation, exp: expected_outcome.size, rules: self}
  results = Hash.new
  evaluators.each do |evaluator|
        t1 = Time.new
        if Evoc::Evaluate.respond_to?(evaluator)
          results[evaluator] = Hash.new
          method_params = Evoc::Evaluate.method(evaluator).parameters.map(&:second)
          params = potential_params.select {|k,v| method_params.include?(k)} 
          results[evaluator]['value'] = Evoc::Evaluate.method(evaluator).call(params)
        else
          raise NoMethodError, "The evaluator you requested (#{evaluator}) has not been implemented in Evoc::Evaluate"
        end
        t2 = Time.new
        results[evaluator]['time'] = TimeDifference.between(t1,t2).in_seconds.round(8)
  end
  return results
end

#evaluation_format(measures:, expected_outcome:, topk: nil) ⇒ Object

Needed by Evaluate mixin



120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# File 'lib/evoc/rule_store.rb', line 120

def evaluation_format(measures:, expected_outcome:,topk: nil)
  current_weight = nil
  current_group = []
  recommendation = []
  topk = (topk.nil? ? self.size : topk)
  # sort and filter out duplicate consequents
  self.sort_on(measures: measures, rules: self.unique_by(measures.first)).take(topk).each do |r|
    expected = ((r.rhs - expected_outcome).empty? ? 1 : 0)
    weight_tag = measures.map {|m| r.get_measure(m).value.nil? ? "INF" : r.get_measure(m).to_s}.join('_')
    if current_weight.nil?
      current_weight = weight_tag
    end
    if weight_tag == current_weight
      current_group << expected
    else
      recommendation << current_group
      current_group = [expected]
      current_weight = weight_tag
    end
  end
  # add last group if not empty
  if !current_group.empty?
    recommendation << current_group
  end
  return recommendation
end

#exact_matchTrue/False/Nil



233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
# File 'lib/evoc/rule_store.rb', line 233

def exact_match
  match = nil
  if !self.query.nil?
    match = false
    self.each do |rule|
      if (rule.lhs.sort == self.query.sort)
        match = true
        break
      end
    end
  else
    logger.debug "Tried to calculate exact match, but query was nil "
  end
  return match
end

#group_by(&block) ⇒ Object



59
60
61
62
63
64
65
# File 'lib/evoc/rule_store.rb', line 59

def group_by(&block)
  res = Hash.new { |hash, key| hash[key] = [] }
  each do |e|
    res[block.call(e)] << e
  end
  res
end

#hyper_rulesObject



114
115
116
# File 'lib/evoc/rule_store.rb', line 114

def hyper_rules
  self.select {|r| r.is_a?(Evoc::HyperRule)}
end

#instance_values_for_csvObject



360
361
362
363
# File 'lib/evoc/rule_store.rb', line 360

def instance_values_for_csv
  dont_include = ['rules']
  self.instance_values.delete_if {|k,v| dont_include.include?(k)}
end

#largest_antecedentInteger



227
228
229
# File 'lib/evoc/rule_store.rb', line 227

def largest_antecedent
  self.map {|r| r.lhs.size}.max
end

#number_of_hyper_rulesObject



108
109
110
# File 'lib/evoc/rule_store.rb', line 108

def number_of_hyper_rules
  self.hyper_rules.size
end

#pretty_printObject



254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
# File 'lib/evoc/rule_store.rb', line 254

def pretty_print
  CSV.generate do |csv|
    # write header
    defined_measures = []
    if aggregator.nil?
      defined_measures = self.map {|r| r.instantiated_measures}.array_union
    else
      defined_measures = self.hyper_rules.map {|r| r.instantiated_measures}.array_union
    end
    csv << ['rule'] + defined_measures
    self.each do |rule|
      row = CSV::Row.new([],[],false)
      row << rule.name
      defined_measures.each do |m| 
        row << rule.get_measure(m).value
      end
      csv << row
    end
  end
end


291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
# File 'lib/evoc/rule_store.rb', line 291

def print(measures = Evoc::Rule.measures)
  CSV {|row| row << ['lhs','rhs'] + measures.map {|m| m.to_s.gsub(/^m_/,'')}}
  if self.size > 0
    name_mapping = self.first.tx_store.int_2_name
    self.sort_on(measures: measures).each do |rule|
      row = CSV::Row.new([],[],false)
      row << rule.lhs.map{|i| name_mapping[i]}.join(',')
      row << rule.rhs.map{|i| name_mapping[i]}.join(',')
      measures.each do |m| 
        row << (rule.measure_instantiated?(m) ? rule.get_measure(m).value : nil)
      end
      CSV {|r| r << row}
    end
  end
  nil
end

Print the current rule set to a csv file



313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
# File 'lib/evoc/rule_store.rb', line 313

def print_to_file(measures: Evoc::Rule.measures, file:)
  CSV.open(file, "wb") do |csv| 
    # write header
    csv << ['lhs','rhs'] + measures
    self.each do |rule|
      row = CSV::Row.new([],[],false)
      row << rule.lhs.join(',')
      row << rule.rhs.join(',')
      measures.each do |m| 
        row << (rule.measure_instantiated?(m) ? rule.get_measure(m).value : nil)
      end
      csv << row
    end
  end
end

#sizeObject



335
336
337
# File 'lib/evoc/rule_store.rb', line 335

def size
  self.rules.size
end

#sort_on(rules: self, measures:) ⇒ Object

Sort rules on one or more measures If a measure is undefined/nil for a rule, we treat it as -infinity for purposes of sorting @param: [Array<String>] measures the list of measures to sort by



193
194
195
# File 'lib/evoc/rule_store.rb', line 193

def sort_on(rules: self, measures:)
  rules.sort_by {|r| measures.map {|m| r.get_measure(m).value.nil? ? Float::INFINITY : -r.get_measure(m)}}
end

#to_csv_rowObject

generate an array of the current values of <self> converts any array values to a comma separated string representation



374
375
376
# File 'lib/evoc/rule_store.rb', line 374

def to_csv_row
  self.instance_values_for_csv.values.map {|val| val.is_a?(Array) ? val.join(',') : val}
end

#to_hObject



347
348
349
350
351
352
353
354
355
356
357
358
# File 'lib/evoc/rule_store.rb', line 347

def to_h
    if self.rules.nil?
        {}
    else
        self.rules.map {|r| 
            h = Hash.new
            h[:lhs] = r.lhs
            h[:rhs] = r.rhs
            r.instantiated_measures.each {|m| h[m] = r.get_measure(m).value.to_r}
            h}
    end
end

#to_sObject



275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
# File 'lib/evoc/rule_store.rb', line 275

def to_s
  CSV.generate do |csv|
    # write header
    csv << ['lhs','rhs'] + Evoc::Rule.measures
    self.each do |rule|
      row = CSV::Row.new([],[],false)
      row << (rule.lhs.respond_to?(:join) ? rule.lhs.join(',') : rule.lhs)
      row << (rule.rhs.respond_to?(:join) ? rule.rhs.join(',') : rule.rhs)
      Evoc::Rule.measures.each do |m| 
        row << (rule.measure_instantiated?(m) ? rule.get_measure(m).value : nil)
      end
      csv << row
    end
  end
end

#unique_by(measure, rules: self) ⇒ Object

returns the set of unique consequents where each consequent is the strongest given by the input measure

@param: [String] measure the measure used to find the strongest rules



206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
# File 'lib/evoc/rule_store.rb', line 206

def unique_by(measure, rules: self)
  selected_rules = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseIntToRuby.new : Hash.new
  rules.each do |rule|
    if !rule.get_measure(measure).value.nil?
	key = rule.rhs.first
      if selected_rules[key].nil?
        selected_rules[key] = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseRubyToRuby.new : Hash.new
        selected_rules[key][:value] = rule.get_measure(measure).value
        selected_rules[key][:rule] = rule
      end
      if rule.get_measure(measure).value > selected_rules[key][:value]
        selected_rules[key][:value] = rule.get_measure(measure).value
        selected_rules[key][:rule] = rule
      end
    end
  end
  return selected_rules.values.map {|k,v| k[:rule]}
end