Class: Evoc::RuleStore
- Inherits:
-
Object
- Object
- Evoc::RuleStore
- Includes:
- Enumerable, Logging
- Defined in:
- lib/evoc/rule_store.rb
Instance Attribute Summary collapse
-
#aggregator ⇒ Object
Returns the value of attribute aggregator.
-
#query ⇒ Object
Returns the value of attribute query.
-
#rules ⇒ Object
Returns the value of attribute rules.
Class Method Summary collapse
-
.parse_file(path_to_rules) ⇒ Object
CLASS METHODS.
- .parse_string(string) ⇒ Object
- .sort_on(rules:, measures:) ⇒ Object
Instance Method Summary collapse
- #<<(rule) ⇒ Object
- #==(other) ⇒ Object
- #[](index) ⇒ Object
- #[]=(index, value) ⇒ Object
-
#aggregate_by(aggregator:, measures:, &block) ⇒ Object
Aggregates the current set of rules using the given aggregator over the rule clusters specified by the given block.
-
#calculate_measures(measures) ⇒ Object
Calculates the requested measures on the current rule set.
- #clear ⇒ Object
-
#csv_header ⇒ Object
generate an array suitable for a csv header.
-
#each(&block) ⇒ Object
required by Enumerable.
- #empty? ⇒ Boolean
-
#evaluate_with(evaluators:, expected_outcome:, measure_combination:, topk: nil, unique_consequents: nil) ⇒ Object
Evaluate this recommendation using the given evaluator.
-
#evaluation_format(measures:, expected_outcome:, topk: nil) ⇒ Object
Needed by Evaluate mixin.
-
#exact_match ⇒ True/False/Nil
If the lhs of one of the rules is equal to the query.
- #group_by(&block) ⇒ Object
-
#hyper_rules ⇒ Object
The hyper rules in the store.
-
#initialize(rules = [], query: nil, aggregator: nil) ⇒ RuleStore
constructor
A new instance of RuleStore.
- #instance_values_for_csv ⇒ Object
-
#largest_antecedent ⇒ Integer
The size of the largest rule, measures by antecedent size.
-
#number_of_hyper_rules ⇒ Object
The number of hyper rules in this store.
- #pretty_print ⇒ Object
- #print(measures = Evoc::Rule.measures) ⇒ Object
-
#print_to_file(measures: Evoc::Rule.measures, file:) ⇒ Object
Print the current rule set to a csv file.
- #size ⇒ Object
-
#sort_on(rules: self, measures:) ⇒ Object
Sort rules on one or more measures If a measure is undefined/nil for a rule, we treat it as -infinity for purposes of sorting @param: [Array<String>] measures the list of measures to sort by.
-
#to_csv_row ⇒ Object
generate an array of the current values of <self> converts any array values to a comma separated string representation.
- #to_h ⇒ Object
- #to_s ⇒ Object
-
#unique_by(measure, rules: self) ⇒ Object
returns the set of unique consequents where each consequent is the strongest given by the input measure.
Methods included from Logging
configure_logger_for, #logger, logger_for, set_level
Constructor Details
#initialize(rules = [], query: nil, aggregator: nil) ⇒ RuleStore
6 7 8 9 10 |
# File 'lib/evoc/rule_store.rb', line 6 def initialize(rules = [],query: nil,aggregator: nil) self.rules = rules self.query = query self.aggregator = aggregator end |
Instance Attribute Details
#aggregator ⇒ Object
Returns the value of attribute aggregator.
4 5 6 |
# File 'lib/evoc/rule_store.rb', line 4 def aggregator @aggregator end |
#query ⇒ Object
Returns the value of attribute query.
4 5 6 |
# File 'lib/evoc/rule_store.rb', line 4 def query @query end |
#rules ⇒ Object
Returns the value of attribute rules.
4 5 6 |
# File 'lib/evoc/rule_store.rb', line 4 def rules @rules end |
Class Method Details
.parse_file(path_to_rules) ⇒ Object
CLASS METHODS
16 17 18 19 20 21 22 23 24 |
# File 'lib/evoc/rule_store.rb', line 16 def self.parse_file(path_to_rules) rule_store = Evoc::RuleStore.new CSV.foreach(path_to_rules, :headers => true) do |row| params = row.to_h.symbolize_keys.convert_values(except: [:lhs,:rhs], converter: Evoc::InterestingnessMeasures::VALUE_TYPE) rule = Evoc::Rule.new(params) rule_store << rule end rule_store end |
.parse_string(string) ⇒ Object
26 27 28 29 30 31 32 33 34 |
# File 'lib/evoc/rule_store.rb', line 26 def self.parse_string(string) rule_store = Evoc::RuleStore.new CSV.parse(string, :headers => true) do |row| params = row.to_h.symbolize_keys.convert_values(except: [:lhs,:rhs], converter: Evoc::InterestingnessMeasures::VALUE_TYPE) rule = Evoc::Rule.new(params) rule_store << rule end rule_store end |
.sort_on(rules:, measures:) ⇒ Object
197 198 199 |
# File 'lib/evoc/rule_store.rb', line 197 def self.sort_on(rules:, measures:) rules.sort_by {|r| measures.map {|m| r.get_measure(m).value.nil? ? Float::INFINITY : -r.get_measure(m)}} end |
Instance Method Details
#<<(rule) ⇒ Object
250 251 252 |
# File 'lib/evoc/rule_store.rb', line 250 def << rule self.rules << rule end |
#==(other) ⇒ Object
329 330 331 332 333 |
# File 'lib/evoc/rule_store.rb', line 329 def ==other self_rules = self.rules.sort_by {|r| r.name}.map {|r| "#{r.name}#{r.instantiated_measures.map {|m| r.get_measure(m).value}}"} other_rules = other.rules.sort_by {|r| r.name}.map {|r| "#{r.name}#{r.instantiated_measures.map {|m| r.get_measure(m).value}}"} self_rules == other_rules end |
#[](index) ⇒ Object
51 52 53 |
# File 'lib/evoc/rule_store.rb', line 51 def [] index @rules[index] end |
#[]=(index, value) ⇒ Object
55 56 57 |
# File 'lib/evoc/rule_store.rb', line 55 def []=(index,value) @rules[index] = value end |
#aggregate_by(aggregator:, measures:, &block) ⇒ Object
Aggregates the current set of rules using the given aggregator over the rule clusters specified by the given block
@param: [Symbol] aggregator the name of a defined aggregator function @param: [Array<String>] measures the measures to aggregate @param: [block] define the rules clusters which should be aggregated
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
# File 'lib/evoc/rule_store.rb', line 88 def aggregate_by(aggregator: ,measures:,&block) rule_clusters = group_by(&block) # remove clusters with only one item #aggregatable_rules = rule_clusters.select {|g,cluster| cluster.size > 1} #non_aggregatable_rules = rule_clusters.select {|g,cluster| cluster.size == 1} aggregation = Evoc::RuleStore.new(query: self.query, aggregator: aggregator) # aggregate the rules in each cluster rule_clusters.each do |_,cluster| # we create one aggregated rule from each rule cluster if cluster.size > 1 aggregation << Evoc::HyperRule.new(cluster,aggregator,measures) else aggregation << cluster.first end end return aggregation end |
#calculate_measures(measures) ⇒ Object
Calculates the requested measures on the current rule set
70 71 72 73 74 75 76 77 78 79 80 |
# File 'lib/evoc/rule_store.rb', line 70 def calculate_measures(measures) if measures.nil? raise ArgumentError.new, "Tried to calculate measures, but list of measures was 'nil'" else self.each do |rule| measures.each do |m| rule.get_measure(m) end end end end |
#clear ⇒ Object
343 344 345 |
# File 'lib/evoc/rule_store.rb', line 343 def clear self.rules.clear end |
#csv_header ⇒ Object
generate an array suitable for a csv header
367 368 369 |
# File 'lib/evoc/rule_store.rb', line 367 def csv_header self.instance_values_for_csv.keys end |
#each(&block) ⇒ Object
required by Enumerable
41 42 43 44 45 46 47 48 49 |
# File 'lib/evoc/rule_store.rb', line 41 def each &block @rules.each do |rule| if block_given? block.call rule else yield rule end end end |
#empty? ⇒ Boolean
339 340 341 |
# File 'lib/evoc/rule_store.rb', line 339 def empty? self.rules.empty? end |
#evaluate_with(evaluators:, expected_outcome:, measure_combination:, topk: nil, unique_consequents: nil) ⇒ Object
Evaluate this recommendation using the given evaluator
Note that the hyper coefficient is added as a last tie breaker for aggregation functions called with ‘aggregator_hc’ Not pretty, sorry..
157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
# File 'lib/evoc/rule_store.rb', line 157 def evaluate_with(evaluators:,expected_outcome:,measure_combination:,topk: nil,unique_consequents: nil) if measure_combination.empty? then raise ArgumentError, "Cannot evalute a recommendation without specifying which measures to rank on" end logger.debug "#{__method__} params: evaluators: #{evaluators}, measure_combination: #{measure_combination}" # sort the rules on each combination and evaluate # if !topk.nil? # raise ArgumentError, "Top K must be a number" unless topk.is_a?(Numeric) # sorted_rules = sorted_rules.take(topk) # end # convert rules into format used in evaluation # map to 0/1 list where 1 is a correct item and 0 is not # second item in each tuple gives the weight of the rule # evaluate the sorted list against the expected outcome recommendation = self.evaluation_format(measures: measure_combination, expected_outcome: expected_outcome, topk: topk) potential_params = {rec: recommendation, exp: expected_outcome.size, rules: self} results = Hash.new evaluators.each do |evaluator| t1 = Time.new if Evoc::Evaluate.respond_to?(evaluator) results[evaluator] = Hash.new method_params = Evoc::Evaluate.method(evaluator).parameters.map(&:second) params = potential_params.select {|k,v| method_params.include?(k)} results[evaluator]['value'] = Evoc::Evaluate.method(evaluator).call(params) else raise NoMethodError, "The evaluator you requested (#{evaluator}) has not been implemented in Evoc::Evaluate" end t2 = Time.new results[evaluator]['time'] = TimeDifference.between(t1,t2).in_seconds.round(8) end return results end |
#evaluation_format(measures:, expected_outcome:, topk: nil) ⇒ Object
Needed by Evaluate mixin
120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
# File 'lib/evoc/rule_store.rb', line 120 def evaluation_format(measures:, expected_outcome:,topk: nil) current_weight = nil current_group = [] recommendation = [] topk = (topk.nil? ? self.size : topk) # sort and filter out duplicate consequents self.sort_on(measures: measures, rules: self.unique_by(measures.first)).take(topk).each do |r| expected = ((r.rhs - expected_outcome).empty? ? 1 : 0) weight_tag = measures.map {|m| r.get_measure(m).value.nil? ? "INF" : r.get_measure(m).to_s}.join('_') if current_weight.nil? current_weight = weight_tag end if weight_tag == current_weight current_group << expected else recommendation << current_group current_group = [expected] current_weight = weight_tag end end # add last group if not empty if !current_group.empty? recommendation << current_group end return recommendation end |
#exact_match ⇒ True/False/Nil
233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 |
# File 'lib/evoc/rule_store.rb', line 233 def exact_match match = nil if !self.query.nil? match = false self.each do |rule| if (rule.lhs.sort == self.query.sort) match = true break end end else logger.debug "Tried to calculate exact match, but query was nil " end return match end |
#group_by(&block) ⇒ Object
59 60 61 62 63 64 65 |
# File 'lib/evoc/rule_store.rb', line 59 def group_by(&block) res = Hash.new { |hash, key| hash[key] = [] } each do |e| res[block.call(e)] << e end res end |
#hyper_rules ⇒ Object
114 115 116 |
# File 'lib/evoc/rule_store.rb', line 114 def hyper_rules self.select {|r| r.is_a?(Evoc::HyperRule)} end |
#instance_values_for_csv ⇒ Object
360 361 362 363 |
# File 'lib/evoc/rule_store.rb', line 360 def instance_values_for_csv dont_include = ['rules'] self.instance_values.delete_if {|k,v| dont_include.include?(k)} end |
#largest_antecedent ⇒ Integer
227 228 229 |
# File 'lib/evoc/rule_store.rb', line 227 def largest_antecedent self.map {|r| r.lhs.size}.max end |
#number_of_hyper_rules ⇒ Object
108 109 110 |
# File 'lib/evoc/rule_store.rb', line 108 def number_of_hyper_rules self.hyper_rules.size end |
#pretty_print ⇒ Object
254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 |
# File 'lib/evoc/rule_store.rb', line 254 def pretty_print CSV.generate do |csv| # write header defined_measures = [] if aggregator.nil? defined_measures = self.map {|r| r.instantiated_measures}.array_union else defined_measures = self.hyper_rules.map {|r| r.instantiated_measures}.array_union end csv << ['rule'] + defined_measures self.each do |rule| row = CSV::Row.new([],[],false) row << rule.name defined_measures.each do |m| row << rule.get_measure(m).value end csv << row end end end |
#print(measures = Evoc::Rule.measures) ⇒ Object
291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 |
# File 'lib/evoc/rule_store.rb', line 291 def print(measures = Evoc::Rule.measures) CSV {|row| row << ['lhs','rhs'] + measures.map {|m| m.to_s.gsub(/^m_/,'')}} if self.size > 0 name_mapping = self.first.tx_store.int_2_name self.sort_on(measures: measures).each do |rule| row = CSV::Row.new([],[],false) row << rule.lhs.map{|i| name_mapping[i]}.join(',') row << rule.rhs.map{|i| name_mapping[i]}.join(',') measures.each do |m| row << (rule.measure_instantiated?(m) ? rule.get_measure(m).value : nil) end CSV {|r| r << row} end end nil end |
#print_to_file(measures: Evoc::Rule.measures, file:) ⇒ Object
Print the current rule set to a csv file
313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 |
# File 'lib/evoc/rule_store.rb', line 313 def print_to_file(measures: Evoc::Rule.measures, file:) CSV.open(file, "wb") do |csv| # write header csv << ['lhs','rhs'] + measures self.each do |rule| row = CSV::Row.new([],[],false) row << rule.lhs.join(',') row << rule.rhs.join(',') measures.each do |m| row << (rule.measure_instantiated?(m) ? rule.get_measure(m).value : nil) end csv << row end end end |
#size ⇒ Object
335 336 337 |
# File 'lib/evoc/rule_store.rb', line 335 def size self.rules.size end |
#sort_on(rules: self, measures:) ⇒ Object
Sort rules on one or more measures If a measure is undefined/nil for a rule, we treat it as -infinity for purposes of sorting @param: [Array<String>] measures the list of measures to sort by
193 194 195 |
# File 'lib/evoc/rule_store.rb', line 193 def sort_on(rules: self, measures:) rules.sort_by {|r| measures.map {|m| r.get_measure(m).value.nil? ? Float::INFINITY : -r.get_measure(m)}} end |
#to_csv_row ⇒ Object
generate an array of the current values of <self> converts any array values to a comma separated string representation
374 375 376 |
# File 'lib/evoc/rule_store.rb', line 374 def to_csv_row self.instance_values_for_csv.values.map {|val| val.is_a?(Array) ? val.join(',') : val} end |
#to_h ⇒ Object
347 348 349 350 351 352 353 354 355 356 357 358 |
# File 'lib/evoc/rule_store.rb', line 347 def to_h if self.rules.nil? {} else self.rules.map {|r| h = Hash.new h[:lhs] = r.lhs h[:rhs] = r.rhs r.instantiated_measures.each {|m| h[m] = r.get_measure(m).value.to_r} h} end end |
#to_s ⇒ Object
275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 |
# File 'lib/evoc/rule_store.rb', line 275 def to_s CSV.generate do |csv| # write header csv << ['lhs','rhs'] + Evoc::Rule.measures self.each do |rule| row = CSV::Row.new([],[],false) row << (rule.lhs.respond_to?(:join) ? rule.lhs.join(',') : rule.lhs) row << (rule.rhs.respond_to?(:join) ? rule.rhs.join(',') : rule.rhs) Evoc::Rule.measures.each do |m| row << (rule.measure_instantiated?(m) ? rule.get_measure(m).value : nil) end csv << row end end end |
#unique_by(measure, rules: self) ⇒ Object
returns the set of unique consequents where each consequent is the strongest given by the input measure
@param: [String] measure the measure used to find the strongest rules
206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 |
# File 'lib/evoc/rule_store.rb', line 206 def unique_by(measure, rules: self) selected_rules = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseIntToRuby.new : Hash.new rules.each do |rule| if !rule.get_measure(measure).value.nil? key = rule.rhs.first if selected_rules[key].nil? selected_rules[key] = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseRubyToRuby.new : Hash.new selected_rules[key][:value] = rule.get_measure(measure).value selected_rules[key][:rule] = rule end if rule.get_measure(measure).value > selected_rules[key][:value] selected_rules[key][:value] = rule.get_measure(measure).value selected_rules[key][:rule] = rule end end end return selected_rules.values.map {|k,v| k[:rule]} end |