Class: Evoc::Algorithm

Inherits:
Object
  • Object
show all
Extended by:
Logging
Defined in:
lib/evoc/algorithm.rb

Constant Summary collapse

@@rule_range_cache =
Hash.new {|h,k| h[k] = Hash.new(&h.default_proc) }

Class Method Summary collapse

Methods included from Logging

configure_logger_for, logger, logger_for, set_level

Class Method Details

.cached_rule_range(min, max, tx_store:, query:) ⇒ Object



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/evoc/algorithm.rb', line 39

def self.cached_rule_range(min,max,tx_store:,query:)
  tag = tx_store.hash+query.hash
  rule_store = Evoc::RuleStore.new(query: query)
  if @@rule_range_cache[tag].empty?
    @@rule_range_cache.clear
    Evoc::Algorithm.rule_range(min,max,tx_store:tx_store,query:query).group_by {|r| r.lhs.size}.each do |size,rules|
      @@rule_range_cache[tag][size] = rules 
    end
  else
    # calculate missing range
    missing_range = (min..max).to_a - @@rule_range_cache[tag].keys
    if !missing_range.empty?
      Evoc::Algorithm.rule_range(missing_range.min,missing_range.max,tx_store:tx_store,query:query).group_by {|r| r.lhs.size}.each do |size,rules|
        @@rule_range_cache[tag][size] = rules 
      end
    end
  end
  (min..max).each do |antecedent_size|
    @@rule_range_cache[tag][antecedent_size].each do |rule|
      rule_store << rule
    end
  end
  logger.debug  "Algorithm (#{min},#{max}) generated #{rule_store.size} rules for query of size #{query.size}"
  return rule_store
end

.co_change(tx_store:, query:) ⇒ Object



226
227
228
# File 'lib/evoc/algorithm.rb', line 226

def self.co_change(tx_store:, query:)
  self.cached_rule_range(1,1,tx_store: tx_store, query: query)
end

.execute(tx_store:, query:, algorithm:) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/evoc/algorithm.rb', line 6

def self.execute(tx_store:,query:,algorithm:)
  if algorithm.nil?
    raise ArgumentError.new, "Algorithm was equal to nil" 
  end
    # tarmaq2 => call tarmaq(2)
    if match = /tarmaq(?<num>\d+)/.match(algorithm)
        Evoc::Algorithm.tarmaq(match[:num].to_i,tx_store:tx_store,query:query)
    elsif match = /topk_(?<k>\d+)_(?<min>\d+)_(?<max>\d+)/.match(algorithm)
      Evoc::TopK.topk(k: match[:k].to_i,
                      min: match[:min].to_i,
                      max: match[:max].to_i,
                      tx_store: tx_store,
                      query: query)
    elsif match = /crule_range_(?<min>\d+)_(?<max>\d+)/.match(algorithm)
      Evoc::Algorithm.cached_rule_range(match[:min].to_i,match[:max].to_i,tx_store:tx_store,query:query)
    elsif match = /rule_range_(?<min>\d+)_(?<max>\d+)/.match(algorithm)
      Evoc::Algorithm.rule_range(match[:min].to_i,match[:max].to_i,tx_store:tx_store,query:query)
    elsif Evoc::Algorithm.respond_to?(algorithm)
        Evoc::Algorithm.method(algorithm).call(tx_store:tx_store,query:query)
    else raise ArgumentError.new, "#{algorithm} is not an available algorithm"
    end
end

.hybrid(tx_store:, query:) ⇒ Object



141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# File 'lib/evoc/algorithm.rb', line 141

def self.hybrid(tx_store:,query:)
  overlaps = self.largest_overlaps(tx_store: tx_store, query: query)
  if overlaps.empty?
    return Evoc::RuleStore.new(query: query)                             # no rules
  else
    if overlaps.size == 1
      if overlaps.first.size == query.size                               # execute rose
        return self.rose(tx_store: tx_store,query: query)
      else
        return self.tarmaq(0,tx_store: tx_store,query: overlaps.first)   # execute tarmaq
      end
    else
      store = Evoc::RuleStore.new(query: query)
      overlaps.each do |overlap|
        if overlap.size == 1                                             # execute co change
          part_store = Evoc::Algorithm.co_change(tx_store: tx_store, query: overlap)
          part_store.each {|r| store << r}
        else                                                             # execute tarmaq
          part_store = Evoc::Algorithm.tarmaq(0,tx_store: tx_store, query: overlap)
          part_store.each {|r| store << r}
        end
      end
      store.rules = store.select {|r| !query.include?(r.rhs.first)}
      return store
    end
  end
end

.largest_overlaps(tx_store:, query:) ⇒ Object



169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# File 'lib/evoc/algorithm.rb', line 169

def self.largest_overlaps(tx_store:,query:)
    largest_match = []
    #initial filter, we consider all txes where something in the query changed
    query_changed_in = tx_store.transactions_of_list(query)
    # now find what subsets of the query changed in each tx
    query_changed_in.each do |tx_id|
        tx = tx_store.get_tx(id:tx_id,id_type: :index)
        largest_match_in_query = (query & tx.items)
        match_size = largest_match_in_query.size
        remainder_in_tx = tx.items - largest_match_in_query
        if remainder_in_tx.size > 0
          if match_size > largest_match.size
            largest_match = largest_match_in_query
          end
        end
    end
    if largest_match.empty? #no more matches
      return []
    else
      query_remainder = query - largest_match
      return [largest_match] + self.largest_overlaps(tx_store: tx_store,query: query_remainder)
    end
end

.largest_rules(tx_store:, query:) ⇒ Object

Find the largest rules for each unique consequent



110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/evoc/algorithm.rb', line 110

def self.largest_rules(tx_store:,query:)
  #initial filter, we consider all txes where something in the query changed
  query_changed_in = tx_store.transactions_of_list(query)
  # now find what subsets of the query changed in each tx
  rules = Hash.new
  query_changed_in.each do |tx_id|
    tx = tx_store.get_tx(id:tx_id,id_type: :index)
    antecedent = (query & tx.items)
    consequents = (tx.items - antecedent)
    if consequents.size != 0
      consequents.each do |consequent|
        if rules[consequent].nil?
          rules[consequent] = Set.new([antecedent])             # new consequent
        elsif antecedent.size > rules[consequent].first.size   # larger antecedent
          rules[consequent] = Set.new([antecedent])
        elsif antecedent.size == rules[consequent].first.size  # equally large antecedent
          rules[consequent] << antecedent
        end
      end
    end
  end
  # now generate rules 
  rule_store = Evoc::RuleStore.new(query: query)
  rules.each do |consequent,antecedents|
    antecedents.each do |antecedent|
      rule_store << Evoc::Rule.new(lhs: antecedent,rhs: consequent,tx_store:tx_store)
    end
  end
  return rule_store
end

.rose(tx_store:, query:) ⇒ Object

rose



221
222
223
224
# File 'lib/evoc/algorithm.rb', line 221

def self.rose(tx_store:,query:)
  qs = query.size
  self.cached_rule_range(qs,qs,tx_store: tx_store, query: query)
end

.rule_cacheObject

Targeted Association Rule Mining implementations



34
35
36
# File 'lib/evoc/algorithm.rb', line 34

def self.rule_cache
  return @@rule_range_cache
end

.rule_range(min, max, tx_store:, query:) ⇒ Object



65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/evoc/algorithm.rb', line 65

def self.rule_range(min,max,tx_store:,query:)
  rule_store = Evoc::RuleStore.new(query: query)
  unique_rules = Set.new
  # min must be equal or smaller to max
  if min <= max
    # can only create rules where the antecedent is equal to or smaller in size than the query
    if min <= query.size
      #initial filter, we consider all txes where something in the query changed
      query_changed_in = tx_store.transactions_of_list(query)
      # create rules 
      query_changed_in.each do |tx_id| 
        tx = tx_store.get_tx(id:tx_id,id_type: :index)
        # the tx size must be larger than the min size
        # (so that we have items left for the consequent)
        if (tx.size > min) 
          # CONSTRUCT RULES in the desired range

          # 1. Find overlap
          query_overlap = (query & tx.items)
          consequents = (tx.items - query_overlap)
          # 2. Ignore exact overlaps (no items left for consequent)
          if consequents.size != 0
            # 3. Construct rules for overlaps that are at least min in size
            if query_overlap.size >= min
              # 4. Generate all permutations of query_overlap in the desired range
              antecedents = (min..max).map {|size| query_overlap.combination(size).to_a}.flatten(1)
              # 5. Add one rule for each antecedent/consequent combination
              antecedents.each do |antecedent|
                consequents.each do |consequent|
                  unique_rules << [antecedent,[consequent]]
                end
              end
            end
          end
        end
      end
    end
  end
  unique_rules.each {|lhs,rhs| rule_store << Evoc::Rule.new(lhs: lhs,rhs: rhs,tx_store:tx_store)}
  return rule_store
end

.tarmaq(depth, tx_store:, query:) ⇒ Object

TARMAQ find largest subsets in @query with evidence in @tx_store version



196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
# File 'lib/evoc/algorithm.rb', line 196

def self.tarmaq(depth,tx_store:,query:)
    largest_match = 0
    #initial filter, we consider all txes where something in the query changed
    query_changed_in = tx_store.transactions_of_list(query)
    # now find what subsets of the query changed in each tx
    query_changed_in.each do |tx_id|
        tx = tx_store.get_tx(id:tx_id,id_type: :index)
        largest_match_in_query = (query & tx.items)
        match_size = largest_match_in_query.size
        remainder_in_tx = tx.items - largest_match_in_query
        if remainder_in_tx.size > 0
          if match_size > largest_match
            largest_match = match_size
          end
        end
    end
    # now generate rules 
    max = largest_match
    min = ((max - depth) <= 0) ? 1 : max - depth
    self.cached_rule_range(min,max,tx_store: tx_store,query: query)
end

.tcharm(tx_store:, query:) ⇒ Object



230
231
232
# File 'lib/evoc/algorithm.rb', line 230

def self.tcharm(tx_store:, query:)
  Evoc::ClosedRules.closed_rules(tx_store: tx_store,query: query)
end