Class: TotalRecall::AccountGuesser

Inherits:
Object
  • Object
show all
Defined in:
lib/total_recall.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeAccountGuesser

Returns a new instance of AccountGuesser.



136
137
138
139
# File 'lib/total_recall.rb', line 136

def initialize
  @accounts = {}
  @tokens = {}
end

Instance Attribute Details

#accountsObject (readonly)

Returns the value of attribute accounts.



134
135
136
# File 'lib/total_recall.rb', line 134

def accounts
  @accounts
end

#tokensObject (readonly)

Returns the value of attribute tokens.



134
135
136
# File 'lib/total_recall.rb', line 134

def tokens
  @tokens
end

Instance Method Details

#guess(data) ⇒ Object

copied from reckon(github.com/iterationlabs/reckon)



142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# File 'lib/total_recall.rb', line 142

def guess(data)
  query_tokens = tokenize(data)

  search_vector = []
   = {}

  query_tokens.each do |token|
    idf = Math.log((accounts.keys.length + 1) / ((tokens[token] || {}).keys.length.to_f + 1))
    tf = 1.0 / query_tokens.length.to_f
    search_vector << tf*idf

    accounts.each do |, total_terms|
      tf = (tokens[token] && tokens[token][]) ? tokens[token][] / total_terms.to_f : 0
      [] ||= []
      [] << tf*idf
    end
  end

  # Should I normalize the vectors?  Probably unnecessary due to tf-idf and short documents.
   = .to_a.map do |, |
    { :cosine => (0....length).to_a.inject(0) { |m, i| m + search_vector[i] * [i] },
      :account =>  }
  end

  .sort! {|a, b| b[:cosine] <=> a[:cosine] }
  .first && .first[:account]
end

#learn(account, data) ⇒ Object

copied from reckon(github.com/iterationlabs/reckon)



171
172
173
174
175
176
177
178
179
# File 'lib/total_recall.rb', line 171

def learn(, data)
  accounts[] ||= 0
  tokenize(data).each do |token|
    tokens[token] ||= {}
    tokens[token][] ||= 0
    tokens[token][] += 1
    accounts[] += 1
  end
end