Class: TotalRecall::AccountGuesser
- Inherits:
-
Object
- Object
- TotalRecall::AccountGuesser
- Defined in:
- lib/total_recall.rb
Instance Attribute Summary collapse
-
#accounts ⇒ Object
readonly
Returns the value of attribute accounts.
-
#tokens ⇒ Object
readonly
Returns the value of attribute tokens.
Instance Method Summary collapse
-
#guess(data) ⇒ Object
copied from reckon(github.com/iterationlabs/reckon).
-
#initialize ⇒ AccountGuesser
constructor
A new instance of AccountGuesser.
-
#learn(account, data) ⇒ Object
copied from reckon(github.com/iterationlabs/reckon).
Constructor Details
#initialize ⇒ AccountGuesser
Returns a new instance of AccountGuesser.
136 137 138 139 |
# File 'lib/total_recall.rb', line 136 def initialize @accounts = {} @tokens = {} end |
Instance Attribute Details
#accounts ⇒ Object (readonly)
Returns the value of attribute accounts.
134 135 136 |
# File 'lib/total_recall.rb', line 134 def accounts @accounts end |
#tokens ⇒ Object (readonly)
Returns the value of attribute tokens.
134 135 136 |
# File 'lib/total_recall.rb', line 134 def tokens @tokens end |
Instance Method Details
#guess(data) ⇒ Object
copied from reckon(github.com/iterationlabs/reckon)
142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
# File 'lib/total_recall.rb', line 142 def guess(data) query_tokens = tokenize(data) search_vector = [] account_vectors = {} query_tokens.each do |token| idf = Math.log((accounts.keys.length + 1) / ((tokens[token] || {}).keys.length.to_f + 1)) tf = 1.0 / query_tokens.length.to_f search_vector << tf*idf accounts.each do |account, total_terms| tf = (tokens[token] && tokens[token][account]) ? tokens[token][account] / total_terms.to_f : 0 account_vectors[account] ||= [] account_vectors[account] << tf*idf end end # Should I normalize the vectors? Probably unnecessary due to tf-idf and short documents. account_vectors = account_vectors.to_a.map do |account, account_vector| { :cosine => (0...account_vector.length).to_a.inject(0) { |m, i| m + search_vector[i] * account_vector[i] }, :account => account } end account_vectors.sort! {|a, b| b[:cosine] <=> a[:cosine] } account_vectors.first && account_vectors.first[:account] end |
#learn(account, data) ⇒ Object
copied from reckon(github.com/iterationlabs/reckon)
171 172 173 174 175 176 177 178 179 |
# File 'lib/total_recall.rb', line 171 def learn(account, data) accounts[account] ||= 0 tokenize(data).each do |token| tokens[token] ||= {} tokens[token][account] ||= 0 tokens[token][account] += 1 accounts[account] += 1 end end |