48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
|
# File 'lib/clusterer/document_array.rb', line 48
def normalize!(idf = nil, add_term = false)
normalizing_factor = 0.0
idf.increment_documents_count if add_term
self[@@term_array_position_mapper.size - 1] ||= 0.0
self.each_with_index do |frequency, ind|
f = add_term ? (idf << term) : (idf ? idf[@@term_array_position_mapper.index(ind)] : 1.0)
self[ind] = (frequency || 0) * f
normalizing_factor += self[ind] ** 2
end
normalizing_factor = Math.sqrt(normalizing_factor)
normalizing_factor = 1 if normalizing_factor.zero?
self.each_with_index {|frequency, ind| self[ind] = frequency/normalizing_factor}
@vector_length = 1.0
self.freeze
end
|