Method: Clusterer::DocumentArray#normalize!

Defined in:
lib/clusterer/document_array.rb

#normalize!(idf = nil, add_term = false) ⇒ Object



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# File 'lib/clusterer/document_array.rb', line 48

def normalize!(idf = nil, add_term = false)
  normalizing_factor = 0.0
  idf.increment_documents_count if add_term

  self[@@term_array_position_mapper.size - 1] ||= 0.0 

  self.each_with_index do |frequency, ind|
    f = add_term ? (idf << term) : (idf ? idf[@@term_array_position_mapper.index(ind)] : 1.0)
    self[ind] = (frequency || 0) * f
    normalizing_factor += self[ind] ** 2
  end
  
  normalizing_factor = Math.sqrt(normalizing_factor)
  normalizing_factor = 1 if normalizing_factor.zero?
  self.each_with_index {|frequency, ind| self[ind] = frequency/normalizing_factor}
  @vector_length = 1.0
  self.freeze
end