Method: LicenseMatcher::TFRubyMatcher#doc_tfidf_matrix

Defined in:
lib/license_matcher/tf_ruby_matcher.rb

#doc_tfidf_matrix(doc) ⇒ Object

Transforms document into TF-IDF matrix used for comparition



59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/license_matcher/tf_ruby_matcher.rb', line 59

def doc_tfidf_matrix(doc)
  arr = Array.new(@model.terms.size) do |i|
    the_term = @model.terms[i]
    if doc.term_count(the_term) > 0
      #calc score only for words that exists in the test doc and the corpus of licenses
      model.idf(the_term) * model.tf(doc, the_term)
    else
      0.0
    end
  end

  NArray[*arr]
end