Method: LicenseMatcher::TFRubyMatcher#match_text

Defined in:
lib/license_matcher/tf_ruby_matcher.rb

#match_text(text, min_confidence = DEFAULT_MIN_CONFIDENCE) ⇒ Object

matches given text with SPDX licenses and returns Match object returns:

match - Match {label: String, score: float}


28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/license_matcher/tf_ruby_matcher.rb', line 28

def match_text(text, min_confidence = DEFAULT_MIN_CONFIDENCE)
  return [] if text.to_s.empty?

  test_doc   = TfIdfSimilarity::Document.new(text, {:id => "test"})

  mat1 = @model.instance_variable_get(:@matrix)
  mat2 = doc_tfidf_matrix(test_doc)

  n_docs = @model.documents.size
  dists = []
  n_docs.times do |i|
    dists << [i, cos_sim(mat1[i, true], mat2)]
  end

  doc_id, best_score = dists.sort {|a,b| b[1] <=> a[1]}.first
  best_match = @model.documents[doc_id].id

  if best_score.to_f > min_confidence
    Match.new(best_match, best_score)
  else
    Match.new("", 0.0)
  end
end