Class: LicenseAuto::Similarity

Inherits:
Object
  • Object
show all
Defined in:
lib/license_auto/license/similarity.rb

Constant Summary collapse

SIM_RATIO =

Expected similarity ratio

0.85

Instance Method Summary collapse

Constructor Details

#initialize(license_content) ⇒ Similarity

Returns a new instance of Similarity.



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/license_auto/license/similarity.rb', line 12

def initialize(license_content)
  # LicenseAuto.logger.debug(license_content)
  @license_template_documents =
      LICENSE_SORTED_FREQUENCY.reject {|template_name|
        abs_filename_path(template_name).nil?
      }.map {|template_name|
        abs_file = abs_filename_path(template_name)
        TfIdfSimilarity::Document.new(File.read(abs_file))
      }.compact
  @license_template_documents.push(
      TfIdfSimilarity::Document.new(license_content)
  )
  model = TfIdfSimilarity::TfIdfModel.new(@license_template_documents)
  @matrix = model.similarity_matrix
  # LicenseAuto.logger.debug(@license_template_documents)
  # LicenseAuto.logger.debug(@matrix[0, 2])
end

Instance Method Details

#abs_filename_path(template_name) ⇒ Object



30
31
32
33
34
35
36
37
38
39
# File 'lib/license_auto/license/similarity.rb', line 30

def abs_filename_path(template_name)
  filename_path = "../templates/#{template_name}.txt"
  abs_filename_path = File.expand_path(filename_path, __FILE__)
  if FileTest.file?(abs_filename_path)
    abs_filename_path
  else
    LicenseAuto.logger.info("License template file not exist: #{abs_filename_path} !")
    nil
  end
end

#most_license_simObject

TODO: what will happen if all is 0.00?



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/license_auto/license/similarity.rb', line 42

def most_license_sim
  license_file_index = @license_template_documents.count - 1
  sim_ratios = @license_template_documents[0..(license_file_index -1)].map.with_index { |doc, index|
    ratio_ = @matrix[license_file_index, index]
  }
  max_sim_ratio = sim_ratios.max
  sim_license_index = sim_ratios.index(max_sim_ratio)

  license_name = LICENSE_SORTED_FREQUENCY[sim_license_index]

  debug = "License: #{license_name}, Ratio: #{max_sim_ratio}"
  LicenseAuto.logger.debug(debug)

  [license_name, max_sim_ratio]
end