Class: LicenseAuto::Similarity
- Inherits:
-
Object
- Object
- LicenseAuto::Similarity
- Defined in:
- lib/license_auto/license/similarity.rb
Constant Summary collapse
- SIM_RATIO =
Expected similarity ratio
0.85
Instance Method Summary collapse
- #abs_filename_path(template_name) ⇒ Object
-
#initialize(license_content) ⇒ Similarity
constructor
A new instance of Similarity.
-
#most_license_sim ⇒ Object
TODO: what will happen if all is 0.00?.
Constructor Details
#initialize(license_content) ⇒ Similarity
Returns a new instance of Similarity.
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
# File 'lib/license_auto/license/similarity.rb', line 12 def initialize(license_content) # LicenseAuto.logger.debug(license_content) @license_template_documents = LICENSE_SORTED_FREQUENCY.reject {|template_name| abs_filename_path(template_name).nil? }.map {|template_name| abs_file = abs_filename_path(template_name) TfIdfSimilarity::Document.new(File.read(abs_file)) }.compact @license_template_documents.push( TfIdfSimilarity::Document.new(license_content) ) model = TfIdfSimilarity::TfIdfModel.new(@license_template_documents) @matrix = model.similarity_matrix # LicenseAuto.logger.debug(@license_template_documents) # LicenseAuto.logger.debug(@matrix[0, 2]) end |
Instance Method Details
#abs_filename_path(template_name) ⇒ Object
30 31 32 33 34 35 36 37 38 39 |
# File 'lib/license_auto/license/similarity.rb', line 30 def abs_filename_path(template_name) filename_path = "../templates/#{template_name}.txt" abs_filename_path = File.(filename_path, __FILE__) if FileTest.file?(abs_filename_path) abs_filename_path else LicenseAuto.logger.info("License template file not exist: #{abs_filename_path} !") nil end end |
#most_license_sim ⇒ Object
TODO: what will happen if all is 0.00?
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
# File 'lib/license_auto/license/similarity.rb', line 42 def most_license_sim license_file_index = @license_template_documents.count - 1 sim_ratios = @license_template_documents[0..(license_file_index -1)].map.with_index { |doc, index| ratio_ = @matrix[license_file_index, index] } max_sim_ratio = sim_ratios.max sim_license_index = sim_ratios.index(max_sim_ratio) license_name = LICENSE_SORTED_FREQUENCY[sim_license_index] debug = "License: #{license_name}, Ratio: #{max_sim_ratio}" LicenseAuto.logger.debug(debug) [license_name, max_sim_ratio] end |