Class: LicenseMatcher::UrlMatcher
- Inherits:
-
Object
- Object
- LicenseMatcher::UrlMatcher
- Defined in:
- lib/license_matcher/url_matcher.rb
Constant Summary collapse
- DEFAULT_LICENSE_JSON =
'data/licenses.json'
Instance Attribute Summary collapse
-
#url_index ⇒ Object
readonly
Returns the value of attribute url_index.
Instance Method Summary collapse
-
#initialize(license_json_file = DEFAULT_LICENSE_JSON) ⇒ UrlMatcher
constructor
A new instance of UrlMatcher.
- #match_text(url_txt, min_confidence = 0.0) ⇒ Object
-
#match_url(the_url) ⇒ Object
Matches License.url with urls in Licenses.json and returns tuple [spdx_id, score].
- #process_spdx_item(lic) ⇒ Object
- #read_json_file(file_path) ⇒ Object
-
#read_license_url_index(spdx_licenses) ⇒ Object
Reads license urls from the license.json and builds a map : spdx_id.
Constructor Details
#initialize(license_json_file = DEFAULT_LICENSE_JSON) ⇒ UrlMatcher
Returns a new instance of UrlMatcher.
7 8 9 10 11 12 |
# File 'lib/license_matcher/url_matcher.rb', line 7 def initialize(license_json_file = DEFAULT_LICENSE_JSON) licenses_json_doc = read_json_file license_json_file raise("Failed to read licenses.json") if licenses_json_doc.nil? @url_index = read_license_url_index(licenses_json_doc) end |
Instance Attribute Details
#url_index ⇒ Object (readonly)
Returns the value of attribute url_index.
3 4 5 |
# File 'lib/license_matcher/url_matcher.rb', line 3 def url_index @url_index end |
Instance Method Details
#match_text(url_txt, min_confidence = 0.0) ⇒ Object
14 15 16 17 18 19 20 21 |
# File 'lib/license_matcher/url_matcher.rb', line 14 def match_text(url_txt, min_confidence = 0.0) spdx_id, score = match_url url_txt.to_s.strip if spdx_id Match.new(spdx_id.to_s, score.to_f) else Match.new("", 0.0) end end |
#match_url(the_url) ⇒ Object
Matches License.url with urls in Licenses.json and returns tuple [spdx_id, score]
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
# File 'lib/license_matcher/url_matcher.rb', line 24 def match_url(the_url) the_url = the_url.to_s.strip spdx_id = nil case the_url when /jquery\.org\/license/i return ['mit', 1.0] #Jquery license page doesnt include any license text when /mozilla\.org\/en-US\/MPL/i return ['mpl-2.0', 1.0] when /fairlicense\.org/i return ['fair', 1.0] when /aforgenet\.com\/framework\/license/i return ['lgpl-3.0', 1.0] when /apache\.org\/licenses/i return ['apache-2.0', 1.0] when /aws\.amazon\.com\/apache2\.0/i return ['apache-2.0', 1.0] when /aws\.amazon\.com\/asl/i return ['amazon', 1.0] when /choosealicense\.com\/no-license/i return ['no-license', 1.0] when /gzip\.org\/zlib\/zlib?license/i return ['zlib', 1.0] when /zlib\.net\/zlib?license/i return ['zlib', 1.0] when /wtfpl\.net\/about/i return ['wtfpl', 1.0] end #does url match with choosealicense.com match = the_url.match(/\bhttps?:\/\/(www\.)?choosealicense\.com\/licenses\/([\S|^\/]+)[\/]?\b/i) if match return [match[2].to_s.downcase, 1.0] end match = the_url.match(/\bhttps?:\/\/(www\.)?creativecommons\.org\/licenses\/([\S|^\/]+)[\/]?\b/i) if match return ["cc-#{match[2].to_s.gsub(/\//, '-')}", 1.0] end #check through SPDX urls @url_index.each do |lic_url, lic_id| lic_url = lic_url.to_s.strip.gsub(/https?:\/\//i, '').gsub(/www\./, '') #normalizes urls in the file matcher = Regexp.new("https?:\/\/(www\.)?#{lic_url}", Regexp::IGNORECASE) if matcher.match(the_url) spdx_id = lic_id.to_s.downcase break end end return [] if spdx_id.nil? [spdx_id, 1.0] end |
#process_spdx_item(lic) ⇒ Object
88 89 90 91 92 93 94 95 96 97 98 |
# File 'lib/license_matcher/url_matcher.rb', line 88 def process_spdx_item(lic) url_index = {} lic_id = lic[:id].to_s.strip.downcase return url_index if lic_id.empty? lic[:links].to_a.each {|x| url_index[x[:url]] = lic_id } lic[:text].to_a.each {|x| url_index[x[:url]] = lic_id } url_index end |
#read_json_file(file_path) ⇒ Object
100 101 102 103 104 105 |
# File 'lib/license_matcher/url_matcher.rb', line 100 def read_json_file(file_path) JSON.parse(File.read(file_path), {symbolize_names: true}) rescue log.info "Failed to read json file `#{file_path}`" nil end |
#read_license_url_index(spdx_licenses) ⇒ Object
Reads license urls from the license.json and builds a map : spdx_id
81 82 83 84 85 |
# File 'lib/license_matcher/url_matcher.rb', line 81 def read_license_url_index(spdx_licenses) url_index = {} spdx_licenses.each {|lic| url_index.merge! process_spdx_item(lic) } url_index end |