Class: LicenseMatcher::UrlMatcher

Inherits:
Object
  • Object
show all
Defined in:
lib/license_matcher/url_matcher.rb

Constant Summary collapse

DEFAULT_LICENSE_JSON =
'data/licenses.json'

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(license_json_file = DEFAULT_LICENSE_JSON) ⇒ UrlMatcher

Returns a new instance of UrlMatcher.



7
8
9
10
11
12
# File 'lib/license_matcher/url_matcher.rb', line 7

def initialize(license_json_file = DEFAULT_LICENSE_JSON)
  licenses_json_doc = read_json_file license_json_file
  raise("Failed to read licenses.json") if licenses_json_doc.nil?

  @url_index =  read_license_url_index(licenses_json_doc)
end

Instance Attribute Details

#url_indexObject (readonly)

Returns the value of attribute url_index.



3
4
5
# File 'lib/license_matcher/url_matcher.rb', line 3

def url_index
  @url_index
end

Instance Method Details

#match_text(url_txt, min_confidence = 0.0) ⇒ Object



14
15
16
17
18
19
20
21
# File 'lib/license_matcher/url_matcher.rb', line 14

def match_text(url_txt, min_confidence = 0.0)
  spdx_id, score = match_url url_txt.to_s.strip
  if spdx_id
    Match.new(spdx_id.to_s, score.to_f)
  else
    Match.new("", 0.0)
  end
end

#match_url(the_url) ⇒ Object

Matches License.url with urls in Licenses.json and returns tuple [spdx_id, score]



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/license_matcher/url_matcher.rb', line 24

def match_url(the_url)
  the_url = the_url.to_s.strip
  spdx_id = nil

  case the_url
  when /jquery\.org\/license/i
    return ['mit', 1.0] #Jquery license page doesnt include any license text
  when /mozilla\.org\/en-US\/MPL/i
    return ['mpl-2.0', 1.0]
  when /fairlicense\.org/i
    return ['fair', 1.0]
  when /aforgenet\.com\/framework\/license/i
    return ['lgpl-3.0', 1.0]
  when /apache\.org\/licenses/i
    return ['apache-2.0', 1.0]
  when /aws\.amazon\.com\/apache2\.0/i
    return ['apache-2.0', 1.0]
  when /aws\.amazon\.com\/asl/i
    return ['amazon', 1.0]
  when /choosealicense\.com\/no-license/i
    return ['no-license', 1.0]
  when /gzip\.org\/zlib\/zlib?license/i
    return ['zlib', 1.0]
  when /zlib\.net\/zlib?license/i
    return ['zlib', 1.0]
  when /wtfpl\.net\/about/i
    return ['wtfpl', 1.0]
  end

  #does url match with choosealicense.com
  match = the_url.match(/\bhttps?:\/\/(www\.)?choosealicense\.com\/licenses\/([\S|^\/]+)[\/]?\b/i)
  if match
    return [match[2].to_s.downcase, 1.0]
  end

  match = the_url.match(/\bhttps?:\/\/(www\.)?creativecommons\.org\/licenses\/([\S|^\/]+)[\/]?\b/i)
  if match
    return ["cc-#{match[2].to_s.gsub(/\//, '-')}", 1.0]
  end

  #check through SPDX urls
  @url_index.each do |lic_url, lic_id|
    lic_url = lic_url.to_s.strip.gsub(/https?:\/\//i, '').gsub(/www\./, '') #normalizes urls in the file
    matcher = Regexp.new("https?:\/\/(www\.)?#{lic_url}", Regexp::IGNORECASE)

    if matcher.match(the_url)
      spdx_id = lic_id.to_s.downcase
      break
    end
  end

  return [] if spdx_id.nil?

  [spdx_id, 1.0]
end

#process_spdx_item(lic) ⇒ Object



88
89
90
91
92
93
94
95
96
97
98
# File 'lib/license_matcher/url_matcher.rb', line 88

def process_spdx_item(lic)
  url_index = {}
  lic_id = lic[:id].to_s.strip.downcase

  return url_index if lic_id.empty?

  lic[:links].to_a.each {|x| url_index[x[:url]] = lic_id }
  lic[:text].to_a.each {|x| url_index[x[:url]] = lic_id }

  url_index
end

#read_json_file(file_path) ⇒ Object



100
101
102
103
104
105
# File 'lib/license_matcher/url_matcher.rb', line 100

def read_json_file(file_path)
  JSON.parse(File.read(file_path), {symbolize_names: true})
rescue
  log.info "Failed to read json file `#{file_path}`"
  nil
end

#read_license_url_index(spdx_licenses) ⇒ Object

Reads license urls from the license.json and builds a map : spdx_id



81
82
83
84
85
# File 'lib/license_matcher/url_matcher.rb', line 81

def read_license_url_index(spdx_licenses)
  url_index = {}
  spdx_licenses.each {|lic| url_index.merge! process_spdx_item(lic) }
  url_index
end