60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
|
# File 'lib/wikimedia/commoner.rb', line 60
def details(title)
return nil if /File:.*/.match(title) == nil
title = /File:[^#]*/.match(title)[0]
response = json_get(info_uri(title))
return {} if response == nil
pages = response['query']['pages'].map { |page_id, page| page }
return { description: 'missing' } if pages.first['missing']!=nil
categories = pages.first['categories'].map { |category| category['title'] }.flatten
categories = categories.map { |category| category.gsub(/^Category:/, '') }
descriptionurl = pages.first['imageinfo'].first['descriptionurl']
licence = pages.first['imageinfo'].first['extmetadata']['LicenseShortName']['value']
licence_url = pages.first['imageinfo'].first['extmetadata']['LicenseUrl']['value'] if pages.first['imageinfo'].first['extmetadata']['LicenseUrl']
if categories.include? 'CC-PD-Mark'
licence = 'CC-PD-Mark'
licence_url = 'http://creativecommons.org/publicdomain/mark/1.0'
end
licence_url = 'https://en.wikipedia.org/wiki/Public_domain' if licence == 'Public domain' && licence_url == nil
party = HTTParty.get(descriptionurl, :verify => false)
doc = Nokogiri::HTML(party.to_s)
an = doc.xpath('//span[@id="creator"]')
author_name = an[0].content if !an.empty?
if an.empty?
an = doc.xpath('//tr[td/@id="fileinfotpl_aut"]/td')
author_name = an[1].content if !an.empty? && an.size > 0
end
author_name = Sanitize.clean(author_name)
author_url = ""
au = doc.xpath('//span[@id="creator"]/*/a/@href')
au = doc.xpath('//tr[td/@id="fileinfotpl_aut"]/td/a/@href') if au.empty?
author_url = au[0].content if !au.empty? && au.size > 0
author_url = "http://commons.wikimedia.org" + author_url if author_url.start_with?('/wiki/User:')
description = ""
description_element = doc.xpath('//td[@class="description"]')
description = Sanitize.clean(description_element[0].content)[0,255].strip! if description_element.size > 0
geohack = doc.xpath("//a[contains(@href, 'tools.wmflabs.org/geohack')]/@href")
longitude = geohack[0] ? /params=.*_[NS]_(-*\d*.\d*)/.match(geohack[0].value)[1] : nil
latitude = geohack[0] ? /params=(\d*.\d*)/.match(geohack[0].value)[1] : nil
openplaques_url = doc.xpath("//a[contains(@href, 'openplaques.org/plaques')]/@href")
openplaques_id = openplaques_url[0] ? /plaques\/(\d*)/.match(openplaques_url[0].value)[1] : nil
page_url = "https://commons.wikimedia.org/wiki/"+title
{
categories: categories,
url: pages.first['imageinfo'].first['url'],
page_url: page_url,
description: description,
author: author_name,
author_url: author_url,
licence: licence,
licence_url: licence_url,
longitude: longitude,
latitude: latitude,
openplaques_id: openplaques_id
}
end
|