5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
|
# File 'lib/logstash/filters/jstor.rb', line 5
def Jstor.parse (input)
uri = URI(URI.unescape(input))
url = uri.path
params = {}
if (uri.query)
params = CGI::parse(uri.query)
end
data = {
"provider" => "jstor"
}
doi_prefix = "10.2307"
if (match = /^\/journal\/([a-z0-9]+)$/i.match(url))
data["rtype"] = "TOC"
data["mime"] = "MISC"
data["unit_id"] = match[1]
data["title_id"] = match[1]
elsif (match = /^\/stable\/10\.[0-9]+\/(([a-z]+)\.([0-9]+)\.([0-9]+)\.issue-([0-9]+))$/i.match(url))
data["rtype"] = "TOC"
data["mime"] = "MISC"
data["unit_id"] = match[1]
data["title_id"] = match[2]
data["issue"] = match[5]
if match[3].length >= 4
data["publication_date"] = match[3]
data["vol"] = match[4]
else
data["vol"] = match[3]
end
elsif (match = /^\/stable\/((10\.[0-9]+\/)?([a-z0-9]+))$/i.match(url))
data["rtype"] = "TOC"
data["mime"] = "MISC"
data["unit_id"] = match[3]
data["title_id"] = match[3]
if match[2]
data["doi"] = match[1]
end
elsif (match = /^\/stable\/(i[0-9]+)$/i.match(url))
data["rtype"] = "TOC"
data["mime"] = "MISC"
data["unit_id"] = match[1]
data["title_id"] = match[1]
elsif (/^\/action\/showPublication$/i.match(url))
if (params["journalCode"])
data["title_id"] = params["journalCode"][0]
data["unit_id"] = params["journalCode"][0]
data["rtype"] = 'TOC'
data["mime"] = 'MISC'
end
elsif (match = /^\/stable\/(get_image|pdf|pdfplus)\/((10\.[0-9]+\/)?([a-z0-9.]+?))(?:\.pdf)?$/i.match(url))
data["unit_id"] = match[4]
data["doi"] = match[3] ? match[2] : doi_prefix + "/" + match[2]
case match[1]
when 'get_image'
data["rtype"] = "ARTICLE_SECTION"
data["mime"] = "GIF"
when 'pdf'
data["rtype"] = "ARTICLE"
data["mime"] = "PDF"
when 'pdfplus'
data["rtype"] = "ARTICLE"
data["mime"] = "PDFPLUS"
end
idPattern = /^([a-z0-9]+)((?:\.(\d+))?\.(\d+)\.(\d+)\.(\w+))?/.match(match[4]) || [];
data["title_id"] = idPattern[1]
data["publication_date"] = idPattern[3]
data["vol"] = idPattern[4]
data["issue"] = idPattern[5]
if (idPattern[6] == 'cover')
data["rtype"] = 'COVER'
elsif (idPattern[6] == 'toc')
data["rtype"] = 'TOC'
else
if (idPattern[6] != nil)
first_page = idPattern[6].to_i
unless (first_page.to_f.nan?)
data["first_page"] = first_page.to_s
end
end
end
elsif (match = /^\/stable\/(info|view)\/([0-9]+)$/i.match(url))
data["rtype"] = match[1] === 'info' ? "ABS" : "PREVIEW"
data["mime"] = "MISC"
data["unit_id"] = match[2]
data["title_id"] = match[2]
data["issue"] = match[5]
end
return data
end
|