6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
|
# File 'lib/logstash/filters/emerald.rb', line 6
def Emerald.parse (input)
uri = URI(URI.unescape(input))
path = uri.path
params = {}
if (uri.query)
params = CGI::parse(uri.query)
end
data = {
"provider" => "emerald"
}
if ((match = /^\/series\/([a-z]+)$/.match(path)))
data['rtype'] = 'BOOKSERIE'
data['mime'] = 'MISC'
data['title_id'] = match[1]
data['unit_id'] = 'series/' +match[1]
elsif ((match = /^\/doi\/([a-z]+)\/([0-9]{2}\.[0-9]{4,5})\/(([A-Z]{1})([0-9]+)([-])([0-9]+)[(]([0-9]{4})[)]([0-9]+))$/.match(path)))
if (match[1] === 'abs')
data['rtype'] = 'ABS'
data['mime'] = 'MISC'
elsif (match[1] === 'book')
data['rtype'] = 'BOOKSERIE'
data['mime'] = 'MISC'
elsif (match[1] === 'full')
data['mime'] = 'HTML'
data['rtype'] = 'ARTICLE'
elsif (match[1] === 'pdfplus')
data['mime'] = 'PDFPLUS'
data['rtype'] = 'ARTICLE'
else
data['rtype'] = 'ARTICLE'
data['mime'] = 'MISC'
end
data['publication_date']= match[8]
data['title_id'] = match[5] +match[6] +match[7]
data['unit_id'] =data['doi'] = match[2] + '/' + match[3]
elsif ((match = /^\/loi\/([a-z]+)$/.match(path)))
data['mime'] = 'MISC'
data['title_id'] = match[1]
data['unit_id'] = 'loi/' +match[1]
elsif ((match = /^\/toc\/([a-z]+)\/([0-9]+)\/([0-9]+)/.match(path)))
data['rtype'] = 'TOC'
data['mime'] = 'MISC'
data['title_id'] = match[1]
data['unit_id'] = match[1] + '/' + match[2] + '/'+ match[3]
elsif ((match = /^\/doi\/([a-z]+)\/([0-9]{2}\.[0-9]{4,5})\/(([A-Z]+)([-])([0-9]+)([-])([0-9]+)([-])([0-9]+))$/.match(path)))
if (match[1] === 'abs')
data['rtype'] = 'ABS'
data['mime'] = 'MISC'
elsif (match[1] === 'full')
data['mime'] = 'HTML'
data['rtype'] = 'ARTICLE'
elsif (match[1] === 'pdfplus')
data['mime'] = 'PDFPLUS'
data['rtype'] = 'ARTICLE'
else
data['rtype'] = 'ARTICLE'
end
data['title_id'] = match[4]
data['unit_id'] =data['doi'] = match[2] + '/' + match[3]
elsif ((match = /^\/doi\/([a-z]+)\/([0-9]{2}\.[0-9]{4,5})\/([0-9]+)$/.match(path)))
if (match[1] === 'abs')
data['rtype'] = 'ABS'
data['mime'] = 'MISC'
elsif (match[1] === 'full')
data['mime'] = 'HTML'
data['rtype'] = 'ARTICLE'
elsif (match[1] === 'pdfplus')
data['mime'] = 'PDFPLUS'
data['rtype'] = 'ARTICLE'
else
data['rtype'] = 'ARTICLE'
end
data['title_id'] = match[3]
data['unit_id'] =data['doi'] = match[2] + '/' + match[3]
end
return data;
end
|