Module: Jstor

Defined in:
lib/logstash/filters/jstor.rb

Class Method Summary collapse

Class Method Details

.parse(input) ⇒ Object



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'lib/logstash/filters/jstor.rb', line 5

def Jstor.parse (input)

    uri = URI(URI.unescape(input))

    url = uri.path
    params = {}
    if (uri.query)
        params = CGI::parse(uri.query)
    end

    data = {
        "provider" => "jstor"
    }
    doi_prefix = "10.2307"


    if (match = /^\/journal\/([a-z0-9]+)$/i.match(url))
        data["rtype"] = "TOC"
        data["mime"] = "MISC"
        data["unit_id"] = match[1]
        data["title_id"] = match[1]

    elsif (match = /^\/stable\/10\.[0-9]+\/(([a-z]+)\.([0-9]+)\.([0-9]+)\.issue-([0-9]+))$/i.match(url))
        data["rtype"] = "TOC"
        data["mime"] = "MISC"
        data["unit_id"] = match[1]
        data["title_id"] = match[2]
        data["issue"] = match[5]

        if match[3].length >= 4
            data["publication_date"] = match[3]
            data["vol"] = match[4]
        else
            data["vol"] = match[3]
        end

    elsif (match = /^\/stable\/((10\.[0-9]+\/)?([a-z0-9]+))$/i.match(url))
        data["rtype"] = "TOC"
        data["mime"] = "MISC"
        data["unit_id"] = match[3]
        data["title_id"] = match[3]

        if match[2]
            data["doi"] = match[1]
        end

    elsif (match =  /^\/stable\/(i[0-9]+)$/i.match(url))
        data["rtype"] = "TOC"
        data["mime"] = "MISC"
        data["unit_id"] = match[1]
        data["title_id"] = match[1]

    elsif (/^\/action\/showPublication$/i.match(url))
        if (params["journalCode"])
            data["title_id"] = params["journalCode"][0]
            data["unit_id"] = params["journalCode"][0]
            data["rtype"] = 'TOC'
            data["mime"] = 'MISC'
        end

    elsif (match =  /^\/stable\/(get_image|pdf|pdfplus)\/((10\.[0-9]+\/)?([a-z0-9.]+?))(?:\.pdf)?$/i.match(url))
        data["unit_id"] = match[4]
        data["doi"] = match[3] ? match[2] : doi_prefix + "/" + match[2] 
        
        case match[1]
        when 'get_image'
            data["rtype"] = "ARTICLE_SECTION"
            data["mime"] = "GIF"
        when 'pdf'
            data["rtype"] = "ARTICLE"
            data["mime"] = "PDF"
        when 'pdfplus'
            data["rtype"] = "ARTICLE"
            data["mime"] = "PDFPLUS"
        end

        idPattern = /^([a-z0-9]+)((?:\.(\d+))?\.(\d+)\.(\d+)\.(\w+))?/.match(match[4]) || [];

        data["title_id"] = idPattern[1]
        data["publication_date"] = idPattern[3]
        data["vol"] = idPattern[4]
        data["issue"] = idPattern[5]

        if (idPattern[6] == 'cover')
            data["rtype"] = 'COVER'
        elsif (idPattern[6] == 'toc')
            data["rtype"] = 'TOC'
        else
            if (idPattern[6] != nil)
                first_page = idPattern[6].to_i
                
                unless (first_page.to_f.nan?)
                    data["first_page"] = first_page.to_s
                end
            end
        end

    elsif (match = /^\/stable\/(info|view)\/([0-9]+)$/i.match(url))
        data["rtype"] = match[1] === 'info' ? "ABS" : "PREVIEW"
        data["mime"] = "MISC"
        data["unit_id"] = match[2]
        data["title_id"] = match[2]
        data["issue"] = match[5]
    end

    return data
end