Module: TandF

Defined in:
lib/logstash/filters/tandf.rb

Class Method Summary collapse

Class Method Details

.parse(input) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/logstash/filters/tandf.rb', line 6

def TandF.parse (input)

  uri = URI(URI.unescape(input))

  path = uri.path
  params = {}
  if (uri.query)
      params = CGI::parse(uri.query)
  end

  data = {
      "provider" => "tandf"
  }

  if ((match = /^\/doi\/(full|pdf|abs)\/([0-9.]+\/([0-9a-z.]+))$/.match(path)))
    data['doi']    = match[2]
    data['unit_id'] = match[3]

    if (/^[0-9]{8}/.match(match[3]))
      data['print_identifier'] = match[3][0, 4] + '-' + match[3][4, 4]
      data['title_id']         = data['print_identifier']
    end

    if (match[1].upcase == 'FULL')
      data['rtype'] = 'ARTICLE'
      data['mime']  = 'HTML'
    elsif (match[1].upcase == 'PDF')
      data['rtype'] = 'ARTICLE'
      data['mime']  = 'PDF'
    elsif (match[1].upcase == 'ABS')
      data['rtype'] = 'ABS'
      data['mime']  = 'HTML'
    end
  
  elsif ((match = /^\/toc\/([a-zA-Z0-9]+)\/current$/.match(path)))
    data['rtype']    = 'TOC'
    data['mime']     = 'HTML'
    data['title_id'] = match[1]
    data['unit_id']  = match[1]
  elsif ((match = /^\/loi\/([a-zA-Z0-9]+)$/.match(path)))
    data['rtype']    = 'TOC'
    data['mime']     = 'HTML'
    data['title_id'] = match[1]
    data['unit_id']  = match[1]
  end


  return data;
end