Class: Opener::Core::ResourceSwitcher

Inherits:
Object
  • Object
show all
Defined in:
lib/opener/core/resource_switcher.rb

Overview

Class for downloading and extracting external resources such as models/lexicons.

Resource paths specified using the ‘–resource-path` option are stored in the environment variable `RESOURCE_PATH`. This variable should be used in webservice/daemon code instead of said code re-parsing CLI arguments.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeResourceSwitcher

Returns a new instance of ResourceSwitcher.



17
18
19
# File 'lib/opener/core/resource_switcher.rb', line 17

def initialize
  @http = HTTPClient.new
end

Instance Attribute Details

#httpHTTPClient (readonly)

Returns:

  • (HTTPClient)


14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/opener/core/resource_switcher.rb', line 14

class ResourceSwitcher
  attr_reader :http

  def initialize
    @http = HTTPClient.new
  end

  ##
  # Adds extra CLI options to the given Slop instance.
  #
  # @param [Slop] slop
  #
  def bind(slop)
    slop.separator "\nResource Options:\n"

    slop.on :'resource-url=',
      'URL pointing to a .zip/.tar.gz file to download',
      :as => String

    slop.on :'resource-path=',
      'Path where the resources should be saved',
      :as => String

    # Hijack Slop's run block so we can inject our own code before it.  This
    # is quite grotesque, but sadly the only way.
    old_runner = slop.instance_variable_get(:@runner)

    slop.run do |opts, args|
      if opts[:'resource-path'] and opts[:'resource-url']
        download_and_extract(opts[:'resource-url'], opts[:'resource-path'])
      end

      # Allow daemons/webservices to use the path without having to re-parse
      # CLI options.
      ENV['RESOURCE_PATH'] = opts[:'resource-path']

      old_runner.call(opts, args)
    end
  end

  ##
  # @param [String] url
  # @param [String] path
  #
  def download_and_extract(url, path)
    filename  = filename_from_url(url)
    temp_path = File.join(path, filename)

    create_directory(path)

    download(url, temp_path)

    Archive.extract(temp_path, path)

    remove_file(temp_path)
  end

  ##
  # Downloads the given file.
  #
  # @param [String] url
  # @param [String] path
  #
  def download(url, path)
    File.open(path, 'w', :encoding => Encoding::BINARY) do |handle|
      http.get(url) do |chunk|
        handle.write(chunk)
      end
    end
  end

  ##
  # Returns the filename of the file located at `url`.
  #
  # @param [String] url
  # @return [String]
  #
  def filename_from_url(url)
    headers = get_headers(url)

    unless headers['Content-Disposition']
      raise "The URL #{url.inspect} did not return a Content-Disposition " \
        "header. This header is required to figure out the filename"
    end

    matches = headers['Content-Disposition'].match(/filename=(.+)/)

    if !matches or !matches[1]
      raise 'No filename could be found in the Content-Disposition header'
    end

    return matches[1]
  end

  ##
  # Creates the path. This method mainly exists to make testing a bit
  # easier.
  #
  # @param [String] path
  #
  def create_directory(path)
    FileUtils.mkdir_p(path)
  end

  ##
  # Removes the given file, mainly exists to make testing easier.
  #
  # @param [String] path
  #
  def remove_file(path)
    File.unlink(path)
  end

  ##
  # @param [String] url
  # @return [Hash]
  #
  def get_headers(url)
    return http.head(url).headers
  end
end

Instance Method Details

#bind(slop) ⇒ Object

Adds extra CLI options to the given Slop instance.

Parameters:

  • slop (Slop)


26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/opener/core/resource_switcher.rb', line 26

def bind(slop)
  slop.separator "\nResource Options:\n"

  slop.on :'resource-url=',
    'URL pointing to a .zip/.tar.gz file to download',
    :as => String

  slop.on :'resource-path=',
    'Path where the resources should be saved',
    :as => String

  # Hijack Slop's run block so we can inject our own code before it.  This
  # is quite grotesque, but sadly the only way.
  old_runner = slop.instance_variable_get(:@runner)

  slop.run do |opts, args|
    if opts[:'resource-path'] and opts[:'resource-url']
      download_and_extract(opts[:'resource-url'], opts[:'resource-path'])
    end

    # Allow daemons/webservices to use the path without having to re-parse
    # CLI options.
    ENV['RESOURCE_PATH'] = opts[:'resource-path']

    old_runner.call(opts, args)
  end
end

#create_directory(path) ⇒ Object

Creates the path. This method mainly exists to make testing a bit easier.

Parameters:

  • path (String)


114
115
116
# File 'lib/opener/core/resource_switcher.rb', line 114

def create_directory(path)
  FileUtils.mkdir_p(path)
end

#download(url, path) ⇒ Object

Downloads the given file.

Parameters:

  • url (String)
  • path (String)


77
78
79
80
81
82
83
# File 'lib/opener/core/resource_switcher.rb', line 77

def download(url, path)
  File.open(path, 'w', :encoding => Encoding::BINARY) do |handle|
    http.get(url) do |chunk|
      handle.write(chunk)
    end
  end
end

#download_and_extract(url, path) ⇒ Object

Parameters:

  • url (String)
  • path (String)


58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/opener/core/resource_switcher.rb', line 58

def download_and_extract(url, path)
  filename  = filename_from_url(url)
  temp_path = File.join(path, filename)

  create_directory(path)

  download(url, temp_path)

  Archive.extract(temp_path, path)

  remove_file(temp_path)
end

#filename_from_url(url) ⇒ String

Returns the filename of the file located at ‘url`.

Parameters:

  • url (String)

Returns:

  • (String)


91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/opener/core/resource_switcher.rb', line 91

def filename_from_url(url)
  headers = get_headers(url)

  unless headers['Content-Disposition']
    raise "The URL #{url.inspect} did not return a Content-Disposition " \
      "header. This header is required to figure out the filename"
  end

  matches = headers['Content-Disposition'].match(/filename=(.+)/)

  if !matches or !matches[1]
    raise 'No filename could be found in the Content-Disposition header'
  end

  return matches[1]
end

#get_headers(url) ⇒ Hash

Parameters:

  • url (String)

Returns:

  • (Hash)


131
132
133
# File 'lib/opener/core/resource_switcher.rb', line 131

def get_headers(url)
  return http.head(url).headers
end

#remove_file(path) ⇒ Object

Removes the given file, mainly exists to make testing easier.

Parameters:

  • path (String)


123
124
125
# File 'lib/opener/core/resource_switcher.rb', line 123

def remove_file(path)
  File.unlink(path)
end