Class: Opener::Core::ResourceSwitcher
- Inherits:
-
Object
- Object
- Opener::Core::ResourceSwitcher
- Defined in:
- lib/opener/core/resource_switcher.rb
Overview
Class for downloading and extracting external resources such as models/lexicons.
Resource paths specified using the ‘–resource-path` option are stored in the environment variable `RESOURCE_PATH`. This variable should be used in webservice/daemon code instead of said code re-parsing CLI arguments.
Instance Attribute Summary collapse
- #http ⇒ HTTPClient readonly
Instance Method Summary collapse
-
#bind(slop) ⇒ Object
Adds extra CLI options to the given Slop instance.
-
#create_directory(path) ⇒ Object
Creates the path.
-
#download(url, path) ⇒ Object
Downloads the given file.
- #download_and_extract(url, path) ⇒ Object
-
#filename_from_url(url) ⇒ String
Returns the filename of the file located at ‘url`.
- #get_headers(url) ⇒ Hash
-
#initialize ⇒ ResourceSwitcher
constructor
A new instance of ResourceSwitcher.
-
#remove_file(path) ⇒ Object
Removes the given file, mainly exists to make testing easier.
Constructor Details
#initialize ⇒ ResourceSwitcher
Returns a new instance of ResourceSwitcher.
17 18 19 |
# File 'lib/opener/core/resource_switcher.rb', line 17 def initialize @http = HTTPClient.new end |
Instance Attribute Details
#http ⇒ HTTPClient (readonly)
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
# File 'lib/opener/core/resource_switcher.rb', line 14 class ResourceSwitcher attr_reader :http def initialize @http = HTTPClient.new end ## # Adds extra CLI options to the given Slop instance. # # @param [Slop] slop # def bind(slop) slop.separator "\nResource Options:\n" slop.on :'resource-url=', 'URL pointing to a .zip/.tar.gz file to download', :as => String slop.on :'resource-path=', 'Path where the resources should be saved', :as => String # Hijack Slop's run block so we can inject our own code before it. This # is quite grotesque, but sadly the only way. old_runner = slop.instance_variable_get(:@runner) slop.run do |opts, args| if opts[:'resource-path'] and opts[:'resource-url'] download_and_extract(opts[:'resource-url'], opts[:'resource-path']) end # Allow daemons/webservices to use the path without having to re-parse # CLI options. ENV['RESOURCE_PATH'] = opts[:'resource-path'] old_runner.call(opts, args) end end ## # @param [String] url # @param [String] path # def download_and_extract(url, path) filename = filename_from_url(url) temp_path = File.join(path, filename) create_directory(path) download(url, temp_path) Archive.extract(temp_path, path) remove_file(temp_path) end ## # Downloads the given file. # # @param [String] url # @param [String] path # def download(url, path) File.open(path, 'w', :encoding => Encoding::BINARY) do |handle| http.get(url) do |chunk| handle.write(chunk) end end end ## # Returns the filename of the file located at `url`. # # @param [String] url # @return [String] # def filename_from_url(url) headers = get_headers(url) unless headers['Content-Disposition'] raise "The URL #{url.inspect} did not return a Content-Disposition " \ "header. This header is required to figure out the filename" end matches = headers['Content-Disposition'].match(/filename=(.+)/) if !matches or !matches[1] raise 'No filename could be found in the Content-Disposition header' end return matches[1] end ## # Creates the path. This method mainly exists to make testing a bit # easier. # # @param [String] path # def create_directory(path) FileUtils.mkdir_p(path) end ## # Removes the given file, mainly exists to make testing easier. # # @param [String] path # def remove_file(path) File.unlink(path) end ## # @param [String] url # @return [Hash] # def get_headers(url) return http.head(url).headers end end |
Instance Method Details
#bind(slop) ⇒ Object
Adds extra CLI options to the given Slop instance.
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
# File 'lib/opener/core/resource_switcher.rb', line 26 def bind(slop) slop.separator "\nResource Options:\n" slop.on :'resource-url=', 'URL pointing to a .zip/.tar.gz file to download', :as => String slop.on :'resource-path=', 'Path where the resources should be saved', :as => String # Hijack Slop's run block so we can inject our own code before it. This # is quite grotesque, but sadly the only way. old_runner = slop.instance_variable_get(:@runner) slop.run do |opts, args| if opts[:'resource-path'] and opts[:'resource-url'] download_and_extract(opts[:'resource-url'], opts[:'resource-path']) end # Allow daemons/webservices to use the path without having to re-parse # CLI options. ENV['RESOURCE_PATH'] = opts[:'resource-path'] old_runner.call(opts, args) end end |
#create_directory(path) ⇒ Object
Creates the path. This method mainly exists to make testing a bit easier.
114 115 116 |
# File 'lib/opener/core/resource_switcher.rb', line 114 def create_directory(path) FileUtils.mkdir_p(path) end |
#download(url, path) ⇒ Object
Downloads the given file.
77 78 79 80 81 82 83 |
# File 'lib/opener/core/resource_switcher.rb', line 77 def download(url, path) File.open(path, 'w', :encoding => Encoding::BINARY) do |handle| http.get(url) do |chunk| handle.write(chunk) end end end |
#download_and_extract(url, path) ⇒ Object
58 59 60 61 62 63 64 65 66 67 68 69 |
# File 'lib/opener/core/resource_switcher.rb', line 58 def download_and_extract(url, path) filename = filename_from_url(url) temp_path = File.join(path, filename) create_directory(path) download(url, temp_path) Archive.extract(temp_path, path) remove_file(temp_path) end |
#filename_from_url(url) ⇒ String
Returns the filename of the file located at ‘url`.
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
# File 'lib/opener/core/resource_switcher.rb', line 91 def filename_from_url(url) headers = get_headers(url) unless headers['Content-Disposition'] raise "The URL #{url.inspect} did not return a Content-Disposition " \ "header. This header is required to figure out the filename" end matches = headers['Content-Disposition'].match(/filename=(.+)/) if !matches or !matches[1] raise 'No filename could be found in the Content-Disposition header' end return matches[1] end |
#get_headers(url) ⇒ Hash
131 132 133 |
# File 'lib/opener/core/resource_switcher.rb', line 131 def get_headers(url) return http.head(url).headers end |
#remove_file(path) ⇒ Object
Removes the given file, mainly exists to make testing easier.
123 124 125 |
# File 'lib/opener/core/resource_switcher.rb', line 123 def remove_file(path) File.unlink(path) end |