Class: RStore::FileCrawler
Instance Attribute Summary collapse
-
#config ⇒ Object
readonly
Returns the value of attribute config.
-
#data_hash ⇒ Object
attr_reader :file_options_hash.
-
#file_options ⇒ Object
readonly
Returns the value of attribute file_options.
-
#file_paths ⇒ Object
Returns the value of attribute file_paths.
-
#file_type ⇒ Object
readonly
Returns the value of attribute file_type.
-
#parse_options ⇒ Object
readonly
Returns the value of attribute parse_options.
-
#path ⇒ Object
readonly
Returns the value of attribute path.
Instance Method Summary collapse
- #can_read?(path) ⇒ Boolean
- #file_options_hash=(file_paths) ⇒ Object
-
#initialize(file_or_folder, file_type, options = {}) ⇒ FileCrawler
constructor
A new instance of FileCrawler.
- #parse_directory(option) ⇒ Object
- #verify_and_format_url(url) ⇒ Object
Constructor Details
#initialize(file_or_folder, file_type, options = {}) ⇒ FileCrawler
Returns a new instance of FileCrawler.
21 22 23 24 25 26 27 28 29 30 |
# File 'lib/rstore/file_crawler.rb', line 21 def initialize file_or_folder, file_type, ={} @path = file_or_folder @file_type = file_type @config = Configuration.new(file_or_folder, ) @file_options = @config. @parse_options = @config. self.file_paths = @path self. = @file_paths self.data_hash = @file_options_hash end |
Instance Attribute Details
#config ⇒ Object (readonly)
Returns the value of attribute config.
17 18 19 |
# File 'lib/rstore/file_crawler.rb', line 17 def config @config end |
#data_hash ⇒ Object
attr_reader :file_options_hash
12 13 14 |
# File 'lib/rstore/file_crawler.rb', line 12 def data_hash @data_hash end |
#file_options ⇒ Object (readonly)
Returns the value of attribute file_options.
14 15 16 |
# File 'lib/rstore/file_crawler.rb', line 14 def @file_options end |
#file_paths ⇒ Object
Returns the value of attribute file_paths.
16 17 18 |
# File 'lib/rstore/file_crawler.rb', line 16 def file_paths @file_paths end |
#file_type ⇒ Object (readonly)
Returns the value of attribute file_type.
16 17 18 |
# File 'lib/rstore/file_crawler.rb', line 16 def file_type @file_type end |
#parse_options ⇒ Object (readonly)
Returns the value of attribute parse_options.
14 15 16 |
# File 'lib/rstore/file_crawler.rb', line 14 def @parse_options end |
#path ⇒ Object (readonly)
Returns the value of attribute path.
15 16 17 |
# File 'lib/rstore/file_crawler.rb', line 15 def path @path end |
Instance Method Details
#can_read?(path) ⇒ Boolean
108 109 110 |
# File 'lib/rstore/file_crawler.rb', line 108 def can_read? path !!(/.*\.#{@file_type.to_s}$/ =~ path) end |
#file_options_hash=(file_paths) ⇒ Object
81 82 83 84 85 86 87 88 |
# File 'lib/rstore/file_crawler.rb', line 81 def file_paths hash = Hash.new {|h,k| h[k] = Hash.new {|h,k| h[k] = nil}} file_paths.each do |path| hash[path][:file_options] = @file_options hash[path][:parse_options] = @parse_options end @file_options_hash = hash end |
#parse_directory(option) ⇒ Object
91 92 93 94 95 96 97 98 99 100 101 102 |
# File 'lib/rstore/file_crawler.rb', line 91 def parse_directory option files = [] if option files = Dir.glob("**/*.{#{@file_type}}") # Recursively read files into array, skip files that are not of @file_type else files = Dir.glob("*.{#{@file_type}}") # Read files of the current directory end files.each do |file| next if File.directory? file file end end |
#verify_and_format_url(url) ⇒ Object
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
# File 'lib/rstore/file_crawler.rb', line 113 def verify_and_format_url url address = url begin # add additional 'begin' block so that we can return the original, unchanged url in the error message. open(address) address rescue case address when /^www/ # open-uri does not recognize URLs starting with 'www' address = 'http://' + address retry when /^http:/ # open-uri does not redirect from http to https on a valid https URL address = address.gsub(/http/,'https') retry else raise ArgumentError, "Could not connect to #{url}. Please check if this URL is correct." end end end |