Class: JunglePath::DBAccess::IO::ChunkedFileReader

Inherits:
Object
  • Object
show all
Defined in:
lib/jungle_path/db_access/io/chunked_file_reader.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(data_file) ⇒ ChunkedFileReader

Returns a new instance of ChunkedFileReader.



9
10
11
12
13
14
15
# File 'lib/jungle_path/db_access/io/chunked_file_reader.rb', line 9

def initialize data_file
	@data_file = data_file
	@start_at = 1
	@chunk_size = 1000
	@done = false
	@line = nil
end

Instance Attribute Details

#chunk_sizeObject (readonly)

yield up chunks of lines. treat entire chunk as good or bad. if chunk size is 1 line and it is bad move start_at forward by 1 (chunk size). if chunk was good move start_at forward by chunk size.



8
9
10
# File 'lib/jungle_path/db_access/io/chunked_file_reader.rb', line 8

def chunk_size
  @chunk_size
end

#data_fileObject (readonly)

yield up chunks of lines. treat entire chunk as good or bad. if chunk size is 1 line and it is bad move start_at forward by 1 (chunk size). if chunk was good move start_at forward by chunk size.



8
9
10
# File 'lib/jungle_path/db_access/io/chunked_file_reader.rb', line 8

def data_file
  @data_file
end

#doneObject (readonly)

yield up chunks of lines. treat entire chunk as good or bad. if chunk size is 1 line and it is bad move start_at forward by 1 (chunk size). if chunk was good move start_at forward by chunk size.



8
9
10
# File 'lib/jungle_path/db_access/io/chunked_file_reader.rb', line 8

def done
  @done
end

#lineObject (readonly)

yield up chunks of lines. treat entire chunk as good or bad. if chunk size is 1 line and it is bad move start_at forward by 1 (chunk size). if chunk was good move start_at forward by chunk size.



8
9
10
# File 'lib/jungle_path/db_access/io/chunked_file_reader.rb', line 8

def line
  @line
end

#start_atObject (readonly)

yield up chunks of lines. treat entire chunk as good or bad. if chunk size is 1 line and it is bad move start_at forward by 1 (chunk size). if chunk was good move start_at forward by chunk size.



8
9
10
# File 'lib/jungle_path/db_access/io/chunked_file_reader.rb', line 8

def start_at
  @start_at
end

Instance Method Details

#eachObject



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/jungle_path/db_access/io/chunked_file_reader.rb', line 36

def each
	@line = nil
	pointer = 0
	yield_size = 0
	File.open(@data_file, 'r') do |file|
		puts "reading: #{data_file}."
		while line = file.gets
			pointer += 1
			if pointer == @start_at
				puts "starting chunk at #{pointer}."
			end
			puts "    at line number #{pointer}." if pointer % 10000 == 0
			if pointer >= @start_at and yield_size < @chunk_size
				@line = line
				yield_size += 1
				yield line
			end
		end
		if yield_size == 0
			@done = true
		end
	end
end

#was_bad_chunkObject



17
18
19
20
21
22
23
24
25
26
# File 'lib/jungle_path/db_access/io/chunked_file_reader.rb', line 17

def was_bad_chunk
	if @chunk_size == 1
		@start_at = @start_at + 1
	else
		@chunk_size = @chunk_size / 2
		if @chunk_size < 1
			@chunk_size = 1
		end
	end
end

#was_good_chunkObject



28
29
30
31
32
33
34
# File 'lib/jungle_path/db_access/io/chunked_file_reader.rb', line 28

def was_good_chunk
	@start_at = @start_at + @chunk_size
	@chunk_size = @chunk_size * 2
	if @chunk_size > 1000000
		@chunk_size = 1000000
	end
end