Class: Fairy::PGroupBy::CommandMergeSortBuffer
Instance Attribute Summary
#log_id
Instance Method Summary
collapse
Constructor Details
Returns a new instance of CommandMergeSortBuffer.
315
316
317
318
319
320
321
322
323
324
|
# File 'lib/fairy/node/p-group-by.rb', line 315
def initialize(njob, policy)
super
@key_values_size = 0
@threshold = policy[:threshold]
@threshold ||= CONF.GROUP_BY_CMSB_THRESHOLD
@buffers = nil
end
|
Instance Method Details
#each(&block) ⇒ Object
386
387
388
389
390
391
392
|
# File 'lib/fairy/node/p-group-by.rb', line 386
def each(&block)
if @buffers
each_2ndmemory &block
else
super
end
end
|
#each_2ndmemory(&block) ⇒ Object
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
|
# File 'lib/fairy/node/p-group-by.rb', line 394
def each_2ndmemory(&block)
unless @key_values.empty?
store_2ndmemory(@key_values)
end
Log::debug(self, @buffers.collect{|b| b.path}.join(" "))
IO::popen("sort -m -k1,1 #{@buffers.collect{|b| b.path}.join(' ')}") do |io|
key = nil
values = nil
io.each do |line|
mk, mv = line.split(/\s+/)
k = Marshal.load(mk.tr(":", "\n").unpack("m").first)
v = Marshal.load(mv.tr(":", "\n").unpack("m").first)
if key == k
values.concat v
else
if values
values.push_eos
yield values
end
key = k
values = KeyValueStream.new(key, self)
values.concat v
end
end
if values
values.push_eos
yield values
end
end
end
|
#init_2ndmemory ⇒ Object
326
327
328
329
330
331
332
333
|
# File 'lib/fairy/node/p-group-by.rb', line 326
def init_2ndmemory
require "fairy/share/fast-tempfile"
@buffer_dir = @policy[:buffer_dir]
@buffer_dir ||= CONF.TMP_DIR
@buffers = []
end
|
#open_buffer(&block) ⇒ Object
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
|
# File 'lib/fairy/node/p-group-by.rb', line 335
def open_buffer(&block)
unless @buffers
init_2ndmemory
end
buffer = FastTempfile.open("mod-group-by-buffer-#{@njob.no}-", @buffer_dir)
@buffers.push buffer
if block_given?
begin
yield buffer.io
ensure
buffer.close
end
else
buffer
end
end
|
#push(value) ⇒ Object
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
|
# File 'lib/fairy/node/p-group-by.rb', line 354
def push(value)
super
@key_values_size += 1
key_values = nil
@key_values_mutex.synchronize do
if @key_values_size > @threshold
key_values = @key_values
@key_values_size = 0
@key_values = {}
end
if key_values
store_2ndmemory(key_values)
end
end
end
|
#store_2ndmemory(key_values) ⇒ Object
371
372
373
374
375
376
377
378
379
380
381
382
383
384
|
# File 'lib/fairy/node/p-group-by.rb', line 371
def store_2ndmemory(key_values)
Log::info(self, "start store")
sorted = key_values.collect{|key, values|
[[Marshal.dump(key)].pack("m").tr("\n", ":"),
[Marshal.dump(values)].pack("m").tr("\n", ":")]}.sort_by{|e| e.first}
open_buffer do |io|
sorted.each do |k, v|
io.puts "#{k}\t#{v}"
end
end
sorted = nil
Log::info(self, "end store")
end
|