Class: Fairy::PGroupBy::CommandMergeSortBuffer

Inherits:
OnMemoryBuffer show all
Defined in:
lib/fairy/node/p-group-by.rb

Direct Known Subclasses

DirectKBMergeSortBuffer, MergeSortBuffer

Instance Attribute Summary

Attributes inherited from OnMemoryBuffer

#log_id

Instance Method Summary collapse

Constructor Details

#initialize(njob, policy) ⇒ CommandMergeSortBuffer

Returns a new instance of CommandMergeSortBuffer.



315
316
317
318
319
320
321
322
323
324
# File 'lib/fairy/node/p-group-by.rb', line 315

def initialize(njob, policy)
	super

	@key_values_size = 0

	@threshold = policy[:threshold]
	@threshold ||= CONF.GROUP_BY_CMSB_THRESHOLD

	@buffers = nil
end

Instance Method Details

#each(&block) ⇒ Object



386
387
388
389
390
391
392
# File 'lib/fairy/node/p-group-by.rb', line 386

def each(&block)
	if @buffers
	  each_2ndmemory &block
	else
	  super
	end
end

#each_2ndmemory(&block) ⇒ Object



394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
# File 'lib/fairy/node/p-group-by.rb', line 394

def each_2ndmemory(&block)
	unless @key_values.empty?
	  store_2ndmemory(@key_values)
	end

	Log::debug(self, @buffers.collect{|b| b.path}.join(" "))

	IO::popen("sort -m -k1,1 #{@buffers.collect{|b| b.path}.join(' ')}") do |io|
	  key = nil
	  values = nil
	  io.each do |line|
	    mk, mv = line.split(/\s+/)
	    k = Marshal.load(mk.tr(":", "\n").unpack("m").first)
	    v = Marshal.load(mv.tr(":", "\n").unpack("m").first)
	    if key == k
 values.concat v
	    else
 if values
		values.push_eos
		yield values
 end
 key = k
 values = KeyValueStream.new(key, self)
 values.concat v
	    end
	  end
	  if values
	    values.push_eos
	    yield values
	  end
	end
end

#init_2ndmemoryObject



326
327
328
329
330
331
332
333
# File 'lib/fairy/node/p-group-by.rb', line 326

def init_2ndmemory
	require "fairy/share/fast-tempfile"

	@buffer_dir = @policy[:buffer_dir]
	@buffer_dir ||= CONF.TMP_DIR

	@buffers = []
end

#open_buffer(&block) ⇒ Object



335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
# File 'lib/fairy/node/p-group-by.rb', line 335

def open_buffer(&block)
	unless @buffers
	  init_2ndmemory
	end
	buffer = FastTempfile.open("mod-group-by-buffer-#{@njob.no}-", @buffer_dir)
	@buffers.push buffer
	if block_given?
	  begin
	    # ruby BUG#2390の対応のため.
	    # yield buffer
	    yield buffer.io
	  ensure
	    buffer.close
	  end
	else
	  buffer
	end
end

#push(value) ⇒ Object



354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
# File 'lib/fairy/node/p-group-by.rb', line 354

def push(value)
	super

	@key_values_size += 1
	key_values = nil
	@key_values_mutex.synchronize do
	  if @key_values_size > @threshold
	    key_values = @key_values
	    @key_values_size = 0
	    @key_values = {}
	  end
	  if key_values
	    store_2ndmemory(key_values)
	  end
	end
end

#store_2ndmemory(key_values) ⇒ Object



371
372
373
374
375
376
377
378
379
380
381
382
383
384
# File 'lib/fairy/node/p-group-by.rb', line 371

def store_2ndmemory(key_values)
	Log::info(self, "start store")
	sorted = key_values.collect{|key, values| 
	  [[Marshal.dump(key)].pack("m").tr("\n", ":"), 
	    [Marshal.dump(values)].pack("m").tr("\n", ":")]}.sort_by{|e| e.first}

	open_buffer do |io|
	  sorted.each do |k, v|
	    io.puts "#{k}\t#{v}"
	  end
	end
	sorted = nil
	Log::info(self, "end store")
end