Class: Fairy::PGroupBy::CommandMergeSortBuffer

Inherits:
OnMemoryBuffer show all
Defined in:
lib/fairy/node/p-group-by.rb

Direct Known Subclasses

DirectKBMergeSortBuffer, MergeSortBuffer

Instance Attribute Summary

Attributes inherited from OnMemoryBuffer

#log_id

Instance Method Summary collapse

Constructor Details

#initialize(njob, policy) ⇒ CommandMergeSortBuffer

Returns a new instance of CommandMergeSortBuffer.



315
316
317
318
319
320
321
322
323
324
# File 'lib/fairy/node/p-group-by.rb', line 315

def initialize(njob, policy)
  super

  @key_values_size = 0

  @threshold = policy[:threshold]
  @threshold ||= CONF.GROUP_BY_CMSB_THRESHOLD

  @buffers = nil
end

Instance Method Details

#each(&block) ⇒ Object



386
387
388
389
390
391
392
# File 'lib/fairy/node/p-group-by.rb', line 386

def each(&block)
  if @buffers
    each_2ndmemory &block
  else
    super
  end
end

#each_2ndmemory(&block) ⇒ Object



394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
# File 'lib/fairy/node/p-group-by.rb', line 394

def each_2ndmemory(&block)
  unless @key_values.empty?
    store_2ndmemory(@key_values)
  end

  Log::debug(self, @buffers.collect{|b| b.path}.join(" "))

  IO::popen("sort -m -k1,1 #{@buffers.collect{|b| b.path}.join(' ')}") do |io|
    key = nil
    values = nil
    io.each do |line|
      mk, mv = line.split(/\s+/)
      k = Marshal.load(mk.tr(":", "\n").unpack("m").first)
      v = Marshal.load(mv.tr(":", "\n").unpack("m").first)
      if key == k
 values.concat v
      else
 if values
    values.push_eos
    yield values
 end
 key = k
 values = KeyValueStream.new(key, self)
 values.concat v
      end
    end
    if values
      values.push_eos
      yield values
    end
  end
end

#init_2ndmemoryObject



326
327
328
329
330
331
332
333
# File 'lib/fairy/node/p-group-by.rb', line 326

def init_2ndmemory
  require "fairy/share/fast-tempfile"

  @buffer_dir = @policy[:buffer_dir]
  @buffer_dir ||= CONF.TMP_DIR

  @buffers = []
end

#open_buffer(&block) ⇒ Object



335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
# File 'lib/fairy/node/p-group-by.rb', line 335

def open_buffer(&block)
  unless @buffers
    init_2ndmemory
  end
  buffer = FastTempfile.open("mod-group-by-buffer-#{@njob.no}-", @buffer_dir)
  @buffers.push buffer
  if block_given?
    begin
      # ruby BUG#2390の対応のため.
      # yield buffer
      yield buffer.io
    ensure
      buffer.close
    end
  else
    buffer
  end
end

#push(value) ⇒ Object



354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
# File 'lib/fairy/node/p-group-by.rb', line 354

def push(value)
  super

  @key_values_size += 1
  key_values = nil
  @key_values_mutex.synchronize do
    if @key_values_size > @threshold
      key_values = @key_values
      @key_values_size = 0
      @key_values = {}
    end
    if key_values
      store_2ndmemory(key_values)
    end
  end
end

#store_2ndmemory(key_values) ⇒ Object



371
372
373
374
375
376
377
378
379
380
381
382
383
384
# File 'lib/fairy/node/p-group-by.rb', line 371

def store_2ndmemory(key_values)
  Log::info(self, "start store")
  sorted = key_values.collect{|key, values| 
    [[Marshal.dump(key)].pack("m").tr("\n", ":"), 
      [Marshal.dump(values)].pack("m").tr("\n", ":")]}.sort_by{|e| e.first}

  open_buffer do |io|
    sorted.each do |k, v|
      io.puts "#{k}\t#{v}"
    end
  end
  sorted = nil
  Log::info(self, "end store")
end