Class: PEROBS::FlatFile

Inherits:
Object
  • Object
show all
Defined in:
lib/perobs/FlatFile.rb

Overview

The FlatFile class manages the storage file of the FlatFileDB. It contains a sequence of blobs Each blob consists of header and the actual blob data bytes.

Constant Summary collapse

INDEX_BTREE_ORDER =

The number of entries in a single BTree node of the index file.

65

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(dir, progressmeter) ⇒ FlatFile

Create a new FlatFile object for a database in the given path.

Parameters:

  • dir (String)

    Directory path for the data base file



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/perobs/FlatFile.rb', line 49

def initialize(dir, progressmeter)
  @db_dir = dir
  @progressmeter = progressmeter
  @f = nil
  @marks = nil
  @index = BTree.new(@db_dir, 'index', INDEX_BTREE_ORDER, @progressmeter)
  old_spaces_file = File.join(@db_dir, 'database_spaces.blobs')
  if File.exist?(old_spaces_file)
    # PEROBS version 4.1.0 and earlier used this space list format. It is
    # deprecated now. Newly created DBs use the SpaceManager format.
    @space_list = SpaceTree.new(@db_dir, @progressmeter)
  else
    @space_list = SpaceManager.new(@db_dir, @progressmeter)
  end
end

Class Method Details

.insert_header_checksums(db_dir) ⇒ Object



803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
# File 'lib/perobs/FlatFile.rb', line 803

def FlatFile::insert_header_checksums(db_dir)
  old_file_name = File.join(db_dir, 'database.blobs')
  new_file_name = File.join(db_dir, 'database_v4.blobs')
  bak_file_name = File.join(db_dir, 'database_v3.blobs')

  old_file = File.open(old_file_name, 'rb')
  new_file = File.open(new_file_name, 'wb')

  entries = 0
  while (buf = old_file.read(21))
    flags, length, id, crc = *buf.unpack('CQQL')
    blob_data = old_file.read(length)

    # Some basic sanity checking to ensure all reserved bits are 0. Older
    # versions of PEROBS used to set bit 1 despite it being reserved now.
    unless flags & 0xF0 == 0
      PEROBS.log.fatal "Blob file #{old_file_name} contains illegal " +
        "flag byte #{'%02x' % flags} at #{old_file.pos - 21}"
    end

    # Check if the blob is valid and current.
    if flags & 0x1 == 1 && flags & 0x8 == 0
      # Make sure the bit 1 is not set anymore.
      flags = flags & 0x05
      header_str = [ flags, length, id, crc ].pack('CQQL')
      header_crc = Zlib.crc32(header_str, 0)
      header_str += [ header_crc ].pack('L')

      new_file.write(header_str + blob_data)
      entries += 1
    end
  end
  PEROBS.log.info "Header checksum added to #{entries} entries"

  old_file.close
  new_file.close

  File.rename(old_file_name, bak_file_name)
  File.rename(new_file_name, old_file_name)
end

Instance Method Details

#check(repair = false) ⇒ Integer

Check (and repair) the FlatFile.

Parameters:

  • repair (Boolean) (defaults to: false)

    True if errors should be fixed.

Returns:

  • (Integer)

    Number of errors found



458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
# File 'lib/perobs/FlatFile.rb', line 458

def check(repair = false)
  errors = 0
  return errors unless @f

  t = Time.now
  PEROBS.log.info "Checking FlatFile database" +
    "#{repair ? ' in repair mode' : ''}..."

  # First check the database blob file. Each entry should be readable and
  # correct and all IDs must be unique. We use a shadow index to keep
  # track of the already found IDs.
  new_index = BTree.new(@db_dir, 'new-index', INDEX_BTREE_ORDER,
                        @progressmeter)
  new_index.erase
  new_index.open

  corrupted_blobs = 0
  end_of_last_healthy_blob = nil
  @progressmeter.start('Checking blobs file', @f.size) do |pm|
    corrupted_blobs = each_blob_header do |header|
      if header.is_valid?
        # We have a non-deleted entry.
        begin
          @f.seek(header.addr + FlatFileBlobHeader::LENGTH)
          buf = @f.read(header.length)
          if buf.bytesize != header.length
            PEROBS.log.error "Premature end of file in blob with ID " +
              "#{header.id}."
            discard_damaged_blob(header) if repair
            errors += 1
            next
          end

          # Uncompress the data if the compression bit is set in the mark
          # byte.
          if header.is_compressed?
            begin
              buf = Zlib.inflate(buf)
            rescue Zlib::BufError, Zlib::DataError
              PEROBS.log.error "Corrupted compressed block with ID " +
                "#{header.id} found."
              discard_damaged_blob(header) if repair
              errors += 1
              next
            end
          end

          if header.crc && checksum(buf) != header.crc
            PEROBS.log.error "Checksum failure while checking blob " +
              "with ID #{header.id}"
            discard_damaged_blob(header) if repair
            errors += 1
            next
          end
        rescue IOError => e
          PEROBS.log.fatal "Check of blob with ID #{header.id} failed: " +
            e.message
        end

        # Check if the ID has already been found in the file.
        if (previous_address = new_index.get(header.id))
          PEROBS.log.error "Multiple blobs for ID #{header.id} found. " +
            "Addresses: #{previous_address}, #{header.addr}"
          errors += 1
          previous_header = FlatFileBlobHeader.read(@f, previous_address,
                                                    header.id)
          if repair
            # We have two blobs with the same ID and we must discard one of
            # them.
            if header.is_outdated?
              discard_damaged_blob(header)
            elsif previous_header.is_outdated?
              discard_damaged_blob(previous_header)
            else
              PEROBS.log.error "None of the blobs with same ID have " +
                "the outdated flag set. Deleting the smaller one."
              errors += 1
              discard_damaged_blob(header.length < previous_header.length ?
                                   header : previous_header)
            end
            next
          end
        else
          # ID is unique so far. Add it to the shadow index.
          new_index.insert(header.id, header.addr)
        end
      end
      end_of_last_healthy_blob = header.addr +
        FlatFileBlobHeader::LENGTH + header.length

      pm.update(header.addr)
    end

    if end_of_last_healthy_blob && end_of_last_healthy_blob != @f.size
      # The blob file ends with a corrupted blob header.
      PEROBS.log.error "#{@f.size - end_of_last_healthy_blob} corrupted " +
        'bytes found at the end of FlatFile.'
      corrupted_blobs += 1
      if repair
        PEROBS.log.error "Truncating FlatFile to " +
          "#{end_of_last_healthy_blob} bytes by discarding " +
          "#{@f.size - end_of_last_healthy_blob} bytes"
        @f.truncate(end_of_last_healthy_blob)
      end
    end

    errors += corrupted_blobs
  end

  # We no longer need the new index.
  new_index.close
  new_index.erase

  if repair && corrupted_blobs > 0
    erase_index_files
    defragmentize
    regenerate_index_and_spaces
  elsif corrupted_blobs == 0
    # Now we check the index data. It must be correct and the entries must
    # match the blob file. All entries in the index must be in the blob file
    # and vise versa.
    begin
      index_ok = @index.check do |id, address|
        has_id_at?(id, address)
      end
      x_check_errs = 0
      space_check_ok = true
      unless index_ok && (space_check_ok = @space_list.check(self)) &&
        (x_check_errs = cross_check_entries) == 0
        errors += 1 unless index_ok && space_check_ok
        errors += x_check_errs
        regenerate_index_and_spaces if repair
      end
    rescue PEROBS::FatalError
      errors += 1
      regenerate_index_and_spaces if repair
    end
  end

  sync if repair
  PEROBS.log.info "check_db completed in #{Time.now - t} seconds. " +
    "#{errors} errors found."

  errors
end

#clear_all_marksObject

Clear alls marks.



338
339
340
341
342
343
344
# File 'lib/perobs/FlatFile.rb', line 338

def clear_all_marks
  if @marks
    @marks.clear
  else
    @marks = IDList.new(@db_dir, 'marks', 8)
  end
end

#closeObject

Close the flat file. This method must be called to ensure that all data is really written into the filesystem.



92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/perobs/FlatFile.rb', line 92

def close
  @space_list.close if @space_list.is_open?
  @index.close if @index.is_open?

  if @marks
    @marks.erase
    @marks = nil
  end

  if @f
    @f.flush
    @f.flock(File::LOCK_UN)
    @f.fsync
    @f.close
    @f = nil
  end
end

#defragmentizeObject

Eliminate all the holes in the file. This is an in-place implementation. No additional space will be needed on the file system.



348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
# File 'lib/perobs/FlatFile.rb', line 348

def defragmentize
  distance = 0
  new_file_size = 0
  deleted_blobs = 0
  corrupted_blobs = 0
  valid_blobs = 0

  # Iterate over all entries.
  @progressmeter.start('Defragmentizing blobs file', @f.size) do |pm|
    each_blob_header do |header|
      # If we have stumbled over a corrupted blob we treat it similar to a
      # deleted blob and reuse the space.
      if header.corruption_start
        distance += header.addr - header.corruption_start
        corrupted_blobs += 1
      end

      # Total size of the current entry
      entry_bytes = FlatFileBlobHeader::LENGTH + header.length
      if header.is_valid?
        # We have found a valid entry.
        valid_blobs += 1
        if distance > 0
          begin
            # Read current entry into a buffer
            @f.seek(header.addr)
            buf = @f.read(entry_bytes)
            # Write the buffer right after the end of the previous entry.
            @f.seek(header.addr - distance)
            @f.write(buf)
            # Mark the space between the relocated current entry and the
            # next valid entry as deleted space.
            FlatFileBlobHeader.new(@f, @f.pos, 0,
                                   distance - FlatFileBlobHeader::LENGTH,
                                   0, 0).write
            @f.flush
          rescue IOError => e
            PEROBS.log.fatal "Error while moving blob for ID " +
              "#{header.id}: #{e.message}"
          end
        end
        new_file_size = header.addr - distance +
          FlatFileBlobHeader::LENGTH + header.length
      else
        deleted_blobs += 1
        distance += entry_bytes
      end

      pm.update(header.addr)
    end
  end

  PEROBS.log.info "#{distance / 1000} KiB/#{deleted_blobs} blobs of " +
    "#{@f.size / 1000} KiB/#{valid_blobs} blobs or " +
    "#{'%.1f' % (distance.to_f / @f.size * 100.0)}% reclaimed"
  if corrupted_blobs > 0
    PEROBS.log.info "#{corrupted_blobs} corrupted blob(s) found. Space " +
      "was recycled."
  end

  @f.flush
  @f.truncate(new_file_size)
  @f.flush

  sync
end

#delete_obj_by_address(addr, id) ⇒ Object

Delete the blob that is stored at the specified address.

Parameters:

  • addr (Integer)

    Address of the blob to delete

  • id (Integer)

    ID of the blob to delete



137
138
139
140
141
142
# File 'lib/perobs/FlatFile.rb', line 137

def delete_obj_by_address(addr, id)
  @index.remove(id) if @index.is_open?
  header = FlatFileBlobHeader.read(@f, addr, id)
  header.clear_flags
  @space_list.add_space(addr, header.length) if @space_list.is_open?
end

#delete_obj_by_id(id) ⇒ Boolean

Delete the blob for the specified ID.

Parameters:

  • id (Integer)

    ID of the object to be deleted

Returns:

  • (Boolean)

    True if object was deleted, false otherwise



125
126
127
128
129
130
131
132
# File 'lib/perobs/FlatFile.rb', line 125

def delete_obj_by_id(id)
  if (pos = find_obj_addr_by_id(id))
    delete_obj_by_address(pos, id)
    return true
  end

  return false
end

#delete_unmarked_objects(&block) ⇒ Object

Delete all unmarked objects.



145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# File 'lib/perobs/FlatFile.rb', line 145

def delete_unmarked_objects(&block)
  # We don't update the index and the space list during this operation as
  # we defragmentize the blob file at the end. We'll end the operation
  # with an empty space list.
  clear_index_files

  deleted_objects_count = 0
  @progressmeter.start('Sweeping unmarked objects', @f.size) do |pm|
    each_blob_header do |header|
      if header.is_valid? && !@marks.include?(header.id)
        delete_obj_by_address(header.addr, header.id)
        yield(header.id) if block_given?
        deleted_objects_count += 1
      end

      pm.update(header.addr)
    end
  end
  defragmentize

  # Update the index file and create a new, empty space list.
  regenerate_index_and_spaces

  deleted_objects_count
end

#find_obj_addr_by_id(id) ⇒ Integer

Find the address of the object with the given ID.

Parameters:

  • id (Integer)

    ID of the object

Returns:

  • (Integer)

    Offset in the flat file or nil if not found



268
269
270
# File 'lib/perobs/FlatFile.rb', line 268

def find_obj_addr_by_id(id)
  @index.get(id)
end

#has_id_at?(id, address) ⇒ Boolean

Returns:

  • (Boolean)


780
781
782
783
784
785
786
787
# File 'lib/perobs/FlatFile.rb', line 780

def has_id_at?(id, address)
  begin
    header = FlatFileBlobHeader.read(@f, address)
  rescue PEROBS::FatalError
    return false
  end
  header.is_valid? && header.id == id
end

#has_space?(address, size) ⇒ Boolean

Returns:

  • (Boolean)


775
776
777
778
# File 'lib/perobs/FlatFile.rb', line 775

def has_space?(address, size)
  header = FlatFileBlobHeader.read(@f, address)
  !header.is_valid? && header.length == size
end

#inspectObject



789
790
791
792
793
794
795
796
797
798
799
800
801
# File 'lib/perobs/FlatFile.rb', line 789

def inspect
  s = '['
  each_blob_header do |header|
    s << "{ :pos => #{header.addr}, :flags => #{header.flags}, " +
         ":length => #{header.length}, :id => #{header.id}, " +
         ":crc => #{header.crc}"
    if header.is_valid?
      s << ", :value => #{@f.read(header.length)}"
    end
    s << " }\n"
  end
  s + ']'
end

#is_marked_by_id?(id) ⇒ Boolean

Return true if the object with the given ID is marked, false otherwise.

Parameters:

  • id (Integer)

    ID of the object

Returns:

  • (Boolean)


333
334
335
# File 'lib/perobs/FlatFile.rb', line 333

def is_marked_by_id?(id)
  @marks.include?(id)
end

#item_counterInteger

Returns Number of items stored in the DB.

Returns:

  • (Integer)

    Number of items stored in the DB.



284
285
286
# File 'lib/perobs/FlatFile.rb', line 284

def item_counter
  @index.entries_count
end

#mark_obj_by_id(id) ⇒ Object

Mark the object with the given ID.

Parameters:

  • id (Integer)

    ID of the object



327
328
329
# File 'lib/perobs/FlatFile.rb', line 327

def mark_obj_by_id(id)
  @marks.insert(id)
end

#openObject

Open the flat file for reading and writing.



66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/perobs/FlatFile.rb', line 66

def open
  file_name = File.join(@db_dir, 'database.blobs')
  new_db_created = false
  begin
    if File.exist?(file_name)
      @f = File.open(file_name, 'rb+')
    else
      PEROBS.log.info "New FlatFile database '#{file_name}' created"
      @f = File.open(file_name, 'wb+')
      new_db_created = true
    end
  rescue IOError => e
    PEROBS.log.fatal "Cannot open FlatFile database #{file_name}: " +
      e.message
  end
  unless @f.flock(File::LOCK_NB | File::LOCK_EX)
    PEROBS.log.fatal "FlatFile database '#{file_name}' is locked by " +
      "another process"
  end
  @f.sync = true

  open_index_files(!new_db_created)
end

#read_obj_by_address(addr, id) ⇒ String

Read the object at the specified address.

Parameters:

  • addr (Integer)

    Offset in the flat file

  • id (Integer)

    ID of the data blob

Returns:

  • (String)

    Raw object data



292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
# File 'lib/perobs/FlatFile.rb', line 292

def read_obj_by_address(addr, id)
  header = FlatFileBlobHeader.read(@f, addr, id)
  if header.id != id
    PEROBS.log.fatal "Database index corrupted: Index for object " +
      "#{id} points to object with ID #{header.id}"
  end

  buf = nil

  begin
    @f.seek(addr + FlatFileBlobHeader::LENGTH)
    buf = @f.read(header.length)
  rescue IOError => e
    PEROBS.log.fatal "Cannot read blob for ID #{id}: #{e.message}"
  end

  # Uncompress the data if the compression bit is set in the flags byte.
  if header.is_compressed?
    begin
      buf = Zlib.inflate(buf)
    rescue Zlib::BufError, Zlib::DataError
      PEROBS.log.fatal "Corrupted compressed block with ID " +
        "#{header.id} found."
    end
  end

  if checksum(buf) != header.crc
    PEROBS.log.fatal "Checksum failure while reading blob ID #{id}"
  end

  buf
end

#read_obj_by_id(id) ⇒ String or nil

Read the object with the given ID.

Parameters:

  • id (Integer)

    ID of the object

Returns:

  • (String or nil)

    Raw object data if found, otherwise nil



275
276
277
278
279
280
281
# File 'lib/perobs/FlatFile.rb', line 275

def read_obj_by_id(id)
  if (addr = find_obj_addr_by_id(id))
    return read_obj_by_address(addr, id)
  end

  nil
end

#refreshObject

This method iterates over all entries in the FlatFile and removes the entry and inserts it again. This is useful to update all entries in case the storage format has changed.



418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
# File 'lib/perobs/FlatFile.rb', line 418

def refresh
  # This iteration might look scary as we iterate over the entries while
  # while we are rearranging them. Re-inserted items may be inserted
  # before or at the current entry and this is fine. They also may be
  # inserted after the current entry and will be re-read again unless they
  # are inserted after the original file end.
  file_size = @f.size

  # We don't update the index and the space list during this operation as
  # we defragmentize the blob file at the end. We'll end the operation
  # with an empty space list.
  clear_index_files

  @progressmeter.start('Converting objects to new storage format',
                       @f.size) do |pm|
    each_blob_header do |header|
      if header.is_valid?
        buf = read_obj_by_address(header.addr, header.id)
        delete_obj_by_address(header.addr, header.id)
        write_obj_by_id(header.id, buf)
      end

      # Some re-inserted blobs may be inserted after the original file end.
      # No need to process those blobs again.
      break if header.addr >= file_size

      pm.update(header.addr)
    end
  end

  # Reclaim the space saved by compressing entries.
  defragmentize

  # Recreate the index file and create an empty space list.
  regenerate_index_and_spaces
end

#regenerate_index_and_spacesObject

This method clears the index tree and the free space list and regenerates them from the FlatFile.



741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
# File 'lib/perobs/FlatFile.rb', line 741

def regenerate_index_and_spaces
  PEROBS.log.warn "Re-generating FlatFileDB index and space files"
  @index.open unless @index.is_open?
  @index.clear
  @space_list.open unless @space_list.is_open?
  @space_list.clear

  @progressmeter.start('Re-generating database index', @f.size) do |pm|
    each_blob_header do |header|
      if header.is_valid?
        if (duplicate_pos = @index.get(header.id))
          PEROBS.log.error "FlatFile contains multiple blobs for ID " +
            "#{header.id}. First blob is at address #{duplicate_pos}. " +
            "Other blob found at address #{header.addr}."
          if header.length > 0
            @space_list.add_space(header.addr, header.length)
          end
          discard_damaged_blob(header)
        else
          @index.insert(header.id, header.addr)
        end
      else
        if header.length > 0
          @space_list.add_space(header.addr, header.length)
        end
      end

      pm.update(header.addr)
    end
  end

  sync
end

#repairInteger

Repair the FlatFile. In contrast to the repair functionality in the check() method this method is much faster. It simply re-creates the index and space list from the blob file.

Parameters:

  • repair (Boolean)

    True if errors should be fixed.

Returns:

  • (Integer)

    Number of errors found



609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
# File 'lib/perobs/FlatFile.rb', line 609

def repair
  errors = 0
  return errors unless @f

  t = Time.now
  PEROBS.log.info "Repairing FlatFile database"

  # Erase and re-open the index and space list files. We purposely don't
  # close the files at it would trigger needless flushing.
  clear_index_files(true)

  # Now we scan the blob file and re-index all blobs and spaces. Corrupted
  # blobs will be skipped.
  corrupted_blobs = 0
  end_of_last_healthy_blob = nil
  @progressmeter.start('Re-indexing blobs file', @f.size) do |pm|
    corrupted_blobs = each_blob_header do |header|
      if header.corruption_start
        # The blob is preceeded by a corrupted area. We create a new
        # header of a deleted blob for this area and write the new blob
        # over it.
        if (data_length = header.addr - header.corruption_start -
            FlatFileBlobHeader::LENGTH) <= 0
          PEROBS.log.error "Found a corrupted blob that is too small to " +
            "fit a header (#{data_length}). File must be defragmented."
        else
          new_header = FlatFileBlobHeader.new(@f, header.corruption_start,
                                              0, data_length, 0, 0)
          new_header.write
          @space_list.add_space(header.corruption_start, data_length)
        end
      end

      if header.is_valid?
        # We have a non-deleted entry.
        begin
          @f.seek(header.addr + FlatFileBlobHeader::LENGTH)
          buf = @f.read(header.length)
          if buf.bytesize != header.length
            PEROBS.log.error "Premature end of file in blob with ID " +
              "#{header.id}."
            discard_damaged_blob(header)
            errors += 1
            next
          end

          # Uncompress the data if the compression bit is set in the mark
          # byte.
          if header.is_compressed?
            begin
              buf = Zlib.inflate(buf)
            rescue Zlib::BufError, Zlib::DataError
              PEROBS.log.error "Corrupted compressed block with ID " +
                "#{header.id} found."
              discard_damaged_blob(header)
              errors += 1
              next
            end
          end

          if header.crc && checksum(buf) != header.crc
            PEROBS.log.error "Checksum failure while checking blob " +
              "with ID #{header.id}"
            discard_damaged_blob(header)
            errors += 1
            next
          end
        rescue IOError => e
          PEROBS.log.fatal "Check of blob with ID #{header.id} failed: " +
            e.message
        end

        # Check if the ID has already been found in the file.
        if (previous_address = @index.get(header.id))
          PEROBS.log.error "Multiple blobs for ID #{header.id} found. " +
            "Addresses: #{previous_address}, #{header.addr}"
          errors += 1
          previous_header = FlatFileBlobHeader.read(@f, previous_address,
                                                    header.id)
          # We have two blobs with the same ID and we must discard one of
          # them.
          if header.is_outdated?
            discard_damaged_blob(header)
          elsif previous_header.is_outdated?
            discard_damaged_blob(previous_header)
          else
            PEROBS.log.error "None of the blobs with same ID have " +
              "the outdated flag set. Deleting the smaller one."
            errors += 1
            discard_damaged_blob(header.length < previous_header.length ?
                                 header : previous_header)
          end
        else
          # ID is unique so far. Add it to the shadow index.
          @index.insert(header.id, header.addr)
        end

      else
        if header.length > 0
          @space_list.add_space(header.addr, header.length)
        end
      end
      end_of_last_healthy_blob = header.addr +
        FlatFileBlobHeader::LENGTH + header.length

      pm.update(header.addr)
    end

    if end_of_last_healthy_blob && end_of_last_healthy_blob != @f.size
      # The blob file ends with a corrupted blob header.
      PEROBS.log.error "#{@f.size - end_of_last_healthy_blob} corrupted " +
        'bytes found at the end of FlatFile.'
      corrupted_blobs += 1

      PEROBS.log.error "Truncating FlatFile to " +
        "#{end_of_last_healthy_blob} bytes by discarding " +
        "#{@f.size - end_of_last_healthy_blob} bytes"
      @f.truncate(end_of_last_healthy_blob)
    end

    errors += corrupted_blobs
  end

  sync
  PEROBS.log.info "FlatFile repair completed in #{Time.now - t} seconds. " +
    "#{errors} errors found."

  errors
end

#syncObject

Force outstanding data to be written to the filesystem.



111
112
113
114
115
116
117
118
119
120
# File 'lib/perobs/FlatFile.rb', line 111

def sync
  begin
    @f.flush
    @f.fsync
  rescue IOError => e
    PEROBS.log.fatal "Cannot sync flat file database: #{e.message}"
  end
  @index.sync
  @space_list.sync
end

#write_obj_by_id(id, raw_obj) ⇒ Integer

Write the given object into the file. This method never uses in-place updates for existing objects. A new copy is inserted first and only when the insert was successful, the old copy is deleted and the index updated.

Parameters:

  • id (Integer)

    ID of the object

  • raw_obj (String)

    Raw object as String

Returns:

  • (Integer)

    position of the written blob in the blob file



178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
# File 'lib/perobs/FlatFile.rb', line 178

def write_obj_by_id(id, raw_obj)
  # Check if we have already an object with the given ID. We'll mark it as
  # outdated and save the header for later deletion. In case this
  # operation is aborted or interrupted we ensure that we either have the
  # old or the new version available.
  if (old_addr = find_obj_addr_by_id(id))
    old_header = FlatFileBlobHeader.read(@f, old_addr)
    old_header.set_outdated_flag
  end

  crc = checksum(raw_obj)

  # If the raw_obj is larger then 256 characters we will compress it to
  # safe some space in the database file. For smaller strings the
  # performance impact of compression is not compensated by writing
  # less data to the storage.
  compressed = false
  raw_obj_bytesize = raw_obj.bytesize
  if raw_obj_bytesize > 256
    raw_obj = Zlib.deflate(raw_obj)
    raw_obj_bytesize = raw_obj.bytesize
    compressed = true
  end

  addr, length = find_free_blob(raw_obj_bytesize)
  begin
    if length != -1
      # Just a safeguard so we don't overwrite current data.
      header = FlatFileBlobHeader.read(@f, addr)
      if header.length != length
        PEROBS.log.fatal "Length in free list (#{length}) and header " +
          "(#{header.length}) for address #{addr} don't match."
      end
      if raw_obj_bytesize > header.length
        PEROBS.log.fatal "Object (#{raw_obj_bytesize}) is longer than " +
          "blob space (#{header.length})."
      end
      if header.is_valid?
        PEROBS.log.fatal "Entry at address #{addr} with flags: " +
          "#{header.flags} is already used for ID #{header.id}."
      end
    end
    flags = 1 << FlatFileBlobHeader::VALID_FLAG_BIT
    flags |= (1 << FlatFileBlobHeader::COMPRESSED_FLAG_BIT) if compressed
    FlatFileBlobHeader.new(@f, addr, flags, raw_obj_bytesize, id, crc).write
    @f.write(raw_obj)
    if length != -1 && raw_obj_bytesize < length
      # The new object was not appended and it did not completely fill the
      # free space. So we have to write a new header to mark the remaining
      # empty space.
      unless length - raw_obj_bytesize >= FlatFileBlobHeader::LENGTH
        PEROBS.log.fatal "Not enough space to append the empty space " +
          "header (space: #{length} bytes, object: #{raw_obj_bytesize} " +
          "bytes)."
      end
      space_address = @f.pos
      space_length = length - FlatFileBlobHeader::LENGTH - raw_obj_bytesize
      FlatFileBlobHeader.new(@f, space_address, 0, space_length,
                             0, 0).write
      # Register the new space with the space list.
      if @space_list.is_open? && space_length > 0
        @space_list.add_space(space_address, space_length)
      end
    end

    # Once the blob has been written we can update the index as well.
    @index.insert(id, addr) if @index.is_open?

    if old_addr
      # If we had an existing object stored for the ID we have to mark
      # this entry as deleted now.
      old_header.clear_flags
      # And register the newly freed space with the space list.
      if @space_list.is_open?
        @space_list.add_space(old_addr, old_header.length)
      end
    else
      @f.flush
    end
  rescue IOError => e
    PEROBS.log.fatal "Cannot write blob for ID #{id} to FlatFileDB: " +
      e.message
  end

  addr
end