Class: FilesHunter::SegmentsAnalyzer

Inherits:
Object
  • Object
show all
Defined in:
lib/fileshunter/SegmentsAnalyzer.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ SegmentsAnalyzer

Constructor

Parameters
  • options (map<Symbol,Object>): Options [default = {}]

    • :block_size (Fixnum): Block size in bytes to read from the file at once [default = 134217728]



75
76
77
78
79
80
81
82
83
# File 'lib/fileshunter/SegmentsAnalyzer.rb', line 75

def initialize(options = {})
  @block_size = (options[:block_size] || 134217728)
  @plugins = RUtilAnts::Plugins::PluginsManager.new
  @plugins.parse_plugins_from_dir(:Decoders, "#{File.dirname(__FILE__)}/Decoders", 'FilesHunter::Decoders')
  # Following are variables that may be accessed in a multithreaded environment
  @parsing_cancelled = false
  @nbr_bytes = nil
  @nbr_bytes_decoded = nil
end

Instance Attribute Details

#parsing_cancelledObject (readonly)

Is the parsing being cancelled?

Boolean


68
69
70
# File 'lib/fileshunter/SegmentsAnalyzer.rb', line 68

def parsing_cancelled
  @parsing_cancelled
end

Instance Method Details

#add_bytes_decoded(nbr_bytes) ⇒ Object

Add some bytes as being decoded

Parameters
  • nbr_bytes (Fixnum): Number of bytes just being decoded



165
166
167
168
# File 'lib/fileshunter/SegmentsAnalyzer.rb', line 165

def add_bytes_decoded(nbr_bytes)
  @nbr_bytes_decoded = nbr_bytes
  #puts "Progression: #{@nbr_bytes_decoded} / #{@nbr_bytes}"
end

#cancel_parsingObject

Cancel the parsing. This method has to be called from a different thread than the one who is currently calling get_segments.



157
158
159
# File 'lib/fileshunter/SegmentsAnalyzer.rb', line 157

def cancel_parsing
  @parsing_cancelled = true
end

#get_segments(file_name) ⇒ Object

Get segments by analyzing a given file

Parameters
  • file_name (String): File to analyze

Result
  • list<Segment>: List of segments for this file



91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# File 'lib/fileshunter/SegmentsAnalyzer.rb', line 91

def get_segments(file_name)
  segments = []

  @parsing_cancelled = false

  File.open(file_name, 'rb') do |file|
    content = IOBlockReader.init(file, :block_size => @block_size)

    @nbr_bytes = File.size(file_name)
    @nbr_bytes_decoded = 0
    log_debug "File size: #{@nbr_bytes}"
    segments << Segment.new(0, @nbr_bytes, :unknown, false, false, {})

    begin
      # Get decoders in a given order.
      # This is important as some containers can include segments of other containers.
      # A given format MUST NOT be able to include a format specified BEFORE him in the list.
      # A given format CAN be able to include a format specified AFTER him in the list.
      [
        'CFBF', # includes Thumbs.db, DOC, XLS, PPT, MSI
        'ASF', # includes WMV
        'CAB', # includes CAB, MSU, MZZ
        'EXE', # includes DLL, EXE, OCX, OBJ, DRV, SYS, FON. Cannot detect data concatenated after some EXE files.
        'MPG_Video', # not generic enough
        'M2V', # not generic enough
        'EBML', # includes MKV, WEBM
        'MP4', # includes 3GP, MOV, M4A and many others
        'OGG',
        'RIFF', # includes AVI, WAV, ANI
        'FLAC',
        'BMP',
        'MP3',
        'Text', # includes TXT, LOG, SRT, RTF, HTML, XML (both ASCII-8BIT and UTF-16)
        'JPEG', # includes JPG, THM
        'TIFF',
        'ICO' # includes ICO, CUR
      ].each do |decoder_name|
        @plugins.access_plugin(:Decoders, decoder_name) do |decoder|
          log_debug "[#{file_name}] - Try #{decoder_name}"
          # require 'ruby-prof'
          # RubyProf.start
          segments = foreach_unknown_segment(segments) do |begin_offset, end_offset|
            log_debug "[#{file_name}] - Try #{decoder_name} for segment [#{begin_offset}, #{end_offset}]"
            content.set_limits(begin_offset, end_offset)
            decoder.setup(self, content, begin_offset, end_offset)
            begin
              decoder.find_segments
            rescue AccessDataError
              log_err "Decoder #{decoder_name} exceeded data ranges: #{$!}.\n#{$!.backtrace.join("\n")}"
            end
            next decoder.segments_found
          end
          # result = RubyProf.stop
          # RubyProf::FlatPrinter.new(result).print(STDOUT)
        end
      end
    rescue CancelParsingError
      log_info "[#{file_name}] - Parsing cancelled"
    end
  end

  return segments
end

#progressionObject

Get the current progression

Result
  • Fixnum: Total number of bytes

  • Fixnum: Total number of bytes decoded



175
176
177
# File 'lib/fileshunter/SegmentsAnalyzer.rb', line 175

def progression
  return @nbr_bytes, @nbr_bytes_decoded
end