Module: Python::Pickle

Defined in:
lib/python/pickle.rb,
lib/python/pickle/tuple.rb,
lib/python/pickle/version.rb,
lib/python/pickle/protocol.rb,
lib/python/pickle/py_class.rb,
lib/python/pickle/protocol0.rb,
lib/python/pickle/protocol1.rb,
lib/python/pickle/protocol2.rb,
lib/python/pickle/protocol3.rb,
lib/python/pickle/protocol4.rb,
lib/python/pickle/protocol5.rb,
lib/python/pickle/py_object.rb,
lib/python/pickle/byte_array.rb,
lib/python/pickle/exceptions.rb,
lib/python/pickle/instruction.rb,
lib/python/pickle/deserializer.rb,
lib/python/pickle/instructions/dup.rb,
lib/python/pickle/instructions/get.rb,
lib/python/pickle/instructions/int.rb,
lib/python/pickle/instructions/pop.rb,
lib/python/pickle/instructions/put.rb,
lib/python/pickle/instructions/dict.rb,
lib/python/pickle/instructions/ext1.rb,
lib/python/pickle/instructions/ext2.rb,
lib/python/pickle/instructions/ext4.rb,
lib/python/pickle/instructions/list.rb,
lib/python/pickle/instructions/long.rb,
lib/python/pickle/instructions/mark.rb,
lib/python/pickle/instructions/none.rb,
lib/python/pickle/instructions/stop.rb,
lib/python/pickle/instructions/build.rb,
lib/python/pickle/instructions/float.rb,
lib/python/pickle/instructions/frame.rb,
lib/python/pickle/instructions/long1.rb,
lib/python/pickle/instructions/long4.rb,
lib/python/pickle/instructions/proto.rb,
lib/python/pickle/instructions/tuple.rb,
lib/python/pickle/instructions/append.rb,
lib/python/pickle/instructions/global.rb,
lib/python/pickle/instructions/reduce.rb,
lib/python/pickle/instructions/string.rb,
lib/python/pickle/instructions/tuple1.rb,
lib/python/pickle/instructions/tuple2.rb,
lib/python/pickle/instructions/tuple3.rb,
lib/python/pickle/instructions/appends.rb,
lib/python/pickle/instructions/bin_get.rb,
lib/python/pickle/instructions/bin_put.rb,
lib/python/pickle/instructions/memoize.rb,
lib/python/pickle/instructions/new_obj.rb,
lib/python/pickle/instructions/bin_int1.rb,
lib/python/pickle/instructions/new_true.rb,
lib/python/pickle/instructions/pop_mark.rb,
lib/python/pickle/instructions/set_item.rb,
lib/python/pickle/instructions/add_items.rb,
lib/python/pickle/instructions/bin_bytes.rb,
lib/python/pickle/instructions/bin_float.rb,
lib/python/pickle/instructions/empty_set.rb,
lib/python/pickle/instructions/has_value.rb,
lib/python/pickle/instructions/new_false.rb,
lib/python/pickle/instructions/set_items.rb,
lib/python/pickle/instructions/bin_bytes8.rb,
lib/python/pickle/instructions/bin_string.rb,
lib/python/pickle/instructions/empty_dict.rb,
lib/python/pickle/instructions/empty_list.rb,
lib/python/pickle/instructions/frozen_set.rb,
lib/python/pickle/instructions/new_obj_ex.rb,
lib/python/pickle/instructions/bin_unicode.rb,
lib/python/pickle/instructions/byte_array8.rb,
lib/python/pickle/instructions/empty_tuple.rb,
lib/python/pickle/instructions/next_buffer.rb,
lib/python/pickle/instructions/bin_unicode8.rb,
lib/python/pickle/instructions/long_bin_get.rb,
lib/python/pickle/instructions/stack_global.rb,
lib/python/pickle/instructions/readonly_buffer.rb,
lib/python/pickle/instructions/short_bin_bytes.rb,
lib/python/pickle/instructions/short_bin_string.rb,
lib/python/pickle/instructions/short_bin_unicode.rb,
lib/python/pickle/instructions/has_length_and_value.rb

Overview

A modern Ruby implementation of the Python Pickle serialization format.

Defined Under Namespace

Modules: Instructions Classes: ByteArray, DeserializationError, Deserializer, Error, Instruction, InvalidFormat, Protocol, Protocol0, Protocol1, Protocol2, Protocol3, Protocol4, Protocol5, PyClass, PyObject, Tuple

Constant Summary collapse

PROTOCOL_VERSIONS =

This constant is part of a private API. You should avoid using this constant if possible, as it may be removed or be changed in the future.

Mapping of protocol versions to protocol parsers.

{
  0 => Protocol0,
  1 => Protocol1,
  2 => Protocol2,
  3 => Protocol3,
  4 => Protocol4,
  5 => Protocol5
}
DEFAULT_PROTCOL =

The default protocol version to use.

4
HIGHEST_PROTOCOL =

The highest protocol version supported.

5
VERSION =

python-pickle version

'0.1.0'

Class Method Summary collapse

Class Method Details

.dump(object, output = nil, protocol: DEFAULT_PROTOCOL) ⇒ Object

Serializes the Ruby object into Python Pickle data.

Parameters:

  • object (Object)

    The Ruby object to serialize.

  • output (IO) (defaults to: nil)

    The option output to write the Pickle data to.

  • protocol (Integer) (defaults to: DEFAULT_PROTOCOL)

    The desired Python Pickle protocol to use.



134
135
136
137
138
# File 'lib/python/pickle.rb', line 134

def self.dump(object,output=nil, protocol: DEFAULT_PROTOCOL)
  if (protocol < 0) || (protocol > HIGHEST_PROTOCOL)
    raise(ArgumentError,"protocol must be between 0 or #{HIGHEST_PROTOCOL}, but was #{protocol.inspect}")
  end
end

.infer_protocol_version(io) ⇒ Integer

This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.

Infers the protocol version from the IO stream.

Parameters:

  • io (IO)

    The IO stream to inspect.

Returns:

  • (Integer)

    The inferred Python Pickle protocol version.

Raises:

  • (InvalidFormat)

    Could not determine the Pickle version from the first two bytes of the IO stream.



155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
# File 'lib/python/pickle.rb', line 155

def self.infer_protocol_version(io)
  opcode = io.getbyte

  begin
    case opcode
    when 0x80 # PROTO (added in protocol 2)
      version = io.getbyte
      io.ungetbyte(version)
      return version
    when 48,  # POP (protocol 0)
         50,  # DUP (protocol 0)
         70,  # FLOAT (protocol 0)
         83,  # STRING (protocol 0)
         86,  # UNICODE (protocol 0)
         100, # DICT (protocol 0)
         103, # GET (protocol 0)
         108, # LIST (protocol 0)
         112  # PUT (protocol 0)
      0
    when 41,  # EMPTY_TUPLE (protocol 1)
         71,  # BINFLOAT (protocol 1)
         75,  # BININT1 (protocol 1)
         84,  # BINSTRING (protocol 1)
         85,  # SHORT_BINSTRING (protocol 1)
         88,  # BINUNICODE (protocol 1)
         93,  # EMPTY_LIST (protocol 1)
         101, # APPENDS (protocol 1)
         113, # BINPUT (protocol 1)
         117, # SETITEMS (protocol 1)
         125  # EMPTY_DICT (protocol 1)
      1
    when 46 # STOP
      # if we've read all the way to the end of the stream and still cannot
      # find any protocol 0 or protocol 1 specific opcodes, assume protocol 0
      0
    when 73, # INT  (identical in both protocol 0 and 1)
         76  # LONG (identical in both protocol 0 and 1)
      chars = io.gets

      begin
        infer_protocol_version(io)
      ensure
        chars.each_byte.reverse_each { |b| io.ungetbyte(b) }
      end
    when 40,  # MARK    (identical in both protocol 0 and 1)
         78,  # NONE    (identical in both protocol 0 and 1)
         82,  # REDUCE  (identical in both protocol 0 and 1)
         97,  # APPEND  (identical in both protocol 0 and 1)
         98,  # BUILD   (identical in both protocol 0 and 1)
         115, # SETITEM (identical in both protocol 0 and 1)
         116  # TUPLE   (identical in both protocol 0 and 1)
      infer_protocol_version(io)
    when 99 # GLOBAL
      first_nl_string  = io.gets
      second_nl_string = io.gets

      begin
        infer_protocol_version(io)
      ensure
        # push the read bytes back into the IO stream
        second_nl_string.each_byte.reverse_each { |b| io.ungetbyte(b) }
        first_nl_string.each_byte.reverse_each  { |b| io.ungetbyte(b) }
      end
    else
      raise(InvalidFormat,"cannot infer protocol version from opcode (#{opcode.inspect}) at position #{io.pos}")
    end
  ensure
    io.ungetbyte(opcode)
  end
end

.load(data, **kwargs) ⇒ Object

Deserializes the Python Pickle stream into a Ruby object.

Parameters:

  • data (String, IO)

    The Python pickle stream to parse.

  • protocol (Integer, nil)

    The explicit protocol version to use. If nil the protocol version will be inferred by inspecting the first two bytes of the stream.

Raises:



93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/python/pickle.rb', line 93

def self.load(data,**kwargs)
  deserializer = Deserializer.new(**kwargs)

  parse(data) do |instruction|
    status, object = deserializer.execute(instruction)

    if status == :halt
      return object
    end
  end

  raise(DeserializationError,"failed to deserialize any object data from stream")
end

.load_file(path, **kwargs) ⇒ Object

Deserializes a Python Pickle file.

Parameters:

  • path (String)

    The path of the file.

Returns:

  • (Object)

    The deserialized object.



116
117
118
# File 'lib/python/pickle.rb', line 116

def self.load_file(path,**kwargs)
  load(File.open(path,'rb'),**kwargs)
end

.parse(data, protocol: nil) {|instruction| ... } ⇒ Array<Instruction>

Parses a Python pickle stream.

Parameters:

  • data (String, IO)

    The Python pickle stream to parse.

  • protocol (Integer, nil) (defaults to: nil)

    The explicit protocol version to use. If nil the protocol version will be inferred by inspecting the first two bytes of the stream.

Yields:

  • (instruction)

    If a block is given, it will be passed each parsed Pickle instruction.

Yield Parameters:

  • instruction (Instruction)

    A parsed Pickle instruction from the Pickle stream.

Returns:

  • (Array<Instruction>)

    All parsed Pickle instructions from the Pickle stream.



59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/python/pickle.rb', line 59

def self.parse(data, protocol: nil, &block)
  io = case data 
       when String then StringIO.new(data)
       when IO     then data
       else
         raise(ArgumentError,"argument must be either an IO object or a String: #{io.inspect}")
       end

  if protocol
    if (protocol < 0) || (protocol > HIGHEST_PROTOCOL)
      raise(ArgumentError,"protocol must be between 0 or #{HIGHEST_PROTOCOL}, but was #{protocol.inspect}")
    end
  else
    protocol = infer_protocol_version(io)
  end

  protocol_class = PROTOCOL_VERSIONS.fetch(protocol)
  protocol       = protocol_class.new(io)

  return protocol.read(&block)
end