Class: Avro::Schema

Inherits:
Object
  • Object
show all
Defined in:
lib/avro/schema.rb

Defined Under Namespace

Classes: ArraySchema, BytesSchema, EnumSchema, Field, FixedSchema, MapSchema, NamedSchema, PrimitiveSchema, RecordSchema, UnionSchema

Constant Summary collapse

PRIMITIVE_TYPES =

Sets of strings, for backwards compatibility. See below for sets of symbols, for better performance.

Set.new(%w[null boolean string bytes int long float double])
NAMED_TYPES =
Set.new(%w[fixed enum record error])
VALID_TYPES =
PRIMITIVE_TYPES + NAMED_TYPES + Set.new(%w[array map union request])
PRIMITIVE_TYPES_SYM =
Set.new(PRIMITIVE_TYPES.map(&:to_sym))
NAMED_TYPES_SYM =
Set.new(NAMED_TYPES.map(&:to_sym))
VALID_TYPES_SYM =
Set.new(VALID_TYPES.map(&:to_sym))
NAME_REGEX =
/^([A-Za-z_][A-Za-z0-9_]*)(\.([A-Za-z_][A-Za-z0-9_]*))*$/.freeze
INT_MIN_VALUE =
-(1 << 31)
INT_MAX_VALUE =
(1 << 31) - 1
LONG_MIN_VALUE =
-(1 << 63)
LONG_MAX_VALUE =
(1 << 63) - 1
DEFAULT_VALIDATE_OPTIONS =
{ recursive: true, encoded: false }.freeze
DECIMAL_LOGICAL_TYPE =
'decimal'
CRC_EMPTY =
0xc15d213aa4d7a795
SINGLE_OBJECT_MAGIC_NUMBER =
[0xC3, 0x01].freeze
@@fp_table =

The java library caches this value after initialized, so this pattern mimics that.

nil

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(type, logical_type = nil) ⇒ Schema

Returns a new instance of Schema.



126
127
128
129
# File 'lib/avro/schema.rb', line 126

def initialize(type, logical_type=nil)
  @type_sym = type.is_a?(Symbol) ? type : type.to_sym
  @logical_type = logical_type
end

Instance Attribute Details

#logical_typeObject (readonly)

Returns the value of attribute logical_type.



132
133
134
# File 'lib/avro/schema.rb', line 132

def logical_type
  @logical_type
end

#type_symObject (readonly)

Returns the value of attribute type_sym.



131
132
133
# File 'lib/avro/schema.rb', line 131

def type_sym
  @type_sym
end

Class Method Details

.parse(json_string) ⇒ Object



44
45
46
# File 'lib/avro/schema.rb', line 44

def self.parse(json_string)
  real_parse(MultiJson.load(json_string), {})
end

.real_parse(json_obj, names = nil, default_namespace = nil) ⇒ Object

Build Avro Schema from data parsed out of JSON string.



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# File 'lib/avro/schema.rb', line 49

def self.real_parse(json_obj, names=nil, default_namespace=nil)
  if json_obj.is_a? Hash
    type = json_obj['type']
    logical_type = json_obj['logicalType']
    raise SchemaParseError, %Q(No "type" property: #{json_obj}) if type.nil?

    # Check that the type is valid before calling #to_sym, since symbols are never garbage
    # collected (important to avoid DoS if we're accepting schemas from untrusted clients)
    unless VALID_TYPES.include?(type)
      raise SchemaParseError, "Unknown type: #{type}"
    end

    type_sym = type.to_sym
    if PRIMITIVE_TYPES_SYM.include?(type_sym)
      case type_sym
      when :bytes
        precision = json_obj['precision']
        scale = json_obj['scale']
        return BytesSchema.new(type_sym, logical_type, precision, scale)
      else
        return PrimitiveSchema.new(type_sym, logical_type)
      end
    elsif NAMED_TYPES_SYM.include? type_sym
      name = json_obj['name']
      if !Avro.disable_schema_name_validation && name !~ NAME_REGEX
        raise SchemaParseError, "Name #{name} is invalid for type #{type}!"
      end
      namespace = json_obj.include?('namespace') ? json_obj['namespace'] : default_namespace
      aliases = json_obj['aliases']
      case type_sym
      when :fixed
        size = json_obj['size']
        precision = json_obj['precision']
        scale = json_obj['scale']
        return FixedSchema.new(name, namespace, size, names, logical_type, aliases, precision, scale)
      when :enum
        symbols = json_obj['symbols']
        doc     = json_obj['doc']
        default = json_obj['default']
        return EnumSchema.new(name, namespace, symbols, names, doc, default, aliases)
      when :record, :error
        fields = json_obj['fields']
        doc    = json_obj['doc']
        return RecordSchema.new(name, namespace, fields, names, type_sym, doc, aliases)
      else
        raise SchemaParseError.new("Unknown named type: #{type}")
      end

    else
      case type_sym
      when :array
        return ArraySchema.new(json_obj['items'], names, default_namespace)
      when :map
        return MapSchema.new(json_obj['values'], names, default_namespace)
      else
        raise SchemaParseError.new("Unknown Valid Type: #{type}")
      end
    end

  elsif json_obj.is_a? Array
    # JSON array (union)
    return UnionSchema.new(json_obj, names, default_namespace)
  elsif PRIMITIVE_TYPES.include? json_obj
    return PrimitiveSchema.new(json_obj)
  else
    raise UnknownSchemaError.new(json_obj)
  end
end

.validate(expected_schema, logical_datum, options = DEFAULT_VALIDATE_OPTIONS) ⇒ Object

Determine if a ruby datum is an instance of a schema



119
120
121
122
123
124
# File 'lib/avro/schema.rb', line 119

def self.validate(expected_schema, logical_datum, options = DEFAULT_VALIDATE_OPTIONS)
  SchemaValidator.validate!(expected_schema, logical_datum, options)
  true
rescue SchemaValidator::ValidationError
  false
end

Instance Method Details

#==(other, _seen = nil) ⇒ Object



209
210
211
# File 'lib/avro/schema.rb', line 209

def ==(other, _seen=nil)
  other.is_a?(Schema) && type_sym == other.type_sym
end

#be_read?(other_schema) ⇒ Boolean

Returns:

  • (Boolean)


201
202
203
# File 'lib/avro/schema.rb', line 201

def be_read?(other_schema)
  other_schema.read?(self)
end

#crc_64_avro_fingerprintObject



170
171
172
173
174
175
176
177
178
179
180
181
# File 'lib/avro/schema.rb', line 170

def crc_64_avro_fingerprint
  parsing_form = Avro::SchemaNormalization.to_parsing_form(self)
  data_bytes = parsing_form.unpack("C*")

  initFPTable unless @@fp_table

  fp = CRC_EMPTY
  data_bytes.each do |b|
    fp = (fp >> 8) ^ @@fp_table[ (fp ^ b) & 0xff ]
  end
  fp
end

#hash(_seen = nil) ⇒ Object



213
214
215
# File 'lib/avro/schema.rb', line 213

def hash(_seen=nil)
  type_sym.hash
end

#initFPTableObject



159
160
161
162
163
164
165
166
167
168
# File 'lib/avro/schema.rb', line 159

def initFPTable
  @@fp_table = Array.new(256)
  256.times do |i|
    fp = i
    8.times do
      fp = (fp >> 1) ^ ( CRC_EMPTY & -( fp & 1 ) )
    end
    @@fp_table[i] = fp
  end
end

#md5_fingerprintObject

Returns the MD5 fingerprint of the schema as an Integer.



143
144
145
146
# File 'lib/avro/schema.rb', line 143

def md5_fingerprint
  parsing_form = SchemaNormalization.to_parsing_form(self)
  Digest::MD5.hexdigest(parsing_form).to_i(16)
end

#mutual_read?(other_schema) ⇒ Boolean

Returns:

  • (Boolean)


205
206
207
# File 'lib/avro/schema.rb', line 205

def mutual_read?(other_schema)
  SchemaCompatibility.mutual_read?(other_schema, self)
end

#read?(writers_schema) ⇒ Boolean

Returns:

  • (Boolean)


197
198
199
# File 'lib/avro/schema.rb', line 197

def read?(writers_schema)
  SchemaCompatibility.can_read?(writers_schema, self)
end

#sha256_fingerprintObject

Returns the SHA-256 fingerprint of the schema as an Integer.



149
150
151
152
# File 'lib/avro/schema.rb', line 149

def sha256_fingerprint
  parsing_form = SchemaNormalization.to_parsing_form(self)
  Digest::SHA256.hexdigest(parsing_form).to_i(16)
end

#single_object_encoding_headerObject



184
185
186
# File 'lib/avro/schema.rb', line 184

def single_object_encoding_header
  [SINGLE_OBJECT_MAGIC_NUMBER, single_object_schema_fingerprint].flatten
end

#single_object_schema_fingerprintObject



187
188
189
190
191
192
193
194
195
# File 'lib/avro/schema.rb', line 187

def single_object_schema_fingerprint
  working = crc_64_avro_fingerprint
  bytes = Array.new(8)
  8.times do |i|
    bytes[i] = (working & 0xff)
    working = working >> 8
  end
  bytes
end

#subparse(json_obj, names = nil, namespace = nil) ⇒ Object



217
218
219
220
221
222
223
224
225
226
227
228
229
# File 'lib/avro/schema.rb', line 217

def subparse(json_obj, names=nil, namespace=nil)
  if json_obj.is_a?(String) && names
    fullname = Name.make_fullname(json_obj, namespace)
    return names[fullname] if names.include?(fullname)
  end

  begin
    Schema.real_parse(json_obj, names, namespace)
  rescue => e
    raise e if e.is_a? SchemaParseError
    raise SchemaParseError, "Sub-schema for #{self.class.name} not a valid Avro schema. Bad schema: #{json_obj}"
  end
end

#to_avro(_names = nil) ⇒ Object



231
232
233
234
235
# File 'lib/avro/schema.rb', line 231

def to_avro(_names=nil)
  props = {'type' => type}
  props['logicalType'] = logical_type if logical_type
  props
end

#to_sObject



237
238
239
# File 'lib/avro/schema.rb', line 237

def to_s
  MultiJson.dump to_avro
end

#typeObject

Returns the type as a string (rather than a symbol), for backwards compatibility. Deprecated in favor of #type_sym.



136
# File 'lib/avro/schema.rb', line 136

def type; @type_sym.to_s; end

#type_adapterObject



138
139
140
# File 'lib/avro/schema.rb', line 138

def type_adapter
  @type_adapter ||= LogicalTypes.type_adapter(type, logical_type, self) || LogicalTypes::Identity
end