Class: Avro::Schema
- Inherits:
-
Object
- Object
- Avro::Schema
- Defined in:
- lib/avro/schema.rb
Direct Known Subclasses
ArraySchema, Field, MapSchema, NamedSchema, PrimitiveSchema, UnionSchema
Defined Under Namespace
Classes: ArraySchema, BytesSchema, EnumSchema, Field, FixedSchema, MapSchema, NamedSchema, PrimitiveSchema, RecordSchema, UnionSchema
Constant Summary collapse
- PRIMITIVE_TYPES =
Sets of strings, for backwards compatibility. See below for sets of symbols, for better performance.
Set.new(%w[null boolean string bytes int long float double])
- NAMED_TYPES =
Set.new(%w[fixed enum record error])
- VALID_TYPES =
PRIMITIVE_TYPES + NAMED_TYPES + Set.new(%w[array map union request])
- PRIMITIVE_TYPES_SYM =
Set.new(PRIMITIVE_TYPES.map(&:to_sym))
- NAMED_TYPES_SYM =
Set.new(NAMED_TYPES.map(&:to_sym))
- VALID_TYPES_SYM =
Set.new(VALID_TYPES.map(&:to_sym))
- NAME_REGEX =
/^([A-Za-z_][A-Za-z0-9_]*)(\.([A-Za-z_][A-Za-z0-9_]*))*$/.freeze
- INT_MIN_VALUE =
-(1 << 31)
- INT_MAX_VALUE =
(1 << 31) - 1
- LONG_MIN_VALUE =
-(1 << 63)
- LONG_MAX_VALUE =
(1 << 63) - 1
- DEFAULT_VALIDATE_OPTIONS =
{ recursive: true, encoded: false }.freeze
- DECIMAL_LOGICAL_TYPE =
'decimal'
- CRC_EMPTY =
0xc15d213aa4d7a795
- SINGLE_OBJECT_MAGIC_NUMBER =
[0xC3, 0x01].freeze
- @@fp_table =
The java library caches this value after initialized, so this pattern mimics that.
nil
Instance Attribute Summary collapse
-
#logical_type ⇒ Object
readonly
Returns the value of attribute logical_type.
-
#type_sym ⇒ Object
readonly
Returns the value of attribute type_sym.
Class Method Summary collapse
- .parse(json_string) ⇒ Object
-
.real_parse(json_obj, names = nil, default_namespace = nil) ⇒ Object
Build Avro Schema from data parsed out of JSON string.
-
.validate(expected_schema, logical_datum, options = DEFAULT_VALIDATE_OPTIONS) ⇒ Object
Determine if a ruby datum is an instance of a schema.
Instance Method Summary collapse
- #==(other, _seen = nil) ⇒ Object
- #be_read?(other_schema) ⇒ Boolean
- #crc_64_avro_fingerprint ⇒ Object
- #hash(_seen = nil) ⇒ Object
- #initFPTable ⇒ Object
-
#initialize(type, logical_type = nil) ⇒ Schema
constructor
A new instance of Schema.
-
#md5_fingerprint ⇒ Object
Returns the MD5 fingerprint of the schema as an Integer.
- #mutual_read?(other_schema) ⇒ Boolean
- #read?(writers_schema) ⇒ Boolean
-
#sha256_fingerprint ⇒ Object
Returns the SHA-256 fingerprint of the schema as an Integer.
- #single_object_encoding_header ⇒ Object
- #single_object_schema_fingerprint ⇒ Object
- #subparse(json_obj, names = nil, namespace = nil) ⇒ Object
- #to_avro(_names = nil) ⇒ Object
- #to_s ⇒ Object
-
#type ⇒ Object
Returns the type as a string (rather than a symbol), for backwards compatibility.
- #type_adapter ⇒ Object
Constructor Details
#initialize(type, logical_type = nil) ⇒ Schema
Returns a new instance of Schema.
126 127 128 129 |
# File 'lib/avro/schema.rb', line 126 def initialize(type, logical_type=nil) @type_sym = type.is_a?(Symbol) ? type : type.to_sym @logical_type = logical_type end |
Instance Attribute Details
#logical_type ⇒ Object (readonly)
Returns the value of attribute logical_type.
132 133 134 |
# File 'lib/avro/schema.rb', line 132 def logical_type @logical_type end |
#type_sym ⇒ Object (readonly)
Returns the value of attribute type_sym.
131 132 133 |
# File 'lib/avro/schema.rb', line 131 def type_sym @type_sym end |
Class Method Details
.parse(json_string) ⇒ Object
44 45 46 |
# File 'lib/avro/schema.rb', line 44 def self.parse(json_string) real_parse(MultiJson.load(json_string), {}) end |
.real_parse(json_obj, names = nil, default_namespace = nil) ⇒ Object
Build Avro Schema from data parsed out of JSON string.
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
# File 'lib/avro/schema.rb', line 49 def self.real_parse(json_obj, names=nil, default_namespace=nil) if json_obj.is_a? Hash type = json_obj['type'] logical_type = json_obj['logicalType'] raise SchemaParseError, %Q(No "type" property: #{json_obj}) if type.nil? # Check that the type is valid before calling #to_sym, since symbols are never garbage # collected (important to avoid DoS if we're accepting schemas from untrusted clients) unless VALID_TYPES.include?(type) raise SchemaParseError, "Unknown type: #{type}" end type_sym = type.to_sym if PRIMITIVE_TYPES_SYM.include?(type_sym) case type_sym when :bytes precision = json_obj['precision'] scale = json_obj['scale'] return BytesSchema.new(type_sym, logical_type, precision, scale) else return PrimitiveSchema.new(type_sym, logical_type) end elsif NAMED_TYPES_SYM.include? type_sym name = json_obj['name'] if !Avro.disable_schema_name_validation && name !~ NAME_REGEX raise SchemaParseError, "Name #{name} is invalid for type #{type}!" end namespace = json_obj.include?('namespace') ? json_obj['namespace'] : default_namespace aliases = json_obj['aliases'] case type_sym when :fixed size = json_obj['size'] precision = json_obj['precision'] scale = json_obj['scale'] return FixedSchema.new(name, namespace, size, names, logical_type, aliases, precision, scale) when :enum symbols = json_obj['symbols'] doc = json_obj['doc'] default = json_obj['default'] return EnumSchema.new(name, namespace, symbols, names, doc, default, aliases) when :record, :error fields = json_obj['fields'] doc = json_obj['doc'] return RecordSchema.new(name, namespace, fields, names, type_sym, doc, aliases) else raise SchemaParseError.new("Unknown named type: #{type}") end else case type_sym when :array return ArraySchema.new(json_obj['items'], names, default_namespace) when :map return MapSchema.new(json_obj['values'], names, default_namespace) else raise SchemaParseError.new("Unknown Valid Type: #{type}") end end elsif json_obj.is_a? Array # JSON array (union) return UnionSchema.new(json_obj, names, default_namespace) elsif PRIMITIVE_TYPES.include? json_obj return PrimitiveSchema.new(json_obj) else raise UnknownSchemaError.new(json_obj) end end |
.validate(expected_schema, logical_datum, options = DEFAULT_VALIDATE_OPTIONS) ⇒ Object
Determine if a ruby datum is an instance of a schema
119 120 121 122 123 124 |
# File 'lib/avro/schema.rb', line 119 def self.validate(expected_schema, logical_datum, = DEFAULT_VALIDATE_OPTIONS) SchemaValidator.validate!(expected_schema, logical_datum, ) true rescue SchemaValidator::ValidationError false end |
Instance Method Details
#==(other, _seen = nil) ⇒ Object
209 210 211 |
# File 'lib/avro/schema.rb', line 209 def ==(other, _seen=nil) other.is_a?(Schema) && type_sym == other.type_sym end |
#be_read?(other_schema) ⇒ Boolean
201 202 203 |
# File 'lib/avro/schema.rb', line 201 def be_read?(other_schema) other_schema.read?(self) end |
#crc_64_avro_fingerprint ⇒ Object
170 171 172 173 174 175 176 177 178 179 180 181 |
# File 'lib/avro/schema.rb', line 170 def crc_64_avro_fingerprint parsing_form = Avro::SchemaNormalization.to_parsing_form(self) data_bytes = parsing_form.unpack("C*") initFPTable unless @@fp_table fp = CRC_EMPTY data_bytes.each do |b| fp = (fp >> 8) ^ @@fp_table[ (fp ^ b) & 0xff ] end fp end |
#hash(_seen = nil) ⇒ Object
213 214 215 |
# File 'lib/avro/schema.rb', line 213 def hash(_seen=nil) type_sym.hash end |
#initFPTable ⇒ Object
159 160 161 162 163 164 165 166 167 168 |
# File 'lib/avro/schema.rb', line 159 def initFPTable @@fp_table = Array.new(256) 256.times do |i| fp = i 8.times do fp = (fp >> 1) ^ ( CRC_EMPTY & -( fp & 1 ) ) end @@fp_table[i] = fp end end |
#md5_fingerprint ⇒ Object
Returns the MD5 fingerprint of the schema as an Integer.
143 144 145 146 |
# File 'lib/avro/schema.rb', line 143 def md5_fingerprint parsing_form = SchemaNormalization.to_parsing_form(self) Digest::MD5.hexdigest(parsing_form).to_i(16) end |
#mutual_read?(other_schema) ⇒ Boolean
205 206 207 |
# File 'lib/avro/schema.rb', line 205 def mutual_read?(other_schema) SchemaCompatibility.mutual_read?(other_schema, self) end |
#read?(writers_schema) ⇒ Boolean
197 198 199 |
# File 'lib/avro/schema.rb', line 197 def read?(writers_schema) SchemaCompatibility.can_read?(writers_schema, self) end |
#sha256_fingerprint ⇒ Object
Returns the SHA-256 fingerprint of the schema as an Integer.
149 150 151 152 |
# File 'lib/avro/schema.rb', line 149 def sha256_fingerprint parsing_form = SchemaNormalization.to_parsing_form(self) Digest::SHA256.hexdigest(parsing_form).to_i(16) end |
#single_object_encoding_header ⇒ Object
184 185 186 |
# File 'lib/avro/schema.rb', line 184 def single_object_encoding_header [SINGLE_OBJECT_MAGIC_NUMBER, single_object_schema_fingerprint].flatten end |
#single_object_schema_fingerprint ⇒ Object
187 188 189 190 191 192 193 194 195 |
# File 'lib/avro/schema.rb', line 187 def single_object_schema_fingerprint working = crc_64_avro_fingerprint bytes = Array.new(8) 8.times do |i| bytes[i] = (working & 0xff) working = working >> 8 end bytes end |
#subparse(json_obj, names = nil, namespace = nil) ⇒ Object
217 218 219 220 221 222 223 224 225 226 227 228 229 |
# File 'lib/avro/schema.rb', line 217 def subparse(json_obj, names=nil, namespace=nil) if json_obj.is_a?(String) && names fullname = Name.make_fullname(json_obj, namespace) return names[fullname] if names.include?(fullname) end begin Schema.real_parse(json_obj, names, namespace) rescue => e raise e if e.is_a? SchemaParseError raise SchemaParseError, "Sub-schema for #{self.class.name} not a valid Avro schema. Bad schema: #{json_obj}" end end |
#to_avro(_names = nil) ⇒ Object
231 232 233 234 235 |
# File 'lib/avro/schema.rb', line 231 def to_avro(_names=nil) props = {'type' => type} props['logicalType'] = logical_type if logical_type props end |
#to_s ⇒ Object
237 238 239 |
# File 'lib/avro/schema.rb', line 237 def to_s MultiJson.dump to_avro end |
#type ⇒ Object
Returns the type as a string (rather than a symbol), for backwards compatibility. Deprecated in favor of #type_sym.
136 |
# File 'lib/avro/schema.rb', line 136 def type; @type_sym.to_s; end |
#type_adapter ⇒ Object
138 139 140 |
# File 'lib/avro/schema.rb', line 138 def type_adapter @type_adapter ||= LogicalTypes.type_adapter(type, logical_type, self) || LogicalTypes::Identity end |