Class: JSON::Lexer

Inherits:
Object show all
Defined in:
lib/json/lexer.rb

Instance Method Summary collapse

Constructor Details

#initialize(s) ⇒ Lexer

This method will initialize the lexer to contain a string.

Parameters
s

the string to initialize the lexer object with



37
38
39
40
# File 'lib/json/lexer.rb', line 37

def initialize(s)
  @index = 0
  @source = s
end

Instance Method Details

#backObject

Backs up the lexer status one character.



43
44
45
# File 'lib/json/lexer.rb', line 43

def back
  @index -= 1 if @index > 0
end

#eachObject



287
288
289
290
291
# File 'lib/json/lexer.rb', line 287

def each
  while (n = nextvalue)
    yield(n)
  end
end

#more?Boolean

Returns:

  • (Boolean)


47
48
49
# File 'lib/json/lexer.rb', line 47

def more?
  return(@index < @source.length)
end

#nextcharObject

Consumes the next character.



52
53
54
55
56
# File 'lib/json/lexer.rb', line 52

def nextchar
  c = self.more?() ? @source[@index,1] : "\0"
  @index += 1
  return(c)
end

#nextchars(n) ⇒ Object

Read the next n characters from the string in the lexer.

Parameters
n

the number of characters to read from the lexer



69
70
71
72
73
74
# File 'lib/json/lexer.rb', line 69

def nextchars(n)
  raise "substring bounds error" if (@index + n > @source.length)
  i = @index
  @index += n
  return(@source[i,n])
end

#nextcleanObject

Read the next n characters from the string with escape sequence processing.



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/json/lexer.rb', line 78

def nextclean
  while true
	c = self.nextchar()
	if (c == '/')
	  case self.nextchar()
	  when '/'
 c = self.nextchar()
 while c != "\n" && c != "\r" && c != "\0"
   c = self.nextchar()
 end
	  when '*'
 while true
   c = self.nextchar()
   raise "unclosed comment" if (c == "\0")
   if (c == '*')
		break if (self.nextchar() == '/')
		self.back()
   end
 end
	  else
 self.back()
 return '/';
	  end
	elsif c == "\0" || c[0] > " "[0]
	  return(c)
	end
  end
end

#nextmatch(char) ⇒ Object

Consumes the next character and check that it matches a specified character.



60
61
62
63
64
# File 'lib/json/lexer.rb', line 60

def nextmatch(char)
  n = self.nextchar
  raise "Expected '#{char}' and instead saw '#{n}'." if (n != char)
  return(n)
end

#nextstring(quot) ⇒ Object

Reads the next string, given a quote character (usually ‘ or “)

Parameters

quot: the next matching quote character to use



165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# File 'lib/json/lexer.rb', line 165

def nextstring(quot)
  c = buf = ""
  while true
	c = self.nextchar()
	case c
	when /\0|\n\r/
	  raise "Unterminated string"
	when "\\"
	  chr = self.nextchar()
	  case chr
	  when 'b'
 buf << "\b"
	  when 't'
 buf << "\t"
	  when 'n'
 buf << "\n"
	  when 'f'
 buf << "\f"
	  when 'r'
 buf << "\r"
	  when 'u'
 buf << utf8str(Integer("0x" + self.nextchars(4)))
	  else
 buf << chr
	  end
	else
	  return(buf) if (c == quot)
	  buf << c
	end
  end
end

#nextto(regex) ⇒ Object

Reads the next group of characters that match a regular expresion.



200
201
202
203
204
205
206
207
208
209
210
# File 'lib/json/lexer.rb', line 200

def nextto(regex)
  buf = ""
  while (true)
	c = self.nextchar()
	if !(regex =~ c).nil? || c == '\0' || c == '\n' || c == '\r'
	  self.back() if (c != '\0')
	  return(buf.chomp())
	end
	buf += c
  end
end

#nextvalueObject

Reads the next value from the string. This can return either a string, a FixNum, a floating point value, a JSON array, or a JSON object.



215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
# File 'lib/json/lexer.rb', line 215

def nextvalue
  c = self.nextclean
  s = ""

  case c
  when /\"|\'/
	return(self.nextstring(c))
  when '{'
	self.back()
    return(Hash.new.from_json(self))
  when '['
	self.back()
	return(Array.new.from_json(self))
  else
	buf = ""
	while ((c =~ /"| |:|,|\]|\}|\/|\0/).nil?)
	  buf += c
	  c = self.nextchar()
	end
	self.back()
	s = buf.chomp
	case s
	when "true"
	  return(true)
	when "false"
	  return(false)
	when "null"
	  return(nil)
	when /^[0-9]|\.|-|\+/
      if s =~ /[.]/ then
        return Float(s)
      else
        return Integer(s)
      end
	end
	if (s == "")
      s = nil
    end
	return(s)
  end
end

#skippast(to) ⇒ Object

Skip past the next instance of the character specified

Parameters
to

the character to skip past



282
283
284
285
# File 'lib/json/lexer.rb', line 282

def skippast(to)
  @index = @source.index(to, @index)
  @index = (@index.nil?) ? @source.length : @index + to.length
end

#skipto(to) ⇒ Object

Skip to the next instance of the character specified

Parameters
to

Character to skip to



260
261
262
263
264
265
266
267
268
269
270
271
272
273
# File 'lib/json/lexer.rb', line 260

def skipto(to)
  index = @index
  loop {
	c = self.nextchar()
	if (c == '\0')
	  @index = index
	  return(c)
	end
	if (c == to)
	  self.back
	  return(c)
	end
  }
end

#unescapeObject



275
276
277
# File 'lib/json/lexer.rb', line 275

def unescape
  @source = CGI::unescape(@source)
end

#utf8str(code) ⇒ Object

Given a Unicode code point, return a string giving its UTF-8 representation based on RFC 2279.



109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# File 'lib/json/lexer.rb', line 109

def utf8str(code)
  if (code & ~(0x7f)) == 0
    # UCS-4 range 0x00000000 - 0x0000007F
    return(code.chr)
  end

  buf = ""
  if (code & ~(0x7ff)) == 0
    # UCS-4 range 0x00000080 - 0x000007FF
    buf << (0b11000000 | (code >> 6)).chr
    buf << (0b10000000 | (code & 0b00111111)).chr
    return(buf)
  end

  if (code & ~(0x000ffff)) == 0
    # UCS-4 range 0x00000800 - 0x0000FFFF
    buf << (0b11100000 | (code >> 12)).chr
    buf << (0b10000000 | ((code >> 6) & 0b00111111)).chr
    buf << (0b10000000 | (code & 0b0011111)).chr
    return(buf)
  end

  # Not used -- JSON only has UCS-2, but for the sake
  # of completeness
  if (code & ~(0x1FFFFF)) == 0
    # UCS-4 range 0x00010000 - 0x001FFFFF
    buf << (0b11110000 | (code >> 18)).chr
    buf << (0b10000000 | ((code >> 12) & 0b00111111)).chr
    buf << (0b10000000 | ((code >> 6) & 0b00111111)).chr
    buf << (0b10000000 | (code & 0b0011111)).chr
    return(buf)
  end

  if (code & ~(0x03FFFFFF)) == 0
    # UCS-4 range 0x00200000 - 0x03FFFFFF
    buf << (0b11110000 | (code >> 24)).chr
    buf << (0b10000000 | ((code >> 18) & 0b00111111)).chr
    buf << (0b10000000 | ((code >> 12) & 0b00111111)).chr
    buf << (0b10000000 | ((code >> 6) & 0b00111111)).chr
    buf << (0b10000000 | (code & 0b0011111)).chr
    return(buf)
  end

  # UCS-4 range 0x04000000 - 0x7FFFFFFF
  buf << (0b11111000 | (code >> 30)).chr
  buf << (0b10000000 | ((code >> 24) & 0b00111111)).chr
  buf << (0b10000000 | ((code >> 18) & 0b00111111)).chr
  buf << (0b10000000 | ((code >> 12) & 0b00111111)).chr
  buf << (0b10000000 | ((code >> 6) & 0b00111111)).chr
  buf << (0b10000000 | (code & 0b0011111)).chr
  return(buf)
end