Class: SearchLingo::Tokenizer

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Includes:
Enumerable
Defined in:
lib/search_lingo/tokenizer.rb

Constant Summary collapse

SIMPLE_TOKEN =

Pattern for matching a simple token (a term without a modifier).

/#{TERM}/
COMPOUND_TOKEN =

Pattern for matching a compound token (a term with an optional modifier).

/(?:#{MODIFIER}:[[:space:]]*)?#{TERM}/
DELIMITER =

Pattern for matching the delimiter between tokens.

/[[:space:]]*/

Instance Method Summary collapse

Constructor Details

#initialize(query) ⇒ Tokenizer

:nodoc:



23
24
25
# File 'lib/search_lingo/tokenizer.rb', line 23

def initialize(query) # :nodoc:
  @scanner = StringScanner.new query.strip
end

Instance Method Details

#eachObject

Iterates over the query string. If called with a block, it yields each token. If called without a block, it returns an Enumerator.



30
31
32
33
34
35
36
# File 'lib/search_lingo/tokenizer.rb', line 30

def each
  return to_enum(__callee__) unless block_given?

  until scanner.eos?
    yield self.next
  end
end

#nextObject

Returns a Token for the next token in the query string. When the end of the query string is reached raises StopIteration.

Raises:

  • (StopIteration)


41
42
43
44
45
46
# File 'lib/search_lingo/tokenizer.rb', line 41

def next
  scanner.skip DELIMITER
  token = scanner.scan COMPOUND_TOKEN
  raise StopIteration unless token
  Token.new token
end

#simplifyObject

Rewinds the query string from the last returned token and returns a Token for the next simple token.



53
54
55
56
# File 'lib/search_lingo/tokenizer.rb', line 53

def simplify
  scanner.unscan
  Token.new scanner.scan SIMPLE_TOKEN
end