Class: Elasticfusion::Search::Query::Lexer

Inherits:
Object
  • Object
show all
Defined in:
lib/elasticfusion/search/query/lexer.rb

Constant Summary collapse

TOKENS =
{
  whitespace: /\s+/,
  and: /AND|,/,
  or: /OR|\|/,
  not: /NOT|-/,
  field_query_delimiter: /:/,
  field_qualifier: /less than|more than|earlier than|later than/,

  safe_string_until: /(\s*)(AND|OR|,|\||"|\(|\))/,
  quoted_string: /"(?:[^\\]|\\.)*?"/,
  string_with_balanced_parentheses_until: /AND|OR|,|\|/
}.freeze

Instance Method Summary collapse

Constructor Details

#initialize(string, searchable_fields) ⇒ Lexer

Returns a new instance of Lexer.



21
22
23
24
# File 'lib/elasticfusion/search/query/lexer.rb', line 21

def initialize(string, searchable_fields)
  @scanner = StringScanner.new(string)
  @field_regex = /(#{searchable_fields.join('|')}):/ if searchable_fields.any?
end

Instance Method Details

#left_parenthesesObject



42
43
44
# File 'lib/elasticfusion/search/query/lexer.rb', line 42

def left_parentheses
  @scanner.skip /\(/
end

#match(token) ⇒ Object



26
27
28
# File 'lib/elasticfusion/search/query/lexer.rb', line 26

def match(token)
  @scanner.scan TOKENS[token]
end

#match_fieldObject



34
35
36
37
38
39
40
# File 'lib/elasticfusion/search/query/lexer.rb', line 34

def match_field
  return unless @field_regex
  field = @scanner.scan @field_regex
  if field
    field[0..-2] # remove field query delimiter (":")
  end
end

#match_until(regex) ⇒ Object

StringScanner#scan_until returns everything up to and including the regex. To avoid including the pattern, we use a lookahead.



84
85
86
# File 'lib/elasticfusion/search/query/lexer.rb', line 84

def match_until(regex)
  @scanner.scan /.+?(?=#{regex.source}|\z)/
end

#quoted_stringObject

May contain any characters except for quotes (the latter are allowed when escaped).



56
57
58
59
60
61
62
63
64
# File 'lib/elasticfusion/search/query/lexer.rb', line 56

def quoted_string
  string = match(:quoted_string)

  if string
    string[1..-2] # ignore quotes
      .gsub(/\\"/, '"')
      .gsub(/\\\\/, '\\')
  end
end

#right_parentheses(expected_count) ⇒ Object



46
47
48
# File 'lib/elasticfusion/search/query/lexer.rb', line 46

def right_parentheses(expected_count)
  @scanner.skip /\){,#{expected_count}}/
end

#safe_stringObject

May contain words, numbers, spaces, dashes, and underscores.



51
52
53
# File 'lib/elasticfusion/search/query/lexer.rb', line 51

def safe_string
  match_until TOKENS[:safe_string_until]
end

#skip(token) ⇒ Object



30
31
32
# File 'lib/elasticfusion/search/query/lexer.rb', line 30

def skip(token)
  @scanner.skip TOKENS[token]
end

#string_with_balanced_parenthesesObject



66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/elasticfusion/search/query/lexer.rb', line 66

def string_with_balanced_parentheses
  string = match_until TOKENS[:string_with_balanced_parentheses_until]

  if string
    opening_parens = string.count('(')

    balanced = string.split(')')[0..opening_parens].join(')')
    balanced += ')' if opening_parens > 0 && string.ends_with?(')')

    cutoff = string.length - balanced.length
    @scanner.pos -= cutoff

    balanced.strip
  end
end