Class: Elasticfusion::Search::Query::Lexer
- Inherits:
-
Object
- Object
- Elasticfusion::Search::Query::Lexer
- Defined in:
- lib/elasticfusion/search/query/lexer.rb
Constant Summary collapse
- TOKENS =
{ whitespace: /\s+/, and: /AND|,/, or: /OR|\|/, not: /NOT|-/, field_query_delimiter: /:/, field_qualifier: /less than|more than|earlier than|later than/, safe_string_until: /(\s*)(AND|OR|,|\||"|\(|\))/, quoted_string: /"(?:[^\\]|\\.)*?"/, string_with_balanced_parentheses_until: /AND|OR|,|\|/ }.freeze
Instance Method Summary collapse
-
#initialize(string, searchable_fields) ⇒ Lexer
constructor
A new instance of Lexer.
- #left_parentheses ⇒ Object
- #match(token) ⇒ Object
- #match_field ⇒ Object
-
#match_until(regex) ⇒ Object
StringScanner#scan_until returns everything up to and including the regex.
-
#quoted_string ⇒ Object
May contain any characters except for quotes (the latter are allowed when escaped).
- #right_parentheses(expected_count) ⇒ Object
-
#safe_string ⇒ Object
May contain words, numbers, spaces, dashes, and underscores.
- #skip(token) ⇒ Object
- #string_with_balanced_parentheses ⇒ Object
Constructor Details
#initialize(string, searchable_fields) ⇒ Lexer
Returns a new instance of Lexer.
21 22 23 24 |
# File 'lib/elasticfusion/search/query/lexer.rb', line 21 def initialize(string, searchable_fields) @scanner = StringScanner.new(string) @field_regex = /(#{searchable_fields.join('|')}):/ if searchable_fields.any? end |
Instance Method Details
#left_parentheses ⇒ Object
42 43 44 |
# File 'lib/elasticfusion/search/query/lexer.rb', line 42 def left_parentheses @scanner.skip /\(/ end |
#match(token) ⇒ Object
26 27 28 |
# File 'lib/elasticfusion/search/query/lexer.rb', line 26 def match(token) @scanner.scan TOKENS[token] end |
#match_field ⇒ Object
34 35 36 37 38 39 40 |
# File 'lib/elasticfusion/search/query/lexer.rb', line 34 def match_field return unless @field_regex field = @scanner.scan @field_regex if field field[0..-2] # remove field query delimiter (":") end end |
#match_until(regex) ⇒ Object
StringScanner#scan_until returns everything up to and including the regex. To avoid including the pattern, we use a lookahead.
84 85 86 |
# File 'lib/elasticfusion/search/query/lexer.rb', line 84 def match_until(regex) @scanner.scan /.+?(?=#{regex.source}|\z)/ end |
#quoted_string ⇒ Object
May contain any characters except for quotes (the latter are allowed when escaped).
56 57 58 59 60 61 62 63 64 |
# File 'lib/elasticfusion/search/query/lexer.rb', line 56 def quoted_string string = match(:quoted_string) if string string[1..-2] # ignore quotes .gsub(/\\"/, '"') .gsub(/\\\\/, '\\') end end |
#right_parentheses(expected_count) ⇒ Object
46 47 48 |
# File 'lib/elasticfusion/search/query/lexer.rb', line 46 def right_parentheses(expected_count) @scanner.skip /\){,#{expected_count}}/ end |
#safe_string ⇒ Object
May contain words, numbers, spaces, dashes, and underscores.
51 52 53 |
# File 'lib/elasticfusion/search/query/lexer.rb', line 51 def safe_string match_until TOKENS[:safe_string_until] end |
#skip(token) ⇒ Object
30 31 32 |
# File 'lib/elasticfusion/search/query/lexer.rb', line 30 def skip(token) @scanner.skip TOKENS[token] end |
#string_with_balanced_parentheses ⇒ Object
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
# File 'lib/elasticfusion/search/query/lexer.rb', line 66 def string_with_balanced_parentheses string = match_until TOKENS[:string_with_balanced_parentheses_until] if string opening_parens = string.count('(') balanced = string.split(')')[0..opening_parens].join(')') balanced += ')' if opening_parens > 0 && string.ends_with?(')') cutoff = string.length - balanced.length @scanner.pos -= cutoff balanced.strip end end |