Class: Kleene::NFA
- Inherits:
-
Object
- Object
- Kleene::NFA
- Defined in:
- lib/kleene/nfa.rb
Instance Attribute Summary collapse
-
#alphabet ⇒ Object
: Set(Char).
-
#current_states ⇒ Object
: Set(State).
-
#final_states ⇒ Object
: Set(State).
-
#start_state ⇒ Object
: State.
-
#states ⇒ Object
: Set(State).
-
#transitions ⇒ Object
: Hash(State, Hash(Char, Set(NFATransition))).
Instance Method Summary collapse
- #accept? ⇒ Boolean
- #add_state(new_state) ⇒ Object
- #add_states(states) ⇒ Object
- #add_transition(token, from_state, to_state) ⇒ Object
-
#all_transitions ⇒ Object
: Array(NFATransition).
- #deep_clone ⇒ Object
-
#epsilon_closure(state_set) ⇒ Object
Determine the epsilon closure of the given state set That is, determine what states are reachable on an epsilon transition from the current state set (@current_states).
- #error_states ⇒ Object
- #graphviz ⇒ Object
-
#handle_token!(input_token) ⇒ Object
process another input token.
-
#initialize(start_state, alphabet = DEFAULT_ALPHABET, transitions = Hash.new, initial_states = nil) ⇒ NFA
constructor
A new instance of NFA.
-
#match?(input) ⇒ Boolean
: MatchRef?.
-
#matches(input) ⇒ Object
Returns an array of matches found anywhere in the input string.
-
#matches_at_offset(input, input_start_offset) ⇒ Object
Returns an array of matches found in the input string, each of which begins at the offset input_start_offset.
- #next_states(state_set, input_token) ⇒ Object
-
#reachable_states(start_state) ⇒ Object
Returns a set of State objects which are reachable through any transition path from the NFA’s start_state.
- #regex_pattern ⇒ Object
- #remove_state(state) ⇒ Object
- #reset_current_states ⇒ Object
- #set_regex_pattern(pattern) ⇒ Object
-
#to_dfa ⇒ Object
This implements the subset construction algorithm presented on page 118 of the first edition of the dragon book.
- #to_s(verbose = false) ⇒ Object
-
#transitions_from(state_set) ⇒ Object
def transitions_from(state) # : Set(NFATransition) @transitions&.values&.reduce {|memo, set_of_transisions| memo | set_of_transisions} || Set.new end.
- #update_final_states ⇒ Object
Constructor Details
permalink #initialize(start_state, alphabet = DEFAULT_ALPHABET, transitions = Hash.new, initial_states = nil) ⇒ NFA
Returns a new instance of NFA.
33 34 35 36 37 38 39 40 41 42 43 44 45 |
# File 'lib/kleene/nfa.rb', line 33 def initialize(start_state, alphabet = DEFAULT_ALPHABET, transitions = Hash.new, initial_states = nil) @start_state = start_state @transitions = transitions @alphabet = alphabet + all_transitions.map(&:token) @states = initial_states || reachable_states(start_state) @current_states = Set.new @final_states = Set.new update_final_states reset_current_states end |
Instance Attribute Details
permalink #alphabet ⇒ Object
: Set(Char)
25 26 27 |
# File 'lib/kleene/nfa.rb', line 25 def alphabet @alphabet end |
permalink #current_states ⇒ Object
: Set(State)
29 30 31 |
# File 'lib/kleene/nfa.rb', line 29 def current_states @current_states end |
permalink #final_states ⇒ Object
: Set(State)
30 31 32 |
# File 'lib/kleene/nfa.rb', line 30 def final_states @final_states end |
permalink #start_state ⇒ Object
: State
27 28 29 |
# File 'lib/kleene/nfa.rb', line 27 def start_state @start_state end |
permalink #states ⇒ Object
: Set(State)
26 27 28 |
# File 'lib/kleene/nfa.rb', line 26 def states @states end |
permalink #transitions ⇒ Object
: Hash(State, Hash(Char, Set(NFATransition)))
28 29 30 |
# File 'lib/kleene/nfa.rb', line 28 def transitions @transitions end |
Instance Method Details
permalink #accept? ⇒ Boolean
176 177 178 |
# File 'lib/kleene/nfa.rb', line 176 def accept? @current_states.any?(&:final?) end |
permalink #add_state(new_state) ⇒ Object
[View source]
97 98 99 |
# File 'lib/kleene/nfa.rb', line 97 def add_state(new_state) @states << new_state end |
permalink #add_states(states) ⇒ Object
[View source]
101 102 103 |
# File 'lib/kleene/nfa.rb', line 101 def add_states(states) @states.merge(states) end |
permalink #add_transition(token, from_state, to_state) ⇒ Object
[View source]
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
# File 'lib/kleene/nfa.rb', line 110 def add_transition(token, from_state, to_state) # # make sure states EITHER have a single outbound epsilon transition OR non-epsilon outbound transitions; they can't have both # if token == NFATransition::Epsilon # # make sure from_state doesn't have any outbound non-epsilon transitions # raise "Error: Non-epsilon transitions are already present on #{from_state.to_s}! States may EITHER have a single outbound epsilon transision OR have outbound non-epsilon transitions, but not both." if transitions_from(from_state).any? {|t| !t.epsilon? } # else # # make sure from_state doesn't have any outbound epsilon transition # raise "Error: Epsilon transitions are already present on #{from_state.to_s}! States may EITHER have a single outbound epsilon transision OR have outbound non-epsilon transitions, but not both." if transitions_from(from_state).any?(&:epsilon?) # end @alphabet << token # alphabet is a set, so there will be no duplications @states << from_state @states << to_state new_transition = NFATransition.new(token, from_state, to_state) char_transition_map = @transitions[from_state] ||= Hash.new set_of_transisions = char_transition_map[token] ||= Set.new set_of_transisions << new_transition new_transition end |
permalink #all_transitions ⇒ Object
: Array(NFATransition)
47 48 49 |
# File 'lib/kleene/nfa.rb', line 47 def all_transitions() # : Array(NFATransition) transitions.flat_map {|state, char_transition_map| char_transition_map.values.flat_map(&:to_a) } end |
permalink #deep_clone ⇒ Object
[View source]
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
# File 'lib/kleene/nfa.rb', line 66 def deep_clone old_states = @states.to_a new_states = old_states.map(&:dup) state_mapping = old_states.zip(new_states).to_h new_transitions = transitions.map {|state, char_transition_map| [ state_mapping[state], char_transition_map.map {|char, set_of_transisions| [ char, set_of_transisions.map {|transition| NFATransition.new(transition.token, state_mapping[transition.from], state_mapping[transition.to])}.to_set ] }.to_h ] }.to_h NFA.new(state_mapping[@start_state], @alphabet.clone, new_transitions, new_states.to_set).set_regex_pattern(regex_pattern) end |
permalink #epsilon_closure(state_set) ⇒ Object
Determine the epsilon closure of the given state set That is, determine what states are reachable on an epsilon transition from the current state set (@current_states). Returns a Set of State objects.
202 203 204 205 206 207 208 209 210 211 212 213 |
# File 'lib/kleene/nfa.rb', line 202 def epsilon_closure(state_set) # : Set(State) state_set = state_set.is_a?(State) ? Set[state_set] : state_set visited_states = Set.new() unvisited_states = state_set while !unvisited_states.empty? epsilon_transitions = unvisited_states.compact_map {|state| @transitions.dig(state, NFATransition::Epsilon) }.flat_map(&:to_a) destination_states = epsilon_transitions.map(&:to).to_set visited_states.merge(unvisited_states) # add the unvisited states to the visited_states unvisited_states = destination_states - visited_states end visited_states end |
permalink #error_states ⇒ Object
[View source]
93 94 95 |
# File 'lib/kleene/nfa.rb', line 93 def error_states @states.select(&:error?).to_set end |
permalink #graphviz ⇒ Object
[View source]
267 268 269 270 271 272 273 274 275 276 277 278 |
# File 'lib/kleene/nfa.rb', line 267 def graphviz retval = "digraph G { " all_transitions.each do |t| transition_label = t.epsilon? ? "ε" : t.token retval += "#{t.from.id} -> #{t.to.id} [label=\"#{transition_label}\"];" end @final_states.each do |s| retval += "#{s.id} [color=lightblue2, style=filled, shape=doublecircle];" end retval += " }" retval end |
permalink #handle_token!(input_token) ⇒ Object
process another input token
172 173 174 |
# File 'lib/kleene/nfa.rb', line 172 def handle_token!(input_token) @current_states = next_states(@current_states, input_token) end |
permalink #match?(input) ⇒ Boolean
: MatchRef?
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
# File 'lib/kleene/nfa.rb', line 154 def match?(input) # : MatchRef? # puts "match?(\"#{input}\")" # puts self.to_s reset_current_states # puts @current_states.map(&:id) input.each_char.with_index do |char, index| # puts char handle_token!(char) # puts @current_states.map(&:id) end if accept? MatchRef.new(input, 0...input.size) end end |
permalink #matches(input) ⇒ Object
Returns an array of matches found anywhere in the input string
148 149 150 151 152 |
# File 'lib/kleene/nfa.rb', line 148 def matches(input) (0...input.size).reduce([]) do |memo, offset| memo + matches_at_offset(input, offset) end end |
permalink #matches_at_offset(input, input_start_offset) ⇒ Object
Returns an array of matches found in the input string, each of which begins at the offset input_start_offset
133 134 135 136 137 138 139 140 141 142 143 144 145 |
# File 'lib/kleene/nfa.rb', line 133 def matches_at_offset(input, input_start_offset) reset_current_states matches = [] (input_start_offset...input.size).each do |offset| token = input[offset] handle_token!(token) if accept? matches << MatchRef.new(input, input_start_offset..offset) end end matches end |
permalink #next_states(state_set, input_token) ⇒ Object
[View source]
180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 |
# File 'lib/kleene/nfa.rb', line 180 def next_states(state_set, input_token) # Retrieve a list of states in the epsilon closure of the given state set epsilon_reachable_states = epsilon_closure(state_set) # puts "epsilon_reachable_states = #{epsilon_reachable_states.map(&:id)}" # Build an array of outbound transitions from each state in the epsilon-closure # Filter the outbound transitions, selecting only those that accept the input we are given. outbound_transitions = epsilon_reachable_states.compact_map {|state| @transitions.dig(state, input_token) }.flat_map(&:to_a) # puts "outbound_transitions = #{outbound_transitions.inspect}" # Build an array of epsilon-closures of each transition's destination state. destination_state_epsilon_closures = outbound_transitions.map {|transition| epsilon_closure(transition.to) } # Union each of the epsilon-closures (each is a set) together to form a flat array of states in the epsilon-closure of all of our current states. next_states = destination_state_epsilon_closures.reduce {|combined_state_set, individual_state_set| combined_state_set.merge(individual_state_set) } next_states || Set.new end |
permalink #reachable_states(start_state) ⇒ Object
Returns a set of State objects which are reachable through any transition path from the NFA’s start_state.
216 217 218 219 220 221 222 223 224 225 226 |
# File 'lib/kleene/nfa.rb', line 216 def reachable_states(start_state) visited_states = Set.new() unvisited_states = Set[start_state] while !unvisited_states.empty? outbound_transitions = unvisited_states.flat_map {|state| @transitions[state]&.values&.flat_map(&:to_a) || Array.new } destination_states = outbound_transitions.map(&:to).to_set visited_states.merge(unvisited_states) # add the unvisited states to the visited_states unvisited_states = destination_states - visited_states end visited_states end |
permalink #regex_pattern ⇒ Object
[View source]
299 300 301 |
# File 'lib/kleene/nfa.rb', line 299 def regex_pattern @regex_pattern || "<<empty>>" end |
permalink #remove_state(state) ⇒ Object
[View source]
105 106 107 108 |
# File 'lib/kleene/nfa.rb', line 105 def remove_state(state) raise "Unable to remove state from NFA: at least one transition leads to or from the state." if all_transitions.any? {|transition| transition.from == state || transition.to == state } @states.delete(state) end |
permalink #reset_current_states ⇒ Object
[View source]
89 90 91 |
# File 'lib/kleene/nfa.rb', line 89 def reset_current_states @current_states = epsilon_closure(@start_state) end |
permalink #set_regex_pattern(pattern) ⇒ Object
[View source]
294 295 296 297 |
# File 'lib/kleene/nfa.rb', line 294 def set_regex_pattern(pattern) @regex_pattern = pattern self end |
permalink #to_dfa ⇒ Object
This implements the subset construction algorithm presented on page 118 of the first edition of the dragon book. I found a similar explanation at: web.cecs.pdx.edu/~harry/compilers/slides/LexicalPart3.pdf
230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 |
# File 'lib/kleene/nfa.rb', line 230 def to_dfa state_map = Hash.new # this map contains (nfa_state_set => dfa_state) pairs dfa_transitions = Hash.new dfa_alphabet = @alphabet - Set[NFATransition::Epsilon] visited_state_sets = Set.new() nfa_start_state_set = epsilon_closure(@start_state) unvisited_state_sets = Set[nfa_start_state_set] dfa_start_state = State.new(nfa_start_state_set.any?(&:final?), nfa_start_state_set.any?(&:error?)) state_map[nfa_start_state_set] = dfa_start_state until unvisited_state_sets.empty? # take one of the unvisited state sets state_set = unvisited_state_sets.first current_dfa_state = state_map[state_set] # Figure out the set of next-states for each token in the alphabet # Add each set of next-states to unvisited_state_sets dfa_alphabet.each do |token| next_nfa_state_set = next_states(state_set, token) unvisited_state_sets << next_nfa_state_set # this new DFA state, next_dfa_state, represents the next nfa state set, next_nfa_state_set next_dfa_state = state_map[next_nfa_state_set] ||= State.new(next_nfa_state_set.any?(&:final?), next_nfa_state_set.any?(&:error?)) char_transition_map = dfa_transitions[current_dfa_state] ||= Hash.new char_transition_map[token] = DFATransition.new(token, current_dfa_state, next_dfa_state) end visited_state_sets << state_set unvisited_state_sets = unvisited_state_sets - visited_state_sets end # `state_map.invert` is sufficient to convert from a (nfa_state_set => dfa_state) mapping to a (dfa_state => nfa_state_set) mapping, because the mappings are strictly one-to-one. DFA.new(state_map[nfa_start_state_set], dfa_alphabet, dfa_transitions, state_map.invert, origin_nfa: self).set_regex_pattern(regex_pattern) end |
permalink #to_s(verbose = false) ⇒ Object
[View source]
280 281 282 283 284 285 286 287 288 289 290 291 292 |
# File 'lib/kleene/nfa.rb', line 280 def to_s(verbose = false) if verbose retval = states.map(&:to_s).join("\n") retval += "\n" all_transitions.each do |t| transition_label = t.epsilon? ? "epsilon" : t.token retval += "#{t.from.id} -> #{transition_label} -> #{t.to.id}\n" end retval else regex_pattern end end |
permalink #transitions_from(state_set) ⇒ Object
def transitions_from(state) # : Set(NFATransition)
@transitions[state]&.values&.reduce {|memo, set_of_transisions| memo | set_of_transisions} || Set.new
end
54 55 56 57 58 59 60 61 62 63 64 |
# File 'lib/kleene/nfa.rb', line 54 def transitions_from(state_set) # : Set(NFATransition) case state_set when State @transitions[state_set]&.values&.reduce {|memo, set_of_transisions| memo | set_of_transisions} || Set.new when Set state_set.map {|state| transitions_from(state) }.reduce {|memo, state_set| memo | state_set } else raise "boom" end end |
permalink #update_final_states ⇒ Object
[View source]
85 86 87 |
# File 'lib/kleene/nfa.rb', line 85 def update_final_states @final_states = @states.select { |s| s.final? }.to_set end |