Supplement to "Hyper-Minimization in O(n^2)"

def hyper_minimize(self): """Alters the DFA into a smallest possible DFA recognizing a finitely different language. In other words, if D is the original DFA and D' the result of this function, then the symmetric difference of L(D) and L(D') will be a finite set, and there exists no smaller automaton than D' with this property. See "The DFAs of Finitely Different Regular Languages" for context. """ # Step 1: Classical minimization self.minimize() # Step 2: Partition states into equivalence classes state_classes = self.f_equivalence_classes() # Step 3: Find preamble and kernel parts (preamble, kernel) = self.preamble_and_kernel() # Step 4: Merge (f_merge_states in the paper) # (Could be done more efficiently) for sc in state_classes: pres = filter(lambda s: s in preamble, sc) kers = filter(lambda s: s in kernel, sc) if len(kers): rep = kers[0] for p_state in pres: self.state_merge(p_state, rep) else: rep = pres[0] for p_state in pres[1:]: self.state_merge(p_state, rep)

def f_equivalence_classes(self): """Returns a partition of the states into finite-difference equivalence clases, using the experimental O(n^2) algorithm.""" sd = symmetric_difference(self, self) self_pairs = [(x, x) for x in self.states] fd_equiv_pairs = sd.right_finite_states(self_pairs) sets = UnionFind() for state in self.states: sets.make_set(state) for (state1, state2) in fd_equiv_pairs: set1, set2 = sets.find(state1), sets.find(state2) if set1 != set2: sets.union(set1, set2) state_classes = sets.as_lists() return state_classes

def right_finite_states(self, sink_states): """Given a DFA (self) and a list of states (sink_states) that are assumed to induce the empty language, return the topologically-ordered set of states in the DFA that induce finite languages. """ #Step 1: Build the states' profiles inbound = self.state_hash([]) outbound = self.state_hash([]) is_sink_state = self.state_subset_hash(sink_states) for state in self.states: if is_sink_state[state]: continue for c in self.alphabet: next = self.delta(state, c) inbound[next].append(state) outbound[state].append(next) #Step 2: Pluck to_pluck = sink_states plucked = [] while len(to_pluck): state = to_pluck.pop() plucked.append(state) for incoming in inbound[state]: outbound[incoming].remove(state) if (len(outbound[incoming]) == 0) and (incoming != state): to_pluck.append(incoming) plucked.reverse() return plucked

def preamble_and_kernel(self): """Returns the partition of the state-set into the preamble and kernel as a 2-tuple. A state is in the preamble iff there are finitely many strings that reach it from the start state. See "The DFAs of Finitely Different Regular Languages" for context. """ #O(n^2): can this be improved? reachable = {} for q in self.states: reachable[q] = self.reachable_from(q, inclusive=False) in_fin = self.state_hash(True) for q in reachable[self.start]: if q in reachable[q]: for next in reachable[q]: in_fin[next] = False preamble = filter(lambda x: in_fin[x], self.states) kernel = filter(lambda x: not in_fin[x], self.states) return (preamble, kernel)

def DFCA_minimize(self, l=None): """DFCA minimization" Input: "self" is a DFA accepting a finite language Result: "self" is DFCA-minimized, and the returned value is the length of the longest word accepted by the original DFA See "Minimal cover-automata for finite languages" for context on DFCAs, and "An O(n^2) Algorithm for Constructing Minimal Cover Automata for Finite Languages" for the source of this algorithm (Campeanu, Paun, Santean, and Yu). We follow their algorithm exactly, except that "l" is optionally calculated for you, and the state- ordering is automatically created. There exists a faster, O(n*logn)-time algorithm due to Korner, from CIAA 2002. """ assert(self.is_finite()) self.minimize() ###Step 0: Numbering the states and computing "l" n = len(self.states) - 1 state_order = self.pluck_leaves() if l==None: l = self.longest_word_length() #We're giving each state a numerical name so that the algorithm can # run on an "ordered" DFA -- see the paper for why. These functions # allow us to copiously convert between names. def nn(q): # "numerical name" return state_order.index(q) def rn(n): # "real name" return state_order[n] ###Step 1: Computing the gap function # 1.1 level = self.levels() #holds real names gap = {} #holds numerical names # 1.2 for i in range(n): gap[(i, n)] = l if level[rn(n)] <= l: for q in self.accepts: gap[(nn(q), n)] = 0 # 1.3 for i in range(n-1): for j in range(i+1, n): if (rn(i) in self.accepts)^(rn(j) in self.accepts): gap[(i,j)] = 0 else: gap[(i,j)] = l # 1.4 def level_range(i, j): return l - max(level[rn(i)], level[rn(j)]) for i in range(n-2, -1, -1): for j in range(n, i, -1): for char in self.alphabet: i2 = nn(self.delta(rn(i), char)) j2 = nn(self.delta(rn(j), char)) if i2 != j2: if i2 < j2: g = gap[(i2, j2)] else: g = gap[(j2, i2)] if g+1 <= level_range(i, j): gap[(i,j)] = min(gap[(i,j)], g+1) ###Step 2: Merging states # 2.1 P = {} for i in range(n+1): P[i] = False # 2.2 for i in range(n): if P[i] == False: for j in range(i+1, n+1): if (P[j] == False) and (gap[(i,j)] == l): self.state_merge(rn(j), rn(i)) P[j] = True return l

Download code:

Inline code:

hyper-minimize

f_equivalence_classes

right_finite_states

preamble_and_kernel

DFCA_minimize