This page presents Python source code to supplement the paper "Hyper-Minimization in O(n^2)", accepted to CIAA 2008. Since Python is a high-level scripting language, the implementation details will never exactly match the theoretical presentation, but it comes quite close. You can contact the author at [email protected]

• DFA.py
• UnionFind.py

### hyper-minimize

def hyper_minimize(self): """Alters the DFA into a smallest possible DFA recognizing a finitely different language. In other words, if D is the original DFA and D' the result of this function, then the symmetric difference of L(D) and L(D') will be a finite set, and there exists no smaller automaton than D' with this property. See "The DFAs of Finitely Different Regular Languages" for context. """ # Step 1: Classical minimization self.minimize() # Step 2: Partition states into equivalence classes state_classes = self.f_equivalence_classes() # Step 3: Find preamble and kernel parts (preamble, kernel) = self.preamble_and_kernel() # Step 4: Merge (f_merge_states in the paper) # (Could be done more efficiently) for sc in state_classes: pres = filter(lambda s: s in preamble, sc) kers = filter(lambda s: s in kernel, sc) if len(kers): rep = kers for p_state in pres: self.state_merge(p_state, rep) else: rep = pres for p_state in pres[1:]: self.state_merge(p_state, rep)

### f_equivalence_classes

def f_equivalence_classes(self): """Returns a partition of the states into finite-difference equivalence clases, using the experimental O(n^2) algorithm.""" sd = symmetric_difference(self, self) self_pairs = [(x, x) for x in self.states] fd_equiv_pairs = sd.right_finite_states(self_pairs) sets = UnionFind() for state in self.states: sets.make_set(state) for (state1, state2) in fd_equiv_pairs: set1, set2 = sets.find(state1), sets.find(state2) if set1 != set2: sets.union(set1, set2) state_classes = sets.as_lists() return state_classes

### right_finite_states

def right_finite_states(self, sink_states): """Given a DFA (self) and a list of states (sink_states) that are assumed to induce the empty language, return the topologically-ordered set of states in the DFA that induce finite languages. """ #Step 1: Build the states' profiles inbound = self.state_hash([]) outbound = self.state_hash([]) is_sink_state = self.state_subset_hash(sink_states) for state in self.states: if is_sink_state[state]: continue for c in self.alphabet: next = self.delta(state, c) inbound[next].append(state) outbound[state].append(next) #Step 2: Pluck to_pluck = sink_states plucked = [] while len(to_pluck): state = to_pluck.pop() plucked.append(state) for incoming in inbound[state]: outbound[incoming].remove(state) if (len(outbound[incoming]) == 0) and (incoming != state): to_pluck.append(incoming) plucked.reverse() return plucked

### preamble_and_kernel

def preamble_and_kernel(self): """Returns the partition of the state-set into the preamble and kernel as a 2-tuple. A state is in the preamble iff there are finitely many strings that reach it from the start state. See "The DFAs of Finitely Different Regular Languages" for context. """ #O(n^2): can this be improved? reachable = {} for q in self.states: reachable[q] = self.reachable_from(q, inclusive=False) in_fin = self.state_hash(True) for q in reachable[self.start]: if q in reachable[q]: for next in reachable[q]: in_fin[next] = False preamble = filter(lambda x: in_fin[x], self.states) kernel = filter(lambda x: not in_fin[x], self.states) return (preamble, kernel)

### DFCA_minimize

def DFCA_minimize(self, l=None): """DFCA minimization" Input: "self" is a DFA accepting a finite language Result: "self" is DFCA-minimized, and the returned value is the length of the longest word accepted by the original DFA See "Minimal cover-automata for finite languages" for context on DFCAs, and "An O(n^2) Algorithm for Constructing Minimal Cover Automata for Finite Languages" for the source of this algorithm (Campeanu, Paun, Santean, and Yu). We follow their algorithm exactly, except that "l" is optionally calculated for you, and the state- ordering is automatically created. There exists a faster, O(n*logn)-time algorithm due to Korner, from CIAA 2002. """ assert(self.is_finite()) self.minimize() ###Step 0: Numbering the states and computing "l" n = len(self.states) - 1 state_order = self.pluck_leaves() if l==None: l = self.longest_word_length() #We're giving each state a numerical name so that the algorithm can # run on an "ordered" DFA -- see the paper for why. These functions # allow us to copiously convert between names. def nn(q): # "numerical name" return state_order.index(q) def rn(n): # "real name" return state_order[n] ###Step 1: Computing the gap function # 1.1 level = self.levels() #holds real names gap = {} #holds numerical names # 1.2 for i in range(n): gap[(i, n)] = l if level[rn(n)] <= l: for q in self.accepts: gap[(nn(q), n)] = 0 # 1.3 for i in range(n-1): for j in range(i+1, n): if (rn(i) in self.accepts)^(rn(j) in self.accepts): gap[(i,j)] = 0 else: gap[(i,j)] = l # 1.4 def level_range(i, j): return l - max(level[rn(i)], level[rn(j)]) for i in range(n-2, -1, -1): for j in range(n, i, -1): for char in self.alphabet: i2 = nn(self.delta(rn(i), char)) j2 = nn(self.delta(rn(j), char)) if i2 != j2: if i2 < j2: g = gap[(i2, j2)] else: g = gap[(j2, i2)] if g+1 <= level_range(i, j): gap[(i,j)] = min(gap[(i,j)], g+1) ###Step 2: Merging states # 2.1 P = {} for i in range(n+1): P[i] = False # 2.2 for i in range(n): if P[i] == False: for j in range(i+1, n+1): if (P[j] == False) and (gap[(i,j)] == l): self.state_merge(rn(j), rn(i)) P[j] = True return l