- DFA.py
- UnionFind.py
- See also python-automata on Google Code

def hyper_minimize(self):
"""Alters the DFA into a smallest possible DFA recognizing a finitely different language.
In other words, if D is the original DFA and D' the result of this function, then the
symmetric difference of L(D) and L(D') will be a finite set, and there exists no smaller
automaton than D' with this property.
See "The DFAs of Finitely Different Regular Languages" for context.
"""
# Step 1: Classical minimization
self.minimize()
# Step 2: Partition states into equivalence classes
state_classes = self.f_equivalence_classes()
# Step 3: Find preamble and kernel parts
(preamble, kernel) = self.preamble_and_kernel()
# Step 4: Merge (f_merge_states in the paper)
# (Could be done more efficiently)
for sc in state_classes:
pres = filter(lambda s: s in preamble, sc)
kers = filter(lambda s: s in kernel, sc)
if len(kers):
rep = kers[0]
for p_state in pres:
self.state_merge(p_state, rep)
else:
rep = pres[0]
for p_state in pres[1:]:
self.state_merge(p_state, rep)

def f_equivalence_classes(self):
"""Returns a partition of the states into finite-difference equivalence clases, using
the experimental O(n^2) algorithm."""
sd = symmetric_difference(self, self)
self_pairs = [(x, x) for x in self.states]
fd_equiv_pairs = sd.right_finite_states(self_pairs)
sets = UnionFind()
for state in self.states:
sets.make_set(state)
for (state1, state2) in fd_equiv_pairs:
set1, set2 = sets.find(state1), sets.find(state2)
if set1 != set2:
sets.union(set1, set2)
state_classes = sets.as_lists()
return state_classes

def right_finite_states(self, sink_states):
"""Given a DFA (self) and a list of states (sink_states) that are assumed to induce the
empty language, return the topologically-ordered set of states in the DFA that induce
finite languages.
"""
#Step 1: Build the states' profiles
inbound = self.state_hash([])
outbound = self.state_hash([])
is_sink_state = self.state_subset_hash(sink_states)
for state in self.states:
if is_sink_state[state]:
continue
for c in self.alphabet:
next = self.delta(state, c)
inbound[next].append(state)
outbound[state].append(next)
#Step 2: Pluck
to_pluck = sink_states
plucked = []
while len(to_pluck):
state = to_pluck.pop()
plucked.append(state)
for incoming in inbound[state]:
outbound[incoming].remove(state)
if (len(outbound[incoming]) == 0) and (incoming != state):
to_pluck.append(incoming)
plucked.reverse()
return plucked

def preamble_and_kernel(self):
"""Returns the partition of the state-set into the preamble and
kernel as a 2-tuple. A state is in the preamble iff there
are finitely many strings that reach it from the start state.
See "The DFAs of Finitely Different Regular Languages" for context.
"""
#O(n^2): can this be improved?
reachable = {}
for q in self.states:
reachable[q] = self.reachable_from(q, inclusive=False)
in_fin = self.state_hash(True)
for q in reachable[self.start]:
if q in reachable[q]:
for next in reachable[q]:
in_fin[next] = False
preamble = filter(lambda x: in_fin[x], self.states)
kernel = filter(lambda x: not in_fin[x], self.states)
return (preamble, kernel)

def DFCA_minimize(self, l=None):
"""DFCA minimization"
Input: "self" is a DFA accepting a finite language
Result: "self" is DFCA-minimized, and the returned value is the length of the longest
word accepted by the original DFA
See "Minimal cover-automata for finite languages" for context on DFCAs, and
"An O(n^2) Algorithm for Constructing Minimal Cover Automata for Finite Languages"
for the source of this algorithm (Campeanu, Paun, Santean, and Yu). We follow their
algorithm exactly, except that "l" is optionally calculated for you, and the state-
ordering is automatically created.
There exists a faster, O(n*logn)-time algorithm due to Korner, from CIAA 2002.
"""
assert(self.is_finite())
self.minimize()
###Step 0: Numbering the states and computing "l"
n = len(self.states) - 1
state_order = self.pluck_leaves()
if l==None:
l = self.longest_word_length()
#We're giving each state a numerical name so that the algorithm can
# run on an "ordered" DFA -- see the paper for why. These functions
# allow us to copiously convert between names.
def nn(q): # "numerical name"
return state_order.index(q)
def rn(n): # "real name"
return state_order[n]
###Step 1: Computing the gap function
# 1.1
level = self.levels() #holds real names
gap = {} #holds numerical names
# 1.2
for i in range(n):
gap[(i, n)] = l
if level[rn(n)] <= l:
for q in self.accepts:
gap[(nn(q), n)] = 0
# 1.3
for i in range(n-1):
for j in range(i+1, n):
if (rn(i) in self.accepts)^(rn(j) in self.accepts):
gap[(i,j)] = 0
else:
gap[(i,j)] = l
# 1.4
def level_range(i, j):
return l - max(level[rn(i)], level[rn(j)])
for i in range(n-2, -1, -1):
for j in range(n, i, -1):
for char in self.alphabet:
i2 = nn(self.delta(rn(i), char))
j2 = nn(self.delta(rn(j), char))
if i2 != j2:
if i2 < j2:
g = gap[(i2, j2)]
else:
g = gap[(j2, i2)]
if g+1 <= level_range(i, j):
gap[(i,j)] = min(gap[(i,j)], g+1)
###Step 2: Merging states
# 2.1
P = {}
for i in range(n+1):
P[i] = False
# 2.2
for i in range(n):
if P[i] == False:
for j in range(i+1, n+1):
if (P[j] == False) and (gap[(i,j)] == l):
self.state_merge(rn(j), rn(i))
P[j] = True
return l