k4st
-
Python,
pasted
on Apr 14:
|
|
def edit_distance(A, B):
"""Calculate the edit distance (levenshtein) between strings A and B.
Also, generate the longest common subsequence from the resulting matrix
after edit distance has been calculated."""
# first, create an empty matrix. Note, we only care about populating the
# first row and first column with the proper values. All other cells will
# have their default values replaced
M = [[i+j for j in range(0, len(A)+1)] for i in range(0, len(B)+1)]
# now, go row by row and calculate the edit distance
column_range = range(1, len(A)+1)
# each row
for i in range(1, len(B)+1):
# each column
for j in column_range:
cost = int(B[i-1] is A[j-1]) ^ 1
M[i][j] = min(M[i-1][j]+1, M[i][j-1]+1, M[i-1][j-1]+cost)
print "edit distance:", M[i][j]
# print out the matrix in a nice way
print
print "edit distance matrix:"
for row in M:
print row
print
# now, build the longest common subsequence from the resulting matrix
sub = ""
while (i > 0) and (j > 0):
# characters match, move diagonally (left and up)
if B[i-1] is A[j-1]:
i = i-1
j = j-1
sub = B[i] + sub
# characters don't match, lets go to the cell with the smallest
# edit distance
else:
# move up?
if M[i-1][j] < M[i][j-1]:
i = i-1
# move left
else:
j = j-1
print "longest common subsequence:", sub
if __name__ == "__main__":
edit_distance("abcdef", "xzceejf")
|
Output:
|
|
edit distance: 4
edit distance matrix:
[0, 1, 2, 3, 4, 5, 6]
[1, 1, 2, 3, 4, 5, 6]
[2, 2, 2, 3, 4, 5, 6]
[3, 3, 3, 2, 3, 4, 5]
[4, 4, 4, 3, 3, 3, 4]
[5, 5, 5, 4, 4, 3, 4]
[6, 6, 6, 5, 5, 4, 4]
[7, 7, 7, 6, 6, 5, 4]
longest common subsequence: cef
|
|