
def edit_distance(A, B):
    """Calculate the edit distance (levenshtein) between strings A and B.
       Also, generate the longest common subsequence from the resulting matrix
       after edit distance has been calculated."""
    
    # first, create an empty matrix. Note, we only care about populating the
    # first row and first column with the proper values. All other cells will
    # have their default values replaced
    M = [[i+j for j in range(0, len(A)+1)] for i in range(0, len(B)+1)]
    
    # now, go row by row and calculate the edit distance
    column_range = range(1, len(A)+1)
    
    # each row
    for i in range(1, len(B)+1):
        
        # each column
        for j in column_range:
            cost = int(B[i-1] is A[j-1]) ^ 1
            M[i][j] = min(M[i-1][j]+1, M[i][j-1]+1, M[i-1][j-1]+cost)
    
    print "edit distance:", M[i][j]
    
    # print out the matrix in a nice way
    print
    print "edit distance matrix:"
    for row in M:
        print row
    
    print
    
    # now, build the longest common subsequence from the resulting matrix
    sub = ""
    while (i > 0) and (j > 0):
        
        # characters match, move diagonally (left and up)
        if B[i-1] is A[j-1]:
            i = i-1
            j = j-1
            sub = B[i] + sub
        
        # characters don't match, lets go to the cell with the smallest
        # edit distance
        else:
            # move up?
            if M[i-1][j] < M[i][j-1]:
                i = i-1
            
            # move left
            else:
                j = j-1
    
    print "longest common subsequence:", sub

if __name__ == "__main__":
    edit_distance("abcdef", "xzceejf")


