
def morris_pratt(P, T):
    """Search for a pattern, P, in some text, T, using the Morris-Pratt
       algorithm."""
    
    def index(P):
        """Find all borders, that is, matching prefixes and suffixes, that
           are in P by matching P against itself. The worst-case border is:
           AA .. AAAB, all the same letter followed by a single different
           letter.
           
           !!! z is there simply for debugging purposes. If you want to see
               how good/bad a pattern is, print z.
           """
        F = range(0, len(P))
        i = 1
        j = 0
        
        z = 0
        
        # go over the characters in the pattern starting at the 2nd character
        while i < len(P):

            z = z+1

            # does the current character in the pattern match the jth character
            # from the beginning of P?
            if P[i] is P[j]:

                F[i] = F[i-1]+1
                j = j+1
                i = i+1
            
            # try to extend the border from the position before j
            elif j > 0:
                j = F[j-1]

            # no it doesn't, so set j=0 to tell us to start matching the
            # pattern against itself from the beginning and state that this
            # chatacter in the pattern ins't part of a prefix
            else:
                F[i] = 0
                i = i+1
        
        print "\t", z, "character comparisons (indexing)"
        
        # we've indexed the pattern, return the failure function
        return F
           
    # using the results of the failure function, search for P in T.
    def search(P, F, T):
        """Search for P in T using the failure function of P, F, to take
           advantage of information we've figured out along the way."""
        
        i = 0 # where we are in the text
        m = 0 # where we started matching from in the text
        j = 0 # where we are in the pattern, this is also useful for the
        z = 0 # comparison count
        r = -1 # what to return
        
        pattern_len = len(P)
        
        while i < len(T):
            
            z = z+1
            
            # yay we have a match, move the pattern and text indexes along
            if T[i] is P[j]:
                i = i+1
                j = j+1
                                
                # we've matched the whole pattern
                if j is pattern_len:
                    r = m
                    break
            
            # no match, lets move past already matched characters
            else:
                j = F[j-1] # tell us where to start matching from in the
                           # pattern the next time we loop
                i = i >= (m+j+1) and i+1 or (m+j+1)
                m = i # reset where we should match from
        
        print "\t", z, "character comparisons (matching)"
        return r

    # search the text
    print "pattern length:", len(P)
    print "text length:", len(T)
    
    i = search(P, index(P), T)
    print "pattern matched at index:", i

if __name__ == "__main__":
    morris_pratt("aaab","baabbaabaaaab")

