codepad
[
create a new paste
]
login
|
about
Language:
C
C++
D
Haskell
Lua
OCaml
PHP
Perl
Plain Text
Python
Ruby
Scheme
Tcl
def morris_pratt(P, T): """Search for a pattern, P, in some text, T, using the Morris-Pratt algorithm.""" def index(P): """Find all borders, that is, matching prefixes and suffixes, that are in P by matching P against itself. The worst-case border is: AA .. AAAB, all the same letter followed by a single different letter. !!! z is there simply for debugging purposes. If you want to see how good/bad a pattern is, print z. """ F = range(0, len(P)) i = 1 j = 0 z = 0 # go over the characters in the pattern starting at the 2nd character while i < len(P): z = z+1 # does the current character in the pattern match the jth character # from the beginning of P? if P[i] is P[j]: F[i] = F[i-1]+1 j = j+1 i = i+1 # try to extend the border from the position before j elif j > 0: j = F[j-1] # no it doesn't, so set j=0 to tell us to start matching the # pattern against itself from the beginning and state that this # chatacter in the pattern ins't part of a prefix else: F[i] = 0 i = i+1 print "\t", z, "character comparisons (indexing)" # we've indexed the pattern, return the failure function return F # using the results of the failure function, search for P in T. def search(P, F, T): """Search for P in T using the failure function of P, F, to take advantage of information we've figured out along the way.""" i = 0 # where we are in the text m = 0 # where we started matching from in the text j = 0 # where we are in the pattern, this is also useful for the z = 0 # comparison count r = -1 # what to return pattern_len = len(P) while i < len(T): z = z+1 # yay we have a match, move the pattern and text indexes along if T[i] is P[j]: i = i+1 j = j+1 # we've matched the whole pattern if j is pattern_len: r = m break # no match, lets move past already matched characters else: j = F[j-1] # tell us where to start matching from in the # pattern the next time we loop i = i >= (m+j+1) and i+1 or (m+j+1) m = i # reset where we should match from print "\t", z, "character comparisons (matching)" return r # search the text print "pattern length:", len(P) print "text length:", len(T) i = search(P, index(P), T) print "pattern matched at index:", i if __name__ == "__main__": morris_pratt("aaab","baabbaabaaaab")
Private
[
?
]
Run code