def boyer_moore(P, T):
"""Search for P in T using the Boyer Moore algorithm assuming a ascii
character set."""
pattern_length = len(P)
length = len(T) - pattern_length + 1
# create a failure function. this list will keep track of the last position
# of each character in the pattern. this will help us jump ahead while
# trying to match the pattern in the text
fail = [-1 for i in range(0, 128)]
i = 0
for c in P:
fail[ord(c)] = i
i = i+1
# j is the current position in the pattern, starting from the end. i is
# the current position in the text, starting from the beginning.
j = pattern_length - 1 # j will decrease as characters are matched
i = 0 # i will increase as characters aren't matched
# while there are still characters to look at. length is the text length
# minus the pattern length. We don't need go over those characters with i
# because j will get them.
while i < length:
# we've matched the pattern, it starts at position i
if j < 0:
return i
# we've matched a character
if P[j] is T[i+j]:
# matched a character, keep going
j = j-1
# skip ahead if we can
else:
# get the last position in the pattern of the current character in
# the text. pos will be -1 if the character doesn't exist in the
# pattern
pos = fail[ord(T[i+j])]
# the current character in the text exists in the pattern. If its
# last position is beyond where we are currently looking then we
# will shift past it.
if pos > j:
i = i+j+1
# the character in the text exists in the pattern, lets align the
# text such to that character
elif -1 < pos <= j:
i = i+(j-pos)
# the character in the text doesn't exist in the pattern, lets
# shift beyond it
else:
i = i+j+1
j = pattern_length - 1
# the pattern was not found in the text
return -1
print "Found 'abc' in 'abababc' at index:", boyer_moore("abc", "abababc")