############################################################################## ###################### code from lecture ############################# ############################################################################## def fingerprint(text, basis=2**16, r=2**32-3): """ used to compute karp-rabin fingerprint of the pattern employs Horner method (modulo r) """ partial_sum = 0 for ch in text: partial_sum =(partial_sum*basis + ord(ch)) % r return partial_sum def text_fingerprint(text, m, basis=2**16, r=2**32-3): """ computes karp-rabin fingerprint of the text """ f=[] b_power = pow(basis, m-1, r) list.append(f, fingerprint(text[0:m], basis, r)) # f[0] equals first text fingerprint for s in range(1, len(text)-m+1): new_fingerprint = ( (f[s-1] - ord(text[s-1])*b_power)*basis +ord(text[s+m-1]) ) % r # compute f[s], based on f[s-1] list.append(f,new_fingerprint)# append f[s] to existing f return f def find_matches_KR(pattern, text, basis=2**16, r=2**32-3): """ find all occurances of pattern in text using coin flipping Karp-Rabin algorithm """ if len(pattern) > len(text): return [] p = fingerprint(pattern, basis, r) f = text_fingerprint(text, len(pattern), basis, r) matches = [s for (s,f_s) in enumerate(f) if f_s == p] # list comprehension return matches ############################################################################## #################### end code from lecture ############################# ############################################################################## def find_matches_KR_safe(pattern, text, basis=2**16, r=2**32-3): """ a safe version of KR checks every suspect for a match """ if len(pattern) > len(text): return [] p = fingerprint(pattern, basis, r) f = text_fingerprint(text, len(pattern), basis, r) matches = [s for (s,f_s) in enumerate(f) if f_s == p \ and text[s:s+len(pattern)]==pattern] #the additional safety check return matches ################################################################## #competition between unsafe and safe versions import time text = "a"*100000 print("text = 'a'*",len(text)) for pattern in ["a"*100, "a"*1000, "a"*10000]: print("pattern = 'a'*",len(pattern)) for f in [find_matches_KR, find_matches_KR_safe]: t0=time.clock() f(pattern, text) t1=time.clock() print (f.__name__, t1-t0) print("") #newline import random def gen_str(size): ''' Generate a random lowercase English string of length size''' s="" for i in range(size): s+=random.choice("abcdefghijklmnopqrstuvwxyz") return s text = gen_str(100000) pattern = gen_str(10000) for f in [find_matches_KR, find_matches_KR_safe]: t0=time.clock() f(pattern, text) t1=time.clock() print (f.__name__, t1-t0)