Python 3.3.2 (v3.3.2:d047928ae3f6, May 16 2013, 00:06:53) [MSC v.1600 64 bit (AMD64)] on win32 Type "copyright", "credits" or "license()" for more information. #KARP-RABIN ALG for STRING MATCHING >>> ================================ RESTART ================================ >>> text = 'a'* 100000 pattern = 'a'* 100 find_matches_KR 0.17408052153081352 find_matches_KR_safe 0.2430849675745154 pattern = 'a'* 1000 find_matches_KR 0.1748447612703074 find_matches_KR_safe 0.5113821474245203 pattern = 'a'* 10000 find_matches_KR 0.16161522838854836 find_matches_KR_safe 2.8439123401620545 >>> text="abracadabra" >>> pattern = "bra" >>> text_fingerprint(text, 3) [6422933, 7471495, 6357433, 6488452, 6357389, 6553988, 6357390, 6422933, 7471495] >>> find_matches_KR(pattern,text) [1, 8] >>> ord(" ") 32 >> ================================ RESTART ================================ >>> random str of len n= 100000 , random pattern of length m= 10000 find_matches_KR 0.1554137835459103 find_matches_KR_safe 0.17127844422508687 >>> find_matches_KR("da","abracadabra", basis=2**16, r=2**16) [2, 4, 6, 9] >>> find_matches_KR_safe("da","abracadabra", basis=2**16, r=2**16) [6] >>> find_matches_KR("Humpty", "Humpty Dumpty", r=2**(16*5)) [0, 7] >>> find_matches_KR("Humpty", "Humpty Dumpty", r=2**(16*6)) [0] >>> #HUFFMAN COMPRESSION >>> ================================ RESTART ================================ >>> >>> t= build_huffman_tree (char_count("live and let live")) >>> t [['e', ['i', 'v']], [[['t', 'n'], ['a', 'd']], ['l', ' ']]] >>> d = generate_code(t) >>> d {'t': '1000', 'v': '011', 'i': '010', 'l': '110', 'n': '1001', ' ': '111', 'a': '1010', 'd': '1011', 'e': '00'} >>> compress("tell", d) '100000110110' >>> str_to_bin("tell") '1110100110010111011001101100' >>> compress("rtr", d) Traceback (most recent call last): File "", line 1, in compress("rtr", d) File "C:\work\Python\11\Huffman.py", line 68, in compress return "".join(encoding_dict[ch] for ch in text if ord(ch)<=128) File "C:\work\Python\11\Huffman.py", line 68, in return "".join(encoding_dict[ch] for ch in text if ord(ch)<=128) KeyError: 'r' >>> missing = [c for c in "rtr" if c not in d] >>> missing ['r', 'r'] >>> corpus = "live and let live" >>> corpus += str.join("", missing) >>> corpus 'live and let liverr' >>> t= build_huffman_tree (char_count(corpus)) >>> t [[['i', 'r'], ['v', ['t', 'n']]], [[['a', 'd'], 'l'], [' ', 'e']]] >>> d_new = generate_code(t) >>> compress("rtr", d_new) '0010110001' >>> compression_ratio("ab", "ab") 0.14285714285714285 >>> 1/7 0.14285714285714285 >>> compression_ratio("ab","abcd") 0.2857142857142857 >>> asci '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f' >>> len(asci) 128 >>> compression_ratio ("ab",asci) 1.0 >>> compression_ratio("hello", "a"*100+asci) 1.1428571428571428 >>> 8/7 1.1428571428571428 >>> d = generate_code(build_huffman_tree(char_count("a"*100 + asci))) >>> d {'\x1b': '10000011', '\x1f': '10000101', '\x13': '10000111', '\x17': '10001001', '\x0b': '10001011', '\x0f': '10001101', '\x03': '10001111', '\x07': '10010001', ';': '10010011', '?': '10010101', '3': '10010111', '7': '10011001', '+': '10011011', '/': '10011101', '#': '10011111', "'": '10100001', '[': '10100011', '_': '10100101', 'S': '10100111', 'W': '10101001', 'K': '10101011', 'O': '10101101', 'C': '10101111', 'G': '10110001', '{': '10110011', '\x7f': '10110101', 's': '10110111', 'w': '10111001', 'k': '10111011', 'o': '10111101', 'c': '10111110', 'g': '11000000', '\x18': '11000001', '\x1c': '11000011', '\x10': '11000101', '\x14': '11000111', '\x08': '11001001', '\x0c': '11001011', '\x00': '11001101', '\x04': '11001111', '8': '11010001', '<': '11010011', '0': '11010101', '4': '11010111', '(': '11011001', ',': '11011011', ' ': '11011101', '$': '11011111', 'X': '11100001', '\\': '11100011', 'P': '11100101', 'T': '11100111', 'H': '11101001', 'L': '11101011', '@': '11101101', 'D': '11101111', 'x': '11110001', '|': '11110011', 'p': '11110101', 't': '11110111', 'h': '11111001', 'l': '11111011', '`': '11111101', 'd': '11111111', '\x19': '10000010', '\x1d': '10000100', '\x11': '10000110', '\x15': '10001000', '\t': '10001010', '\r': '10001100', '\x01': '10001110', '\x05': '10010000', '9': '10010010', '=': '10010100', '1': '10010110', '5': '10011000', ')': '10011010', '-': '10011100', '!': '10011110', '%': '10100000', 'Y': '10100010', ']': '10100100', 'Q': '10100110', 'U': '10101000', 'I': '10101010', 'M': '10101100', 'A': '10101110', 'E': '10110000', 'y': '10110010', '}': '10110100', 'q': '10110110', 'u': '10111000', 'i': '10111010', 'm': '10111100', 'a': '0', 'e': '10111111', '\x1a': '11000010', '\x1e': '11000100', '\x12': '11000110', '\x16': '11001000', '\n': '11001010', '\x0e': '11001100', '\x02': '11001110', '\x06': '11010000', ':': '11010010', '>': '11010100', '2': '11010110', '6': '11011000', '*': '11011010', '.': '11011100', '"': '11011110', '&': '11100000', 'Z': '11100010', '^': '11100100', 'R': '11100110', 'V': '11101000', 'J': '11101010', 'N': '11101100', 'B': '11101110', 'F': '11110000', 'z': '11110010', '~': '11110100', 'r': '11110110', 'v': '11111000', 'j': '11111010', 'n': '11111100', 'b': '11111110', 'f': '1000000'} >>> d["a"] '0' >>> d["h"] '11111001' >>>