Python 3.2 (r32:88445, Feb 20 2011, 21:29:02) [MSC v.1500 32 bit (Intel)] on win32 Type "copyright", "credits" or "license()" for more information. >>> ================================ RESTART ================================ >>> text = 'a'* 100000 pattern = 'a'* 100 find_matches_KR 0.17912779417495797 find_matches_KR_safe 0.2306938451674724 pattern = 'a'* 1000 find_matches_KR 0.17534966036186167 find_matches_KR_safe 0.44744932666023185 pattern = 'a'* 10000 find_matches_KR 0.169008630985223 find_matches_KR_safe 1.953182597229536 >>> ================================ RESTART ================================ >>> text = 'a'* 100000 pattern = 'a'* 30000 find_matches_KR 0.15880118841919855 find_matches_KR_safe 4.1814001500190665 pattern = 'a'* 40000 find_matches_KR 0.14909995544126442 find_matches_KR_safe 4.722397399669511 pattern = 'a'* 50000 find_matches_KR 0.14969695869167765 find_matches_KR_safe 4.896815936103611 #maximum of m(n-m) is at m=n/2 pattern = 'a'* 60000 find_matches_KR 0.13291326132231873 find_matches_KR_safe 4.671044783624735 pattern = 'a'* 70000 find_matches_KR 0.12548773656986967 find_matches_KR_safe 4.108046134355064 >>> ================================ RESTART ================================ >>> random str of len n= 100000 , random pattern of length m= 10000 find_matches_KR 0.1732013426287419 find_matches_KR_safe 0.18781856353251603 #complexity for random strings is pretty much the same in both cases >>> find_matches_KR("da", "abracadabra", basis = 2**16, r = 2**16) [2, 4, 6, 9] >>> find_matches_KR_safe("da", "abracadabra", basis = 2**16, r = 2**16) [6] >>> fingerprint("da",basis = 2**16, r = 2**16) 97 >>> fingerprint("ca",basis = 2**16, r = 2**16) 97 >>> find_matches_KR("Humpty", "Humpty Dumpty", basis = 2**16, r = (2**16)**5) [0, 7] >>> find_matches_KR("Humpty", "Humpty Dumpty", basis = 2**16, r = (2**16)**6) [0] >>> find_matches_KR("Humpty", "Humpty Dumpty", basis = 2**16, r = (2**16)**5-3) [0] >>> Python 3.2 (r32:88445, Feb 20 2011, 21:29:02) [MSC v.1500 32 bit (Intel)] on win32 Type "copyright", "credits" or "license()" for more information. >>> ================================ RESTART ================================ >>> >>> char_count("live and let live") {'a': 1, ' ': 3, 'e': 3, 'd': 1, 'i': 2, 'l': 3, 'n': 1, 't': 1, 'v': 2} >>> build_huffman_tree(char_count("live and let live")) [['l', ['i', 'v']], [[['a', 'd'], ['n', 't']], [' ', 'e']]] >>> generate_code(build_huffman_tree(char_count("live and let live"))) {'a': '1000', ' ': '110', 'e': '111', 'd': '1001', 'i': '010', 'l': '00', 'n': '1010', 't': '1011', 'v': '011'} >>> d = generate_code(build_huffman_tree(char_count("live and let live"))) >>> compress("let", d) '001111011' >>> d["l"] '00' >>> d["e"] '111' >>> d["t"] '1011' >>> dd = build_decoding_dict(d) >>> dd {'00': 'l', '010': 'i', '011': 'v', '111': 'e', '110': ' ', '1010': 'n', '1011': 't', '1001': 'd', '1000': 'a'} >>> decompress('001111011', dd) 'let' >>> compression_ratio("ab", "ab") 0.14285714285714285 >>> 1/7 0.14285714285714285 >>> compression_ratio("ab", "abcd") 0.2857142857142857 >>> 2/7 0.2857142857142857 >>> compression_ratio("ab", "ab"+"e"*17) 0.2857142857142857 >>> compression_ratio("ab", asci + "xyz"*100) 1.2857142857142858 >>> 9/7 1.2857142857142858 >>> ================================ RESTART ================================ >>> >>> compression_ratio(ma, ma) 0.639795918367347 >>> compression_ratio(ma, pi) Traceback (most recent call last): File "", line 1, in compression_ratio(ma, pi) File "C:\Documents and Settings\eladlibm\Desktop\T11\Huffman.py", line 95, in compression_ratio len_compress = len(compress(text, d)) File "C:\Documents and Settings\eladlibm\Desktop\T11\Huffman.py", line 68, in compress return "".join(encoding_dict[ch] for ch in text if ord(ch)<=128) File "C:\Documents and Settings\eladlibm\Desktop\T11\Huffman.py", line 68, in return "".join(encoding_dict[ch] for ch in text if ord(ch)<=128) KeyError: '"' >>> compression_ratio(ma, pi+asci) #drop all asci into corpus. May affect distributions 0.7119897959183673 >>> ma '"La Isla Bonita"\n\n\n [Spoken:]\n \nComo puede ser verdad\n [or Como puede ese olvidar]\n How could it be true? \n\nLast night I dreamt of San Pedro\n Just like I\'d never gone, I knew the song\n A young girl with eyes like the desert\n It all seems like yesterday, not far away\n \n[Chorus:]\n \nTropical the island breeze\n All of nature wild and free\n This is where I long to be\n La isla bonita\n And when the samba played\n The sun would set so high\n Ring through my ears and sting my eyes\n Your Spanish lullaby\n \nThe beautiful island\n \nI fell in love with San Pedro\n Warm wind carried on the sea, he called to me\n Te dijo te amo\n I prayed that the days would last\n They went so fast\n \nHe told you, "I love you"\n \n[chorus]\n \nI want to be where the sun warms the sky\n When it\'s time for siesta you can watch them go by\n Beautiful faces, no cares in this world\n Where a girl loves a boy, and a boy loves a girl\n \nLast night I dreamt of San Pedro\n It all seems like yesterday, not far away\n \n[chorus twice]\n\n La la la la la la la\n Te dijo te amo\n La la la la la la la\n El dijo que te ama\n\n He told you, "I love you"\n \nHe said he loves you' >>> len(ma) 1120 >>> len(pi) 594 >>> compression_ratio(ma, pi+asci) 0.7119897959183673 >>> compression_ratio(ma, ma) 0.639795918367347 >>> compression_ratio(ma, ad) Traceback (most recent call last): File "", line 1, in compression_ratio(ma, ad) File "C:\Documents and Settings\eladlibm\Desktop\T11\Huffman.py", line 95, in compression_ratio len_compress = len(compress(text, d)) File "C:\Documents and Settings\eladlibm\Desktop\T11\Huffman.py", line 68, in compress return "".join(encoding_dict[ch] for ch in text if ord(ch)<=128) File "C:\Documents and Settings\eladlibm\Desktop\T11\Huffman.py", line 68, in return "".join(encoding_dict[ch] for ch in text if ord(ch)<=128) KeyError: '[' >>> compression_ratio(ma, ad+asci) 0.6716836734693877 >>>