Python 3.2 (r32:88445, Feb 20 2011, 21:29:02) [MSC v.1500 32 bit (Intel)] on win32
Type "copyright", "credits" or "license()" for more information.
>>> ================================ RESTART ================================
>>> 
text = 'a'* 100000
pattern = 'a'* 100
find_matches_KR 0.17912779417495797
find_matches_KR_safe 0.2306938451674724

pattern = 'a'* 1000
find_matches_KR 0.17534966036186167
find_matches_KR_safe 0.44744932666023185

pattern = 'a'* 10000
find_matches_KR 0.169008630985223
find_matches_KR_safe 1.953182597229536

>>> ================================ RESTART ================================
>>> 
text = 'a'* 100000
pattern = 'a'* 30000
find_matches_KR 0.15880118841919855
find_matches_KR_safe 4.1814001500190665

pattern = 'a'* 40000
find_matches_KR 0.14909995544126442
find_matches_KR_safe 4.722397399669511

pattern = 'a'* 50000
find_matches_KR 0.14969695869167765 
find_matches_KR_safe 4.896815936103611 #maximum of m(n-m) is at m=n/2

pattern = 'a'* 60000
find_matches_KR 0.13291326132231873
find_matches_KR_safe 4.671044783624735

pattern = 'a'* 70000
find_matches_KR 0.12548773656986967
find_matches_KR_safe 4.108046134355064

>>> ================================ RESTART ================================
>>> 
random str of len n= 100000  , random pattern of length m= 10000
find_matches_KR 0.1732013426287419
find_matches_KR_safe 0.18781856353251603
#complexity for random strings is pretty much the same in both cases

>>> find_matches_KR("da", "abracadabra", basis = 2**16, r = 2**16)
[2, 4, 6, 9]
>>> find_matches_KR_safe("da", "abracadabra", basis = 2**16, r = 2**16)
[6]
>>> fingerprint("da",basis = 2**16, r = 2**16)
97
>>> fingerprint("ca",basis = 2**16, r = 2**16)
97
>>> find_matches_KR("Humpty", "Humpty Dumpty", basis = 2**16, r = (2**16)**5)
[0, 7]
>>> find_matches_KR("Humpty", "Humpty Dumpty", basis = 2**16, r = (2**16)**6)
[0]
>>> find_matches_KR("Humpty", "Humpty Dumpty", basis = 2**16, r = (2**16)**5-3)
[0]
>>> 


Python 3.2 (r32:88445, Feb 20 2011, 21:29:02) [MSC v.1500 32 bit (Intel)] on win32
Type "copyright", "credits" or "license()" for more information.
>>> ================================ RESTART ================================
>>> 
>>> char_count("live and let live")
{'a': 1, ' ': 3, 'e': 3, 'd': 1, 'i': 2, 'l': 3, 'n': 1, 't': 1, 'v': 2}
>>> build_huffman_tree(char_count("live and let live"))
[['l', ['i', 'v']], [[['a', 'd'], ['n', 't']], [' ', 'e']]]
>>> generate_code(build_huffman_tree(char_count("live and let live")))
{'a': '1000', ' ': '110', 'e': '111', 'd': '1001', 'i': '010', 'l': '00', 'n': '1010', 't': '1011', 'v': '011'}
>>> d = generate_code(build_huffman_tree(char_count("live and let live")))
>>> compress("let", d)
'001111011'
>>> d["l"]
'00'
>>> d["e"]
'111'
>>> d["t"]
'1011'
>>> dd = build_decoding_dict(d)
>>> dd
{'00': 'l', '010': 'i', '011': 'v', '111': 'e', '110': ' ', '1010': 'n', '1011': 't', '1001': 'd', '1000': 'a'}
>>> decompress('001111011', dd)
'let'
>>> compression_ratio("ab", "ab")
0.14285714285714285
>>> 1/7
0.14285714285714285
>>> compression_ratio("ab", "abcd")
0.2857142857142857
>>> 2/7
0.2857142857142857
>>> compression_ratio("ab", "ab"+"e"*17)
0.2857142857142857
>>> compression_ratio("ab", asci + "xyz"*100)
1.2857142857142858
>>> 9/7
1.2857142857142858
>>> ================================ RESTART ================================
>>> 
>>> compression_ratio(ma, ma)
0.639795918367347
>>> compression_ratio(ma, pi)
Traceback (most recent call last):
  File "<pyshell#20>", line 1, in <module>
    compression_ratio(ma, pi)
  File "C:\Documents and Settings\eladlibm\Desktop\T11\Huffman.py", line 95, in compression_ratio
    len_compress = len(compress(text, d))
  File "C:\Documents and Settings\eladlibm\Desktop\T11\Huffman.py", line 68, in compress
    return "".join(encoding_dict[ch] for ch in text if ord(ch)<=128)
  File "C:\Documents and Settings\eladlibm\Desktop\T11\Huffman.py", line 68, in <genexpr>
    return "".join(encoding_dict[ch] for ch in text if ord(ch)<=128)
KeyError: '"'

>>> compression_ratio(ma, pi+asci) #drop all asci into corpus. May affect distributions
0.7119897959183673
>>> ma
'"La Isla Bonita"\n\n\n [Spoken:]\n \nComo puede ser verdad\n [or Como puede ese olvidar]\n How could it be true? \n\nLast night I dreamt of San Pedro\n Just like I\'d never gone, I knew the song\n A young girl with eyes like the desert\n It all seems like yesterday, not far away\n \n[Chorus:]\n \nTropical the island breeze\n All of nature wild and free\n This is where I long to be\n La isla bonita\n And when the samba played\n The sun would set so high\n Ring through my ears and sting my eyes\n Your Spanish lullaby\n \nThe beautiful island\n \nI fell in love with San Pedro\n Warm wind carried on the sea, he called to me\n Te dijo te amo\n I prayed that the days would last\n They went so fast\n \nHe told you, "I love you"\n \n[chorus]\n \nI want to be where the sun warms the sky\n When it\'s time for siesta you can watch them go by\n Beautiful faces, no cares in this world\n Where a girl loves a boy, and a boy loves a girl\n \nLast night I dreamt of San Pedro\n It all seems like yesterday, not far away\n \n[chorus twice]\n\n La la la la la la la\n Te dijo te amo\n La la la la la la la\n El dijo que te ama\n\n He told you, "I love you"\n \nHe said he loves you'
>>> len(ma)
1120
>>> len(pi)
594
>>> compression_ratio(ma, pi+asci)
0.7119897959183673
>>> compression_ratio(ma, ma)
0.639795918367347
>>> compression_ratio(ma, ad)
Traceback (most recent call last):
  File "<pyshell#27>", line 1, in <module>
    compression_ratio(ma, ad)
  File "C:\Documents and Settings\eladlibm\Desktop\T11\Huffman.py", line 95, in compression_ratio
    len_compress = len(compress(text, d))
  File "C:\Documents and Settings\eladlibm\Desktop\T11\Huffman.py", line 68, in compress
    return "".join(encoding_dict[ch] for ch in text if ord(ch)<=128)
  File "C:\Documents and Settings\eladlibm\Desktop\T11\Huffman.py", line 68, in <genexpr>
    return "".join(encoding_dict[ch] for ch in text if ord(ch)<=128)
KeyError: '['
>>> compression_ratio(ma, ad+asci)
0.6716836734693877
>>>