Python 3.2.3 (default, Apr 11 2012, 07:15:24) [MSC v.1500 32 bit (Intel)] on win32 Type "copyright", "credits" or "license()" for more information. >>> ================================ RESTART ================================ >>> >>> find_matches_KR("da", "abracadabra", b=2**16, r=2**16) Traceback (most recent call last): File "", line 1, in find_matches_KR("da", "abracadabra", b=2**16, r=2**16) TypeError: find_matches_KR() got an unexpected keyword argument 'b' >>> find_matches_KR("da", "abracadabra", basis=2**16, r=2**16) [2, 4, 6, 9] # three "matches" are false positive >>> find_matches_KR_safe("da", "abracadabra", basis=2**16, r=2**16) [6] #no false positives >>> find_matches_KR("bra", "abracada1ra", basis=2**16, r=2**32) [1, 8] #the "b" in "bra" can be replaced by any char >>> find_matches_KR("Humpty", "Humpty Dumpty", basis=2**16, r=2**(16*5)) [0, 7] >>> find_matches_KR("Humpty", "Humpty Dumpty", basis=2**16, r=2**(16*6)) [0] >>> ord("r") 114 >>> ================================ RESTART ================================ >>> fingerprint_sum("abc") 294 >>> 97+98+99 294 >>> fingerprint_sum("cba") 294 >>> text_fingerprint_sum("TGCAGATGACAG") Traceback (most recent call last): File "", line 1, in text_fingerprint_sum("TGCAGATGACAG") TypeError: text_fingerprint_sum() takes at least 2 arguments (1 given) >>> text_fingerprint_sum("TGCAGATGACAG", 3) #fingerprint is a simple sum [222, 203, 203, 201, 220, 220, 220, 203, 197, 203] #many false positives >>> text_fingerprint("TGCAGATGACAG", 3) [4653375, 4391190, 4260112, 4653316, 4260137, 5505290, 4653373, 4260120, 4391172, 4260112] >>> #chance for false positives is very low Python 3.2.3 (default, Apr 11 2012, 07:15:24) [MSC v.1500 32 bit (Intel)] on win32 Type "copyright", "credits" or "license()" for more information. >>> ================================ RESTART ================================ >>> >>> char_count("live and let live") {'a': 1, ' ': 3, 'e': 3, 'd': 1, 'i': 2, 'l': 3, 'n': 1, 't': 1, 'v': 2} >>> build_huffman_list(char_count("live and let live")) [['l', ['i', 'v']], [[['a', 'd'], ['n', 't']], [' ', 'e']]] >>> build_huffman_tree(char_count("live and let live")) livadnt e >>> formatBFS(build_huffman_tree(char_count("live and let live"))) livadnt e liv adnt e l iv adnt e i v ad nt e a d n t >>> >>> >>> >>> generate_code(build_huffman_list(char_count("live and let live"))) {'a': '1000', ' ': '110', 'e': '111', 'd': '1001', 'i': '010', 'l': '00', 'n': '1010', 't': '1011', 'v': '011'} >>> >>> d=generate_code(build_huffman_list(char_count("live and let live"))) >>> d {'a': '1000', ' ': '110', 'e': '111', 'd': '1001', 'i': '010', 'l': '00', 'n': '1010', 't': '1011', 'v': '011'} >>> e=build_decoding_dict(d) >>> e {'00': 'l', '010': 'i', '011': 'v', '111': 'e', '110': ' ', '1010': 'n', '1011': 't', '1001': 'd', '1000': 'a'} >>> compress("tell",d) '10111110000' >>> len(_) 11 #much less then 4*7=28 bits in unicode >>> decompress(10111110000, e) Traceback (most recent call last): File "", line 1, in decompress(10111110000, e) File "F:\TAU\intro1001\recitations\2013a\T10\huffman.py", line 150, in decompress for bit in bits: TypeError: 'int' object is not iterable >>> decompress("10111110000", e) 'tell' >>> asc = gen_full_asci() >>> asc '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f' >>> compress("tell",d+asc) Traceback (most recent call last): File "", line 1, in compress("tell",d+asc) TypeError: unsupported operand type(s) for +: 'dict' and 'str' >>> dd=generate_code(build_huffman_list(char_count("live and let live"+asc))) >>> dd {'\x00': '11110110', '\x02': '0110010', '\x04': '11110111', '\x06': '0110011', '\x08': '11111000', '\n': '0110100', '\x0c': '11111001', '\x0e': '0110101', '\x10': '11111010', '\x12': '0110110', '\x14': '11111011', '\x16': '0110111', '\x18': '11111100', '\x1a': '0111000', '\x1c': '11111101', '\x1e': '0111001', ' ': '111001', '"': '0111010', '$': '11111110', '&': '0111011', '(': '11111111', '*': '0111100', ',': '0000000', '.': '0111101', '0': '0000001', '2': '0111110', '4': '0000010', '6': '0111111', '8': '0000011', ':': '1000000', '<': '0000100', '>': '1000001', '@': '0000101', 'B': '1000010', 'D': '0000110', 'F': '1000011', 'H': '0000111', 'J': '1000100', 'L': '0001000', 'N': '1000101', 'P': '0001001', 'R': '1000110', 'T': '0001010', 'V': '1000111', 'X': '0001011', 'Z': '1001000', '\\': '0001100', '^': '1001001', '`': '0001101', 'b': '1001010', 'd': '1110001', 'f': '1001011', 'h': '0001110', 'j': '1001100', 'l': '111010', 'n': '1111001', 'p': '0001111', 'r': '1001101', 't': '1111000', 'v': '110110', 'x': '0010000', 'z': '1001110', '|': '0010001', '~': '1001111', '\x01': '1010000', '\x03': '0010010', '\x05': '1010001', '\x07': '0010011', '\t': '1010010', '\x0b': '0010100', '\r': '1010011', '\x0f': '0010101', '\x11': '1010100', '\x13': '0010110', '\x15': '1010101', '\x17': '0010111', '\x19': '1010110', '\x1b': '0011000', '\x1d': '1010111', '\x1f': '0011001', '!': '1011000', '#': '0011010', '%': '1011001', "'": '0011011', ')': '1011010', '+': '0011100', '-': '1011011', '/': '0011101', '1': '1011100', '3': '0011110', '5': '1011101', '7': '0011111', '9': '1011110', ';': '0100000', '=': '1011111', '?': '0100001', 'A': '1100000', 'C': '0100010', 'E': '1100001', 'G': '0100011', 'I': '1100010', 'K': '0100100', 'M': '1100011', 'O': '0100101', 'Q': '1100100', 'S': '0100110', 'U': '1100101', 'W': '0100111', 'Y': '1100110', '[': '0101000', ']': '1100111', '_': '0101001', 'a': '1111010', 'c': '0101010', 'e': '111011', 'g': '0101011', 'i': '110111', 'k': '0101100', 'm': '1101000', 'o': '0101101', 'q': '1101001', 's': '0101110', 'u': '1101010', 'w': '0101111', 'y': '1101011', '{': '0110000', '}': '1110000', '\x7f': '0110001'} >>> compress("tell",d) '10111110000' >>> compress("tell",dd) '1111000111011111010111010' >>> len(_) 25 #addition of ascii chars decreased compression efficiency >>> dd["l"] '111010' #a shorter, 6-bit code >>> dd["w"] '0101111' >>> [code for code in dd if len(dd[code])>7] ['\x00', '\x04', '\x08', '\x0c', '\x10', '\x14', '\x18', '\x1c', '$', '('] >>> compress("((((((",dd) '111111111111111111111111111111111111111111111111' >>> len(_) 48 >>> 48/42 1.1428571428571428 #compression ratio = 8/7 >1 >>> ma = open("madonna.txt").read() >>> ma '"La Isla Bonita"\n\n\n [Spoken:]\n \nComo puede ser verdad\n [or Como puede ese olvidar]\n How could it be true? \n\nLast night I dreamt of San Pedro\n Just like I\'d never gone, I knew the song\n A young girl with eyes like the desert\n It all seems like yesterday, not far away\n \n[Chorus:]\n \nTropical the island breeze\n All of nature wild and free\n This is where I long to be\n La isla bonita\n And when the samba played\n The sun would set so high\n Ring through my ears and sting my eyes\n Your Spanish lullaby\n \nThe beautiful island\n \nI fell in love with San Pedro\n Warm wind carried on the sea, he called to me\n Te dijo te amo\n I prayed that the days would last\n They went so fast\n \nHe told you, "I love you"\n \n[chorus]\n \nI want to be where the sun warms the sky\n When it\'s time for siesta you can watch them go by\n Beautiful faces, no cares in this world\n Where a girl loves a boy, and a boy loves a girl\n \nLast night I dreamt of San Pedro\n It all seems like yesterday, not far away\n \n[chorus twice]\n\n La la la la la la la\n Te dijo te amo\n La la la la la la la\n El dijo que te ama\n\n He told you, "I love you"\n \nHe said he loves you' >>> ad = open("adelle.txt").read() Traceback (most recent call last): File "", line 1, in ad = open("adelle.txt").read() IOError: [Errno 2] No such file or directory: 'adelle.txt' >>> ad = open("adele.txt").read() >>> ad '"Someone Like You"\n\n\n I heard that you\'re settled down\n That you found a girl and you\'re married now.\n I heard that your dreams came true.\n Guess she gave you things I didn\'t give to you.\n \nOld friend, why are you so shy?\n Ain\'t like you to hold back or hide from the light.\n \nI hate to turn up out of the blue uninvited\n But I couldn\'t stay away, I couldn\'t fight it.\n I had hoped you\'d see my face and that you\'d be reminded\n That for me it isn\'t over.\n \nNever mind, I\'ll find someone like you\n I wish nothing but the best for you too\n Don\'t forget me, I beg\n I remember you said,\n "Sometimes it lasts in love but sometimes it hurts instead,\n Sometimes it lasts in love but sometimes it hurts instead, "\n Yeah\n \nYou know how the time flies\n Only yesterday was the time of our lives\n We were born and raised\n In a summer haze\n Bound by the surprise of our glory days\n \nI hate to turn up out of the blue uninvited\n But I couldn\'t stay away, I couldn\'t fight it.\n I\'d hoped you\'d see my face and that you\'d be reminded\n That for me it isn\'t over.\n \nNever mind, I\'ll find someone like you\n I wish nothing but the best for you too\n Don\'t forget me, I beg\n I remember you said,\n "Sometimes it lasts in love but sometimes it hurts instead."\n Yeah\n \nNothing compares\n No worries or cares\n Regrets and mistakes\n They are memories made.\n Who would have known how bittersweet this would taste?\n \nNever mind, I\'ll find someone like you\n I wish nothing but the best for you\n Don\'t forget me, I beg\n I remember you said,\n "Sometimes it lasts in love but sometimes it hurts instead."\n \nNever mind, I\'ll find someone like you\n I wish nothing but the best for you too\n Don\'t forget me, I beg\n I remember you said,\n "Sometimes it lasts in love but sometimes it hurts instead,\n Sometimes it lasts in love but sometimes it hurts instead."\n Yeah ' >>> pi=open("piaf.txt").read() >>> pi "Non, rien de rien, non je ne regrette de rien,\nNi le bien qu'on m'a fait, ni le mal, tout ca m'est bien egal\nNon, rien de rien, non je ne regrette de rien\nC'est paye, balaye, oublie, je me fous de passe\n\nAvec mes souvenirs, j'ai allume le feu,\nMes chagrins, mes plaisirs, je n'ai plus besoin d'eux\nBalayes mes amours avec leurs tremolos,\nBalayes pour toujours, je repars a zero\n\nNon, rien de rien, non je ne regrette de rien,\nNi le bien qu'on m'a fait, ni le mal, tout ca m'est bien egal\nNon, rien de rien, non je ne regrette de rien\nCar ma vie, car mes joies, aujourdhui, ca commence avec toi!" >>> compression_ratio(ma,ma) 0.639795918367347 >>> compression_ratio(pi,pi) 0.6058201058201058 >>> compression_ratio(ma,pi) Traceback (most recent call last): File "", line 1, in compression_ratio(ma,pi) File "F:\TAU\intro1001\recitations\2013a\T10\huffman.py", line 199, in compression_ratio len_compress = len(compress(text, d)) File "F:\TAU\intro1001\recitations\2013a\T10\huffman.py", line 140, in compress return ''.join(encoding_dict[ch] for ch in text if ord(ch)<=128) File "F:\TAU\intro1001\recitations\2013a\T10\huffman.py", line 140, in return ''.join(encoding_dict[ch] for ch in text if ord(ch)<=128) KeyError: '"' >>> compression_ratio(pi,pi) 0.6058201058201058 >>> compression_ratio(ma,pi+asc) 0.7119897959183673 >>> compression_ratio(ma,ad+asc) 0.6716836734693877 >>> compression_ratio(ma,ad) Traceback (most recent call last): File "", line 1, in compression_ratio(ma,ad) File "F:\TAU\intro1001\recitations\2013a\T10\huffman.py", line 199, in compression_ratio len_compress = len(compress(text, d)) File "F:\TAU\intro1001\recitations\2013a\T10\huffman.py", line 140, in compress return ''.join(encoding_dict[ch] for ch in text if ord(ch)<=128) File "F:\TAU\intro1001\recitations\2013a\T10\huffman.py", line 140, in return ''.join(encoding_dict[ch] for ch in text if ord(ch)<=128) KeyError: '[' >>> compression_ratio(ma,ma+asc) 0.6590561224489796 >>>