csnotes/370/homework/huffman.py
2019-05-05 21:15:06 -07:00

96 lines
2.2 KiB
Python

import queue
import heapq
class Node:
def __init__(self, c, weight):
self.c = c
self.freq = weight
self.left = None
self.right = None
self.code = ''
def __repr__(self):
return f'{self.c}|{self.freq}'
def __lt__(self, other):
return self.weight < other.weight
def frequencyMap(string):
ret = []
for i in string:
# check if the node is in our list
tmp = [x.c for x in ret]
if i not in tmp:
ret.append(Node(i, 1))
else:
# otherwise increment the frequency of that node
for k in ret:
if k.c == i:
k.freq += 1
# Sort the charmap alphabetically
ret.sort(key=lambda x: x.c)
return ret
def encode(freqs):
# add things to our min heap
heap = [i for i in freqs]
heapq.heapify(heap)
# now we can merge all the nodes together
while len(heap) > 1:
# pop two items from the queuee
left = heapq.heappop(heap)
right = heapq.heappop(heap)
# setup the new root node
root = Node('*', left.weight + right.weight)
root.left = left
root.right = right
# re-insert the new subtree into the minheap
heapq.heappush(heap, root)
# return the heap itself so we cna do stuff with it
return heap
def decode(root, binaryStr):
string = ''
curr = root
for i in binaryStr:
if i == '0':
curr = curr.left
else:
curr = curr.right
# check if we're at a leaf
if curr.left is None and curr.right is None:
string += curr.c
curr = root
print(string)
def printEncoding(text, heap):
# prints out the encoding for a given string
for i in text:
heap.printChar(i)
print() # newline for the meme
if __name__ == "__main__":
text = input()
binary = input()
#print(f'{text}\n{binary}\n===================')
# calculate the frequency of each character
frequencies = frequencyMap(text)
# build up our heap to display info from
heap = encode(frequencies)[0]
#print(heap)
# decode the binary
decode(heap, binary)