From f93153cdb011771d751ec22c9a29bab5c430d18e Mon Sep 17 00:00:00 2001 From: Medium Fries Date: Thu, 18 Apr 2019 14:01:31 -0700 Subject: [PATCH] final submission for homework 4 --- 370/homework/huffman.py | 144 +++++++++++++++------------------------- 1 file changed, 53 insertions(+), 91 deletions(-) diff --git a/370/homework/huffman.py b/370/homework/huffman.py index e427916..24197ee 100644 --- a/370/homework/huffman.py +++ b/370/homework/huffman.py @@ -1,90 +1,20 @@ import queue -class Node(): +import heapq + +class Node: def __init__(self, c, weight): self.c = c self.weight = weight self.left = None self.right = None + + self.code = '' def __repr__(self): return f'{self.c}|{self.weight}' -class MinHeap(): - def __init__(self): - self.data = [] - - def empty(self): - return self.size() == 0 - - def size(self): - return len(self.data) - - def print(self): - for x in self.data: - print(x.c + str(x.freq)) - - def insert(self, val): - self.data.append(val) - self.__heapifyUp(len(self.data) - 1) - - def extractMin(self): - temp = self.data[0] - self.__swap(0, -1) - self.data.remove(self.data[-1]) - self.__heapifyDown(0) - return temp - - def __swap(self,i,j): - self.data[i], self.data[j] = self.data[j], self.data[i] - - def __heapifyUp(self, idx): - if idx > 0: - parent = (idx - 1) // 2 - if (self.data[parent].freq > self.data[idx].freq): - self.__swap(parent, idx) - self.__heapifyUp(parent) - - def __heapifyDown(self, idx): - data = self.data - left = 2 * idx + 1 - right = 2 * idx + 2 - mini = idx - if (left < len(data) and (data[left].freq < data[mini].freq)): - mini = left - if (right < len(data) and (data[right].freq < data[mini].freq)): - mini = right - if (mini is not idx): - self.__swap(mini, idx) - self.__heapifyDown(mini) - - def printChar(self, c): - # Traverse through the tree until we get to a leaf then back out - tmp = self.data[0] - # keep going until we hit a leaf - while tmp.left is not None or tmp.right is not None: - # 0 if left : 1 if right - continue - - -def main(): - pq = MinHeap() - pq.insert(Node(' ', 1)) - pq.insert(Node('d', 4)) - pq.insert(Node('e', 10)) - pq.insert(Node('i', 13)) - pq.insert(Node('s', 2)) - pq.insert(Node('m', 8)) - pq.insert(Node('o', 6)) - - - assert(list(map(lambda x: x.freq, pq.data)) == [1, 2, 6, 13, 4, 10, 8]) - assert(pq.extractMin().freq == 1) - assert(list(map(lambda x: x.freq, pq.data)) == [2, 4, 6, 13, 8, 10]) - assert(pq.extractMin().freq == 2) - assert(list(map(lambda x: x.freq, pq.data)) == [4, 8, 6, 13, 10]) - assert(pq.extractMin().freq == 4) - pq.print() - print("Heap works") + def __lt__(self, other): + return self.weight < other.weight def frequencyMap(string): ret = [] @@ -99,25 +29,50 @@ def frequencyMap(string): if k.c == i: k.weight += 1 - # Sort the charmap based on the frequencies - ret.sort(key=lambda x: x.weight) + # Sort the charmap alphabetically + ret.sort(key=lambda x: x.c) return ret -def buildMinHeap(freqs): - heap = MinHeap() - _queue = queue.Queue() - while _queue.qsize() >= 2: - left = _queue.get() - right = _queue.get() - weight = left.weight + right.weight - root = Node('*', weight) - # Once there is one item in the queue left we can simply return the new thing +def encode(freqs): + # add things to our min heap + heap = [i for i in freqs] + heapq.heapify(heap) + + # now we can merge all the nodes together + while len(heap) > 1: + # pop two items from the queuee + left = heapq.heappop(heap) + right = heapq.heappop(heap) + + # setup the new root node + root = Node('*', left.weight + right.weight) + root.left = left + root.right = right + + # re-insert the new subtree into the minheap + heapq.heappush(heap, root) + # return the heap itself so we cna do stuff with it return heap +def decode(root, binaryStr): + string = '' + curr = root + for i in binaryStr: + if i == '0': + curr = curr.left + else: + curr = curr.right + # check if we're at a leaf + if curr.left is None and curr.right is None: + string += curr.c + curr = root + print(string) + def printEncoding(text, heap): + # prints out the encoding for a given string for i in text: heap.printChar(i) @@ -126,8 +81,15 @@ def printEncoding(text, heap): if __name__ == "__main__": text = input() binary = input() + #print(f'{text}\n{binary}\n===================') + # calculate the frequency of each character frequencies = frequencyMap(text) - print(frequencies) - heap = buildMinHeap(frequencies) - printEncoding(binary, heap) + + # build up our heap to display info from + heap = encode(frequencies)[0] + #print(heap) + + # decode the binary + decode(heap, binary) +