diff --git a/370/homework/huffman.py b/370/homework/huffman.py new file mode 100644 index 0000000..e427916 --- /dev/null +++ b/370/homework/huffman.py @@ -0,0 +1,133 @@ +import queue +class Node(): + def __init__(self, c, weight): + self.c = c + self.weight = weight + self.left = None + self.right = None + + def __repr__(self): + return f'{self.c}|{self.weight}' + +class MinHeap(): + def __init__(self): + self.data = [] + + def empty(self): + return self.size() == 0 + + def size(self): + return len(self.data) + + def print(self): + for x in self.data: + print(x.c + str(x.freq)) + + def insert(self, val): + self.data.append(val) + self.__heapifyUp(len(self.data) - 1) + + def extractMin(self): + temp = self.data[0] + self.__swap(0, -1) + self.data.remove(self.data[-1]) + self.__heapifyDown(0) + return temp + + def __swap(self,i,j): + self.data[i], self.data[j] = self.data[j], self.data[i] + + def __heapifyUp(self, idx): + if idx > 0: + parent = (idx - 1) // 2 + if (self.data[parent].freq > self.data[idx].freq): + self.__swap(parent, idx) + self.__heapifyUp(parent) + + def __heapifyDown(self, idx): + data = self.data + left = 2 * idx + 1 + right = 2 * idx + 2 + mini = idx + if (left < len(data) and (data[left].freq < data[mini].freq)): + mini = left + if (right < len(data) and (data[right].freq < data[mini].freq)): + mini = right + if (mini is not idx): + self.__swap(mini, idx) + self.__heapifyDown(mini) + + def printChar(self, c): + # Traverse through the tree until we get to a leaf then back out + tmp = self.data[0] + # keep going until we hit a leaf + while tmp.left is not None or tmp.right is not None: + # 0 if left : 1 if right + continue + + +def main(): + pq = MinHeap() + pq.insert(Node(' ', 1)) + pq.insert(Node('d', 4)) + pq.insert(Node('e', 10)) + pq.insert(Node('i', 13)) + pq.insert(Node('s', 2)) + pq.insert(Node('m', 8)) + pq.insert(Node('o', 6)) + + + assert(list(map(lambda x: x.freq, pq.data)) == [1, 2, 6, 13, 4, 10, 8]) + assert(pq.extractMin().freq == 1) + assert(list(map(lambda x: x.freq, pq.data)) == [2, 4, 6, 13, 8, 10]) + assert(pq.extractMin().freq == 2) + assert(list(map(lambda x: x.freq, pq.data)) == [4, 8, 6, 13, 10]) + assert(pq.extractMin().freq == 4) + pq.print() + print("Heap works") + +def frequencyMap(string): + ret = [] + for i in string: + # check if the node is in our list + tmp = [x.c for x in ret] + if i not in tmp: + ret.append(Node(i, 1)) + else: + # otherwise increment the frequency of that node + for k in ret: + if k.c == i: + k.weight += 1 + + # Sort the charmap based on the frequencies + ret.sort(key=lambda x: x.weight) + return ret + + +def buildMinHeap(freqs): + heap = MinHeap() + _queue = queue.Queue() + while _queue.qsize() >= 2: + left = _queue.get() + right = _queue.get() + weight = left.weight + right.weight + root = Node('*', weight) + # Once there is one item in the queue left we can simply return the new thing + return heap + + + +def printEncoding(text, heap): + for i in text: + heap.printChar(i) + + print() # newline for the meme + +if __name__ == "__main__": + text = input() + binary = input() + # calculate the frequency of each character + frequencies = frequencyMap(text) + print(frequencies) + heap = buildMinHeap(frequencies) + printEncoding(binary, heap) diff --git a/370/homework/input.dat b/370/homework/input.dat new file mode 100644 index 0000000..c82f5a1 --- /dev/null +++ b/370/homework/input.dat @@ -0,0 +1,2 @@ +i love computer science +1100101110101100110111111011100011010101101100100111 \ No newline at end of file