proper frequency map
This commit is contained in:
parent
8d464608aa
commit
04021086ee
133
370/homework/huffman.py
Normal file
133
370/homework/huffman.py
Normal file
@ -0,0 +1,133 @@
|
|||||||
|
import queue
|
||||||
|
class Node():
|
||||||
|
def __init__(self, c, weight):
|
||||||
|
self.c = c
|
||||||
|
self.weight = weight
|
||||||
|
self.left = None
|
||||||
|
self.right = None
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f'{self.c}|{self.weight}'
|
||||||
|
|
||||||
|
class MinHeap():
|
||||||
|
def __init__(self):
|
||||||
|
self.data = []
|
||||||
|
|
||||||
|
def empty(self):
|
||||||
|
return self.size() == 0
|
||||||
|
|
||||||
|
def size(self):
|
||||||
|
return len(self.data)
|
||||||
|
|
||||||
|
def print(self):
|
||||||
|
for x in self.data:
|
||||||
|
print(x.c + str(x.freq))
|
||||||
|
|
||||||
|
def insert(self, val):
|
||||||
|
self.data.append(val)
|
||||||
|
self.__heapifyUp(len(self.data) - 1)
|
||||||
|
|
||||||
|
def extractMin(self):
|
||||||
|
temp = self.data[0]
|
||||||
|
self.__swap(0, -1)
|
||||||
|
self.data.remove(self.data[-1])
|
||||||
|
self.__heapifyDown(0)
|
||||||
|
return temp
|
||||||
|
|
||||||
|
def __swap(self,i,j):
|
||||||
|
self.data[i], self.data[j] = self.data[j], self.data[i]
|
||||||
|
|
||||||
|
def __heapifyUp(self, idx):
|
||||||
|
if idx > 0:
|
||||||
|
parent = (idx - 1) // 2
|
||||||
|
if (self.data[parent].freq > self.data[idx].freq):
|
||||||
|
self.__swap(parent, idx)
|
||||||
|
self.__heapifyUp(parent)
|
||||||
|
|
||||||
|
def __heapifyDown(self, idx):
|
||||||
|
data = self.data
|
||||||
|
left = 2 * idx + 1
|
||||||
|
right = 2 * idx + 2
|
||||||
|
mini = idx
|
||||||
|
if (left < len(data) and (data[left].freq < data[mini].freq)):
|
||||||
|
mini = left
|
||||||
|
if (right < len(data) and (data[right].freq < data[mini].freq)):
|
||||||
|
mini = right
|
||||||
|
if (mini is not idx):
|
||||||
|
self.__swap(mini, idx)
|
||||||
|
self.__heapifyDown(mini)
|
||||||
|
|
||||||
|
def printChar(self, c):
|
||||||
|
# Traverse through the tree until we get to a leaf then back out
|
||||||
|
tmp = self.data[0]
|
||||||
|
# keep going until we hit a leaf
|
||||||
|
while tmp.left is not None or tmp.right is not None:
|
||||||
|
# 0 if left : 1 if right
|
||||||
|
continue
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
pq = MinHeap()
|
||||||
|
pq.insert(Node(' ', 1))
|
||||||
|
pq.insert(Node('d', 4))
|
||||||
|
pq.insert(Node('e', 10))
|
||||||
|
pq.insert(Node('i', 13))
|
||||||
|
pq.insert(Node('s', 2))
|
||||||
|
pq.insert(Node('m', 8))
|
||||||
|
pq.insert(Node('o', 6))
|
||||||
|
|
||||||
|
|
||||||
|
assert(list(map(lambda x: x.freq, pq.data)) == [1, 2, 6, 13, 4, 10, 8])
|
||||||
|
assert(pq.extractMin().freq == 1)
|
||||||
|
assert(list(map(lambda x: x.freq, pq.data)) == [2, 4, 6, 13, 8, 10])
|
||||||
|
assert(pq.extractMin().freq == 2)
|
||||||
|
assert(list(map(lambda x: x.freq, pq.data)) == [4, 8, 6, 13, 10])
|
||||||
|
assert(pq.extractMin().freq == 4)
|
||||||
|
pq.print()
|
||||||
|
print("Heap works")
|
||||||
|
|
||||||
|
def frequencyMap(string):
|
||||||
|
ret = []
|
||||||
|
for i in string:
|
||||||
|
# check if the node is in our list
|
||||||
|
tmp = [x.c for x in ret]
|
||||||
|
if i not in tmp:
|
||||||
|
ret.append(Node(i, 1))
|
||||||
|
else:
|
||||||
|
# otherwise increment the frequency of that node
|
||||||
|
for k in ret:
|
||||||
|
if k.c == i:
|
||||||
|
k.weight += 1
|
||||||
|
|
||||||
|
# Sort the charmap based on the frequencies
|
||||||
|
ret.sort(key=lambda x: x.weight)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
def buildMinHeap(freqs):
|
||||||
|
heap = MinHeap()
|
||||||
|
_queue = queue.Queue()
|
||||||
|
while _queue.qsize() >= 2:
|
||||||
|
left = _queue.get()
|
||||||
|
right = _queue.get()
|
||||||
|
weight = left.weight + right.weight
|
||||||
|
root = Node('*', weight)
|
||||||
|
# Once there is one item in the queue left we can simply return the new thing
|
||||||
|
return heap
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def printEncoding(text, heap):
|
||||||
|
for i in text:
|
||||||
|
heap.printChar(i)
|
||||||
|
|
||||||
|
print() # newline for the meme
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
text = input()
|
||||||
|
binary = input()
|
||||||
|
# calculate the frequency of each character
|
||||||
|
frequencies = frequencyMap(text)
|
||||||
|
print(frequencies)
|
||||||
|
heap = buildMinHeap(frequencies)
|
||||||
|
printEncoding(binary, heap)
|
2
370/homework/input.dat
Normal file
2
370/homework/input.dat
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
i love computer science
|
||||||
|
1100101110101100110111111011100011010101101100100111
|
Loading…
Reference in New Issue
Block a user