Please explain how the frequen
Please explain how the frequencies of characters weregotten, how they were sorted, and how the codes were gotten for theprogram below.
#include <iostream>
#include <vector>
#include <queue>
#include <string>
using namespace std;
class Huffman_Code
{
struct New_Node
{
char value;
size_t frequencies;
New_Node* left;
New_Node* right;
New_Node(char value, size_t frequencies) : value(value),
frequencies(frequencies),
left(NULL),
right(NULL)
{}
~New_Node()
{
delete left;
delete right;
}
};
struct compare
{
bool operator()(New_Node* l, New_Node* r)
{
return (l->frequencies > r->frequencies);
}
};
New_Node* top;
void print_Code(New_Node* root, string str)
{
if(root == NULL)
return;
if(root->value == ‘$’)
{
print_Code(root->left, str + “0”);
print_Code(root->right, str + “1”);
}
if(root->value != ‘$’)
{
cout << root->value <<” : ” << str <<“\n”;
print_Code(root->left, str + “0”);
print_Code(root->right, str + “1”);
}
}
public:
Huffman_Code() {};
~Huffman_Code()
{
delete top;
}
void Huffman_tree(vector<char>& value,vector<size_t>& frequencies, size_t size)
{
New_Node* left;
New_Node* right;
priority_queue<New_Node*, vector<New_Node*>, compare> minHeap;
for(size_t i = 0; i < size; ++i)
{
minHeap.push(new New_Node(value[i], frequencies[i]));
}
while(minHeap.size() != 1)
{
left = minHeap.top();
minHeap.pop();
right = minHeap.top();
minHeap.pop();
top = new New_Node(‘$’, left->frequencies +right->frequencies);
top->left = left;
top->right = right;
minHeap.push(top);
}
print_Code(minHeap.top(), “”);
}
};
int main()
{
int number, freq;
char ch;
Huffman_Code set1;
vector<char> value;
vector<size_t> frequencies;
cout<<“Length of elements \n”;
cin>>number;
cout<<“Enter the elements \n”;
for (int i=0;i<number;i++)
{
cin>>ch;
value.insert(value.end(), ch);
}
cout<<“Enter the frequencies \n”;
for (int i=0;i<number;i++)
{
cin>>freq;
frequencies.insert(frequencies.end(), freq);
}
size_t size = value.size();
set1.Huffman_tree(value, frequencies, size);
return 0;
}
Answer:
1. The above given code is of Huffman coding algorithm, mostbasicly used for COMPRESSION.
Question 1: How the frequencies of characters were found?
Imagine you have a some text file which have data i.e "ABCD".Now usually these characters are stored in binary representation of their ASCII values. These ASCII values when represented in Binary takes 16bit each, so total size of file=64bit.Mr. Huffman came up with a technique saying that why not we represent A,B,C,D with binary numberless than 16 bit lets say 2bit but each character having distinct values. Ex.A:00,B:01,C:10,D:11So file size would be, 2x4=8bitSo here we saved 56bits.But practically there would be many times A would be there, many times B would be there right.So, what if A is represented 50 times, B is represented 60 times but c is 5 times. So again can we minimise the file size further. Huffman told may be yes, if most repeated characters have minimum bit representation rather than those with least repeated charactersThis technique further minimized the big file sizes.So how these frequencies are obtained is,we select a file which needs to be compressed, We go through the each distinct character of file and note down how many times they are repeated.Then we will find frequencies of characters.
2. What is Min heap?
3. Explanation of HuffmanAlgorithm, which is implemented by the program:
Outputfor the above example:
Code Explanation:
int main(){ int number, freq; char ch; Huffman_Code set1; vector<char> value;//Vector containing values a,b,c,d,e,f, vector is nothing but array vector<size_t> frequencies;//Frequencies of character cout<<"Length of elements \n"; cin>>number;//Number of characters cout<<"Enter the elements \n"; for (int i=0;i<number;i++) { cin>>ch; value.insert(value.end(), ch);//inserting from back of the vector }//taking input cout<<"Enter the frequencies \n"; for (int i=0;i<number;i++) { cin>>freq; frequencies.insert(frequencies.end(), freq); } size_t size = value.size(); set1.Huffman_tree(value, frequencies, size);//calling huffman_tree creation return 0;}
In the main function you are just providing the values for thevector of char, vector of frequencies. You can check comments oncein snippets.
struct New_Node { char value; size_t frequencies; New_Node* left; New_Node* right; New_Node(char value, size_t frequencies) : value(value),//when new node is created the Node creation calls the constructor with left and right pointer initialized as null frequencies(frequencies), //the char value and respective frequency is given, to the constructor to be assigned to respective field left(NULL), right(NULL) {} ~New_Node()//this is destructor after the program execution is over, memory is freed { delete left; delete right; } };//creation of new node of the tree
You create a new node by the above code for the tree.
void Huffman_tree(vector<char>& value, vector<size_t>& frequencies, size_t size) { New_Node* left; New_Node* right; priority_queue<New_Node*, vector<New_Node*>, compare > minHeap; //for creation of minHeap a STL library is used called PRIOROTY QUEUE, now here is //declaration but by default it creates a maxHeap(parent greater than child elements) //But try to understand mean heap is stored in an vector but is implemented as tree and //various operations are performed on tree. So we have to use the above implementation for //a minHeap. Where compare defines its going to be a minHeap. for(size_t i = 0; i < size; ++i) { //Initially all frequencies with their characters are pushed into minHeap as //told in algorithm minHeap.push(new New_Node(value[i], frequencies[i])); } while(minHeap.size() != 1) { left = minHeap.top();//minimum element from minHeap put it in new node left subtree minHeap.pop();//pop it right = minHeap.top();//2nd minimum element from minHeap put it in new node right subtree minHeap.pop(); top = new New_Node('$', left->frequencies + right->frequencies);//create new node, with $ sign top->left = left;//assign 1st min to left top->right = right;//assign 2nd min to right minHeap.push(top);//again push the new node to the minHeap //continue this for all elements of min heap } print_Code(minHeap.top(), "");}
The most important snippet of the code. Go through the commentonce
void print_Code(New_Node* root, string str){ if(root == NULL)//if we reach root node then we need to return return;//code printing part if(root->value == '$')//if we reach dollar sign then we need to go either left or right { print_Code(root->left, str + "0"); print_Code(root->right, str + "1"); } if(root->value != '$')//if its not $ sign then, we have to print the code of character. { cout << root->value <<" : " << str << "\n"; print_Code(root->left, str + "0"); print_Code(root->right, str + "1"); }}
The traversal for code part.
Note: If still there is any doubts remaining , please post themas comment of this question.