Scott's world.

Huffman-Codes

Word count: 1.2kReading time: 7 min
2019/08/02 Share

Huffman Codes

In 1953, David A. Huffman published his paper “A Method for the Construction of Minimum-Redundancy Codes”, and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string “aaaxuaxz”, we can observe that the frequencies of the characters ‘a’, ‘x’, ‘u’ and ‘z’ are 4, 2, 1 and 1, respectively. We may either encode the symbols as {‘a’=0, ‘x’=10, ‘u’=110, ‘z’=111}, or in another way as {‘a’=1, ‘x’=01, ‘u’=001, ‘z’=000}, both compress the string into 14 bits. Another set of code can be given as {‘a’=0, ‘x’=11, ‘u’=100, ‘z’=101}, but {‘a’=0, ‘x’=01, ‘u’=011, ‘z’=001} is NOT correct since “aaaxuaxz” and “aazuaxax” can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.

Input Specification:

Each input file contains one test case. For each case, the first line gives an integer N (2≤N≤63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:

1
c[1] f[1] c[2] f[2] ... c[N] f[N]

where c[i] is a character chosen from {‘0’ - ‘9’, ‘a’ - ‘z’, ‘A’ - ‘Z’, ‘_’}, and f[i] is the frequency of c[i] and is an integer no more than 1000. The next line gives a positive integer M (≤1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:

1
c[i] code[i]

where c[i] is the i-th character and code[i] is an non-empty string of no more than 63 ‘0’s and ‘1’s.

Output Specification:

For each test case, print in each line either “Yes” if the student’s submission is correct, or “No” if not.

Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.

Sample Input:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11

Sample Output:

1
2
3
4
Yes
Yes
No
No

题意分析

  • 题目大意

    判断输入的前缀码是否为最优前缀码,也判断是否为哈夫曼树

  • 主要思路

    首先通过最小堆存储数据

    然后通过最小堆来构建哈夫曼树

    通过哈夫曼树来计算WPL

    最后判断WPL和输入的前缀码是否为标准

答案

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
#include<iostream>
#include <string>
using namespace std;
#define MaxNum 64

struct TreeNode//树的结点
{
int Weight=0;
TreeNode *Left=nullptr;
TreeNode *Right=nullptr;
};

struct HeapNode//堆
{
TreeNode Data[MaxNum];
int Size=0;
};

HeapNode *CreateHeap(int N)//创建一个新的小根堆
{
HeapNode *H=new(HeapNode);
H->Data[0].Weight=-1;
return H;
}

TreeNode *DeleteMin(HeapNode *H)//从堆中删除一个结点
{
int Parent=0,Child=0;
TreeNode temp;
TreeNode *MinItem=new(TreeNode);
*MinItem=H->Data[1];

temp=(H->Data[(H->Size)--]);

for (Parent = 1; Parent*2 <= H->Size ; Parent=Child)//寻找删除结点前堆中最后一个结点在新堆中的插入位置
{
Child=Parent*2;
if ((Child!=H->Size)&&((H->Data[Child].Weight)>(H->Data[Child+1].Weight)))
{
Child++;
}
if ((temp.Weight)<=(H->Data[Child].Weight))
{
break;
}else
{
H->Data[Parent]=H->Data[Child];
}

}
H->Data[Parent]=temp;
return MinItem;

}

void Insert(HeapNode *H,TreeNode *item)//插入新结点到堆中
{
int i=0;
i=++(H->Size);
for (;H->Data[i/2].Weight>item->Weight; i/=2)
{
H->Data[i]=H->Data[i/2];

}
H->Data[i]=*item;

}

HeapNode *ReadData(int N,HeapNode *H,int A[])//读取各个节点的权值输入数据
{
char s='\0';
int value=0;
for (int i=0; i<N; ++i)
{
cin>>s;
cin>>value;
A[i]=value;
TreeNode *T=new(TreeNode);
T->Weight=value;
Insert(H, T);
}
return H;
}

TreeNode *Huffman(HeapNode *H)//构建Huffman树
{
TreeNode *T=nullptr;
int num=H->Size;
for (int i=0; i<num-1; ++i)
{
T=new(TreeNode);
T->Left=DeleteMin(H);

T->Right=DeleteMin(H);

T->Weight=T->Left->Weight+T->Right->Weight;
Insert(H, T);

}
T=DeleteMin(H);
return T;
}

int WPL(TreeNode *T,int Depth)//计算Huffman树的编码长度
{
if ((T->Left==nullptr)&&(T->Right==nullptr))
{
return Depth*(T->Weight);
}else
{
return (WPL(T->Left,Depth+1)+WPL(T->Right,Depth+1));
}
}
struct JNode
{
int Flag=0;
JNode *Left=nullptr;
JNode *Right=nullptr;

};
bool Judge(string S,JNode *J)//判断该次编码能否符合前缀编码的要求
{
int i=0;
for (; i<S.length(); ++i)
{
if (S[i]=='0')
{
if (J->Left==nullptr)
{
JNode *J_1=new(JNode);
J->Left=J_1;
}else
{
if (J->Left->Flag==1)
{
return false;
}
}
J=J->Left;
}else
{
if (J->Right==nullptr)
{
JNode *J_1=new(JNode);
J->Right=J_1;
}else
{
if (J->Right->Flag==1)
{
return false;
}
}
J=J->Right;
}
}
J->Flag=1;
if (J->Left==nullptr&&J->Right==nullptr)
{
return true;
}else
{
return false;
}
}

int main(int argc, char const *argv[])
{
int N=0,n=0;
cin>>N;
HeapNode *H=CreateHeap(N);
int Value[MaxNum]={};
H=ReadData(N,H,Value);

TreeNode *T=Huffman(H);

int CodeLen=WPL(T,0);

cin>>n;
string temp="\0";
char c='\0';
bool result=false;


for (int i=0; i<n; ++i)
{
int count=0,flag=0;
JNode *J=new(JNode);
for (int k=0; k<N; ++k)
{
cin>>c>>temp;
count+=temp.length()*Value[k];
if (!flag)
{
result=Judge(temp,J);
if (!result)
{
flag=1;
}
}


}
delete J;
if (result&&(count==CodeLen))//前缀编码且编码长度之和与Huffman编码相同
{
cout<<"Yes"<<endl;
}else{
cout<<"No"<<endl;
}
}

return 0;
}
CATALOG
  1. 1. Huffman Codes
    1. 1.0.1. Input Specification:
    2. 1.0.2. Output Specification:
    3. 1.0.3. Sample Input:
    4. 1.0.4. Sample Output:
  2. 1.1. 题意分析
  3. 1.2. 答案