-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathBZip2HuffmanStageDecoder.java
More file actions
194 lines (156 loc) · 6.74 KB
/
BZip2HuffmanStageDecoder.java
File metadata and controls
194 lines (156 loc) · 6.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
/*
* Copyright (c) 2011 Matthew Francis
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package org.itadaki.bzip2;
import java.io.IOException;
/**
* A decoder for the BZip2 Huffman coding stage
*/
public class BZip2HuffmanStageDecoder {
/**
* The BZip2BitInputStream from which Huffman codes are read
*/
private final BZip2BitInputStream bitInputStream;
/**
* The Huffman table number to use for each group of 50 symbols
*/
private final byte[] selectors;
/**
* The minimum code length for each Huffman table
*/
private final int[] minimumLengths = new int[BZip2Constants.HUFFMAN_MAXIMUM_TABLES];
/**
* An array of values for each Huffman table that must be subtracted from the numerical value of
* a Huffman code of a given bit length to give its canonical code index
*/
private final int[][] codeBases = new int[BZip2Constants.HUFFMAN_MAXIMUM_TABLES][BZip2Constants.HUFFMAN_DECODE_MAXIMUM_CODE_LENGTH + 2];
/**
* An array of values for each Huffman table that gives the highest numerical value of a Huffman
* code of a given bit length
*/
private final int[][] codeLimits = new int[BZip2Constants.HUFFMAN_MAXIMUM_TABLES][BZip2Constants.HUFFMAN_DECODE_MAXIMUM_CODE_LENGTH + 1];
/**
* A mapping for each Huffman table from canonical code index to output symbol
*/
private final int[][] codeSymbols = new int[BZip2Constants.HUFFMAN_MAXIMUM_TABLES][BZip2Constants.HUFFMAN_MAXIMUM_ALPHABET_SIZE];
/**
* The Huffman table for the current group
*/
private int currentTable;
/**
* The index of the current group within the selectors array
*/
private int groupIndex = -1;
/**
* The byte position within the current group. A new group is selected every 50 decoded bytes
*/
private int groupPosition = -1;
/**
* Constructs Huffman decoding tables from lists of Canonical Huffman code lengths
* @param alphabetSize The total number of codes (uniform for each table)
* @param tableCodeLengths The Canonical Huffman code lengths for each table
*/
private void createHuffmanDecodingTables (final int alphabetSize, final byte[][] tableCodeLengths) {
for (int table = 0; table < tableCodeLengths.length; table++) {
final int[] tableBases = this.codeBases[table];
final int[] tableLimits = this.codeLimits[table];
final int[] tableSymbols = this.codeSymbols[table];
final byte[] codeLengths = tableCodeLengths[table];
int minimumLength = BZip2Constants.HUFFMAN_DECODE_MAXIMUM_CODE_LENGTH;
int maximumLength = 0;
// Find the minimum and maximum code length for the table
for (int i = 0; i < alphabetSize; i++) {
maximumLength = Math.max (codeLengths[i], maximumLength);
minimumLength = Math.min (codeLengths[i], minimumLength);
}
this.minimumLengths[table] = minimumLength;
// Calculate the first output symbol for each code length
for (int i = 0; i < alphabetSize; i++) {
tableBases[codeLengths[i] + 1]++;
}
for (int i = 1; i < BZip2Constants.HUFFMAN_DECODE_MAXIMUM_CODE_LENGTH + 2; i++) {
tableBases[i] += tableBases[i - 1];
}
// Calculate the first and last Huffman code for each code length (codes at a given
// length are sequential in value)
int code = 0;
for (int i = minimumLength; i <= maximumLength; i++) {
int base = code;
code += tableBases[i + 1] - tableBases[i];
tableBases[i] = base - tableBases[i];
tableLimits[i] = code - 1;
code <<= 1;
}
// Populate the mapping from canonical code index to output symbol
int codeIndex = 0;
for (int bitLength = minimumLength; bitLength <= maximumLength; bitLength++) {
for (int symbol = 0; symbol < alphabetSize; symbol++) {
if (codeLengths[symbol] == bitLength) {
tableSymbols[codeIndex++] = symbol;
}
}
}
}
}
/**
* Decodes and returns the next symbol
* @return The decoded symbol
* @throws IOException if the end of the input stream is reached while decoding
*/
public int nextSymbol() throws IOException {
final BZip2BitInputStream bitInputStream = this.bitInputStream;
// Move to next group selector if required
if (((++this.groupPosition % BZip2Constants.HUFFMAN_GROUP_RUN_LENGTH) == 0)) {
this.groupIndex++;
if (this.groupIndex == this.selectors.length) {
throw new BZip2Exception ("Error decoding BZip2 block");
}
this.currentTable = this.selectors[this.groupIndex] & 0xff;
}
final int currentTable = this.currentTable;
final int[] tableLimits = this.codeLimits[currentTable];
int codeLength = this.minimumLengths[currentTable];
// Starting with the minimum bit length for the table, read additional bits one at a time
// until a complete code is recognised
int codeBits = bitInputStream.readBits (codeLength);
for (; codeLength <= BZip2Constants.HUFFMAN_DECODE_MAXIMUM_CODE_LENGTH; codeLength++) {
if (codeBits <= tableLimits[codeLength]) {
// Convert the code to a symbol index and return
return this.codeSymbols[currentTable][codeBits - this.codeBases[currentTable][codeLength]];
}
codeBits = (codeBits << 1) | bitInputStream.readBits (1);
}
// A valid code was not recognised
throw new BZip2Exception ("Error decoding BZip2 block");
}
/**
* @param bitInputStream The BZip2BitInputStream from which Huffman codes are read
* @param alphabetSize The total number of codes (uniform for each table)
* @param tableCodeLengths The Canonical Huffman code lengths for each table
* @param selectors The Huffman table number to use for each group of 50 symbols
*/
public BZip2HuffmanStageDecoder (final BZip2BitInputStream bitInputStream, final int alphabetSize, final byte[][] tableCodeLengths, final byte[] selectors) {
this.bitInputStream = bitInputStream;
this.selectors = selectors;
this.currentTable = this.selectors[0];
createHuffmanDecodingTables (alphabetSize, tableCodeLengths);
}
}