// Copyright 2017 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "puffin/src/huffman_table.h"
#include <algorithm>
#include <vector>
#include "puffin/src/logging.h"
using std::string;
using std::vector;
namespace puffin {
// Permutations of input Huffman code lengths (used only to read code lengths
// necessary for reading Huffman table.)
const uint8_t kPermutations[19] = {16, 17, 18, 0, 8, 7, 9, 6, 10, 5,
11, 4, 12, 3, 13, 2, 14, 1, 15};
// The bases of each alphabet which is added to the integer value of extra
// bits that comes after the Huffman code in the input to create the given
// length value. The last element is a guard.
const uint16_t kLengthBases[30] = {
3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27,
31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0xFFFF};
// Number of extra bits that comes after the associating Huffman code.
const uint8_t kLengthExtraBits[29] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
4, 4, 4, 4, 5, 5, 5, 5, 0};
// Same as |kLengthBases| but for the distances instead of lengths. The last
// element is a guard.
const uint16_t kDistanceBases[31] = {
1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33,
49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537,
2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0xFFFF};
// Same as |kLengthExtraBits| but for distances instead of lengths.
const uint8_t kDistanceExtraBits[30] = {0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
9, 9, 10, 10, 11, 11, 12, 12, 13, 13};
// 288 is the maximum number of needed huffman codes for an alphabet. Fixed
// huffman table needs 288 and dynamic huffman table needs maximum 286.
// 286 = 256 (coding a byte) +
// 1 (coding the end of block symbole) +
// 29 (coding the lengths)
HuffmanTable::HuffmanTable() : codeindexpairs_(288), initialized_(false) {}
bool HuffmanTable::InitHuffmanCodes(const Buffer& lens, size_t* max_bits) {
// Temporary buffers used in |InitHuffmanCodes|.
uint16_t len_count_[kMaxHuffmanBits + 1] = {0};
uint16_t next_code_[kMaxHuffmanBits + 1] = {0};
// 1. Count the number of codes for each length;
for (auto len : lens) {
len_count_[len]++;
}
for (*max_bits = kMaxHuffmanBits; *max_bits >= 1; (*max_bits)--) {
if (len_count_[*max_bits] != 0) {
break;
}
}
// Check for oversubscribed code lengths. (A code with length 'L' cannot have
// more than 2^L items.
for (size_t idx = 1; idx <= *max_bits; idx++) {
if (len_count_[idx] > (1 << idx)) {
LOG(ERROR) << "Oversubscribed code lengths error!";
return false;
}
}
// 2. Compute the coding of the first element for each length.
uint16_t code = 0;
len_count_[0] = 0;
for (size_t bits = 1; bits <= kMaxHuffmanBits; bits++) {
code = (code + len_count_[bits - 1]) << 1;
next_code_[bits] = code;
}
codeindexpairs_.clear();
// 3. Calculate all the code values.
for (size_t idx = 0; idx < lens.size(); idx++) {
auto len = lens[idx];
if (len == 0) {
continue;
}
// Reverse the code
CodeIndexPair cip;
cip.code = 0;
auto tmp_code = next_code_[len];
for (size_t r = 0; r < len; r++) {
cip.code <<= 1;
cip.code |= tmp_code & 1U;
tmp_code >>= 1;
}
cip.index = idx;
codeindexpairs_.push_back(cip);
next_code_[len]++;
}
return true;
}
bool HuffmanTable::BuildHuffmanCodes(const Buffer& lens,
vector<uint16_t>* hcodes,
size_t* max_bits) {
TEST_AND_RETURN_FALSE(InitHuffmanCodes(lens, max_bits));
// Sort descending based on the bit-length of the code.
std::sort(codeindexpairs_.begin(), codeindexpairs_.end(),
[&lens](const CodeIndexPair& a, const CodeIndexPair& b) {
return lens[a.index] > lens[b.index];
});
// Only zero out the part of hcodes which is valuable.
memset(hcodes->data(), 0, (1 << *max_bits) * sizeof(uint16_t));
for (const auto& cip : codeindexpairs_) {
// The MSB bit of the code in hcodes is set if it is a valid code and its
// code exists in the input Huffman table.
(*hcodes)[cip.code] = cip.index | 0x8000;
auto fill_bits = *max_bits - lens[cip.index];
for (auto idx = 1; idx < (1 << fill_bits); idx++) {
auto location = (idx << lens[cip.index]) | cip.code;
if (!((*hcodes)[location] & 0x8000)) {
(*hcodes)[location] = cip.index | 0x8000;
}
}
}
return true;
}
bool HuffmanTable::BuildHuffmanReverseCodes(const Buffer& lens,
vector<uint16_t>* rcodes,
size_t* max_bits) {
TEST_AND_RETURN_FALSE(InitHuffmanCodes(lens, max_bits));
// Sort ascending based on the index.
std::sort(codeindexpairs_.begin(), codeindexpairs_.end(),
[](const CodeIndexPair& a, const CodeIndexPair& b) -> bool {
return a.index < b.index;
});
size_t index = 0;
for (size_t idx = 0; idx < rcodes->size(); idx++) {
if (index < codeindexpairs_.size() && idx == codeindexpairs_[index].index) {
(*rcodes)[idx] = codeindexpairs_[index].code;
index++;
} else {
(*rcodes)[idx] = 0;
}
}
return true;
}
bool HuffmanTable::BuildFixedHuffmanTable() {
if (!initialized_) {
// For all the vectors used in this class, we set the size in the
// constructor and we do not change the size later. This is for optimization
// purposes. The total size of data in this class is approximately
// 2KB. Because it is a constructor return values cannot be checked.
lit_len_lens_.resize(288);
lit_len_rcodes_.resize(288);
lit_len_hcodes_.resize(1 << 9);
distance_lens_.resize(30);
distance_rcodes_.resize(30);
distance_hcodes_.resize(1 << 5);
size_t i = 0;
while (i < 144) {
lit_len_lens_[i++] = 8;
}
while (i < 256) {
lit_len_lens_[i++] = 9;
}
while (i < 280) {
lit_len_lens_[i++] = 7;
}
while (i < 288) {
lit_len_lens_[i++] = 8;
}
i = 0;
while (i < 30) {
distance_lens_[i++] = 5;
}
TEST_AND_RETURN_FALSE(
BuildHuffmanCodes(lit_len_lens_, &lit_len_hcodes_, &lit_len_max_bits_));
TEST_AND_RETURN_FALSE(BuildHuffmanCodes(distance_lens_, &distance_hcodes_,
&distance_max_bits_));
TEST_AND_RETURN_FALSE(BuildHuffmanReverseCodes(
lit_len_lens_, &lit_len_rcodes_, &lit_len_max_bits_));
TEST_AND_RETURN_FALSE(BuildHuffmanReverseCodes(
distance_lens_, &distance_rcodes_, &distance_max_bits_));
initialized_ = true;
}
return true;
}
bool HuffmanTable::BuildDynamicHuffmanTable(BitReaderInterface* br,
uint8_t* buffer,
size_t* length) {
// Initilize only once and reuse.
if (!initialized_) {
// Only resizing the arrays needed.
code_lens_.resize(19);
code_hcodes_.resize(1 << 7);
lit_len_lens_.resize(286);
lit_len_hcodes_.resize(1 << 15);
distance_lens_.resize(30);
distance_hcodes_.resize(1 << 15);
// 286: Maximum number of literal/lengths symbols.
// 30: Maximum number of distance symbols.
// The reason we reserve this to the sum of both maximum sizes is that we
// need to calculate both huffman codes contiguously. See b/72815313.
tmp_lens_.resize(286 + 30);
initialized_ = true;
}
// Read the header. Reads the first portion of the Huffman data from input and
// writes it into the puff |buffer|. The first portion includes the size
// (|num_lit_len|) of the literals/lengths Huffman code length array
// (|dynamic_lit_len_lens_|), the size (|num_distance|) of distance Huffman
// code length array (|dynamic_distance_lens_|), and the size (|num_codes|) of
// Huffman code length array (dynamic_code_lens_) for reading
// |dynamic_lit_len_lens_| and |dynamic_distance_lens_|. Then it follows by
// reading |dynamic_code_lens_|.
TEST_AND_RETURN_FALSE(*length >= 3);
size_t index = 0;
TEST_AND_RETURN_FALSE(br->CacheBits(14));
buffer[index++] = br->ReadBits(5); // HLIST
auto num_lit_len = br->ReadBits(5) + 257;
br->DropBits(5);
buffer[index++] = br->ReadBits(5); // HDIST
auto num_distance = br->ReadBits(5) + 1;
br->DropBits(5);
buffer[index++] = br->ReadBits(4); // HCLEN
auto num_codes = br->ReadBits(4) + 4;
br->DropBits(4);
TEST_AND_RETURN_FALSE(
CheckHuffmanArrayLengths(num_lit_len, num_distance, num_codes));
bool checked = false;
size_t idx = 0;
TEST_AND_RETURN_FALSE(*length - index >= (num_codes + 1) / 2);
// Two codes per byte
for (; idx < num_codes; idx++) {
TEST_AND_RETURN_FALSE(br->CacheBits(3));
code_lens_[kPermutations[idx]] = br->ReadBits(3);
if (checked) {
buffer[index++] |= br->ReadBits(3);
} else {
buffer[index] = br->ReadBits(3) << 4;
}
checked = !checked;
br->DropBits(3);
}
// Pad the last byte if odd number of codes.
if (checked) {
index++;
}
for (; idx < 19; idx++) {
code_lens_[kPermutations[idx]] = 0;
}
TEST_AND_RETURN_FALSE(
BuildHuffmanCodes(code_lens_, &code_hcodes_, &code_max_bits_));
// Build literals/lengths and distance Huffman code length arrays.
auto bytes_available = (*length - index);
tmp_lens_.clear();
TEST_AND_RETURN_FALSE(BuildHuffmanCodeLengths(
br, buffer + index, &bytes_available, code_max_bits_,
num_lit_len + num_distance, &tmp_lens_));
index += bytes_available;
// TODO(ahassani): Optimize this so the memcpy is not needed anymore.
lit_len_lens_.clear();
lit_len_lens_.insert(lit_len_lens_.begin(), tmp_lens_.begin(),
tmp_lens_.begin() + num_lit_len);
distance_lens_.clear();
distance_lens_.insert(distance_lens_.begin(), tmp_lens_.begin() + num_lit_len,
tmp_lens_.end());
TEST_AND_RETURN_FALSE(
BuildHuffmanCodes(lit_len_lens_, &lit_len_hcodes_, &lit_len_max_bits_));
// Build distance Huffman codes.
TEST_AND_RETURN_FALSE(BuildHuffmanCodes(distance_lens_, &distance_hcodes_,
&distance_max_bits_));
*length = index;
return true;
}
bool HuffmanTable::BuildHuffmanCodeLengths(BitReaderInterface* br,
uint8_t* buffer,
size_t* length,
size_t max_bits,
size_t num_codes,
Buffer* lens) {
size_t index = 0;
lens->clear();
for (size_t idx = 0; idx < num_codes;) {
TEST_AND_RETURN_FALSE(br->CacheBits(max_bits));
auto bits = br->ReadBits(max_bits);
uint16_t code;
size_t nbits;
TEST_AND_RETURN_FALSE(CodeAlphabet(bits, &code, &nbits));
TEST_AND_RETURN_FALSE(index < *length);
br->DropBits(nbits);
if (code < 16) {
buffer[index++] = code;
lens->push_back(code);
idx++;
} else {
TEST_AND_RETURN_FALSE(code < 19);
size_t copy_num = 0;
uint8_t copy_val;
switch (code) {
case 16:
TEST_AND_RETURN_FALSE(idx != 0);
TEST_AND_RETURN_FALSE(br->CacheBits(2));
copy_num = 3 + br->ReadBits(2);
buffer[index++] = 16 + br->ReadBits(2); // 3 - 6 times
copy_val = (*lens)[idx - 1];
br->DropBits(2);
break;
case 17:
TEST_AND_RETURN_FALSE(br->CacheBits(3));
copy_num = 3 + br->ReadBits(3);
buffer[index++] = 20 + br->ReadBits(3); // 3 - 10 times
copy_val = 0;
br->DropBits(3);
break;
case 18:
TEST_AND_RETURN_FALSE(br->CacheBits(7));
copy_num = 11 + br->ReadBits(7);
buffer[index++] = 28 + br->ReadBits(7); // 11 - 138 times
copy_val = 0;
br->DropBits(7);
break;
default:
LOG(ERROR) << "Invalid code!";
return false;
}
idx += copy_num;
while (copy_num--) {
lens->push_back(copy_val);
}
}
}
TEST_AND_RETURN_FALSE(lens->size() == num_codes);
*length = index;
return true;
}
bool HuffmanTable::BuildDynamicHuffmanTable(const uint8_t* buffer,
size_t length,
BitWriterInterface* bw) {
if (!initialized_) {
// Only resizing the arrays needed.
code_lens_.resize(19);
code_rcodes_.resize(19);
lit_len_lens_.resize(286);
lit_len_rcodes_.resize(286);
distance_lens_.resize(30);
distance_rcodes_.resize(30);
tmp_lens_.resize(286 + 30);
initialized_ = true;
}
TEST_AND_RETURN_FALSE(length >= 3);
size_t index = 0;
// Write the header.
size_t num_lit_len = buffer[index] + 257;
TEST_AND_RETURN_FALSE(bw->WriteBits(5, buffer[index++]));
size_t num_distance = buffer[index] + 1;
TEST_AND_RETURN_FALSE(bw->WriteBits(5, buffer[index++]));
size_t num_codes = buffer[index] + 4;
TEST_AND_RETURN_FALSE(bw->WriteBits(4, buffer[index++]));
TEST_AND_RETURN_FALSE(
CheckHuffmanArrayLengths(num_lit_len, num_distance, num_codes));
TEST_AND_RETURN_FALSE(length - index >= (num_codes + 1) / 2);
bool checked = false;
size_t idx = 0;
for (; idx < num_codes; idx++) {
uint8_t len;
if (checked) {
len = buffer[index++] & 0x0F;
} else {
len = buffer[index] >> 4;
}
checked = !checked;
code_lens_[kPermutations[idx]] = len;
TEST_AND_RETURN_FALSE(bw->WriteBits(3, len));
}
if (checked) {
index++;
}
for (; idx < 19; idx++) {
code_lens_[kPermutations[idx]] = 0;
}
TEST_AND_RETURN_FALSE(
BuildHuffmanReverseCodes(code_lens_, &code_rcodes_, &code_max_bits_));
// Build literal/lengths and distance Huffman code length arrays.
auto bytes_available = length - index;
TEST_AND_RETURN_FALSE(
BuildHuffmanCodeLengths(buffer + index, &bytes_available, bw,
num_lit_len + num_distance, &tmp_lens_));
index += bytes_available;
lit_len_lens_.clear();
lit_len_lens_.insert(lit_len_lens_.begin(), tmp_lens_.begin(),
tmp_lens_.begin() + num_lit_len);
distance_lens_.clear();
distance_lens_.insert(distance_lens_.begin(), tmp_lens_.begin() + num_lit_len,
tmp_lens_.end());
// Build literal/lengths Huffman reverse codes.
TEST_AND_RETURN_FALSE(BuildHuffmanReverseCodes(
lit_len_lens_, &lit_len_rcodes_, &lit_len_max_bits_));
// Build distance Huffman reverse codes.
TEST_AND_RETURN_FALSE(BuildHuffmanReverseCodes(
distance_lens_, &distance_rcodes_, &distance_max_bits_));
TEST_AND_RETURN_FALSE(length == index);
return true;
}
bool HuffmanTable::BuildHuffmanCodeLengths(const uint8_t* buffer,
size_t* length,
BitWriterInterface* bw,
size_t num_codes,
Buffer* lens) {
lens->clear();
uint16_t hcode;
size_t nbits;
size_t index = 0;
for (size_t idx = 0; idx < num_codes;) {
TEST_AND_RETURN_FALSE(index < *length);
auto pcode = buffer[index++];
TEST_AND_RETURN_FALSE(pcode <= 155);
auto code = pcode < 16 ? pcode : pcode < 20 ? 16 : pcode < 28 ? 17 : 18;
TEST_AND_RETURN_FALSE(CodeHuffman(code, &hcode, &nbits));
TEST_AND_RETURN_FALSE(bw->WriteBits(nbits, hcode));
if (code < 16) {
lens->push_back(code);
idx++;
} else {
size_t copy_num = 0;
uint8_t copy_val;
switch (code) {
case 16:
// Cannot repeat a non-existent last code if idx == 0.
TEST_AND_RETURN_FALSE(idx != 0);
TEST_AND_RETURN_FALSE(bw->WriteBits(2, pcode - 16));
copy_num = 3 + pcode - 16;
copy_val = (*lens)[idx - 1];
break;
case 17:
TEST_AND_RETURN_FALSE(bw->WriteBits(3, pcode - 20));
copy_num = 3 + pcode - 20;
copy_val = 0;
break;
case 18:
TEST_AND_RETURN_FALSE(bw->WriteBits(7, pcode - 28));
copy_num = 11 + pcode - 28;
copy_val = 0;
break;
default:
break;
}
idx += copy_num;
while (copy_num--) {
lens->push_back(copy_val);
}
}
}
TEST_AND_RETURN_FALSE(lens->size() == num_codes);
*length = index;
return true;
}
string BlockTypeToString(BlockType type) {
switch (type) {
case BlockType::kUncompressed:
return "Uncompressed";
case BlockType::kFixed:
return "Fixed";
case BlockType::kDynamic:
return "Dynamic";
default:
return "Unknown";
}
}
} // namespace puffin