// Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef SRC_HUFFMAN_TABLE_H_ #define SRC_HUFFMAN_TABLE_H_ #include <cstddef> #include <cstdint> #include <string> #include <vector> #include "puffin/src/bit_reader.h" #include "puffin/src/bit_writer.h" #include "puffin/src/include/puffin/common.h" #include "puffin/src/logging.h" namespace puffin { // Maximum Huffman code length based on RFC1951. constexpr size_t kMaxHuffmanBits = 15; // Permutations of input Huffman code lengths (used only to read // |dynamic_code_lens_|). extern const uint8_t kPermutations[]; // The bases of each alphabet which is added to the integer value of extra // bits that comes after the Huffman code in the input to create the given // length value. The last element is a guard. extern const uint16_t kLengthBases[]; // Number of extra bits that comes after the associating Huffman code. extern const uint8_t kLengthExtraBits[]; // same as |kLengthBases| except for the the distances instead of lengths. // The last element is a guard. extern const uint16_t kDistanceBases[]; // Same as |kLengthExtraBits| except for distances instead of lengths. extern const uint8_t kDistanceExtraBits[]; class HuffmanTable { public: HuffmanTable(); virtual ~HuffmanTable() = default; // Checks the lengths of Huffman length arrays for correctness // // |num_lit_len| IN The number of literal/lengths code lengths // |num_distance| IN The number of distance code lengths // |num_codes| IN The number of code lengths for reading Huffman table. inline bool CheckHuffmanArrayLengths(size_t num_lit_len, size_t num_distance, size_t num_codes) { if (num_lit_len > 286 || num_distance > 30 || num_codes > 19) { LOG(ERROR) << "The lengths of the dynamic Huffman table are invalid: " << "num_lit_len(" << num_lit_len << ") " << "num_distance(" << num_distance << ") " << "num_codes(" << num_codes << ")"; return false; } return true; } // Returns the maximum number of bits used in the current literal/length // Huffman codes. inline size_t LitLenMaxBits() { return lit_len_max_bits_; } // Returns the maximum number of bits used in the current distance Huffman // codes. inline size_t DistanceMaxBits() { return distance_max_bits_; } // Returns the alphabet associated with the set of input bits for the code // length array. // // |bits| IN The input Huffman bits read from the deflate stream. // |alphabet| OUT The alphabet associated with the given |bits|. // |nbits| OUT The number of bits in the Huffman code of alphabet. // Returns true if there is an alphabet associated with |bits|. inline bool CodeAlphabet(uint32_t bits, uint16_t* alphabet, size_t* nbits) { auto hc = code_hcodes_[bits]; TEST_AND_RETURN_FALSE(hc & 0x8000); *alphabet = hc & 0x7FFF; *nbits = code_lens_[*alphabet]; return true; } // Returns the alphabet associated with the set of input bits for the // literal/length code length array. // // |bits| IN The input Huffman bits read from the deflate stream. // |alphabet| OUT The alphabet associated with the given |bits|. // |nbits| OUT The number of bits in the Huffman code of the |alphabet|. // Returns true if there is an alphabet associated with |bits|. inline bool LitLenAlphabet(uint32_t bits, uint16_t* alphabet, size_t* nbits) { auto hc = lit_len_hcodes_[bits]; TEST_AND_RETURN_FALSE(hc & 0x8000); *alphabet = hc & 0x7FFF; *nbits = lit_len_lens_[*alphabet]; return true; } // Returns the alphabet associated with the set of input bits for the // distance code length array. // // |bits| IN The input Huffman bits read from the deflate stream. // |alphabet| OUT The alphabet associated with the given |bits|. // |nbits| OUT The number of bits in the Huffman code of the |alphabet|. // Returns true if there is an alphabet associated with |bits|. inline bool DistanceAlphabet(uint32_t bits, uint16_t* alphabet, size_t* nbits) { auto hc = distance_hcodes_[bits]; TEST_AND_RETURN_FALSE(hc & 0x8000); *alphabet = hc & 0x7FFF; *nbits = distance_lens_[*alphabet]; return true; } // Returns the Huffman code of a give alphabet for Huffman table codes. // // |alphabet| IN The alphabet. // |huffman| OUT The Huffman code for |alphabet|. // |nbits| OUT The maximum number of bits in the Huffman code of the // |alphabet|. inline bool CodeHuffman(uint16_t alphabet, uint16_t* huffman, size_t* nbits) { TEST_AND_RETURN_FALSE(alphabet < code_lens_.size()); *huffman = code_rcodes_[alphabet]; *nbits = code_lens_[alphabet]; return true; } // Returns the Huffman code of a give alphabet for literal/length codes. // // |alphabet| IN The alphabet. // |huffman| OUT The Huffman code for |alphabet|. // |nbits| OUT The maximum number of bits in the Huffman code of the // |alphabet|. inline bool LitLenHuffman(uint16_t alphabet, uint16_t* huffman, size_t* nbits) { TEST_AND_RETURN_FALSE(alphabet < lit_len_lens_.size()); *huffman = lit_len_rcodes_[alphabet]; *nbits = lit_len_lens_[alphabet]; return true; } inline bool EndOfBlockBitLength(size_t* nbits) { TEST_AND_RETURN_FALSE(256 < lit_len_lens_.size()); *nbits = lit_len_lens_[256]; return true; } // Returns the Huffman code of a give alphabet for distance codes. // // |alphabet| IN The alphabet. // |huffman| OUT The Huffman code for |alphabet|. // |nbits| OUT The maximum number of bits in the Huffman code of the // |alphabet|. inline bool DistanceHuffman(uint16_t alphabet, uint16_t* huffman, size_t* nbits) { TEST_AND_RETURN_FALSE(alphabet < distance_lens_.size()); *huffman = distance_rcodes_[alphabet]; *nbits = distance_lens_[alphabet]; return true; } // This populates the object with fixed huffman table parameters. // TODO(ahassani): Revamp the use of this function to be initiliazed once in // the lifetime of the program and only one instance needed. bool BuildFixedHuffmanTable(); // This functions first reads the Huffman code length arrays from the input // deflate stream, then builds both literal/length and distance Huffman // code arrays. It also writes the Huffman table into the puffed stream. // // |br| IN The |BitReaderInterface| reading the deflate stream. // |buffer| OUT The object to write the Huffman table. // |length| IN/OUT The length available in the |buffer| and in return it // will be the length of Huffman table data written into // the |buffer|. bool BuildDynamicHuffmanTable(BitReaderInterface* br, uint8_t* buffer, size_t* length); // This functions first reads the Huffman code length arrays from the input // puffed |buffer|, then builds both literal/length and distance Huffman code // arrays. It also writes the coded Huffman table arrays into the deflate // stream. // // |buffer| IN The array to read the Huffman table from. // |length| IN The length available in the |buffer|. // |bw| IN/OUT The |BitWriterInterface| for writing into the deflate // stream. bool BuildDynamicHuffmanTable(const uint8_t* buffer, size_t length, BitWriterInterface* bw); protected: // Initializes the Huffman codes from an array of lengths. // // |lens| IN The input array of code lengths. // |max_bits| OUT The maximum number of bits used for the Huffman codes. bool InitHuffmanCodes(const Buffer& lens, size_t* max_bits); // Creates the Huffman code to alphabet array. // |lens| IN The input array of code lengths. // |hcodes| OUT The Huffman to alphabet array. // |max_bits| OUT The maximum number of bits used for the Huffman codes. bool BuildHuffmanCodes(const Buffer& lens, std::vector<uint16_t>* hcodes, size_t* max_bits); // Creates the alphabet to Huffman code array. // |lens| IN The input array of code lengths. // |rcodes| OUT The Huffman to Huffman array. // |max_bits| OUT The maximum number of bits used for the Huffman codes. bool BuildHuffmanReverseCodes(const Buffer& lens, std::vector<uint16_t>* rcodes, size_t* max_bits); // Reads a specific Huffman code length array from input. At the same time // writes the array into the puffed stream. The Huffman code length array is // either the literal/lengths or distance codes. // // |br| IN The |BitReaderInterface| for reading the deflate // stream. // |buffer| OUT The array to write the Huffman table. // |length| IN/OUT The length available in the |buffer| and in return it // will be the length of data written into the |buffer|. // |max_bits| IN The maximum number of bits in the Huffman codes. // |num_codes| IN The size of the Huffman code length array in the input. // |lens| OUT The resulting Huffman code length array. bool BuildHuffmanCodeLengths(BitReaderInterface* br, uint8_t* buffer, size_t* length, size_t max_bits, size_t num_codes, Buffer* lens); // Similar to |BuildHuffmanCodeLengths| but for reading from puffed buffer and // writing into deflate stream. // // |buffer| IN The array to read the Huffman table from. // |length| IN The length available in the |buffer|. // |bw| IN/OUT The |BitWriterInterface| for writing into the deflate // stream. // |num_codes| IN Number of Huffman code lengths to read from the // |buffer|. // |lens| OUT The Huffman code lengths array. bool BuildHuffmanCodeLengths(const uint8_t* buffer, size_t* length, BitWriterInterface* bw, size_t num_codes, Buffer* lens); private: // A utility struct used to create Huffman codes. struct CodeIndexPair { uint16_t code; // The Huffman code uint16_t index; // The alphabet }; // A vector with maximum size of 286 that is only uses as temporary space for // building Huffman codes. std::vector<CodeIndexPair> codeindexpairs_; // Used in building Huffman codes for literals/lengths and distances. std::vector<uint8_t> lit_len_lens_; std::vector<uint16_t> lit_len_hcodes_; std::vector<uint16_t> lit_len_rcodes_; size_t lit_len_max_bits_; std::vector<uint8_t> distance_lens_; std::vector<uint16_t> distance_hcodes_; std::vector<uint16_t> distance_rcodes_; size_t distance_max_bits_; // The reason for keeping a temporary buffer here is to avoid reallocing each // time. std::vector<uint8_t> tmp_lens_; // Used in building Huffman codes for reading and decoding literal/length and // distance Huffman code length arrays. std::vector<uint8_t> code_lens_; std::vector<uint16_t> code_hcodes_; std::vector<uint16_t> code_rcodes_; size_t code_max_bits_; bool initialized_; DISALLOW_COPY_AND_ASSIGN(HuffmanTable); }; // The type of a block in a deflate stream. enum class BlockType : uint8_t { kUncompressed = 0x00, kFixed = 0x01, kDynamic = 0x02, }; std::string BlockTypeToString(BlockType type); } // namespace puffin #endif // SRC_HUFFMAN_TABLE_H_