C++程序  |  306行  |  11.93 KB

// Copyright 2017 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef SRC_HUFFMAN_TABLE_H_
#define SRC_HUFFMAN_TABLE_H_

#include <cstddef>
#include <cstdint>
#include <string>
#include <vector>

#include "puffin/src/bit_reader.h"
#include "puffin/src/bit_writer.h"
#include "puffin/src/include/puffin/common.h"
#include "puffin/src/logging.h"

namespace puffin {

// Maximum Huffman code length based on RFC1951.
constexpr size_t kMaxHuffmanBits = 15;

// Permutations of input Huffman code lengths (used only to read
// |dynamic_code_lens_|).
extern const uint8_t kPermutations[];

// The bases of each alphabet which is added to the integer value of extra
// bits that comes after the Huffman code in the input to create the given
// length value. The last element is a guard.
extern const uint16_t kLengthBases[];

// Number of extra bits that comes after the associating Huffman code.
extern const uint8_t kLengthExtraBits[];

// same as |kLengthBases| except for the the distances instead of lengths.
// The last element is a guard.
extern const uint16_t kDistanceBases[];

// Same as |kLengthExtraBits| except for distances instead of lengths.
extern const uint8_t kDistanceExtraBits[];

class HuffmanTable {
 public:
  HuffmanTable();
  virtual ~HuffmanTable() = default;

  // Checks the lengths of Huffman length arrays for correctness
  //
  // |num_lit_len|  IN  The number of literal/lengths code lengths
  // |num_distance| IN  The number of distance code lengths
  // |num_codes|    IN  The number of code lengths for reading Huffman table.
  inline bool CheckHuffmanArrayLengths(size_t num_lit_len,
                                       size_t num_distance,
                                       size_t num_codes) {
    if (num_lit_len > 286 || num_distance > 30 || num_codes > 19) {
      LOG(ERROR) << "The lengths of the dynamic Huffman table are invalid: "
                 << "num_lit_len(" << num_lit_len << ") "
                 << "num_distance(" << num_distance << ") "
                 << "num_codes(" << num_codes << ")";
      return false;
    }
    return true;
  }

  // Returns the maximum number of bits used in the current literal/length
  // Huffman codes.
  inline size_t LitLenMaxBits() { return lit_len_max_bits_; }

  // Returns the maximum number of bits used in the current distance Huffman
  // codes.
  inline size_t DistanceMaxBits() { return distance_max_bits_; }

  // Returns the alphabet associated with the set of input bits for the code
  // length array.
  //
  // |bits|     IN   The input Huffman bits read from the deflate stream.
  // |alphabet| OUT  The alphabet associated with the given |bits|.
  // |nbits|    OUT  The number of bits in the Huffman code of alphabet.
  // Returns true if there is an alphabet associated with |bits|.
  inline bool CodeAlphabet(uint32_t bits, uint16_t* alphabet, size_t* nbits) {
    auto hc = code_hcodes_[bits];
    TEST_AND_RETURN_FALSE(hc & 0x8000);
    *alphabet = hc & 0x7FFF;
    *nbits = code_lens_[*alphabet];
    return true;
  }

  // Returns the alphabet associated with the set of input bits for the
  // literal/length code length array.
  //
  // |bits|     IN   The input Huffman bits read from the deflate stream.
  // |alphabet| OUT  The alphabet associated with the given |bits|.
  // |nbits|    OUT  The number of bits in the Huffman code of the |alphabet|.
  // Returns true if there is an alphabet associated with |bits|.
  inline bool LitLenAlphabet(uint32_t bits, uint16_t* alphabet, size_t* nbits) {
    auto hc = lit_len_hcodes_[bits];
    TEST_AND_RETURN_FALSE(hc & 0x8000);
    *alphabet = hc & 0x7FFF;
    *nbits = lit_len_lens_[*alphabet];
    return true;
  }

  // Returns the alphabet associated with the set of input bits for the
  // distance code length array.
  //
  // |bits|     IN   The input Huffman bits read from the deflate stream.
  // |alphabet| OUT  The alphabet associated with the given |bits|.
  // |nbits|    OUT  The number of bits in the Huffman code of the |alphabet|.
  // Returns true if there is an alphabet associated with |bits|.
  inline bool DistanceAlphabet(uint32_t bits,
                               uint16_t* alphabet,
                               size_t* nbits) {
    auto hc = distance_hcodes_[bits];
    TEST_AND_RETURN_FALSE(hc & 0x8000);
    *alphabet = hc & 0x7FFF;
    *nbits = distance_lens_[*alphabet];
    return true;
  }

  // Returns the Huffman code of a give alphabet for Huffman table codes.
  //
  // |alphabet| IN   The alphabet.
  // |huffman|  OUT  The Huffman code for |alphabet|.
  // |nbits|    OUT  The maximum number of bits in the Huffman code of the
  //                 |alphabet|.
  inline bool CodeHuffman(uint16_t alphabet, uint16_t* huffman, size_t* nbits) {
    TEST_AND_RETURN_FALSE(alphabet < code_lens_.size());
    *huffman = code_rcodes_[alphabet];
    *nbits = code_lens_[alphabet];
    return true;
  }

  // Returns the Huffman code of a give alphabet for literal/length codes.
  //
  // |alphabet| IN   The alphabet.
  // |huffman|  OUT  The Huffman code for |alphabet|.
  // |nbits|    OUT  The maximum number of bits in the Huffman code of the
  //                 |alphabet|.
  inline bool LitLenHuffman(uint16_t alphabet,
                            uint16_t* huffman,
                            size_t* nbits) {
    TEST_AND_RETURN_FALSE(alphabet < lit_len_lens_.size());
    *huffman = lit_len_rcodes_[alphabet];
    *nbits = lit_len_lens_[alphabet];
    return true;
  }

  inline bool EndOfBlockBitLength(size_t* nbits) {
    TEST_AND_RETURN_FALSE(256 < lit_len_lens_.size());
    *nbits = lit_len_lens_[256];
    return true;
  }

  // Returns the Huffman code of a give alphabet for distance codes.
  //
  // |alphabet| IN   The alphabet.
  // |huffman|  OUT  The Huffman code for |alphabet|.
  // |nbits|    OUT  The maximum number of bits in the Huffman code of the
  //                 |alphabet|.
  inline bool DistanceHuffman(uint16_t alphabet,
                              uint16_t* huffman,
                              size_t* nbits) {
    TEST_AND_RETURN_FALSE(alphabet < distance_lens_.size());
    *huffman = distance_rcodes_[alphabet];
    *nbits = distance_lens_[alphabet];
    return true;
  }

  // This populates the object with fixed huffman table parameters.
  // TODO(ahassani): Revamp the use of this function to be initiliazed once in
  // the lifetime of the program and only one instance needed.
  bool BuildFixedHuffmanTable();

  // This functions first reads the Huffman code length arrays from the input
  // deflate stream, then builds both literal/length and distance Huffman
  // code arrays. It also writes the Huffman table into the puffed stream.
  //
  // |br|      IN      The |BitReaderInterface| reading the deflate stream.
  // |buffer|  OUT     The object to write the Huffman table.
  // |length|  IN/OUT  The length available in the |buffer| and in return it
  //                   will be the length of Huffman table data written into
  //                   the |buffer|.
  bool BuildDynamicHuffmanTable(BitReaderInterface* br,
                                uint8_t* buffer,
                                size_t* length);

  // This functions first reads the Huffman code length arrays from the input
  // puffed |buffer|, then builds both literal/length and distance Huffman code
  // arrays. It also writes the coded Huffman table arrays into the deflate
  // stream.
  //
  // |buffer| IN      The array to read the Huffman table from.
  // |length| IN      The length available in the |buffer|.
  // |bw|     IN/OUT  The |BitWriterInterface| for writing into the deflate
  //                  stream.
  bool BuildDynamicHuffmanTable(const uint8_t* buffer,
                                size_t length,
                                BitWriterInterface* bw);

 protected:
  // Initializes the Huffman codes from an array of lengths.
  //
  // |lens|     IN   The input array of code lengths.
  // |max_bits| OUT  The maximum number of bits used for the Huffman codes.
  bool InitHuffmanCodes(const Buffer& lens, size_t* max_bits);

  // Creates the Huffman code to alphabet array.
  // |lens|     IN   The input array of code lengths.
  // |hcodes|   OUT  The Huffman to alphabet array.
  // |max_bits| OUT  The maximum number of bits used for the Huffman codes.
  bool BuildHuffmanCodes(const Buffer& lens,
                         std::vector<uint16_t>* hcodes,
                         size_t* max_bits);

  // Creates the alphabet to Huffman code array.
  // |lens|     IN   The input array of code lengths.
  // |rcodes|   OUT  The Huffman to Huffman array.
  // |max_bits| OUT  The maximum number of bits used for the Huffman codes.
  bool BuildHuffmanReverseCodes(const Buffer& lens,
                                std::vector<uint16_t>* rcodes,
                                size_t* max_bits);

  // Reads a specific Huffman code length array from input. At the same time
  // writes the array into the puffed stream. The Huffman code length array is
  // either the literal/lengths or distance codes.
  //
  // |br|        IN      The |BitReaderInterface| for reading the deflate
  //                      stream.
  // |buffer|    OUT     The array to write the Huffman table.
  // |length|    IN/OUT  The length available in the |buffer| and in return it
  //                     will be the length of data written into the |buffer|.
  // |max_bits|  IN      The maximum number of bits in the Huffman codes.
  // |num_codes| IN      The size of the Huffman code length array in the input.
  // |lens|      OUT     The resulting Huffman code length array.
  bool BuildHuffmanCodeLengths(BitReaderInterface* br,
                               uint8_t* buffer,
                               size_t* length,
                               size_t max_bits,
                               size_t num_codes,
                               Buffer* lens);

  // Similar to |BuildHuffmanCodeLengths| but for reading from puffed buffer and
  // writing into deflate stream.
  //
  // |buffer|    IN      The array to read the Huffman table from.
  // |length|    IN      The length available in the |buffer|.
  // |bw|        IN/OUT  The |BitWriterInterface| for writing into the deflate
  //                     stream.
  // |num_codes| IN      Number of Huffman code lengths to read from the
  //                     |buffer|.
  // |lens|      OUT     The Huffman code lengths array.
  bool BuildHuffmanCodeLengths(const uint8_t* buffer,
                               size_t* length,
                               BitWriterInterface* bw,
                               size_t num_codes,
                               Buffer* lens);

 private:
  // A utility struct used to create Huffman codes.
  struct CodeIndexPair {
    uint16_t code;   // The Huffman code
    uint16_t index;  // The alphabet
  };
  // A vector with maximum size of 286 that is only uses as temporary space for
  // building Huffman codes.
  std::vector<CodeIndexPair> codeindexpairs_;

  // Used in building Huffman codes for literals/lengths and distances.
  std::vector<uint8_t> lit_len_lens_;
  std::vector<uint16_t> lit_len_hcodes_;
  std::vector<uint16_t> lit_len_rcodes_;
  size_t lit_len_max_bits_;
  std::vector<uint8_t> distance_lens_;
  std::vector<uint16_t> distance_hcodes_;
  std::vector<uint16_t> distance_rcodes_;
  size_t distance_max_bits_;

  // The reason for keeping a temporary buffer here is to avoid reallocing each
  // time.
  std::vector<uint8_t> tmp_lens_;

  // Used in building Huffman codes for reading and decoding literal/length and
  // distance Huffman code length arrays.
  std::vector<uint8_t> code_lens_;
  std::vector<uint16_t> code_hcodes_;
  std::vector<uint16_t> code_rcodes_;
  size_t code_max_bits_;

  bool initialized_;

  DISALLOW_COPY_AND_ASSIGN(HuffmanTable);
};

// The type of a block in a deflate stream.
enum class BlockType : uint8_t {
  kUncompressed = 0x00,
  kFixed = 0x01,
  kDynamic = 0x02,
};

std::string BlockTypeToString(BlockType type);

}  // namespace puffin

#endif  // SRC_HUFFMAN_TABLE_H_