普通文本  |  197行  |  6.87 KB

// Copyright 2007 Google Inc.
// Author: Lincoln Smith
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Classes to implement an Encoder for the format described in
// RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format.
// The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html
//
// The RFC describes the possibility of using a secondary compressor
// to further reduce the size of each section of the VCDIFF output.
// That feature is not supported in this implementation of the encoder
// and decoder.
// No secondary compressor types have been publicly registered with
// the IANA at http://www.iana.org/assignments/vcdiff-comp-ids
// in the more than five years since the registry was created, so there
// is no standard set of compressor IDs which would be generated by other
// encoders or accepted by other decoders.

#include <config.h>
#include <memory>  // auto_ptr
#include "checksum.h"
#include "encodetable.h"
#include "google/output_string.h"
#include "google/vcencoder.h"
#include "jsonwriter.h"
#include "logging.h"
#include "vcdiffengine.h"

namespace open_vcdiff {

HashedDictionary::HashedDictionary(const char* dictionary_contents,
                                   size_t dictionary_size)
    : engine_(new VCDiffEngine(dictionary_contents, dictionary_size)) { }

HashedDictionary::~HashedDictionary() { delete engine_; }

bool HashedDictionary::Init() {
  return const_cast<VCDiffEngine*>(engine_)->Init();
}

class VCDiffStreamingEncoderImpl {
 public:
  VCDiffStreamingEncoderImpl(const HashedDictionary* dictionary,
                             VCDiffFormatExtensionFlags format_extensions,
                             bool look_for_target_matches);

  // These functions are identical to their counterparts
  // in VCDiffStreamingEncoder.
  bool StartEncoding(OutputStringInterface* out);

  bool EncodeChunk(const char* data, size_t len, OutputStringInterface* out);

  bool FinishEncoding(OutputStringInterface* out);

 private:
  const VCDiffEngine* engine_;

  std::auto_ptr<CodeTableWriterInterface> coder_;

  const VCDiffFormatExtensionFlags format_extensions_;

  // Determines whether to look for matches within the previously encoded
  // target data, or just within the source (dictionary) data.  Please see
  // vcencoder.h for a full explanation of this parameter.
  const bool look_for_target_matches_;

  // This state variable is used to ensure that StartEncoding(), EncodeChunk(),
  // and FinishEncoding() are called in the correct order.  It will be true
  // if StartEncoding() has been called, followed by zero or more calls to
  // EncodeChunk(), but FinishEncoding() has not yet been called.  It will
  // be false initially, and also after FinishEncoding() has been called.
  bool encode_chunk_allowed_;

  // Making these private avoids implicit copy constructor & assignment operator
  VCDiffStreamingEncoderImpl(const VCDiffStreamingEncoderImpl&);  // NOLINT
  void operator=(const VCDiffStreamingEncoderImpl&);
};

inline VCDiffStreamingEncoderImpl::VCDiffStreamingEncoderImpl(
    const HashedDictionary* dictionary,
    VCDiffFormatExtensionFlags format_extensions,
    bool look_for_target_matches)
    : engine_(dictionary->engine()),
      format_extensions_(format_extensions),
      look_for_target_matches_(look_for_target_matches),
      encode_chunk_allowed_(false) {
  if (format_extensions & VCD_FORMAT_JSON) {
    coder_.reset(new JSONCodeTableWriter());
  } else {
    // This implementation of the encoder uses the default
    // code table.  A VCDiffCodeTableWriter could also be constructed
    // using a custom code table.
    coder_.reset(new VCDiffCodeTableWriter(
        (format_extensions & VCD_FORMAT_INTERLEAVED) != 0));
  }
}

inline bool VCDiffStreamingEncoderImpl::StartEncoding(
    OutputStringInterface* out) {
  if (!coder_->Init(engine_->dictionary_size())) {
    VCD_DFATAL << "Internal error: "
                  "Initialization of code table writer failed" << VCD_ENDL;
    return false;
  }
  coder_->WriteHeader(out, format_extensions_);
  encode_chunk_allowed_ = true;
  return true;
}

inline bool VCDiffStreamingEncoderImpl::EncodeChunk(
    const char* data,
    size_t len,
    OutputStringInterface* out) {
  if (!encode_chunk_allowed_) {
    VCD_ERROR << "EncodeChunk called before StartEncoding" << VCD_ENDL;
    return false;
  }
  if ((format_extensions_ & VCD_FORMAT_CHECKSUM) != 0) {
    coder_->AddChecksum(ComputeAdler32(data, len));
  }
  engine_->Encode(data, len, look_for_target_matches_, out, coder_.get());
  return true;
}

inline bool VCDiffStreamingEncoderImpl::FinishEncoding(
    OutputStringInterface* out) {
  if (!encode_chunk_allowed_) {
    VCD_ERROR << "FinishEncoding called before StartEncoding" << VCD_ENDL;
    return false;
  }
  encode_chunk_allowed_ = false;
  coder_->FinishEncoding(out);
  return true;
}

VCDiffStreamingEncoder::VCDiffStreamingEncoder(
    const HashedDictionary* dictionary,
    VCDiffFormatExtensionFlags format_extensions,
    bool look_for_target_matches)
    : impl_(new VCDiffStreamingEncoderImpl(dictionary,
                                           format_extensions,
                                           look_for_target_matches)) { }

VCDiffStreamingEncoder::~VCDiffStreamingEncoder() { delete impl_; }

bool VCDiffStreamingEncoder::StartEncodingToInterface(
    OutputStringInterface* out) {
  return impl_->StartEncoding(out);
}

bool VCDiffStreamingEncoder::EncodeChunkToInterface(
    const char* data,
    size_t len,
    OutputStringInterface* out) {
  return impl_->EncodeChunk(data, len, out);
}

bool VCDiffStreamingEncoder::FinishEncodingToInterface(
    OutputStringInterface* out) {
  return impl_->FinishEncoding(out);
}

bool VCDiffEncoder::EncodeToInterface(const char* target_data,
                                      size_t target_len,
                                      OutputStringInterface* out) {
  out->clear();
  if (!encoder_) {
    if (!dictionary_.Init()) {
      VCD_ERROR << "Error initializing HashedDictionary" << VCD_ENDL;
      return false;
    }
    encoder_ = new VCDiffStreamingEncoder(&dictionary_,
                                          flags_,
                                          look_for_target_matches_);
  }
  if (!encoder_->StartEncodingToInterface(out)) {
    return false;
  }
  if (!encoder_->EncodeChunkToInterface(target_data, target_len, out)) {
    return false;
  }
  return encoder_->FinishEncodingToInterface(out);
}

}  // namespace open_vcdiff