// Copyright 2006 Google Inc. // Authors: Sanjay Ghemawat, Jeff Dean, Chandra Chereddi, Lincoln Smith // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef OPEN_VCDIFF_VCDIFFENGINE_H_ #define OPEN_VCDIFF_VCDIFFENGINE_H_ #include <config.h> #include <stddef.h> // size_t #include <stdint.h> // uint32_t namespace open_vcdiff { class BlockHash; class OutputStringInterface; class CodeTableWriterInterface; // The VCDiffEngine class is used to find the optimal encoding (in terms of COPY // and ADD instructions) for a given dictionary and target window. To write the // instructions for this encoding, it calls the Copy() and Add() methods of the // code table writer object which is passed as an argument to Encode(). class VCDiffEngine { public: // The minimum size of a string match that is worth putting into a COPY // instruction. Since this value is more than twice the block size, the // encoder will always discover a match of this size, no matter whether it is // aligned on block boundaries in the dictionary text. static const size_t kMinimumMatchSize = 32; VCDiffEngine(const char* dictionary, size_t dictionary_size); ~VCDiffEngine(); // Initializes the object before use. // This method must be called after constructing a VCDiffEngine object, // and before any other method may be called. It should not be called // twice on the same object. // Returns true if initialization succeeded, or false if an error occurred, // in which case no other method except the destructor may then be used // on the object. // The Init() method is the only one allowed to treat hashed_dictionary_ // as non-const. bool Init(); size_t dictionary_size() const { return dictionary_size_; } // Main worker function. Finds the best matches between the dictionary // (source) and target data, and uses the coder to write a // delta file window into *diff. // Because it is a const function, many threads // can call Encode() at once for the same VCDiffEngine object. // All thread-specific data will be stored in the coder and diff arguments. // The coder object must have been fully initialized (by calling its Init() // method, if any) before calling this function. // // look_for_target_matches determines whether to look for matches // within the previously encoded target data, or just within the source // (dictionary) data. Please see vcencoder.h for a full explanation // of this parameter. void Encode(const char* target_data, size_t target_size, bool look_for_target_matches, OutputStringInterface* diff, CodeTableWriterInterface* coder) const; private: static bool ShouldGenerateCopyInstructionForMatchOfSize(size_t size) { return size >= kMinimumMatchSize; } // The following two functions use templates to produce two different // versions of the code depending on the value of the option // look_for_target_matches. This approach saves a test-and-branch instruction // within the inner loop of EncodeCopyForBestMatch. template<bool look_for_target_matches> void EncodeInternal(const char* target_data, size_t target_size, OutputStringInterface* diff, CodeTableWriterInterface* coder) const; // If look_for_target_matches is true, then target_hash must point to a valid // BlockHash object, and cannot be NULL. If look_for_target_matches is // false, then the value of target_hash is ignored. template<bool look_for_target_matches> size_t EncodeCopyForBestMatch(uint32_t hash_value, const char* target_candidate_start, const char* unencoded_target_start, size_t unencoded_target_size, const BlockHash* target_hash, CodeTableWriterInterface* coder) const; void AddUnmatchedRemainder(const char* unencoded_target_start, size_t unencoded_target_size, CodeTableWriterInterface* coder) const; void FinishEncoding(size_t target_size, OutputStringInterface* diff, CodeTableWriterInterface* coder) const; const char* dictionary_; // A copy of the dictionary contents const size_t dictionary_size_; // A hash that contains one element for every kBlockSize bytes of dictionary_. // This can be reused to encode many different target strings using the // same dictionary, without the need to compute the hash values each time. const BlockHash* hashed_dictionary_; // Making these private avoids implicit copy constructor & assignment operator VCDiffEngine(const VCDiffEngine&); void operator=(const VCDiffEngine&); }; } // namespace open_vcdiff #endif // OPEN_VCDIFF_VCDIFFENGINE_H_