普通文本  |  948行  |  33.61 KB

// Copyright 2008 Google Inc.
// Author: Lincoln Smith
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <config.h>
#include "vcdiffengine.h"
#include <string.h>  // memset, strlen
#include <algorithm>
#include <string>
#include <vector>
#include "addrcache.h"
#include "blockhash.h"
#include "encodetable.h"
#include "google/output_string.h"
#include "rolling_hash.h"
#include "testing.h"
#include "varint_bigendian.h"
#include "vcdiff_defs.h"

namespace open_vcdiff {

namespace {

class VCDiffEngineTestBase : public testing::Test {
 protected:
  typedef std::string string;

  // Some common definitions and helper functions used in the various tests
  // for VCDiffEngine.
  static const int kBlockSize = VCDiffEngine::kMinimumMatchSize;

  VCDiffEngineTestBase() : interleaved_(false),
                           diff_output_string_(&diff_),
                           verify_position_(0),
                           saved_total_size_position_(0),
                           saved_delta_encoding_position_(0),
                           saved_section_sizes_position_(0),
                           data_bytes_(0),
                           instruction_bytes_(0),
                           address_bytes_(0) { }

  virtual ~VCDiffEngineTestBase() { }

  virtual void TearDown() {
    VerifyMatchCounts();
  }

  // Copy string_without_spaces into newly allocated result buffer,
  // but pad its contents with space characters so that every character
  // in string_without_spaces corresponds to (block_size - 1)
  // spaces in the result, followed by that character.
  // For example:
  // If string_without_spaces begins "The only thing"... and block_size is 4,
  // then 3 space characters will be inserted
  // between each letter in the result, as follows:
  // "   T   h   e       o   n   l   y       t   h   i   n   g"...
  // This makes testing simpler, because finding a block_size-byte match
  // between the dictionary and target only depends on the
  // trailing letter in each block.
  // If no_initial_padding is true, then the first letter will not have
  // spaces added in front of it.
  static void MakeEachLetterABlock(const char* string_without_spaces,
                                   const char** result,
                                   int block_size,
                                   bool no_initial_padding) {
    const size_t length_without_spaces = strlen(string_without_spaces);
    char* padded_text = new char[(block_size * length_without_spaces) + 1];
    memset(padded_text, ' ', block_size * length_without_spaces);
    char* padded_text_ptr = padded_text;
    if (!no_initial_padding) {
      padded_text_ptr += block_size - 1;
    }
    for (size_t i = 0; i < length_without_spaces; ++i) {
      *padded_text_ptr = string_without_spaces[i];
      padded_text_ptr += block_size;
    }
    *(padded_text_ptr - block_size + 1)  = '\0';
    *result = padded_text;
  }

  // These functions iterate through the decoded output and expect
  // simple elements: bytes or variable-length integers.
  void ExpectByte(char byte) {
    EXPECT_GT(diff_.size(), verify_position_);
    EXPECT_EQ(byte, diff_[verify_position_]);
    ++verify_position_;
  }

  size_t ExpectVarint(int32_t expected_value) {
    EXPECT_GT(diff_.size(), verify_position_);
    const char* const original_position = &diff_[verify_position_];
    const char* new_position = original_position;
    const size_t expected_length = VarintBE<int32_t>::Length(expected_value);
    int32_t parsed_value = VarintBE<int32_t>::Parse(diff_.data() + diff_.size(),
                                                    &new_position);
    EXPECT_LE(0, parsed_value);
    size_t parsed_length = new_position - original_position;
    EXPECT_EQ(expected_value, parsed_value);
    EXPECT_EQ(expected_length, parsed_length);
    verify_position_ += parsed_length;
    return parsed_length;
  }

  size_t ExpectSize(size_t size) {
    return ExpectVarint(static_cast<int32_t>(size));
  }

  size_t ExpectStringLength(const char* s) {
    return ExpectSize(strlen(s));
  }

  void SkipVarint() {
    EXPECT_GT(diff_.size(), verify_position_);
    const char* const original_position = &diff_[verify_position_];
    const char* new_position = original_position;
    VarintBE<int32_t>::Parse(diff_.data() + diff_.size(), &new_position);
    size_t parsed_length = new_position - original_position;
    verify_position_ += parsed_length;
  }

  void ExpectDataByte(char byte) {
    ExpectByte(byte);
    if (interleaved_) {
      ++instruction_bytes_;
    } else {
      ++data_bytes_;
    }
  }

  void ExpectDataString(const char *expected_string) {
    const char* ptr = expected_string;
    while (*ptr) {
      ExpectDataByte(*ptr);
      ++ptr;
    }
  }

  void ExpectDataStringWithBlockSpacing(const char *expected_string,
                                        bool trailing_spaces) {
    const char* ptr = expected_string;
    while (*ptr) {
      for (int i = 0; i < (kBlockSize - 1); ++i) {
        ExpectDataByte(' ');
      }
      ExpectDataByte(*ptr);
      ++ptr;
    }
    if (trailing_spaces) {
      for (int i = 0; i < (kBlockSize - 1); ++i) {
        ExpectDataByte(' ');
      }
    }
  }

  void ExpectInstructionByte(char byte) {
    ExpectByte(byte);
    ++instruction_bytes_;
  }

  void ExpectInstructionVarint(int32_t value) {
    instruction_bytes_ += ExpectVarint(value);
  }

  void ExpectAddressByte(char byte) {
    ExpectByte(byte);
    if (interleaved_) {
      ++instruction_bytes_;
    } else {
      ++address_bytes_;
    }
  }

  void ExpectAddressVarint(int32_t value) {
    if (interleaved_) {
      instruction_bytes_ += ExpectVarint(value);
    } else {
      address_bytes_ += ExpectVarint(value);
    }
  }

  // The following functions leverage the fact that the encoder uses
  // the default code table and cache sizes.  They are able to search for
  // instructions of a particular size.  The logic for mapping from
  // instruction type, mode, and size to opcode value is very different here
  // from the logic used in encodetable.{h,cc}, so hopefully
  // this version will help validate that the other is correct.
  // This version uses conditional statements, while encodetable.h
  // looks up values in a mapping table.
  void ExpectAddress(int32_t address, int copy_mode) {
    if (default_cache_.WriteAddressAsVarintForMode(copy_mode)) {
      ExpectAddressVarint(address);
    } else {
      ExpectAddressByte(address);
    }
  }

  void ExpectAddInstruction(int size) {
    if (size <= 18) {
      ExpectInstructionByte(0x01 + size);
    } else {
      ExpectInstructionByte(0x01);
      ExpectInstructionVarint(size);
    }
  }

  void ExpectCopyInstruction(int size, int mode) {
    if ((size >= 4) && (size <= 16)) {
      ExpectInstructionByte(0x10 + (0x10 * mode) + size);
    } else {
      ExpectInstructionByte(0x13 + (0x10 * mode));
      ExpectInstructionVarint(size);
    }
    ExpectMatch(size);
  }

  bool ExpectAddCopyInstruction(int add_size, int copy_size, int copy_mode) {
    if (!default_cache_.IsSameMode(copy_mode) &&
        (add_size <= 4) &&
        (copy_size >= 4) &&
        (copy_size <= 6)) {
      ExpectInstructionByte(0x9C +
                            (0x0C * copy_mode) +
                            (0x03 * add_size) +
                            copy_size);
      ExpectMatch(copy_size);
      return true;
    } else if (default_cache_.IsSameMode(copy_mode) &&
               (add_size <= 4) &&
               (copy_size == 4)) {
      ExpectInstructionByte(0xD2 + (0x04 * copy_mode) + add_size);
      ExpectMatch(copy_size);
      return true;
    } else {
      ExpectAddInstruction(add_size);
      return false;
    }
  }

  void ExpectAddInstructionForStringLength(const char* s) {
    ExpectAddInstruction(static_cast<int>(strlen(s)));
  }

  // Call this function before beginning to iterate through the diff string
  // using the Expect... functions.
  // text must be NULL-terminated.
  void VerifyHeaderForDictionaryAndTargetText(const char* dictionary,
                                              const char* target_text) {
    ExpectByte(0x01);  // Win_Indicator: VCD_SOURCE (dictionary)
    ExpectStringLength(dictionary);
    ExpectByte(0x00);  // Source segment position: start of dictionary
    saved_total_size_position_ = verify_position_;
    SkipVarint();  // Length of the delta encoding
    saved_delta_encoding_position_ = verify_position_;
    ExpectStringLength(target_text);
    ExpectByte(0x00);  // Delta_indicator (no compression)
    saved_section_sizes_position_ = verify_position_;
    SkipVarint();  // length of data for ADDs and RUNs
    SkipVarint();  // length of instructions section
    SkipVarint();  // length of addresses for COPYs
  }

  // Call this function before beginning to iterating through the entire
  // diff string using the Expect... functions.  It makes sure that the
  // size totals in the window header match the number of bytes that
  // were parsed.
  void VerifySizes() {
    EXPECT_EQ(verify_position_, diff_.size());
    const size_t delta_encoding_size = verify_position_ -
                                       saved_delta_encoding_position_;
    verify_position_ = saved_total_size_position_;
    ExpectSize(delta_encoding_size);
    verify_position_ = saved_section_sizes_position_;
    ExpectSize(data_bytes_);
    ExpectSize(instruction_bytes_);
    ExpectSize(address_bytes_);
  }

  void ExpectMatch(size_t match_size) {
    if (match_size >= expected_match_counts_.size()) {
      // Be generous to avoid resizing again
      expected_match_counts_.resize(match_size * 2, 0);
    }
    ++expected_match_counts_[match_size];
  }

  void VerifyMatchCounts() {
    EXPECT_TRUE(std::equal(expected_match_counts_.begin(),
                           expected_match_counts_.end(),
                           actual_match_counts_.begin()));
  }

  bool interleaved_;
  string diff_;
  OutputString<string> diff_output_string_;
  size_t verify_position_;
  size_t saved_total_size_position_;
  size_t saved_delta_encoding_position_;
  size_t saved_section_sizes_position_;
  size_t data_bytes_;
  size_t instruction_bytes_;
  size_t address_bytes_;
  VCDiffAddressCache default_cache_;  // Used for finding mode values
  std::vector<int> expected_match_counts_;
  std::vector<int> actual_match_counts_;
};

class VCDiffEngineTest : public VCDiffEngineTestBase {
 protected:
  VCDiffEngineTest() :
      engine_(dictionary_, strlen(dictionary_)) {
    EXPECT_TRUE(const_cast<VCDiffEngine*>(&engine_)->Init());
  }

  virtual ~VCDiffEngineTest() { }


  static void SetUpTestCase() {
    MakeEachLetterABlock(dictionary_without_spaces_, &dictionary_,
                         kBlockSize, false);
    MakeEachLetterABlock(target_without_spaces_, &target_, kBlockSize, false);
  }

  static void TearDownTestCase() {
    delete[] dictionary_;
    delete[] target_;
  }

  void EncodeNothing(bool interleaved, bool target_matching) {
    interleaved_ = interleaved;
    VCDiffCodeTableWriter coder(interleaved);
    coder.Init(engine_.dictionary_size());
    engine_.Encode("", 0, target_matching, &diff_output_string_, &coder);
    EXPECT_TRUE(diff_.empty());
    actual_match_counts_ = coder.match_counts();
  }

  // text must be NULL-terminated
  void EncodeText(const char* text, bool interleaved, bool target_matching) {
    interleaved_ = interleaved;
    VCDiffCodeTableWriter coder(interleaved);
    coder.Init(engine_.dictionary_size());
    engine_.Encode(text,
                   strlen(text),
                   target_matching,
                   &diff_output_string_,
                   &coder);
    actual_match_counts_ = coder.match_counts();
  }

  void Encode(bool interleaved, bool target_matching) {
    EncodeText(target_, interleaved, target_matching);
    VerifyHeader();
  }

  void VerifyHeader() {
    VerifyHeaderForDictionaryAndTargetText(dictionary_, target_);
  }

  static const char dictionary_without_spaces_[];
  static const char target_without_spaces_[];

  static const char* dictionary_;
  static const char* target_;

  const VCDiffEngine engine_;
};

#ifdef GTEST_HAS_DEATH_TEST
typedef VCDiffEngineTest VCDiffEngineDeathTest;
#endif  // GTEST_HAS_DEATH_TEST

const char VCDiffEngineTest::dictionary_without_spaces_[] =
    "The only thing we have to fear is fear itself";

const char VCDiffEngineTest::target_without_spaces_[] =
    "What we hear is fearsome";

const char* VCDiffEngineTest::dictionary_ = NULL;
const char* VCDiffEngineTest::target_ = NULL;

#ifdef GTEST_HAS_DEATH_TEST
TEST_F(VCDiffEngineDeathTest, InitCalledTwice) {
  EXPECT_DEBUG_DEATH(EXPECT_FALSE(const_cast<VCDiffEngine*>(&engine_)->Init()),
                     "twice");
}
#endif  // GTEST_HAS_DEATH_TEST

TEST_F(VCDiffEngineTest, EngineEncodeNothing) {
  EncodeNothing(/* interleaved = */ false, /* target matching = */ false);
}

TEST_F(VCDiffEngineTest, EngineEncodeNothingInterleaved) {
  EncodeNothing(/* interleaved = */ true, /* target matching = */ false);
}

TEST_F(VCDiffEngineTest, EngineEncodeNothingTarget) {
  EncodeNothing(/* interleaved = */ false, /* target matching = */ true);
}

TEST_F(VCDiffEngineTest, EngineEncodeNothingTargetInterleaved) {
  EncodeNothing(/* interleaved = */ true, /* target matching = */ true);
}

TEST_F(VCDiffEngineTest, EngineEncodeSmallerThanOneBlock) {
  const char* small_text = "  ";
  EncodeText(small_text,
             /* interleaved = */ false,
             /* target matching = */ false);
  VerifyHeaderForDictionaryAndTargetText(dictionary_, small_text);
  // Data for ADDs
  ExpectDataString(small_text);
  // Instructions and sizes
  ExpectAddInstructionForStringLength(small_text);
}

TEST_F(VCDiffEngineTest, EngineEncodeSmallerThanOneBlockInterleaved) {
  const char* small_text = "  ";
  EncodeText(small_text,
             /* interleaved = */ true,
             /* target matching = */ false);
  VerifyHeaderForDictionaryAndTargetText(dictionary_, small_text);
  // Interleaved section
  ExpectAddInstructionForStringLength(small_text);
  ExpectDataString(small_text);
}

TEST_F(VCDiffEngineTest, EngineEncodeSampleText) {
  Encode(/* interleaved = */ false, /* target matching = */ false);
  // Data for ADDs
  ExpectDataStringWithBlockSpacing("W", false);
  ExpectDataByte('t');
  ExpectDataByte('s');
  ExpectDataByte('m');
  // Instructions and sizes
  if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
                                VCD_SELF_MODE)) {
    ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
  }
  ExpectAddInstruction(1);
  ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
  ExpectCopyInstruction(11 * kBlockSize,
                        default_cache_.FirstNearMode());
  if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
    ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
  }
  if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
    ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
  }
  // Addresses for COPY
  ExpectAddressVarint(18 * kBlockSize);  // "ha"
  ExpectAddressVarint(14 * kBlockSize);  // " we h"
  ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1));  // "ear is fear"
  ExpectAddressVarint(4 * kBlockSize);  // "o" from "The only"
  ExpectAddressVarint(2 * kBlockSize);  // "e" from "The only"
  VerifySizes();
}

TEST_F(VCDiffEngineTest, EngineEncodeSampleTextInterleaved) {
  Encode(/* interleaved = */ true, /* target matching = */ false);
  // Interleaved section
  if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
                                VCD_SELF_MODE)) {
    ExpectDataStringWithBlockSpacing("W", false);
    ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
  } else {
    ExpectDataStringWithBlockSpacing("W", false);
  }
  ExpectAddressVarint(18 * kBlockSize);  // "ha"
  ExpectAddInstruction(1);
  ExpectDataByte('t');
  ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
  ExpectAddressVarint(14 * kBlockSize);  // " we h"
  ExpectCopyInstruction(11 * kBlockSize,
                        default_cache_.FirstNearMode());
  ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1));  // "ear is fear"
  if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
    ExpectDataByte('s');
    ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
  } else {
    ExpectDataByte('s');
  }
  ExpectAddressVarint(4 * kBlockSize);  // "o" from "The only"
  if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
    ExpectDataByte('m');
    ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
  } else {
    ExpectDataByte('m');
  }
  ExpectAddressVarint(2 * kBlockSize);  // "e" from "The only"
  VerifySizes();
}

TEST_F(VCDiffEngineTest, EngineEncodeSampleTextWithTargetMatching) {
  Encode(/* interleaved = */ false, /* target matching = */ true);
  // Data for ADDs
  ExpectDataStringWithBlockSpacing("W", false);
  ExpectDataByte('t');
  ExpectDataByte('s');
  ExpectDataByte('m');
  // Instructions and sizes
  if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
                                VCD_SELF_MODE)) {
    ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
  }
  ExpectAddInstruction(1);
  ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
  ExpectCopyInstruction(11 * kBlockSize,
                        default_cache_.FirstNearMode());
  if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
    ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
  }
  if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
    ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
  }
  // Addresses for COPY
  ExpectAddressVarint(18 * kBlockSize);  // "ha"
  ExpectAddressVarint(14 * kBlockSize);  // " we h"
  ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1));  // "ear is fear"
  ExpectAddressVarint(4 * kBlockSize);  // "o" from "The only"
  ExpectAddressVarint(2 * kBlockSize);  // "e" from "The only"
  VerifySizes();
}

TEST_F(VCDiffEngineTest, EngineEncodeSampleTextInterleavedWithTargetMatching) {
  Encode(/* interleaved = */ true, /* target matching = */ false);
  // Interleaved section
  if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
                                VCD_SELF_MODE)) {
    ExpectDataStringWithBlockSpacing("W", false);
    ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
  } else {
    ExpectDataStringWithBlockSpacing("W", false);
  }
  ExpectAddressVarint(18 * kBlockSize);  // "ha"
  ExpectAddInstruction(1);
  ExpectDataByte('t');
  ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
  ExpectAddressVarint(14 * kBlockSize);  // " we h"
  ExpectCopyInstruction(11 * kBlockSize,
                        default_cache_.FirstNearMode());
  ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1));  // "ear is fear"
  if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
    ExpectDataByte('s');
    ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
  } else {
    ExpectDataByte('s');
  }
  ExpectAddressVarint(4 * kBlockSize);  // "o" from "The only"
  if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
    ExpectDataByte('m');
    ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
  } else {
    ExpectDataByte('m');
  }
  ExpectAddressVarint(2 * kBlockSize);  // "e" from "The only"
  VerifySizes();
}

// This test case takes a dictionary containing several instances of the string
// "weasel", and a target string which is identical to the dictionary
// except that all instances of "weasel" have been replaced with the string
// "moon-pie".  It tests that COPY instructions are generated for all
// boilerplate text (that is, the text between the "moon-pie" instances in
// the target) and, if target matching is enabled, that each instance of
// "moon-pie" (except the first one) is encoded using a COPY instruction
// rather than an ADD.
class WeaselsToMoonpiesTest : public VCDiffEngineTestBase {
 protected:
  // kCompressibleTestBlockSize:
  // The size of the block to create for each letter in the
  // dictionary and search string for the "compressible text" test.
  // See MakeEachLetterABlock, below.
  // If we use kCompressibleTestBlockSize = kBlockSize, then the
  // encoder will find one match per unique letter in the HTML text.
  // There are too many examples of "<" in the text for the encoder
  // to iterate through them all, and some matches are not found.
  // If we use kCompressibleTextBlockSize = 1, then the boilerplate
  // text between "weasel" strings in the dictionary and "moon-pie"
  // strings in the target may not be long enough to be found by
  // the encoder's block-hash algorithm.  A good value, that will give
  // reproducible results across all block sizes, will be somewhere
  // in between these extremes.
  static const int kCompressibleTestBlockSize = kBlockSize / 4;
  static const int kTrailingSpaces = kCompressibleTestBlockSize - 1;

  WeaselsToMoonpiesTest() :
      engine_(dictionary_, strlen(dictionary_)),
      match_index_(0),
      search_dictionary_(dictionary_, strlen(dictionary_)),
      copied_moonpie_address_(0) {
    EXPECT_TRUE(const_cast<VCDiffEngine*>(&engine_)->Init());
    weasel_positions_[0] = 0;
    after_weasel_[0] = 0;
    moonpie_positions_[0] = 0;
    after_moonpie_[0] = 0;
  }

  virtual ~WeaselsToMoonpiesTest() { }

  static void SetUpTestCase() {
    MakeEachLetterABlock(dictionary_without_spaces_,
                         &dictionary_,
                         kCompressibleTestBlockSize,
                         false);
    MakeEachLetterABlock(target_without_spaces_,
                         &target_,
                         kCompressibleTestBlockSize,
                         false);
    MakeEachLetterABlock(weasel_text_without_spaces_,
                         &weasel_text_,
                         kCompressibleTestBlockSize,
                         true);
    MakeEachLetterABlock(moonpie_text_without_spaces_,
                         &moonpie_text_,
                         kCompressibleTestBlockSize,
                         true);
  }

  static void TearDownTestCase() {
    delete[] dictionary_;
    delete[] target_;
    delete[] weasel_text_;
    delete[] moonpie_text_;
  }

  // text must be NULL-terminated
  void EncodeText(const char* text, bool interleaved, bool target_matching) {
    interleaved_ = interleaved;
    VCDiffCodeTableWriter coder(interleaved);
    coder.Init(engine_.dictionary_size());
    engine_.Encode(text,
                   strlen(text),
                   target_matching,
                   &diff_output_string_,
                   &coder);
    actual_match_counts_ = coder.match_counts();
  }

  void Encode(bool interleaved, bool target_matching) {
    EncodeText(target_, interleaved, target_matching);
    VerifyHeader();
  }

  void VerifyHeader() {
    VerifyHeaderForDictionaryAndTargetText(dictionary_, target_);
  }

  void ExpectCopyForSize(size_t size, int mode) {
    ExpectCopyInstruction(static_cast<int>(size), mode);
  }

  void ExpectAddForSize(size_t size) {
    ExpectAddInstruction(static_cast<int>(size));
  }

  void ExpectAddressVarintForSize(size_t value) {
    ExpectAddressVarint(static_cast<int32_t>(value));
  }

  void FindNextMoonpie(bool include_trailing_spaces) {
    ++match_index_;
    SetCurrentWeaselPosition(search_dictionary_.find(weasel_text_,
                                                     AfterLastWeasel()));
    if (CurrentWeaselPosition() == string::npos) {
      SetCurrentMoonpiePosition(string::npos);
    } else {
      SetCurrentAfterWeaselPosition(CurrentWeaselPosition()
                                        + strlen(weasel_text_)
                                        + (include_trailing_spaces ?
                                            kTrailingSpaces : 0));
      SetCurrentMoonpiePosition(AfterLastMoonpie()
                                    + CurrentBoilerplateLength());
      SetCurrentAfterMoonpiePosition(CurrentMoonpiePosition()
                                        + strlen(moonpie_text_)
                                        + (include_trailing_spaces ?
                                            kTrailingSpaces : 0));
    }
  }
  bool NoMoreMoonpies() const {
    return CurrentMoonpiePosition() == string::npos;
  }
  size_t CurrentWeaselPosition() const {
    return weasel_positions_[match_index_];
  }
  size_t LastWeaselPosition() const {
    return weasel_positions_[match_index_ - 1];
  }
  size_t CurrentMoonpiePosition() const {
    return moonpie_positions_[match_index_];
  }
  size_t LastMoonpiePosition() const {
    return moonpie_positions_[match_index_ - 1];
  }
  size_t AfterLastWeasel() const {
    CHECK_GE(match_index_, 1);
    return after_weasel_[match_index_ - 1];
  }
  size_t AfterPreviousWeasel() const {
    CHECK_GE(match_index_, 2);
    return after_weasel_[match_index_ - 2];
  }
  size_t AfterLastMoonpie() const {
    CHECK_GE(match_index_, 1);
    return after_moonpie_[match_index_ - 1];
  }
  size_t AfterPreviousMoonpie() const {
    CHECK_GE(match_index_, 2);
    return after_moonpie_[match_index_ - 2];
  }

  void SetCurrentWeaselPosition(size_t value) {
    weasel_positions_[match_index_] = value;
  }
  void SetCurrentAfterWeaselPosition(size_t value) {
    after_weasel_[match_index_] = value;
  }
  void SetCurrentMoonpiePosition(size_t value) {
    moonpie_positions_[match_index_] = value;
  }
  void SetCurrentAfterMoonpiePosition(size_t value) {
    after_moonpie_[match_index_] = value;
  }

  // Find the length of the text in between the "weasel" strings in the
  // compressible dictionary, which is the same as the text between
  // the "moon-pie" strings in the compressible target.
  size_t CurrentBoilerplateLength() const {
    CHECK_GE(match_index_, 1);
    return CurrentWeaselPosition() - AfterLastWeasel();
  }
  size_t DistanceFromLastWeasel() const {
    CHECK_GE(match_index_, 1);
    return CurrentWeaselPosition() - LastWeaselPosition();
  }
  size_t DistanceFromLastMoonpie() const {
    CHECK_GE(match_index_, 1);
    return CurrentMoonpiePosition() - LastMoonpiePosition();
  }
  size_t DistanceBetweenLastTwoWeasels() const {
    CHECK_GE(match_index_, 2);
    return AfterLastWeasel() - AfterPreviousWeasel();
  }
  size_t DistanceBetweenLastTwoMoonpies() const {
    CHECK_GE(match_index_, 2);
    return AfterLastMoonpie() - AfterPreviousMoonpie();
  }

  int32_t FindBoilerplateAddressForCopyMode(int copy_mode) const;
  int UpdateCopyModeForMoonpie(int copy_mode) const;
  int32_t FindMoonpieAddressForCopyMode(int copy_mode) const;

  void CopyBoilerplateAndAddMoonpie(int copy_mode);
  void CopyBoilerplateAndCopyMoonpie(int copy_mode, int moonpie_copy_mode);

  static const char dictionary_without_spaces_[];
  static const char target_without_spaces_[];
  static const char weasel_text_without_spaces_[];
  static const char moonpie_text_without_spaces_[];

  static const char* dictionary_;
  static const char* target_;
  static const char* weasel_text_;
  static const char* moonpie_text_;

  const VCDiffEngine engine_;
  size_t weasel_positions_[128];
  size_t after_weasel_[128];
  size_t moonpie_positions_[128];
  size_t after_moonpie_[128];
  int match_index_;
  string search_dictionary_;
  size_t copied_moonpie_address_;
};

// Care is taken in the formulation of the dictionary
// to ensure that the surrounding letters do not match; for example,
// there are not two instances of the string "weasels".  Otherwise,
// the matching behavior would not be as predictable.
const char WeaselsToMoonpiesTest::dictionary_without_spaces_[] =
  "<html>\n"
  "<head>\n"
  "<meta content=\"text/html; charset=ISO-8859-1\"\n"
  "http-equiv=\"content-type\">\n"
  "<title>All about weasels</title>\n"
  "</head>\n"
  "<!-- You will notice that the word \"weasel\" may be replaced"
  " with something else -->\n"
  "<body>\n"
  "<h1>All about the weasel: highly compressible HTML text</h1>"
  "<ul>\n"
  "<li>Don\'t look a gift weasel in its mouth.</li>\n"
  "<li>This item makes sure the next occurrence is found.</li>\n"
  "<li>Don\'t count your weasel, before it\'s hatched.</li>\n"
  "</ul>\n"
  "<br>\n"
  "</body>\n"
  "</html>\n";

const char WeaselsToMoonpiesTest::target_without_spaces_[] =
  "<html>\n"
  "<head>\n"
  "<meta content=\"text/html; charset=ISO-8859-1\"\n"
  "http-equiv=\"content-type\">\n"
  "<title>All about moon-pies</title>\n"
  "</head>\n"
  "<!-- You will notice that the word \"moon-pie\" may be replaced"
  " with something else -->\n"
  "<body>\n"
  "<h1>All about the moon-pie: highly compressible HTML text</h1>"
  "<ul>\n"
  "<li>Don\'t look a gift moon-pie in its mouth.</li>\n"
  "<li>This item makes sure the next occurrence is found.</li>\n"
  "<li>Don\'t count your moon-pie, before it\'s hatched.</li>\n"
  "</ul>\n"
  "<br>\n"
  "</body>\n"
  "</html>\n";

const char WeaselsToMoonpiesTest::weasel_text_without_spaces_[] = "weasel";
const char WeaselsToMoonpiesTest::moonpie_text_without_spaces_[] = "moon-pie";

const char* WeaselsToMoonpiesTest::dictionary_ = NULL;
const char* WeaselsToMoonpiesTest::target_ = NULL;
const char* WeaselsToMoonpiesTest::weasel_text_ = NULL;
const char* WeaselsToMoonpiesTest::moonpie_text_ = NULL;

int32_t WeaselsToMoonpiesTest::FindBoilerplateAddressForCopyMode(
    int copy_mode) const {
  size_t copy_address = 0;
  if (default_cache_.IsSelfMode(copy_mode)) {
    copy_address = AfterLastWeasel();
  } else if (default_cache_.IsNearMode(copy_mode)) {
    copy_address = DistanceBetweenLastTwoWeasels();
  } else if (default_cache_.IsSameMode(copy_mode)) {
    copy_address = AfterLastWeasel() % 256;
  }
  return static_cast<int32_t>(copy_address);
}

int WeaselsToMoonpiesTest::UpdateCopyModeForMoonpie(int copy_mode) const {
  if (copy_mode == default_cache_.FirstSameMode()) {
    return default_cache_.FirstSameMode()
        + static_cast<int>((copied_moonpie_address_ / 256) % 3);
  } else {
    return copy_mode;
  }
}

int32_t WeaselsToMoonpiesTest::FindMoonpieAddressForCopyMode(
    int copy_mode) const {
  size_t copy_address = 0;
  if (default_cache_.IsHereMode(copy_mode)) {
    copy_address = DistanceFromLastMoonpie();
  } else if (default_cache_.IsNearMode(copy_mode)) {
    copy_address = DistanceBetweenLastTwoMoonpies() - kTrailingSpaces;
  } else if (default_cache_.IsSameMode(copy_mode)) {
    copy_address = copied_moonpie_address_ % 256;
  }
  return static_cast<int32_t>(copy_address);
}

// Expect one dictionary instance of "weasel" to be replaced with "moon-pie"
// in the encoding.
void WeaselsToMoonpiesTest::CopyBoilerplateAndAddMoonpie(int copy_mode) {
  EXPECT_FALSE(NoMoreMoonpies());
  ExpectCopyForSize(CurrentBoilerplateLength(), copy_mode);
  ExpectAddress(FindBoilerplateAddressForCopyMode(copy_mode), copy_mode);
  ExpectAddInstructionForStringLength(moonpie_text_);
  ExpectDataString(moonpie_text_);
}

// Expect one dictionary instance of "weasel" to be replaced with "moon-pie"
// in the encoding.  The "moon-pie" text will be copied from the previously
// encoded target.
void WeaselsToMoonpiesTest::CopyBoilerplateAndCopyMoonpie(
    int copy_mode,
    int moonpie_copy_mode) {
  EXPECT_FALSE(NoMoreMoonpies());
  ExpectCopyForSize(CurrentBoilerplateLength(), copy_mode);
  ExpectAddress(FindBoilerplateAddressForCopyMode(copy_mode), copy_mode);
  moonpie_copy_mode = UpdateCopyModeForMoonpie(moonpie_copy_mode);
  ExpectCopyForSize(strlen(moonpie_text_) + kTrailingSpaces, moonpie_copy_mode);
  ExpectAddress(FindMoonpieAddressForCopyMode(moonpie_copy_mode),
                moonpie_copy_mode);
  copied_moonpie_address_ = strlen(dictionary_) + LastMoonpiePosition();
}

TEST_F(WeaselsToMoonpiesTest, EngineEncodeCompressibleNoTargetMatching) {
  Encode(/* interleaved = */ true, /* target matching = */ false);
  FindNextMoonpie(false);
  // Expect all five "weasel"s to be replaced with "moon-pie"s
  CopyBoilerplateAndAddMoonpie(default_cache_.FirstSameMode());
  FindNextMoonpie(false);
  CopyBoilerplateAndAddMoonpie(VCD_SELF_MODE);
  FindNextMoonpie(false);
  CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 1);
  FindNextMoonpie(false);
  CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 2);
  FindNextMoonpie(false);
  CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 3);
  FindNextMoonpie(false);
  EXPECT_TRUE(NoMoreMoonpies());
  ExpectCopyForSize(strlen(dictionary_) - AfterLastWeasel(),
                    default_cache_.FirstNearMode());
  ExpectAddressVarintForSize(DistanceBetweenLastTwoWeasels());
  VerifySizes();
}

TEST_F(WeaselsToMoonpiesTest, EngineEncodeCompressibleWithTargetMatching) {
  Encode(/* interleaved = */ true, /* target matching = */ true);
  // Expect all five "weasel"s to be replaced with "moon-pie"s.
  // Every "moon-pie" after the first one should be copied from the
  // previously encoded target text.
  FindNextMoonpie(false);
  CopyBoilerplateAndAddMoonpie(default_cache_.FirstSameMode());
  FindNextMoonpie(true);
  CopyBoilerplateAndCopyMoonpie(VCD_SELF_MODE, VCD_HERE_MODE);
  FindNextMoonpie(true);
  CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1,
                                default_cache_.FirstSameMode());
  FindNextMoonpie(true);
  CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 3,
                                VCD_HERE_MODE);
  FindNextMoonpie(true);
  CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1,
                                default_cache_.FirstSameMode());
  FindNextMoonpie(true);
  EXPECT_TRUE(NoMoreMoonpies());
  ExpectCopyForSize(strlen(dictionary_) - AfterLastWeasel(),
                    default_cache_.FirstNearMode() + 3);
  ExpectAddressVarintForSize(DistanceBetweenLastTwoWeasels());
  VerifySizes();
}

}  //  anonymous namespace
}  //  namespace open-vcdiff