// Copyright 2008 Google Inc. // Author: Lincoln Smith // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include <config.h> #include "vcdiffengine.h" #include <string.h> // memset, strlen #include <algorithm> #include <string> #include <vector> #include "addrcache.h" #include "blockhash.h" #include "encodetable.h" #include "google/output_string.h" #include "rolling_hash.h" #include "testing.h" #include "varint_bigendian.h" #include "vcdiff_defs.h" namespace open_vcdiff { namespace { class VCDiffEngineTestBase : public testing::Test { protected: typedef std::string string; // Some common definitions and helper functions used in the various tests // for VCDiffEngine. static const int kBlockSize = VCDiffEngine::kMinimumMatchSize; VCDiffEngineTestBase() : interleaved_(false), diff_output_string_(&diff_), verify_position_(0), saved_total_size_position_(0), saved_delta_encoding_position_(0), saved_section_sizes_position_(0), data_bytes_(0), instruction_bytes_(0), address_bytes_(0) { } virtual ~VCDiffEngineTestBase() { } virtual void TearDown() { VerifyMatchCounts(); } // Copy string_without_spaces into newly allocated result buffer, // but pad its contents with space characters so that every character // in string_without_spaces corresponds to (block_size - 1) // spaces in the result, followed by that character. // For example: // If string_without_spaces begins "The only thing"... and block_size is 4, // then 3 space characters will be inserted // between each letter in the result, as follows: // " T h e o n l y t h i n g"... // This makes testing simpler, because finding a block_size-byte match // between the dictionary and target only depends on the // trailing letter in each block. // If no_initial_padding is true, then the first letter will not have // spaces added in front of it. static void MakeEachLetterABlock(const char* string_without_spaces, const char** result, int block_size, bool no_initial_padding) { const size_t length_without_spaces = strlen(string_without_spaces); char* padded_text = new char[(block_size * length_without_spaces) + 1]; memset(padded_text, ' ', block_size * length_without_spaces); char* padded_text_ptr = padded_text; if (!no_initial_padding) { padded_text_ptr += block_size - 1; } for (size_t i = 0; i < length_without_spaces; ++i) { *padded_text_ptr = string_without_spaces[i]; padded_text_ptr += block_size; } *(padded_text_ptr - block_size + 1) = '\0'; *result = padded_text; } // These functions iterate through the decoded output and expect // simple elements: bytes or variable-length integers. void ExpectByte(char byte) { EXPECT_GT(diff_.size(), verify_position_); EXPECT_EQ(byte, diff_[verify_position_]); ++verify_position_; } size_t ExpectVarint(int32_t expected_value) { EXPECT_GT(diff_.size(), verify_position_); const char* const original_position = &diff_[verify_position_]; const char* new_position = original_position; const size_t expected_length = VarintBE<int32_t>::Length(expected_value); int32_t parsed_value = VarintBE<int32_t>::Parse(diff_.data() + diff_.size(), &new_position); EXPECT_LE(0, parsed_value); size_t parsed_length = new_position - original_position; EXPECT_EQ(expected_value, parsed_value); EXPECT_EQ(expected_length, parsed_length); verify_position_ += parsed_length; return parsed_length; } size_t ExpectSize(size_t size) { return ExpectVarint(static_cast<int32_t>(size)); } size_t ExpectStringLength(const char* s) { return ExpectSize(strlen(s)); } void SkipVarint() { EXPECT_GT(diff_.size(), verify_position_); const char* const original_position = &diff_[verify_position_]; const char* new_position = original_position; VarintBE<int32_t>::Parse(diff_.data() + diff_.size(), &new_position); size_t parsed_length = new_position - original_position; verify_position_ += parsed_length; } void ExpectDataByte(char byte) { ExpectByte(byte); if (interleaved_) { ++instruction_bytes_; } else { ++data_bytes_; } } void ExpectDataString(const char *expected_string) { const char* ptr = expected_string; while (*ptr) { ExpectDataByte(*ptr); ++ptr; } } void ExpectDataStringWithBlockSpacing(const char *expected_string, bool trailing_spaces) { const char* ptr = expected_string; while (*ptr) { for (int i = 0; i < (kBlockSize - 1); ++i) { ExpectDataByte(' '); } ExpectDataByte(*ptr); ++ptr; } if (trailing_spaces) { for (int i = 0; i < (kBlockSize - 1); ++i) { ExpectDataByte(' '); } } } void ExpectInstructionByte(char byte) { ExpectByte(byte); ++instruction_bytes_; } void ExpectInstructionVarint(int32_t value) { instruction_bytes_ += ExpectVarint(value); } void ExpectAddressByte(char byte) { ExpectByte(byte); if (interleaved_) { ++instruction_bytes_; } else { ++address_bytes_; } } void ExpectAddressVarint(int32_t value) { if (interleaved_) { instruction_bytes_ += ExpectVarint(value); } else { address_bytes_ += ExpectVarint(value); } } // The following functions leverage the fact that the encoder uses // the default code table and cache sizes. They are able to search for // instructions of a particular size. The logic for mapping from // instruction type, mode, and size to opcode value is very different here // from the logic used in encodetable.{h,cc}, so hopefully // this version will help validate that the other is correct. // This version uses conditional statements, while encodetable.h // looks up values in a mapping table. void ExpectAddress(int32_t address, int copy_mode) { if (default_cache_.WriteAddressAsVarintForMode(copy_mode)) { ExpectAddressVarint(address); } else { ExpectAddressByte(address); } } void ExpectAddInstruction(int size) { if (size <= 18) { ExpectInstructionByte(0x01 + size); } else { ExpectInstructionByte(0x01); ExpectInstructionVarint(size); } } void ExpectCopyInstruction(int size, int mode) { if ((size >= 4) && (size <= 16)) { ExpectInstructionByte(0x10 + (0x10 * mode) + size); } else { ExpectInstructionByte(0x13 + (0x10 * mode)); ExpectInstructionVarint(size); } ExpectMatch(size); } bool ExpectAddCopyInstruction(int add_size, int copy_size, int copy_mode) { if (!default_cache_.IsSameMode(copy_mode) && (add_size <= 4) && (copy_size >= 4) && (copy_size <= 6)) { ExpectInstructionByte(0x9C + (0x0C * copy_mode) + (0x03 * add_size) + copy_size); ExpectMatch(copy_size); return true; } else if (default_cache_.IsSameMode(copy_mode) && (add_size <= 4) && (copy_size == 4)) { ExpectInstructionByte(0xD2 + (0x04 * copy_mode) + add_size); ExpectMatch(copy_size); return true; } else { ExpectAddInstruction(add_size); return false; } } void ExpectAddInstructionForStringLength(const char* s) { ExpectAddInstruction(static_cast<int>(strlen(s))); } // Call this function before beginning to iterate through the diff string // using the Expect... functions. // text must be NULL-terminated. void VerifyHeaderForDictionaryAndTargetText(const char* dictionary, const char* target_text) { ExpectByte(0x01); // Win_Indicator: VCD_SOURCE (dictionary) ExpectStringLength(dictionary); ExpectByte(0x00); // Source segment position: start of dictionary saved_total_size_position_ = verify_position_; SkipVarint(); // Length of the delta encoding saved_delta_encoding_position_ = verify_position_; ExpectStringLength(target_text); ExpectByte(0x00); // Delta_indicator (no compression) saved_section_sizes_position_ = verify_position_; SkipVarint(); // length of data for ADDs and RUNs SkipVarint(); // length of instructions section SkipVarint(); // length of addresses for COPYs } // Call this function before beginning to iterating through the entire // diff string using the Expect... functions. It makes sure that the // size totals in the window header match the number of bytes that // were parsed. void VerifySizes() { EXPECT_EQ(verify_position_, diff_.size()); const size_t delta_encoding_size = verify_position_ - saved_delta_encoding_position_; verify_position_ = saved_total_size_position_; ExpectSize(delta_encoding_size); verify_position_ = saved_section_sizes_position_; ExpectSize(data_bytes_); ExpectSize(instruction_bytes_); ExpectSize(address_bytes_); } void ExpectMatch(size_t match_size) { if (match_size >= expected_match_counts_.size()) { // Be generous to avoid resizing again expected_match_counts_.resize(match_size * 2, 0); } ++expected_match_counts_[match_size]; } void VerifyMatchCounts() { EXPECT_TRUE(std::equal(expected_match_counts_.begin(), expected_match_counts_.end(), actual_match_counts_.begin())); } bool interleaved_; string diff_; OutputString<string> diff_output_string_; size_t verify_position_; size_t saved_total_size_position_; size_t saved_delta_encoding_position_; size_t saved_section_sizes_position_; size_t data_bytes_; size_t instruction_bytes_; size_t address_bytes_; VCDiffAddressCache default_cache_; // Used for finding mode values std::vector<int> expected_match_counts_; std::vector<int> actual_match_counts_; }; class VCDiffEngineTest : public VCDiffEngineTestBase { protected: VCDiffEngineTest() : engine_(dictionary_, strlen(dictionary_)) { EXPECT_TRUE(const_cast<VCDiffEngine*>(&engine_)->Init()); } virtual ~VCDiffEngineTest() { } static void SetUpTestCase() { MakeEachLetterABlock(dictionary_without_spaces_, &dictionary_, kBlockSize, false); MakeEachLetterABlock(target_without_spaces_, &target_, kBlockSize, false); } static void TearDownTestCase() { delete[] dictionary_; delete[] target_; } void EncodeNothing(bool interleaved, bool target_matching) { interleaved_ = interleaved; VCDiffCodeTableWriter coder(interleaved); coder.Init(engine_.dictionary_size()); engine_.Encode("", 0, target_matching, &diff_output_string_, &coder); EXPECT_TRUE(diff_.empty()); actual_match_counts_ = coder.match_counts(); } // text must be NULL-terminated void EncodeText(const char* text, bool interleaved, bool target_matching) { interleaved_ = interleaved; VCDiffCodeTableWriter coder(interleaved); coder.Init(engine_.dictionary_size()); engine_.Encode(text, strlen(text), target_matching, &diff_output_string_, &coder); actual_match_counts_ = coder.match_counts(); } void Encode(bool interleaved, bool target_matching) { EncodeText(target_, interleaved, target_matching); VerifyHeader(); } void VerifyHeader() { VerifyHeaderForDictionaryAndTargetText(dictionary_, target_); } static const char dictionary_without_spaces_[]; static const char target_without_spaces_[]; static const char* dictionary_; static const char* target_; const VCDiffEngine engine_; }; #ifdef GTEST_HAS_DEATH_TEST typedef VCDiffEngineTest VCDiffEngineDeathTest; #endif // GTEST_HAS_DEATH_TEST const char VCDiffEngineTest::dictionary_without_spaces_[] = "The only thing we have to fear is fear itself"; const char VCDiffEngineTest::target_without_spaces_[] = "What we hear is fearsome"; const char* VCDiffEngineTest::dictionary_ = NULL; const char* VCDiffEngineTest::target_ = NULL; #ifdef GTEST_HAS_DEATH_TEST TEST_F(VCDiffEngineDeathTest, InitCalledTwice) { EXPECT_DEBUG_DEATH(EXPECT_FALSE(const_cast<VCDiffEngine*>(&engine_)->Init()), "twice"); } #endif // GTEST_HAS_DEATH_TEST TEST_F(VCDiffEngineTest, EngineEncodeNothing) { EncodeNothing(/* interleaved = */ false, /* target matching = */ false); } TEST_F(VCDiffEngineTest, EngineEncodeNothingInterleaved) { EncodeNothing(/* interleaved = */ true, /* target matching = */ false); } TEST_F(VCDiffEngineTest, EngineEncodeNothingTarget) { EncodeNothing(/* interleaved = */ false, /* target matching = */ true); } TEST_F(VCDiffEngineTest, EngineEncodeNothingTargetInterleaved) { EncodeNothing(/* interleaved = */ true, /* target matching = */ true); } TEST_F(VCDiffEngineTest, EngineEncodeSmallerThanOneBlock) { const char* small_text = " "; EncodeText(small_text, /* interleaved = */ false, /* target matching = */ false); VerifyHeaderForDictionaryAndTargetText(dictionary_, small_text); // Data for ADDs ExpectDataString(small_text); // Instructions and sizes ExpectAddInstructionForStringLength(small_text); } TEST_F(VCDiffEngineTest, EngineEncodeSmallerThanOneBlockInterleaved) { const char* small_text = " "; EncodeText(small_text, /* interleaved = */ true, /* target matching = */ false); VerifyHeaderForDictionaryAndTargetText(dictionary_, small_text); // Interleaved section ExpectAddInstructionForStringLength(small_text); ExpectDataString(small_text); } TEST_F(VCDiffEngineTest, EngineEncodeSampleText) { Encode(/* interleaved = */ false, /* target matching = */ false); // Data for ADDs ExpectDataStringWithBlockSpacing("W", false); ExpectDataByte('t'); ExpectDataByte('s'); ExpectDataByte('m'); // Instructions and sizes if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1, VCD_SELF_MODE)) { ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE); } ExpectAddInstruction(1); ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE); ExpectCopyInstruction(11 * kBlockSize, default_cache_.FirstNearMode()); if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) { ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE); } if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) { ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE); } // Addresses for COPY ExpectAddressVarint(18 * kBlockSize); // "ha" ExpectAddressVarint(14 * kBlockSize); // " we h" ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear" ExpectAddressVarint(4 * kBlockSize); // "o" from "The only" ExpectAddressVarint(2 * kBlockSize); // "e" from "The only" VerifySizes(); } TEST_F(VCDiffEngineTest, EngineEncodeSampleTextInterleaved) { Encode(/* interleaved = */ true, /* target matching = */ false); // Interleaved section if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1, VCD_SELF_MODE)) { ExpectDataStringWithBlockSpacing("W", false); ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE); } else { ExpectDataStringWithBlockSpacing("W", false); } ExpectAddressVarint(18 * kBlockSize); // "ha" ExpectAddInstruction(1); ExpectDataByte('t'); ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE); ExpectAddressVarint(14 * kBlockSize); // " we h" ExpectCopyInstruction(11 * kBlockSize, default_cache_.FirstNearMode()); ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear" if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) { ExpectDataByte('s'); ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE); } else { ExpectDataByte('s'); } ExpectAddressVarint(4 * kBlockSize); // "o" from "The only" if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) { ExpectDataByte('m'); ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE); } else { ExpectDataByte('m'); } ExpectAddressVarint(2 * kBlockSize); // "e" from "The only" VerifySizes(); } TEST_F(VCDiffEngineTest, EngineEncodeSampleTextWithTargetMatching) { Encode(/* interleaved = */ false, /* target matching = */ true); // Data for ADDs ExpectDataStringWithBlockSpacing("W", false); ExpectDataByte('t'); ExpectDataByte('s'); ExpectDataByte('m'); // Instructions and sizes if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1, VCD_SELF_MODE)) { ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE); } ExpectAddInstruction(1); ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE); ExpectCopyInstruction(11 * kBlockSize, default_cache_.FirstNearMode()); if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) { ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE); } if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) { ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE); } // Addresses for COPY ExpectAddressVarint(18 * kBlockSize); // "ha" ExpectAddressVarint(14 * kBlockSize); // " we h" ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear" ExpectAddressVarint(4 * kBlockSize); // "o" from "The only" ExpectAddressVarint(2 * kBlockSize); // "e" from "The only" VerifySizes(); } TEST_F(VCDiffEngineTest, EngineEncodeSampleTextInterleavedWithTargetMatching) { Encode(/* interleaved = */ true, /* target matching = */ false); // Interleaved section if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1, VCD_SELF_MODE)) { ExpectDataStringWithBlockSpacing("W", false); ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE); } else { ExpectDataStringWithBlockSpacing("W", false); } ExpectAddressVarint(18 * kBlockSize); // "ha" ExpectAddInstruction(1); ExpectDataByte('t'); ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE); ExpectAddressVarint(14 * kBlockSize); // " we h" ExpectCopyInstruction(11 * kBlockSize, default_cache_.FirstNearMode()); ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear" if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) { ExpectDataByte('s'); ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE); } else { ExpectDataByte('s'); } ExpectAddressVarint(4 * kBlockSize); // "o" from "The only" if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) { ExpectDataByte('m'); ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE); } else { ExpectDataByte('m'); } ExpectAddressVarint(2 * kBlockSize); // "e" from "The only" VerifySizes(); } // This test case takes a dictionary containing several instances of the string // "weasel", and a target string which is identical to the dictionary // except that all instances of "weasel" have been replaced with the string // "moon-pie". It tests that COPY instructions are generated for all // boilerplate text (that is, the text between the "moon-pie" instances in // the target) and, if target matching is enabled, that each instance of // "moon-pie" (except the first one) is encoded using a COPY instruction // rather than an ADD. class WeaselsToMoonpiesTest : public VCDiffEngineTestBase { protected: // kCompressibleTestBlockSize: // The size of the block to create for each letter in the // dictionary and search string for the "compressible text" test. // See MakeEachLetterABlock, below. // If we use kCompressibleTestBlockSize = kBlockSize, then the // encoder will find one match per unique letter in the HTML text. // There are too many examples of "<" in the text for the encoder // to iterate through them all, and some matches are not found. // If we use kCompressibleTextBlockSize = 1, then the boilerplate // text between "weasel" strings in the dictionary and "moon-pie" // strings in the target may not be long enough to be found by // the encoder's block-hash algorithm. A good value, that will give // reproducible results across all block sizes, will be somewhere // in between these extremes. static const int kCompressibleTestBlockSize = kBlockSize / 4; static const int kTrailingSpaces = kCompressibleTestBlockSize - 1; WeaselsToMoonpiesTest() : engine_(dictionary_, strlen(dictionary_)), match_index_(0), search_dictionary_(dictionary_, strlen(dictionary_)), copied_moonpie_address_(0) { EXPECT_TRUE(const_cast<VCDiffEngine*>(&engine_)->Init()); weasel_positions_[0] = 0; after_weasel_[0] = 0; moonpie_positions_[0] = 0; after_moonpie_[0] = 0; } virtual ~WeaselsToMoonpiesTest() { } static void SetUpTestCase() { MakeEachLetterABlock(dictionary_without_spaces_, &dictionary_, kCompressibleTestBlockSize, false); MakeEachLetterABlock(target_without_spaces_, &target_, kCompressibleTestBlockSize, false); MakeEachLetterABlock(weasel_text_without_spaces_, &weasel_text_, kCompressibleTestBlockSize, true); MakeEachLetterABlock(moonpie_text_without_spaces_, &moonpie_text_, kCompressibleTestBlockSize, true); } static void TearDownTestCase() { delete[] dictionary_; delete[] target_; delete[] weasel_text_; delete[] moonpie_text_; } // text must be NULL-terminated void EncodeText(const char* text, bool interleaved, bool target_matching) { interleaved_ = interleaved; VCDiffCodeTableWriter coder(interleaved); coder.Init(engine_.dictionary_size()); engine_.Encode(text, strlen(text), target_matching, &diff_output_string_, &coder); actual_match_counts_ = coder.match_counts(); } void Encode(bool interleaved, bool target_matching) { EncodeText(target_, interleaved, target_matching); VerifyHeader(); } void VerifyHeader() { VerifyHeaderForDictionaryAndTargetText(dictionary_, target_); } void ExpectCopyForSize(size_t size, int mode) { ExpectCopyInstruction(static_cast<int>(size), mode); } void ExpectAddForSize(size_t size) { ExpectAddInstruction(static_cast<int>(size)); } void ExpectAddressVarintForSize(size_t value) { ExpectAddressVarint(static_cast<int32_t>(value)); } void FindNextMoonpie(bool include_trailing_spaces) { ++match_index_; SetCurrentWeaselPosition(search_dictionary_.find(weasel_text_, AfterLastWeasel())); if (CurrentWeaselPosition() == string::npos) { SetCurrentMoonpiePosition(string::npos); } else { SetCurrentAfterWeaselPosition(CurrentWeaselPosition() + strlen(weasel_text_) + (include_trailing_spaces ? kTrailingSpaces : 0)); SetCurrentMoonpiePosition(AfterLastMoonpie() + CurrentBoilerplateLength()); SetCurrentAfterMoonpiePosition(CurrentMoonpiePosition() + strlen(moonpie_text_) + (include_trailing_spaces ? kTrailingSpaces : 0)); } } bool NoMoreMoonpies() const { return CurrentMoonpiePosition() == string::npos; } size_t CurrentWeaselPosition() const { return weasel_positions_[match_index_]; } size_t LastWeaselPosition() const { return weasel_positions_[match_index_ - 1]; } size_t CurrentMoonpiePosition() const { return moonpie_positions_[match_index_]; } size_t LastMoonpiePosition() const { return moonpie_positions_[match_index_ - 1]; } size_t AfterLastWeasel() const { CHECK_GE(match_index_, 1); return after_weasel_[match_index_ - 1]; } size_t AfterPreviousWeasel() const { CHECK_GE(match_index_, 2); return after_weasel_[match_index_ - 2]; } size_t AfterLastMoonpie() const { CHECK_GE(match_index_, 1); return after_moonpie_[match_index_ - 1]; } size_t AfterPreviousMoonpie() const { CHECK_GE(match_index_, 2); return after_moonpie_[match_index_ - 2]; } void SetCurrentWeaselPosition(size_t value) { weasel_positions_[match_index_] = value; } void SetCurrentAfterWeaselPosition(size_t value) { after_weasel_[match_index_] = value; } void SetCurrentMoonpiePosition(size_t value) { moonpie_positions_[match_index_] = value; } void SetCurrentAfterMoonpiePosition(size_t value) { after_moonpie_[match_index_] = value; } // Find the length of the text in between the "weasel" strings in the // compressible dictionary, which is the same as the text between // the "moon-pie" strings in the compressible target. size_t CurrentBoilerplateLength() const { CHECK_GE(match_index_, 1); return CurrentWeaselPosition() - AfterLastWeasel(); } size_t DistanceFromLastWeasel() const { CHECK_GE(match_index_, 1); return CurrentWeaselPosition() - LastWeaselPosition(); } size_t DistanceFromLastMoonpie() const { CHECK_GE(match_index_, 1); return CurrentMoonpiePosition() - LastMoonpiePosition(); } size_t DistanceBetweenLastTwoWeasels() const { CHECK_GE(match_index_, 2); return AfterLastWeasel() - AfterPreviousWeasel(); } size_t DistanceBetweenLastTwoMoonpies() const { CHECK_GE(match_index_, 2); return AfterLastMoonpie() - AfterPreviousMoonpie(); } int32_t FindBoilerplateAddressForCopyMode(int copy_mode) const; int UpdateCopyModeForMoonpie(int copy_mode) const; int32_t FindMoonpieAddressForCopyMode(int copy_mode) const; void CopyBoilerplateAndAddMoonpie(int copy_mode); void CopyBoilerplateAndCopyMoonpie(int copy_mode, int moonpie_copy_mode); static const char dictionary_without_spaces_[]; static const char target_without_spaces_[]; static const char weasel_text_without_spaces_[]; static const char moonpie_text_without_spaces_[]; static const char* dictionary_; static const char* target_; static const char* weasel_text_; static const char* moonpie_text_; const VCDiffEngine engine_; size_t weasel_positions_[128]; size_t after_weasel_[128]; size_t moonpie_positions_[128]; size_t after_moonpie_[128]; int match_index_; string search_dictionary_; size_t copied_moonpie_address_; }; // Care is taken in the formulation of the dictionary // to ensure that the surrounding letters do not match; for example, // there are not two instances of the string "weasels". Otherwise, // the matching behavior would not be as predictable. const char WeaselsToMoonpiesTest::dictionary_without_spaces_[] = "<html>\n" "<head>\n" "<meta content=\"text/html; charset=ISO-8859-1\"\n" "http-equiv=\"content-type\">\n" "<title>All about weasels</title>\n" "</head>\n" "<!-- You will notice that the word \"weasel\" may be replaced" " with something else -->\n" "<body>\n" "<h1>All about the weasel: highly compressible HTML text</h1>" "<ul>\n" "<li>Don\'t look a gift weasel in its mouth.</li>\n" "<li>This item makes sure the next occurrence is found.</li>\n" "<li>Don\'t count your weasel, before it\'s hatched.</li>\n" "</ul>\n" "<br>\n" "</body>\n" "</html>\n"; const char WeaselsToMoonpiesTest::target_without_spaces_[] = "<html>\n" "<head>\n" "<meta content=\"text/html; charset=ISO-8859-1\"\n" "http-equiv=\"content-type\">\n" "<title>All about moon-pies</title>\n" "</head>\n" "<!-- You will notice that the word \"moon-pie\" may be replaced" " with something else -->\n" "<body>\n" "<h1>All about the moon-pie: highly compressible HTML text</h1>" "<ul>\n" "<li>Don\'t look a gift moon-pie in its mouth.</li>\n" "<li>This item makes sure the next occurrence is found.</li>\n" "<li>Don\'t count your moon-pie, before it\'s hatched.</li>\n" "</ul>\n" "<br>\n" "</body>\n" "</html>\n"; const char WeaselsToMoonpiesTest::weasel_text_without_spaces_[] = "weasel"; const char WeaselsToMoonpiesTest::moonpie_text_without_spaces_[] = "moon-pie"; const char* WeaselsToMoonpiesTest::dictionary_ = NULL; const char* WeaselsToMoonpiesTest::target_ = NULL; const char* WeaselsToMoonpiesTest::weasel_text_ = NULL; const char* WeaselsToMoonpiesTest::moonpie_text_ = NULL; int32_t WeaselsToMoonpiesTest::FindBoilerplateAddressForCopyMode( int copy_mode) const { size_t copy_address = 0; if (default_cache_.IsSelfMode(copy_mode)) { copy_address = AfterLastWeasel(); } else if (default_cache_.IsNearMode(copy_mode)) { copy_address = DistanceBetweenLastTwoWeasels(); } else if (default_cache_.IsSameMode(copy_mode)) { copy_address = AfterLastWeasel() % 256; } return static_cast<int32_t>(copy_address); } int WeaselsToMoonpiesTest::UpdateCopyModeForMoonpie(int copy_mode) const { if (copy_mode == default_cache_.FirstSameMode()) { return default_cache_.FirstSameMode() + static_cast<int>((copied_moonpie_address_ / 256) % 3); } else { return copy_mode; } } int32_t WeaselsToMoonpiesTest::FindMoonpieAddressForCopyMode( int copy_mode) const { size_t copy_address = 0; if (default_cache_.IsHereMode(copy_mode)) { copy_address = DistanceFromLastMoonpie(); } else if (default_cache_.IsNearMode(copy_mode)) { copy_address = DistanceBetweenLastTwoMoonpies() - kTrailingSpaces; } else if (default_cache_.IsSameMode(copy_mode)) { copy_address = copied_moonpie_address_ % 256; } return static_cast<int32_t>(copy_address); } // Expect one dictionary instance of "weasel" to be replaced with "moon-pie" // in the encoding. void WeaselsToMoonpiesTest::CopyBoilerplateAndAddMoonpie(int copy_mode) { EXPECT_FALSE(NoMoreMoonpies()); ExpectCopyForSize(CurrentBoilerplateLength(), copy_mode); ExpectAddress(FindBoilerplateAddressForCopyMode(copy_mode), copy_mode); ExpectAddInstructionForStringLength(moonpie_text_); ExpectDataString(moonpie_text_); } // Expect one dictionary instance of "weasel" to be replaced with "moon-pie" // in the encoding. The "moon-pie" text will be copied from the previously // encoded target. void WeaselsToMoonpiesTest::CopyBoilerplateAndCopyMoonpie( int copy_mode, int moonpie_copy_mode) { EXPECT_FALSE(NoMoreMoonpies()); ExpectCopyForSize(CurrentBoilerplateLength(), copy_mode); ExpectAddress(FindBoilerplateAddressForCopyMode(copy_mode), copy_mode); moonpie_copy_mode = UpdateCopyModeForMoonpie(moonpie_copy_mode); ExpectCopyForSize(strlen(moonpie_text_) + kTrailingSpaces, moonpie_copy_mode); ExpectAddress(FindMoonpieAddressForCopyMode(moonpie_copy_mode), moonpie_copy_mode); copied_moonpie_address_ = strlen(dictionary_) + LastMoonpiePosition(); } TEST_F(WeaselsToMoonpiesTest, EngineEncodeCompressibleNoTargetMatching) { Encode(/* interleaved = */ true, /* target matching = */ false); FindNextMoonpie(false); // Expect all five "weasel"s to be replaced with "moon-pie"s CopyBoilerplateAndAddMoonpie(default_cache_.FirstSameMode()); FindNextMoonpie(false); CopyBoilerplateAndAddMoonpie(VCD_SELF_MODE); FindNextMoonpie(false); CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 1); FindNextMoonpie(false); CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 2); FindNextMoonpie(false); CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 3); FindNextMoonpie(false); EXPECT_TRUE(NoMoreMoonpies()); ExpectCopyForSize(strlen(dictionary_) - AfterLastWeasel(), default_cache_.FirstNearMode()); ExpectAddressVarintForSize(DistanceBetweenLastTwoWeasels()); VerifySizes(); } TEST_F(WeaselsToMoonpiesTest, EngineEncodeCompressibleWithTargetMatching) { Encode(/* interleaved = */ true, /* target matching = */ true); // Expect all five "weasel"s to be replaced with "moon-pie"s. // Every "moon-pie" after the first one should be copied from the // previously encoded target text. FindNextMoonpie(false); CopyBoilerplateAndAddMoonpie(default_cache_.FirstSameMode()); FindNextMoonpie(true); CopyBoilerplateAndCopyMoonpie(VCD_SELF_MODE, VCD_HERE_MODE); FindNextMoonpie(true); CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1, default_cache_.FirstSameMode()); FindNextMoonpie(true); CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 3, VCD_HERE_MODE); FindNextMoonpie(true); CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1, default_cache_.FirstSameMode()); FindNextMoonpie(true); EXPECT_TRUE(NoMoreMoonpies()); ExpectCopyForSize(strlen(dictionary_) - AfterLastWeasel(), default_cache_.FirstNearMode() + 3); ExpectAddressVarintForSize(DistanceBetweenLastTwoWeasels()); VerifySizes(); } } // anonymous namespace } // namespace open-vcdiff