// Copyright 2008 Google Inc. // Author: Lincoln Smith // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include <config.h> #include "blockhash.h" #include <limits.h> // INT_MIN #include <string.h> // memcpy, memcmp, strlen #include <iostream> #include <memory> // auto_ptr #include "encodetable.h" #include "rolling_hash.h" #include "testing.h" namespace open_vcdiff { const int kBlockSize = BlockHash::kBlockSize; class BlockHashTest : public testing::Test { protected: static const int kTimingTestSize = 1 << 21; // 2M static const int kTimingTestIterations = 32; BlockHashTest() { dh_.reset(BlockHash::CreateDictionaryHash(sample_text, strlen(sample_text))); th_.reset(BlockHash::CreateTargetHash(sample_text, strlen(sample_text), 0)); EXPECT_TRUE(dh_.get() != NULL); EXPECT_TRUE(th_.get() != NULL); } // BlockHashTest is a friend to BlockHash. Expose the protected functions // that will be tested by the children of BlockHashTest. static bool BlockContentsMatch(const char* block1, const char* block2) { return BlockHash::BlockContentsMatch(block1, block2); } int FirstMatchingBlock(const BlockHash& block_hash, uint32_t hash_value, const char* block_ptr) const { return block_hash.FirstMatchingBlock(hash_value, block_ptr); } int NextMatchingBlock(const BlockHash& block_hash, int block_number, const char* block_ptr) const { return block_hash.NextMatchingBlock(block_number, block_ptr); } static int MatchingBytesToLeft(const char* source_match_start, const char* target_match_start, int max_bytes) { return BlockHash::MatchingBytesToLeft(source_match_start, target_match_start, max_bytes); } static int MatchingBytesToRight(const char* source_match_end, const char* target_match_end, int max_bytes) { return BlockHash::MatchingBytesToRight(source_match_end, target_match_end, max_bytes); } static int StringLengthAsInt(const char* s) { return static_cast<int>(strlen(s)); } void InitBlocksToDifferAtNthByte(int n) { CHECK(n < kBlockSize); memset(compare_buffer_1_, 0xBE, kTimingTestSize); memset(compare_buffer_2_, 0xBE, kTimingTestSize); for (int index = n; index < kTimingTestSize; index += kBlockSize) { compare_buffer_1_[index] = 0x00; compare_buffer_2_[index] = 0x01; } } void TestAndPrintTimesForCompareFunctions(bool should_be_identical); void TimingTestForBlocksThatDifferAtByte(int n) { InitBlocksToDifferAtNthByte(n); std::cout << "Comparing blocks that differ at byte " << n << std::endl; TestAndPrintTimesForCompareFunctions(false); } // Copy sample_text_without_spaces and search_string_without_spaces // into newly allocated sample_text and search_string buffers, // but pad them with space characters so that every character // in sample_text_without_spaces matches (kBlockSize - 1) // space characters in sample_text, followed by that character. // For example: // Since sample_text_without_spaces begins "The only thing"..., // if kBlockSize is 4, then 3 space characters will be inserted // between each letter of sample_text, as follows: // " T h e o n l y t h i n g"... // This makes testing simpler, because finding a kBlockSize-byte match // between the sample text and search string only depends on the // trailing letter in each block. static void MakeEachLetterABlock(const char* string_without_spaces, const char** result) { const size_t length_without_spaces = strlen(string_without_spaces); char* padded_text = new char[(kBlockSize * length_without_spaces) + 1]; memset(padded_text, ' ', kBlockSize * length_without_spaces); char* padded_text_ptr = padded_text + (kBlockSize - 1); for (size_t i = 0; i < length_without_spaces; ++i) { *padded_text_ptr = string_without_spaces[i]; padded_text_ptr += kBlockSize; } padded_text[kBlockSize * length_without_spaces] = '\0'; *result = padded_text; } static void SetUpTestCase() { MakeEachLetterABlock(sample_text_without_spaces, &sample_text); MakeEachLetterABlock(search_string_without_spaces, &search_string); MakeEachLetterABlock(search_string_altered_without_spaces, &search_string_altered); MakeEachLetterABlock(search_to_end_without_spaces, &search_to_end_string); MakeEachLetterABlock(search_to_beginning_without_spaces, &search_to_beginning_string); MakeEachLetterABlock(sample_text_many_matches_without_spaces, &sample_text_many_matches); MakeEachLetterABlock(search_string_many_matches_without_spaces, &search_string_many_matches); MakeEachLetterABlock("y", &test_string_y); MakeEachLetterABlock("e", &test_string_e); char* new_test_string_unaligned_e = new char[kBlockSize]; memset(new_test_string_unaligned_e, ' ', kBlockSize); new_test_string_unaligned_e[kBlockSize - 2] = 'e'; test_string_unaligned_e = new_test_string_unaligned_e; char* new_test_string_all_Qs = new char[kBlockSize]; memset(new_test_string_all_Qs, 'Q', kBlockSize); test_string_all_Qs = new_test_string_all_Qs; hashed_y = RollingHash<kBlockSize>::Hash(test_string_y); hashed_e = RollingHash<kBlockSize>::Hash(test_string_e); hashed_f = RollingHash<kBlockSize>::Hash(&search_string[index_of_f_in_fearsome]); hashed_unaligned_e = RollingHash<kBlockSize>::Hash(test_string_unaligned_e); hashed_all_Qs = RollingHash<kBlockSize>::Hash(test_string_all_Qs); } static void TearDownTestCase() { delete[] sample_text; delete[] search_string; delete[] search_string_altered; delete[] search_to_end_string; delete[] search_to_beginning_string; delete[] sample_text_many_matches; delete[] search_string_many_matches; delete[] test_string_y; delete[] test_string_e; delete[] test_string_unaligned_e; delete[] test_string_all_Qs; } // Each block in the sample text and search string is kBlockSize bytes long, // and consists of (kBlockSize - 1) space characters // followed by a single letter of text. // Block numbers of certain characters within the sample text: // All six occurrences of "e", in order. static const int block_of_first_e = 2; static const int block_of_second_e = 16; static const int block_of_third_e = 21; static const int block_of_fourth_e = 27; static const int block_of_fifth_e = 35; static const int block_of_sixth_e = 42; static const int block_of_y_in_only = 7; // The block number is multiplied by kBlockSize to arrive at the // index, which points to the (kBlockSize - 1) space characters before // the letter specified. // Indices of certain characters within the sample text. static const int index_of_first_e = block_of_first_e * kBlockSize; static const int index_of_fourth_e = block_of_fourth_e * kBlockSize; static const int index_of_sixth_e = block_of_sixth_e * kBlockSize; static const int index_of_y_in_only = block_of_y_in_only * kBlockSize; static const int index_of_space_before_fear_is_fear = 25 * kBlockSize; static const int index_of_longest_match_ear_is_fear = 27 * kBlockSize; static const int index_of_i_in_fear_is_fear = 31 * kBlockSize; static const int index_of_space_before_fear_itself = 33 * kBlockSize; static const int index_of_space_before_itself = 38 * kBlockSize; static const int index_of_ababc = 4 * kBlockSize; // Indices of certain characters within the search strings. static const int index_of_second_w_in_what_we = 5 * kBlockSize; static const int index_of_second_e_in_what_we_hear = 9 * kBlockSize; static const int index_of_f_in_fearsome = 16 * kBlockSize; static const int index_of_space_in_eat_itself = 12 * kBlockSize; static const int index_of_i_in_itself = 13 * kBlockSize; static const int index_of_t_in_use_the = 4 * kBlockSize; static const int index_of_o_in_online = 8 * kBlockSize; static const char sample_text_without_spaces[]; static const char search_string_without_spaces[]; static const char search_string_altered_without_spaces[]; static const char search_to_end_without_spaces[]; static const char search_to_beginning_without_spaces[]; static const char sample_text_many_matches_without_spaces[]; static const char search_string_many_matches_without_spaces[]; static const char* sample_text; static const char* search_string; static const char* search_string_altered; static const char* search_to_end_string; static const char* search_to_beginning_string; static const char* sample_text_many_matches; static const char* search_string_many_matches; static const char* test_string_y; static const char* test_string_e; static const char* test_string_all_Qs; static const char* test_string_unaligned_e; static uint32_t hashed_y; static uint32_t hashed_e; static uint32_t hashed_f; static uint32_t hashed_unaligned_e; static uint32_t hashed_all_Qs; // Boost scoped_ptr, if available, could be used instead of std::auto_ptr. std::auto_ptr<const BlockHash> dh_; // hash table is populated at startup std::auto_ptr<BlockHash> th_; // hash table not populated; // used to test incremental adds BlockHash::Match best_match_; char* compare_buffer_1_; char* compare_buffer_2_; int prime_result_; }; #ifdef GTEST_HAS_DEATH_TEST typedef BlockHashTest BlockHashDeathTest; #endif // GTEST_HAS_DEATH_TEST // The C++ standard requires a separate definition of these static const values, // even though their initializers are given within the class definition. const int BlockHashTest::block_of_first_e; const int BlockHashTest::block_of_second_e; const int BlockHashTest::block_of_third_e; const int BlockHashTest::block_of_fourth_e; const int BlockHashTest::block_of_fifth_e; const int BlockHashTest::block_of_sixth_e; const int BlockHashTest::block_of_y_in_only; const int BlockHashTest::index_of_first_e; const int BlockHashTest::index_of_fourth_e; const int BlockHashTest::index_of_sixth_e; const int BlockHashTest::index_of_y_in_only; const int BlockHashTest::index_of_space_before_fear_is_fear; const int BlockHashTest::index_of_longest_match_ear_is_fear; const int BlockHashTest::index_of_i_in_fear_is_fear; const int BlockHashTest::index_of_space_before_fear_itself; const int BlockHashTest::index_of_space_before_itself; const int BlockHashTest::index_of_ababc; const int BlockHashTest::index_of_second_w_in_what_we; const int BlockHashTest::index_of_second_e_in_what_we_hear; const int BlockHashTest::index_of_f_in_fearsome; const int BlockHashTest::index_of_space_in_eat_itself; const int BlockHashTest::index_of_i_in_itself; const int BlockHashTest::index_of_t_in_use_the; const int BlockHashTest::index_of_o_in_online; const char BlockHashTest::sample_text_without_spaces[] = "The only thing we have to fear is fear itself"; const char BlockHashTest::search_string_without_spaces[] = "What we hear is fearsome"; const char BlockHashTest::search_string_altered_without_spaces[] = "Vhat ve hear is fearsomm"; const char BlockHashTest::search_to_end_without_spaces[] = "Pop will eat itself, eventually"; const char BlockHashTest::search_to_beginning_without_spaces[] = "Use The online dictionary"; const char BlockHashTest::sample_text_many_matches_without_spaces[] = "ababababcab"; const char BlockHashTest::search_string_many_matches_without_spaces[] = "ababc"; const char* BlockHashTest::sample_text = NULL; const char* BlockHashTest::search_string = NULL; const char* BlockHashTest::search_string_altered = NULL; const char* BlockHashTest::search_to_end_string = NULL; const char* BlockHashTest::search_to_beginning_string = NULL; const char* BlockHashTest::sample_text_many_matches = NULL; const char* BlockHashTest::search_string_many_matches = NULL; const char* BlockHashTest::test_string_y = NULL; const char* BlockHashTest::test_string_e = NULL; const char* BlockHashTest::test_string_unaligned_e = NULL; const char* BlockHashTest::test_string_all_Qs = NULL; uint32_t BlockHashTest::hashed_y = 0; uint32_t BlockHashTest::hashed_e = 0; uint32_t BlockHashTest::hashed_f = 0; uint32_t BlockHashTest::hashed_unaligned_e = 0; uint32_t BlockHashTest::hashed_all_Qs = 0; void BlockHashTest::TestAndPrintTimesForCompareFunctions( bool should_be_identical) { CHECK(compare_buffer_1_ != NULL); CHECK(compare_buffer_2_ != NULL); // Prime the memory cache. prime_result_ = memcmp(compare_buffer_1_, compare_buffer_2_, kTimingTestSize); const char* const block1_limit = &compare_buffer_1_[kTimingTestSize - kBlockSize]; int block_compare_words_result = 0; CycleTimer block_compare_words_timer; block_compare_words_timer.Start(); for (int i = 0; i < kTimingTestIterations; ++i) { const char* block1 = compare_buffer_1_; const char* block2 = compare_buffer_2_; while (block1 < block1_limit) { if (!BlockHash::BlockCompareWords(block1, block2)) { ++block_compare_words_result; } block1 += kBlockSize; block2 += kBlockSize; } } block_compare_words_timer.Stop(); double time_for_block_compare_words = static_cast<double>(block_compare_words_timer.GetInUsec()) / ((kTimingTestSize / kBlockSize) * kTimingTestIterations); int block_contents_match_result = 0; CycleTimer block_contents_match_timer; block_contents_match_timer.Start(); for (int i = 0; i < kTimingTestIterations; ++i) { const char* block1 = compare_buffer_1_; const char* block2 = compare_buffer_2_; while (block1 < block1_limit) { if (!BlockHash::BlockContentsMatch(block1, block2)) { ++block_contents_match_result; } block1 += kBlockSize; block2 += kBlockSize; } } block_contents_match_timer.Stop(); double time_for_block_contents_match = static_cast<double>(block_contents_match_timer.GetInUsec()) / ((kTimingTestSize / kBlockSize) * kTimingTestIterations); EXPECT_EQ(block_contents_match_result, block_compare_words_result); if (should_be_identical) { CHECK_EQ(0, block_compare_words_result); } else { CHECK_GT(block_compare_words_result, 0); } std::cout << "BlockHash::BlockCompareWords: " << time_for_block_compare_words << " us per operation" << std::endl; std::cout << "BlockHash::BlockContentsMatch: " << time_for_block_contents_match << " us per operation" << std::endl; if (time_for_block_compare_words > 0) { double percent_change = (((time_for_block_contents_match - time_for_block_compare_words) / time_for_block_compare_words) * 100.0); if (percent_change >= 0.0) { std::cout << "BlockContentsMatch is " << percent_change << "%" << " SLOWER than BlockCompareWords" << std::endl; } else { std::cout << "BlockContentsMatch is " << (-percent_change) << "%" << " FASTER than BlockCompareWords" << std::endl; } } #if defined(NDEBUG) && !defined(VCDIFF_USE_BLOCK_COMPARE_WORDS) // Only check timings for optimized build. There's plenty of margin: this // check will fail only if BlockContentsMatch is at least twice as slow as // BlockCompareWords. EXPECT_GT(time_for_block_compare_words * 2.0, time_for_block_contents_match); #endif // NDEBUG && !VCDIFF_USE_BLOCK_COMPARE_WORDS } // The two strings passed to BlockHash::MatchingBytesToLeft do have matching // characters -- in fact, they're the same string -- but since max_bytes is zero // or negative, BlockHash::MatchingBytesToLeft should not read from the strings // and should return 0. TEST_F(BlockHashTest, MaxBytesZeroDoesNothing) { EXPECT_EQ(0, MatchingBytesToLeft( &search_string[index_of_f_in_fearsome], &search_string[index_of_f_in_fearsome], 0)); EXPECT_EQ(0, MatchingBytesToRight( &search_string[index_of_f_in_fearsome], &search_string[index_of_f_in_fearsome], 0)); } TEST_F(BlockHashTest, MaxBytesNegativeDoesNothing) { EXPECT_EQ(0, MatchingBytesToLeft( &search_string[index_of_f_in_fearsome], &search_string[index_of_f_in_fearsome], -1)); EXPECT_EQ(0, MatchingBytesToLeft( &search_string[index_of_f_in_fearsome], &search_string[index_of_f_in_fearsome], INT_MIN)); EXPECT_EQ(0, MatchingBytesToRight( &search_string[index_of_f_in_fearsome], &search_string[index_of_f_in_fearsome], -1)); EXPECT_EQ(0, MatchingBytesToRight( &search_string[index_of_f_in_fearsome], &search_string[index_of_f_in_fearsome], INT_MIN)); } TEST_F(BlockHashTest, MaxBytesOneMatch) { EXPECT_EQ(1, MatchingBytesToLeft( &search_string[index_of_f_in_fearsome], &search_string[index_of_f_in_fearsome], 1)); EXPECT_EQ(1, MatchingBytesToRight( &search_string[index_of_f_in_fearsome], &search_string[index_of_f_in_fearsome], 1)); } TEST_F(BlockHashTest, MaxBytesOneNoMatch) { EXPECT_EQ(0, MatchingBytesToLeft( &search_string[index_of_f_in_fearsome], &search_string[index_of_second_e_in_what_we_hear], 1)); EXPECT_EQ(0, MatchingBytesToRight( &search_string[index_of_f_in_fearsome], &search_string[index_of_second_e_in_what_we_hear - 1], 1)); } TEST_F(BlockHashTest, LeftLimitedByMaxBytes) { // The number of bytes that match between the original "we hear is fearsome" // and the altered "ve hear is fearsome". const int expected_length = kBlockSize * StringLengthAsInt("e hear is "); const int max_bytes = expected_length - 1; EXPECT_EQ(max_bytes, MatchingBytesToLeft( &search_string[index_of_f_in_fearsome], &search_string_altered[index_of_f_in_fearsome], max_bytes)); } TEST_F(BlockHashTest, LeftNotLimited) { // The number of bytes that match between the original "we hear is fearsome" // and the altered "ve hear is fearsome". const int expected_length = kBlockSize * StringLengthAsInt("e hear is "); const int max_bytes = expected_length + 1; EXPECT_EQ(expected_length, MatchingBytesToLeft( &search_string[index_of_f_in_fearsome], &search_string_altered[index_of_f_in_fearsome], max_bytes)); EXPECT_EQ(expected_length, MatchingBytesToLeft( &search_string[index_of_f_in_fearsome], &search_string_altered[index_of_f_in_fearsome], INT_MAX)); } TEST_F(BlockHashTest, RightLimitedByMaxBytes) { // The number of bytes that match between the original "fearsome" // and the altered "fearsomm". const int expected_length = (kBlockSize * StringLengthAsInt("fearsom")) + (kBlockSize - 1); // spacing between letters const int max_bytes = expected_length - 1; EXPECT_EQ(max_bytes, MatchingBytesToRight( &search_string[index_of_f_in_fearsome], &search_string_altered[index_of_f_in_fearsome], max_bytes)); } TEST_F(BlockHashTest, RightNotLimited) { // The number of bytes that match between the original "we hear is fearsome" // and the altered "ve hear is fearsome". const int expected_length = (kBlockSize * StringLengthAsInt("fearsom")) + (kBlockSize - 1); // spacing between letters const int max_bytes = expected_length + 1; EXPECT_EQ(expected_length, MatchingBytesToRight( &search_string[index_of_f_in_fearsome], &search_string_altered[index_of_f_in_fearsome], max_bytes)); EXPECT_EQ(expected_length, MatchingBytesToRight( &search_string[index_of_f_in_fearsome], &search_string_altered[index_of_f_in_fearsome], INT_MAX)); } // If this test fails in a non-x86 or non-gcc environment, consider adding // -DVCDIFF_USE_BLOCK_COMPARE_WORDS to AM_CXXFLAGS in Makefile.am and // Makefile.in, and reconstructing the Makefile. That will cause blockhash.cc // to use a special implementation (BlockCompareWords) to compare blocks // rather than using standard memcmp. TEST_F(BlockHashTest, BlockContentsMatchIsAsFastAsBlockCompareWords) { compare_buffer_1_ = new char[kTimingTestSize]; compare_buffer_2_ = new char[kTimingTestSize]; // The value 0xBE is arbitrarily chosen. First test with identical contents // in the buffers, so that the comparison functions cannot short-circuit // and will return true. memset(compare_buffer_1_, 0xBE, kTimingTestSize); memset(compare_buffer_2_, 0xBE, kTimingTestSize); std::cout << "Comparing " << (kTimingTestSize / kBlockSize) << " identical values:" << std::endl; TestAndPrintTimesForCompareFunctions(true); // Now change one value in the middle of one buffer, so that the contents // are no longer the same. compare_buffer_1_[kTimingTestSize / 2] = 0x00; std::cout << "Comparing " << ((kTimingTestSize / kBlockSize) - 1) << " identical values" << " and one mismatch:" << std::endl; TestAndPrintTimesForCompareFunctions(false); // Set one of the bytes of each block to differ so that // none of the compare operations will return true, and run timing tests. // In practice, BlockHash::BlockContentsMatch will only be called // for two blocks whose hash values match, and the two most important // cases are: (1) the blocks are identical, or (2) none of their bytes match. TimingTestForBlocksThatDifferAtByte(0); TimingTestForBlocksThatDifferAtByte(1); TimingTestForBlocksThatDifferAtByte(kBlockSize / 2); TimingTestForBlocksThatDifferAtByte(kBlockSize - 1); delete[] compare_buffer_1_; delete[] compare_buffer_2_; } TEST_F(BlockHashTest, FindFailsBeforeHashing) { EXPECT_EQ(-1, FirstMatchingBlock(*th_, hashed_y, test_string_y)); } TEST_F(BlockHashTest, HashOneFindOne) { for (int i = 0; i <= index_of_y_in_only; ++i) { th_->AddOneIndexHash(i, RollingHash<kBlockSize>::Hash(&sample_text[i])); } EXPECT_EQ(block_of_y_in_only, FirstMatchingBlock(*th_, hashed_y, test_string_y)); EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_y_in_only, test_string_y)); } TEST_F(BlockHashTest, HashAllFindOne) { EXPECT_EQ(block_of_y_in_only, FirstMatchingBlock(*dh_, hashed_y, test_string_y)); EXPECT_EQ(-1, NextMatchingBlock(*dh_, block_of_y_in_only, test_string_y)); } TEST_F(BlockHashTest, NonMatchingTextNotFound) { EXPECT_EQ(-1, FirstMatchingBlock(*dh_, hashed_all_Qs, test_string_all_Qs)); } // Search for unaligned text. The test string is contained in the // sample text (unlike the non-matching string in NonMatchingTextNotFound, // above), but it is not aligned on a block boundary. FindMatchingBlock // will only work if the test string is aligned on a block boundary. // // " T h e o n l y" // ^^^^ Here is the test string // TEST_F(BlockHashTest, UnalignedTextNotFound) { EXPECT_EQ(-1, FirstMatchingBlock(*dh_, hashed_unaligned_e, test_string_unaligned_e)); } TEST_F(BlockHashTest, FindSixMatches) { EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*dh_, hashed_e, test_string_e)); EXPECT_EQ(block_of_second_e, NextMatchingBlock(*dh_, block_of_first_e, test_string_e)); EXPECT_EQ(block_of_third_e, NextMatchingBlock(*dh_, block_of_second_e, test_string_e)); EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*dh_, block_of_third_e, test_string_e)); EXPECT_EQ(block_of_fifth_e, NextMatchingBlock(*dh_, block_of_fourth_e, test_string_e)); EXPECT_EQ(block_of_sixth_e, NextMatchingBlock(*dh_, block_of_fifth_e, test_string_e)); EXPECT_EQ(-1, NextMatchingBlock(*dh_, block_of_sixth_e, test_string_e)); // Starting over gives same result EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*dh_, hashed_e, test_string_e)); } TEST_F(BlockHashTest, AddRangeFindThreeMatches) { // Add hash values only for those characters before the fourth instance // of "e" in the sample text. Tests that the ending index // of AddAllBlocksThroughIndex() is not inclusive: only three matches // for "e" should be found. th_->AddAllBlocksThroughIndex(index_of_fourth_e); EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, test_string_e)); EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, test_string_e)); EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, test_string_e)); EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_third_e, test_string_e)); // Starting over gives same result EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, test_string_e)); } // Try indices that are not even multiples of the block size. // Add three ranges and verify the results after each // call to AddAllBlocksThroughIndex(). TEST_F(BlockHashTest, AddRangeWithUnalignedIndices) { th_->AddAllBlocksThroughIndex(index_of_first_e + 1); EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, test_string_e)); EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_first_e, test_string_e)); // Starting over gives same result EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, test_string_e)); // Add the second range to expand the result set th_->AddAllBlocksThroughIndex(index_of_fourth_e - 3); EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, test_string_e)); EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, test_string_e)); EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, test_string_e)); EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_third_e, test_string_e)); // Starting over gives same result EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, test_string_e)); // Add the third range to expand the result set th_->AddAllBlocksThroughIndex(index_of_fourth_e + 1); EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, test_string_e)); EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, test_string_e)); EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, test_string_e)); EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e, test_string_e)); EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e)); // Starting over gives same result EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, test_string_e)); } #ifdef GTEST_HAS_DEATH_TEST TEST_F(BlockHashDeathTest, AddingRangesInDescendingOrderNoEffect) { th_->AddAllBlocksThroughIndex(index_of_fourth_e + 1); EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, test_string_e)); EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, test_string_e)); EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, test_string_e)); EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e, test_string_e)); EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e)); // Starting over gives same result EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, test_string_e)); // These calls will produce DFATAL error messages, and will do nothing, // since the ranges have already been added. EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(index_of_fourth_e - 3), "<"); EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(index_of_first_e + 1), "<"); EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, test_string_e)); EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, test_string_e)); EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, test_string_e)); EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e, test_string_e)); EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e)); // Starting over gives same result EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, test_string_e)); } #endif // GTEST_HAS_DEATH_TEST TEST_F(BlockHashTest, AddEntireRangeFindSixMatches) { th_->AddAllBlocksThroughIndex(StringLengthAsInt(sample_text)); EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, test_string_e)); EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, test_string_e)); EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, test_string_e)); EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e, test_string_e)); EXPECT_EQ(block_of_fifth_e, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e)); EXPECT_EQ(block_of_sixth_e, NextMatchingBlock(*th_, block_of_fifth_e, test_string_e)); EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_sixth_e, test_string_e)); // Starting over gives same result EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, test_string_e)); } TEST_F(BlockHashTest, ZeroSizeSourceAccepted) { BlockHash zero_sized_hash(sample_text, 0, 0); EXPECT_EQ(true, zero_sized_hash.Init(true)); EXPECT_EQ(-1, FirstMatchingBlock(*th_, hashed_y, test_string_y)); } #ifdef GTEST_HAS_DEATH_TEST TEST_F(BlockHashDeathTest, BadNextMatchingBlockReturnsNoMatch) { EXPECT_DEBUG_DEATH(EXPECT_EQ(-1, NextMatchingBlock(*dh_, 0xFFFFFFFE, " ")), "invalid"); } TEST_F(BlockHashDeathTest, CallingInitTwiceIsIllegal) { BlockHash bh(sample_text, strlen(sample_text), 0); EXPECT_TRUE(bh.Init(false)); EXPECT_DEBUG_DEATH(EXPECT_FALSE(bh.Init(false)), "twice"); } TEST_F(BlockHashDeathTest, CallingAddBlockBeforeInitIsIllegal) { BlockHash bh(sample_text, strlen(sample_text), 0); EXPECT_DEBUG_DEATH(bh.AddAllBlocksThroughIndex(index_of_first_e), "called before"); } TEST_F(BlockHashDeathTest, AddAllBlocksThroughIndexOutOfRange) { EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(strlen(sample_text) + 1), "higher than end"); } #endif // GTEST_HAS_DEATH_TEST TEST_F(BlockHashTest, UnknownFingerprintReturnsNoMatch) { EXPECT_EQ(-1, FirstMatchingBlock(*dh_, 0xFAFAFAFA, "FAFA")); } TEST_F(BlockHashTest, FindBestMatch) { dh_->FindBestMatch(hashed_f, &search_string[index_of_f_in_fearsome], search_string, strlen(search_string), &best_match_); EXPECT_EQ(index_of_longest_match_ear_is_fear, best_match_.source_offset()); EXPECT_EQ(index_of_second_e_in_what_we_hear, best_match_.target_offset()); // The match includes the spaces after the final character, // which is why (kBlockSize - 1) is added to the expected best size. EXPECT_EQ((strlen("ear is fear") * kBlockSize) + (kBlockSize - 1), best_match_.size()); } TEST_F(BlockHashTest, FindBestMatchWithStartingOffset) { BlockHash th2(sample_text, strlen(sample_text), 0x10000); th2.Init(true); // hash all blocks th2.FindBestMatch(hashed_f, &search_string[index_of_f_in_fearsome], search_string, strlen(search_string), &best_match_); // Offset should begin with dictionary_size EXPECT_EQ(0x10000 + (index_of_longest_match_ear_is_fear), best_match_.source_offset()); EXPECT_EQ(index_of_second_e_in_what_we_hear, best_match_.target_offset()); // The match includes the spaces after the final character, // which is why (kBlockSize - 1) is added to the expected best size. EXPECT_EQ((strlen("ear is fear") * kBlockSize) + (kBlockSize - 1), best_match_.size()); } TEST_F(BlockHashTest, BestMatchReachesEndOfDictionary) { // Hash the "i" in "fear itself" uint32_t hash_value = RollingHash<kBlockSize>::Hash( &search_to_end_string[index_of_i_in_itself]); dh_->FindBestMatch(hash_value, &search_to_end_string[index_of_i_in_itself], search_to_end_string, strlen(search_to_end_string), &best_match_); EXPECT_EQ(index_of_space_before_itself, best_match_.source_offset()); EXPECT_EQ(index_of_space_in_eat_itself, best_match_.target_offset()); EXPECT_EQ(strlen(" itself") * kBlockSize, best_match_.size()); } TEST_F(BlockHashTest, BestMatchReachesStartOfDictionary) { // Hash the "i" in "fear itself" uint32_t hash_value = RollingHash<kBlockSize>::Hash( &search_to_beginning_string[index_of_o_in_online]); dh_->FindBestMatch(hash_value, &search_to_beginning_string[index_of_o_in_online], search_to_beginning_string, strlen(search_to_beginning_string), &best_match_); EXPECT_EQ(0, best_match_.source_offset()); // beginning of dictionary EXPECT_EQ(index_of_t_in_use_the, best_match_.target_offset()); // The match includes the spaces after the final character, // which is why (kBlockSize - 1) is added to the expected best size. EXPECT_EQ((strlen("The onl") * kBlockSize) + (kBlockSize - 1), best_match_.size()); } TEST_F(BlockHashTest, BestMatchWithManyMatches) { BlockHash many_matches_hash(sample_text_many_matches, strlen(sample_text_many_matches), 0); EXPECT_TRUE(many_matches_hash.Init(true)); // Hash the " a" at the beginning of the search string "ababc" uint32_t hash_value = RollingHash<kBlockSize>::Hash(search_string_many_matches); many_matches_hash.FindBestMatch(hash_value, search_string_many_matches, search_string_many_matches, strlen(search_string_many_matches), &best_match_); EXPECT_EQ(index_of_ababc, best_match_.source_offset()); EXPECT_EQ(0, best_match_.target_offset()); EXPECT_EQ(strlen(search_string_many_matches), best_match_.size()); } TEST_F(BlockHashTest, HashCollisionFindsNoMatch) { char* collision_search_string = new char[strlen(search_string) + 1]; memcpy(collision_search_string, search_string, strlen(search_string) + 1); char* fearsome_location = &collision_search_string[index_of_f_in_fearsome]; // Tweak the collision string so that it has the same hash value // but different text. The last four characters of the search string // should be " f", and the bytes given below have the same hash value // as those characters. CHECK_GE(kBlockSize, 4); fearsome_location[kBlockSize - 4] = 0x84; fearsome_location[kBlockSize - 3] = 0xF1; fearsome_location[kBlockSize - 2] = 0x51; fearsome_location[kBlockSize - 1] = 0x00; EXPECT_EQ(hashed_f, RollingHash<kBlockSize>::Hash(fearsome_location)); EXPECT_NE(0, memcmp(&search_string[index_of_f_in_fearsome], fearsome_location, kBlockSize)); // No match should be found this time. dh_->FindBestMatch(hashed_f, fearsome_location, collision_search_string, strlen(search_string), // since collision_search_string has embedded \0 &best_match_); EXPECT_EQ(-1, best_match_.source_offset()); EXPECT_EQ(-1, best_match_.target_offset()); EXPECT_EQ(0U, best_match_.size()); delete[] collision_search_string; } // If the footprint passed to FindBestMatch does not actually match // the search string, it should not find any matches. TEST_F(BlockHashTest, WrongFootprintFindsNoMatch) { dh_->FindBestMatch(hashed_e, // Using hashed value of "e" instead of "f"! &search_string[index_of_f_in_fearsome], search_string, strlen(search_string), &best_match_); EXPECT_EQ(-1, best_match_.source_offset()); EXPECT_EQ(-1, best_match_.target_offset()); EXPECT_EQ(0U, best_match_.size()); } // Use a dictionary containing 1M copies of the letter 'Q', // and target data that also contains 1M Qs. If FindBestMatch // is not throttled to find a maximum number of matches, this // will take a very long time -- several seconds at least. // If this test appears to hang, it is because the throttling code // (see BlockHash::kMaxMatchesToCheck for details) is not working. TEST_F(BlockHashTest, SearchStringFindsTooManyMatches) { const int kTestSize = 1 << 20; // 1M char* huge_dictionary = new char[kTestSize]; memset(huge_dictionary, 'Q', kTestSize); BlockHash huge_bh(huge_dictionary, kTestSize, 0); EXPECT_TRUE(huge_bh.Init(/* populate_hash_table = */ true)); char* huge_target = new char[kTestSize]; memset(huge_target, 'Q', kTestSize); CycleTimer timer; timer.Start(); huge_bh.FindBestMatch(hashed_all_Qs, huge_target + (kTestSize / 2), // middle of target huge_target, kTestSize, &best_match_); timer.Stop(); double elapsed_time_in_us = static_cast<double>(timer.GetInUsec()); std::cout << "Time to search for best match with 1M matches: " << elapsed_time_in_us << " us" << std::endl; // All blocks match the candidate block. FindBestMatch should have checked // a certain number of matches before giving up. The best match // should include at least half the source and target, since the candidate // block was in the middle of the target data. EXPECT_GT((kTestSize / 2), best_match_.source_offset()); EXPECT_GT((kTestSize / 2), best_match_.target_offset()); EXPECT_LT(static_cast<size_t>(kTestSize / 2), best_match_.size()); EXPECT_GT(5000000, elapsed_time_in_us); // < 5 seconds #ifdef NDEBUG EXPECT_GT(1000000, elapsed_time_in_us); // < 1 second #endif // NDEBUG delete[] huge_target; delete[] huge_dictionary; } #ifdef GTEST_HAS_DEATH_TEST TEST_F(BlockHashDeathTest, AddTooManyBlocks) { for (int i = 0; i < StringLengthAsInt(sample_text_without_spaces); ++i) { th_->AddOneIndexHash(i * kBlockSize, hashed_e); } // Didn't expect another block to be added EXPECT_DEBUG_DEATH(th_->AddOneIndexHash(StringLengthAsInt(sample_text), hashed_e), "AddBlock"); } #endif // GTEST_HAS_DEATH_TEST } // namespace open_vcdiff