普通文本  |  937行  |  39.95 KB

// Copyright 2008 Google Inc.
// Author: Lincoln Smith
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <config.h>
#include "blockhash.h"
#include <limits.h>  // INT_MIN
#include <string.h>  // memcpy, memcmp, strlen
#include <iostream>
#include <memory>  // auto_ptr
#include "encodetable.h"
#include "rolling_hash.h"
#include "testing.h"

namespace open_vcdiff {

const int kBlockSize = BlockHash::kBlockSize;

class BlockHashTest : public testing::Test {
 protected:
  static const int kTimingTestSize = 1 << 21;  // 2M
  static const int kTimingTestIterations = 32;

  BlockHashTest() {
    dh_.reset(BlockHash::CreateDictionaryHash(sample_text,
                                              strlen(sample_text)));
    th_.reset(BlockHash::CreateTargetHash(sample_text, strlen(sample_text), 0));
    EXPECT_TRUE(dh_.get() != NULL);
    EXPECT_TRUE(th_.get() != NULL);
  }

  // BlockHashTest is a friend to BlockHash.  Expose the protected functions
  // that will be tested by the children of BlockHashTest.
  static bool BlockContentsMatch(const char* block1, const char* block2) {
    return BlockHash::BlockContentsMatch(block1, block2);
  }

  int FirstMatchingBlock(const BlockHash& block_hash,
                         uint32_t hash_value,
                         const char* block_ptr) const {
    return block_hash.FirstMatchingBlock(hash_value, block_ptr);
  }

  int NextMatchingBlock(const BlockHash& block_hash,
                        int block_number,
                        const char* block_ptr) const {
    return block_hash.NextMatchingBlock(block_number, block_ptr);
  }

  static int MatchingBytesToLeft(const char* source_match_start,
                                 const char* target_match_start,
                                 int max_bytes) {
    return BlockHash::MatchingBytesToLeft(source_match_start,
                                          target_match_start,
                                          max_bytes);
  }

  static int MatchingBytesToRight(const char* source_match_end,
                                  const char* target_match_end,
                                  int max_bytes) {
    return BlockHash::MatchingBytesToRight(source_match_end,
                                           target_match_end,
                                           max_bytes);
  }

  static int StringLengthAsInt(const char* s) {
    return static_cast<int>(strlen(s));
  }

  void InitBlocksToDifferAtNthByte(int n) {
    CHECK(n < kBlockSize);
    memset(compare_buffer_1_, 0xBE, kTimingTestSize);
    memset(compare_buffer_2_, 0xBE, kTimingTestSize);
    for (int index = n; index < kTimingTestSize; index += kBlockSize) {
      compare_buffer_1_[index] = 0x00;
      compare_buffer_2_[index] = 0x01;
    }
  }

  void TestAndPrintTimesForCompareFunctions(bool should_be_identical);

  void TimingTestForBlocksThatDifferAtByte(int n) {
    InitBlocksToDifferAtNthByte(n);
    std::cout << "Comparing blocks that differ at byte " << n << std::endl;
    TestAndPrintTimesForCompareFunctions(false);
  }

  // Copy sample_text_without_spaces and search_string_without_spaces
  // into newly allocated sample_text and search_string buffers,
  // but pad them with space characters so that every character
  // in sample_text_without_spaces matches (kBlockSize - 1)
  // space characters in sample_text, followed by that character.
  // For example:
  // Since sample_text_without_spaces begins "The only thing"...,
  // if kBlockSize is 4, then 3 space characters will be inserted
  // between each letter of sample_text, as follows:
  // "   T   h   e       o   n   l   y       t   h   i   n   g"...
  // This makes testing simpler, because finding a kBlockSize-byte match
  // between the sample text and search string only depends on the
  // trailing letter in each block.
  static void MakeEachLetterABlock(const char* string_without_spaces,
                                   const char** result) {
    const size_t length_without_spaces = strlen(string_without_spaces);
    char* padded_text = new char[(kBlockSize * length_without_spaces) + 1];
    memset(padded_text, ' ', kBlockSize * length_without_spaces);
    char* padded_text_ptr = padded_text + (kBlockSize - 1);
    for (size_t i = 0; i < length_without_spaces; ++i) {
      *padded_text_ptr = string_without_spaces[i];
      padded_text_ptr += kBlockSize;
    }
    padded_text[kBlockSize * length_without_spaces] = '\0';
    *result = padded_text;
  }

  static void SetUpTestCase() {
    MakeEachLetterABlock(sample_text_without_spaces, &sample_text);
    MakeEachLetterABlock(search_string_without_spaces, &search_string);
    MakeEachLetterABlock(search_string_altered_without_spaces,
                         &search_string_altered);
    MakeEachLetterABlock(search_to_end_without_spaces, &search_to_end_string);
    MakeEachLetterABlock(search_to_beginning_without_spaces,
                         &search_to_beginning_string);
    MakeEachLetterABlock(sample_text_many_matches_without_spaces,
                         &sample_text_many_matches);
    MakeEachLetterABlock(search_string_many_matches_without_spaces,
                         &search_string_many_matches);
    MakeEachLetterABlock("y", &test_string_y);
    MakeEachLetterABlock("e", &test_string_e);
    char* new_test_string_unaligned_e = new char[kBlockSize];
    memset(new_test_string_unaligned_e, ' ', kBlockSize);
    new_test_string_unaligned_e[kBlockSize - 2] = 'e';
    test_string_unaligned_e = new_test_string_unaligned_e;
    char* new_test_string_all_Qs = new char[kBlockSize];
    memset(new_test_string_all_Qs, 'Q', kBlockSize);
    test_string_all_Qs = new_test_string_all_Qs;
    hashed_y = RollingHash<kBlockSize>::Hash(test_string_y);
    hashed_e = RollingHash<kBlockSize>::Hash(test_string_e);
    hashed_f =
        RollingHash<kBlockSize>::Hash(&search_string[index_of_f_in_fearsome]);
    hashed_unaligned_e = RollingHash<kBlockSize>::Hash(test_string_unaligned_e);
    hashed_all_Qs = RollingHash<kBlockSize>::Hash(test_string_all_Qs);
  }

  static void TearDownTestCase() {
    delete[] sample_text;
    delete[] search_string;
    delete[] search_string_altered;
    delete[] search_to_end_string;
    delete[] search_to_beginning_string;
    delete[] sample_text_many_matches;
    delete[] search_string_many_matches;
    delete[] test_string_y;
    delete[] test_string_e;
    delete[] test_string_unaligned_e;
    delete[] test_string_all_Qs;
  }

  // Each block in the sample text and search string is kBlockSize bytes long,
  // and consists of (kBlockSize - 1) space characters
  // followed by a single letter of text.

  // Block numbers of certain characters within the sample text:
  // All six occurrences of "e", in order.
  static const int block_of_first_e = 2;
  static const int block_of_second_e = 16;
  static const int block_of_third_e = 21;
  static const int block_of_fourth_e = 27;
  static const int block_of_fifth_e = 35;
  static const int block_of_sixth_e = 42;

  static const int block_of_y_in_only = 7;
  // The block number is multiplied by kBlockSize to arrive at the
  // index, which points to the (kBlockSize - 1) space characters before
  // the letter specified.
  // Indices of certain characters within the sample text.
  static const int index_of_first_e = block_of_first_e * kBlockSize;
  static const int index_of_fourth_e = block_of_fourth_e * kBlockSize;
  static const int index_of_sixth_e = block_of_sixth_e * kBlockSize;
  static const int index_of_y_in_only = block_of_y_in_only * kBlockSize;
  static const int index_of_space_before_fear_is_fear = 25 * kBlockSize;
  static const int index_of_longest_match_ear_is_fear = 27 * kBlockSize;
  static const int index_of_i_in_fear_is_fear = 31 * kBlockSize;
  static const int index_of_space_before_fear_itself = 33 * kBlockSize;
  static const int index_of_space_before_itself = 38 * kBlockSize;
  static const int index_of_ababc = 4 * kBlockSize;

  // Indices of certain characters within the search strings.
  static const int index_of_second_w_in_what_we = 5 * kBlockSize;
  static const int index_of_second_e_in_what_we_hear = 9 * kBlockSize;
  static const int index_of_f_in_fearsome = 16 * kBlockSize;
  static const int index_of_space_in_eat_itself =  12 * kBlockSize;
  static const int index_of_i_in_itself = 13 * kBlockSize;
  static const int index_of_t_in_use_the = 4 * kBlockSize;
  static const int index_of_o_in_online = 8 * kBlockSize;

  static const char sample_text_without_spaces[];
  static const char search_string_without_spaces[];
  static const char search_string_altered_without_spaces[];
  static const char search_to_end_without_spaces[];
  static const char search_to_beginning_without_spaces[];
  static const char sample_text_many_matches_without_spaces[];
  static const char search_string_many_matches_without_spaces[];

  static const char* sample_text;
  static const char* search_string;
  static const char* search_string_altered;
  static const char* search_to_end_string;
  static const char* search_to_beginning_string;
  static const char* sample_text_many_matches;
  static const char* search_string_many_matches;

  static const char* test_string_y;
  static const char* test_string_e;
  static const char* test_string_all_Qs;
  static const char* test_string_unaligned_e;

  static uint32_t hashed_y;
  static uint32_t hashed_e;
  static uint32_t hashed_f;
  static uint32_t hashed_unaligned_e;
  static uint32_t hashed_all_Qs;

  // Boost scoped_ptr, if available, could be used instead of std::auto_ptr.
  std::auto_ptr<const BlockHash> dh_;  // hash table is populated at startup
  std::auto_ptr<BlockHash> th_;  // hash table not populated;
                                // used to test incremental adds

  BlockHash::Match best_match_;
  char* compare_buffer_1_;
  char* compare_buffer_2_;
  int prime_result_;
};

#ifdef GTEST_HAS_DEATH_TEST
typedef BlockHashTest BlockHashDeathTest;
#endif  // GTEST_HAS_DEATH_TEST

// The C++ standard requires a separate definition of these static const values,
// even though their initializers are given within the class definition.
const int BlockHashTest::block_of_first_e;
const int BlockHashTest::block_of_second_e;
const int BlockHashTest::block_of_third_e;
const int BlockHashTest::block_of_fourth_e;
const int BlockHashTest::block_of_fifth_e;
const int BlockHashTest::block_of_sixth_e;
const int BlockHashTest::block_of_y_in_only;
const int BlockHashTest::index_of_first_e;
const int BlockHashTest::index_of_fourth_e;
const int BlockHashTest::index_of_sixth_e;
const int BlockHashTest::index_of_y_in_only;
const int BlockHashTest::index_of_space_before_fear_is_fear;
const int BlockHashTest::index_of_longest_match_ear_is_fear;
const int BlockHashTest::index_of_i_in_fear_is_fear;
const int BlockHashTest::index_of_space_before_fear_itself;
const int BlockHashTest::index_of_space_before_itself;
const int BlockHashTest::index_of_ababc;
const int BlockHashTest::index_of_second_w_in_what_we;
const int BlockHashTest::index_of_second_e_in_what_we_hear;
const int BlockHashTest::index_of_f_in_fearsome;
const int BlockHashTest::index_of_space_in_eat_itself;
const int BlockHashTest::index_of_i_in_itself;
const int BlockHashTest::index_of_t_in_use_the;
const int BlockHashTest::index_of_o_in_online;

const char BlockHashTest::sample_text_without_spaces[] =
    "The only thing we have to fear is fear itself";

const char BlockHashTest::search_string_without_spaces[] =
    "What we hear is fearsome";

const char BlockHashTest::search_string_altered_without_spaces[] =
    "Vhat ve hear is fearsomm";

const char BlockHashTest::search_to_end_without_spaces[] =
    "Pop will eat itself, eventually";

const char BlockHashTest::search_to_beginning_without_spaces[] =
    "Use The online dictionary";

const char BlockHashTest::sample_text_many_matches_without_spaces[] =
    "ababababcab";

const char BlockHashTest::search_string_many_matches_without_spaces[] =
    "ababc";

const char* BlockHashTest::sample_text = NULL;
const char* BlockHashTest::search_string = NULL;
const char* BlockHashTest::search_string_altered = NULL;
const char* BlockHashTest::search_to_end_string = NULL;
const char* BlockHashTest::search_to_beginning_string = NULL;
const char* BlockHashTest::sample_text_many_matches = NULL;
const char* BlockHashTest::search_string_many_matches = NULL;

const char* BlockHashTest::test_string_y = NULL;
const char* BlockHashTest::test_string_e = NULL;
const char* BlockHashTest::test_string_unaligned_e = NULL;
const char* BlockHashTest::test_string_all_Qs = NULL;

uint32_t BlockHashTest::hashed_y = 0;
uint32_t BlockHashTest::hashed_e = 0;
uint32_t BlockHashTest::hashed_f = 0;
uint32_t BlockHashTest::hashed_unaligned_e = 0;
uint32_t BlockHashTest::hashed_all_Qs = 0;

void BlockHashTest::TestAndPrintTimesForCompareFunctions(
    bool should_be_identical) {
  CHECK(compare_buffer_1_ != NULL);
  CHECK(compare_buffer_2_ != NULL);
  // Prime the memory cache.
  prime_result_ =
      memcmp(compare_buffer_1_, compare_buffer_2_, kTimingTestSize);
  const char* const block1_limit =
      &compare_buffer_1_[kTimingTestSize - kBlockSize];
  int block_compare_words_result = 0;
  CycleTimer block_compare_words_timer;
  block_compare_words_timer.Start();
  for (int i = 0; i < kTimingTestIterations; ++i) {
    const char* block1 = compare_buffer_1_;
    const char* block2 = compare_buffer_2_;
    while (block1 < block1_limit) {
      if (!BlockHash::BlockCompareWords(block1, block2)) {
        ++block_compare_words_result;
      }
      block1 += kBlockSize;
      block2 += kBlockSize;
    }
  }
  block_compare_words_timer.Stop();
  double time_for_block_compare_words =
      static_cast<double>(block_compare_words_timer.GetInUsec())
      / ((kTimingTestSize / kBlockSize) * kTimingTestIterations);
  int block_contents_match_result = 0;
  CycleTimer block_contents_match_timer;
  block_contents_match_timer.Start();
  for (int i = 0; i < kTimingTestIterations; ++i) {
    const char* block1 = compare_buffer_1_;
    const char* block2 = compare_buffer_2_;
    while (block1 < block1_limit) {
      if (!BlockHash::BlockContentsMatch(block1, block2)) {
        ++block_contents_match_result;
      }
      block1 += kBlockSize;
      block2 += kBlockSize;
    }
  }
  block_contents_match_timer.Stop();
  double time_for_block_contents_match =
      static_cast<double>(block_contents_match_timer.GetInUsec())
      / ((kTimingTestSize / kBlockSize) * kTimingTestIterations);
  EXPECT_EQ(block_contents_match_result, block_compare_words_result);
  if (should_be_identical) {
    CHECK_EQ(0, block_compare_words_result);
  } else {
    CHECK_GT(block_compare_words_result, 0);
  }
  std::cout << "BlockHash::BlockCompareWords: "
            << time_for_block_compare_words << " us per operation" << std::endl;
  std::cout << "BlockHash::BlockContentsMatch: "
            << time_for_block_contents_match << " us per operation"
            << std::endl;
  if (time_for_block_compare_words > 0) {
    double percent_change =
        (((time_for_block_contents_match - time_for_block_compare_words)
          / time_for_block_compare_words) * 100.0);
    if (percent_change >= 0.0) {
      std::cout << "BlockContentsMatch is " << percent_change << "%"
                << " SLOWER than BlockCompareWords" << std::endl;
    } else {
      std::cout << "BlockContentsMatch is " << (-percent_change) << "%"
                << " FASTER than BlockCompareWords" << std::endl;
    }
  }
#if defined(NDEBUG) && !defined(VCDIFF_USE_BLOCK_COMPARE_WORDS)
  // Only check timings for optimized build.  There's plenty of margin: this
  // check will fail only if BlockContentsMatch is at least twice as slow as
  // BlockCompareWords.
  EXPECT_GT(time_for_block_compare_words * 2.0, time_for_block_contents_match);
#endif  // NDEBUG && !VCDIFF_USE_BLOCK_COMPARE_WORDS
}

// The two strings passed to BlockHash::MatchingBytesToLeft do have matching
// characters -- in fact, they're the same string -- but since max_bytes is zero
// or negative, BlockHash::MatchingBytesToLeft should not read from the strings
// and should return 0.
TEST_F(BlockHashTest, MaxBytesZeroDoesNothing) {
  EXPECT_EQ(0, MatchingBytesToLeft(
      &search_string[index_of_f_in_fearsome],
      &search_string[index_of_f_in_fearsome],
      0));
  EXPECT_EQ(0, MatchingBytesToRight(
      &search_string[index_of_f_in_fearsome],
      &search_string[index_of_f_in_fearsome],
      0));
}

TEST_F(BlockHashTest, MaxBytesNegativeDoesNothing) {
  EXPECT_EQ(0, MatchingBytesToLeft(
      &search_string[index_of_f_in_fearsome],
      &search_string[index_of_f_in_fearsome],
      -1));
  EXPECT_EQ(0, MatchingBytesToLeft(
      &search_string[index_of_f_in_fearsome],
      &search_string[index_of_f_in_fearsome],
      INT_MIN));
  EXPECT_EQ(0, MatchingBytesToRight(
      &search_string[index_of_f_in_fearsome],
      &search_string[index_of_f_in_fearsome],
      -1));
  EXPECT_EQ(0, MatchingBytesToRight(
      &search_string[index_of_f_in_fearsome],
      &search_string[index_of_f_in_fearsome],
      INT_MIN));
}

TEST_F(BlockHashTest, MaxBytesOneMatch) {
  EXPECT_EQ(1, MatchingBytesToLeft(
      &search_string[index_of_f_in_fearsome],
      &search_string[index_of_f_in_fearsome],
      1));
  EXPECT_EQ(1, MatchingBytesToRight(
      &search_string[index_of_f_in_fearsome],
      &search_string[index_of_f_in_fearsome],
      1));
}

TEST_F(BlockHashTest, MaxBytesOneNoMatch) {
  EXPECT_EQ(0, MatchingBytesToLeft(
      &search_string[index_of_f_in_fearsome],
      &search_string[index_of_second_e_in_what_we_hear],
      1));
  EXPECT_EQ(0, MatchingBytesToRight(
      &search_string[index_of_f_in_fearsome],
      &search_string[index_of_second_e_in_what_we_hear - 1],
      1));
}

TEST_F(BlockHashTest, LeftLimitedByMaxBytes) {
  // The number of bytes that match between the original "we hear is fearsome"
  // and the altered "ve hear is fearsome".
  const int expected_length = kBlockSize * StringLengthAsInt("e hear is ");
  const int max_bytes = expected_length - 1;
  EXPECT_EQ(max_bytes, MatchingBytesToLeft(
      &search_string[index_of_f_in_fearsome],
      &search_string_altered[index_of_f_in_fearsome],
      max_bytes));
}

TEST_F(BlockHashTest, LeftNotLimited) {
  // The number of bytes that match between the original "we hear is fearsome"
  // and the altered "ve hear is fearsome".
  const int expected_length = kBlockSize * StringLengthAsInt("e hear is ");
  const int max_bytes = expected_length + 1;
  EXPECT_EQ(expected_length, MatchingBytesToLeft(
      &search_string[index_of_f_in_fearsome],
      &search_string_altered[index_of_f_in_fearsome],
      max_bytes));
  EXPECT_EQ(expected_length, MatchingBytesToLeft(
      &search_string[index_of_f_in_fearsome],
      &search_string_altered[index_of_f_in_fearsome],
      INT_MAX));
}

TEST_F(BlockHashTest, RightLimitedByMaxBytes) {
  // The number of bytes that match between the original "fearsome"
  // and the altered "fearsomm".
  const int expected_length = (kBlockSize * StringLengthAsInt("fearsom"))
                              + (kBlockSize - 1);  // spacing between letters
  const int max_bytes = expected_length - 1;
  EXPECT_EQ(max_bytes, MatchingBytesToRight(
      &search_string[index_of_f_in_fearsome],
      &search_string_altered[index_of_f_in_fearsome],
      max_bytes));
}

TEST_F(BlockHashTest, RightNotLimited) {
  // The number of bytes that match between the original "we hear is fearsome"
  // and the altered "ve hear is fearsome".
  const int expected_length = (kBlockSize * StringLengthAsInt("fearsom"))
                              + (kBlockSize - 1);  // spacing between letters
  const int max_bytes = expected_length + 1;
  EXPECT_EQ(expected_length, MatchingBytesToRight(
      &search_string[index_of_f_in_fearsome],
      &search_string_altered[index_of_f_in_fearsome],
      max_bytes));
  EXPECT_EQ(expected_length, MatchingBytesToRight(
      &search_string[index_of_f_in_fearsome],
      &search_string_altered[index_of_f_in_fearsome],
      INT_MAX));
}

// If this test fails in a non-x86 or non-gcc environment, consider adding
// -DVCDIFF_USE_BLOCK_COMPARE_WORDS to AM_CXXFLAGS in Makefile.am and
// Makefile.in, and reconstructing the Makefile.  That will cause blockhash.cc
// to use a special implementation (BlockCompareWords) to compare blocks
// rather than using standard memcmp.
TEST_F(BlockHashTest, BlockContentsMatchIsAsFastAsBlockCompareWords) {
  compare_buffer_1_ = new char[kTimingTestSize];
  compare_buffer_2_ = new char[kTimingTestSize];

  // The value 0xBE is arbitrarily chosen.  First test with identical contents
  // in the buffers, so that the comparison functions cannot short-circuit
  // and will return true.
  memset(compare_buffer_1_, 0xBE, kTimingTestSize);
  memset(compare_buffer_2_, 0xBE, kTimingTestSize);
  std::cout << "Comparing "
            << (kTimingTestSize / kBlockSize) << " identical values:"
            << std::endl;
  TestAndPrintTimesForCompareFunctions(true);

  // Now change one value in the middle of one buffer, so that the contents
  // are no longer the same.
  compare_buffer_1_[kTimingTestSize / 2] = 0x00;
  std::cout << "Comparing "
            << ((kTimingTestSize / kBlockSize) - 1) << " identical values"
            << " and one mismatch:" << std::endl;
  TestAndPrintTimesForCompareFunctions(false);

  // Set one of the bytes of each block to differ so that
  // none of the compare operations will return true, and run timing tests.
  // In practice, BlockHash::BlockContentsMatch will only be called
  // for two blocks whose hash values match, and the two most important
  // cases are: (1) the blocks are identical, or (2) none of their bytes match.
  TimingTestForBlocksThatDifferAtByte(0);
  TimingTestForBlocksThatDifferAtByte(1);
  TimingTestForBlocksThatDifferAtByte(kBlockSize / 2);
  TimingTestForBlocksThatDifferAtByte(kBlockSize - 1);

  delete[] compare_buffer_1_;
  delete[] compare_buffer_2_;
}

TEST_F(BlockHashTest, FindFailsBeforeHashing) {
  EXPECT_EQ(-1, FirstMatchingBlock(*th_, hashed_y, test_string_y));
}

TEST_F(BlockHashTest, HashOneFindOne) {
  for (int i = 0; i <= index_of_y_in_only; ++i) {
    th_->AddOneIndexHash(i, RollingHash<kBlockSize>::Hash(&sample_text[i]));
  }
  EXPECT_EQ(block_of_y_in_only, FirstMatchingBlock(*th_, hashed_y,
                                                   test_string_y));
  EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_y_in_only, test_string_y));
}

TEST_F(BlockHashTest, HashAllFindOne) {
  EXPECT_EQ(block_of_y_in_only, FirstMatchingBlock(*dh_, hashed_y,
                                                   test_string_y));
  EXPECT_EQ(-1, NextMatchingBlock(*dh_, block_of_y_in_only, test_string_y));
}

TEST_F(BlockHashTest, NonMatchingTextNotFound) {
  EXPECT_EQ(-1, FirstMatchingBlock(*dh_, hashed_all_Qs, test_string_all_Qs));
}

// Search for unaligned text.  The test string is contained in the
// sample text (unlike the non-matching string in NonMatchingTextNotFound,
// above), but it is not aligned on a block boundary.  FindMatchingBlock
// will only work if the test string is aligned on a block boundary.
//
//    "   T   h   e       o   n   l   y"
//              ^^^^ Here is the test string
//
TEST_F(BlockHashTest, UnalignedTextNotFound) {
  EXPECT_EQ(-1, FirstMatchingBlock(*dh_, hashed_unaligned_e,
                                   test_string_unaligned_e));
}

TEST_F(BlockHashTest, FindSixMatches) {
  EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*dh_, hashed_e,
                                                 test_string_e));
  EXPECT_EQ(block_of_second_e, NextMatchingBlock(*dh_, block_of_first_e,
                                                 test_string_e));
  EXPECT_EQ(block_of_third_e, NextMatchingBlock(*dh_, block_of_second_e,
                                                test_string_e));
  EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*dh_, block_of_third_e,
                                                 test_string_e));
  EXPECT_EQ(block_of_fifth_e, NextMatchingBlock(*dh_, block_of_fourth_e,
                                                test_string_e));
  EXPECT_EQ(block_of_sixth_e, NextMatchingBlock(*dh_, block_of_fifth_e,
                                                test_string_e));
  EXPECT_EQ(-1, NextMatchingBlock(*dh_, block_of_sixth_e, test_string_e));

  // Starting over gives same result
  EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*dh_, hashed_e,
                                                 test_string_e));
}

TEST_F(BlockHashTest, AddRangeFindThreeMatches) {
  // Add hash values only for those characters before the fourth instance
  // of "e" in the sample text.  Tests that the ending index
  // of AddAllBlocksThroughIndex() is not inclusive: only three matches
  // for "e" should be found.
  th_->AddAllBlocksThroughIndex(index_of_fourth_e);
  EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
                                                 test_string_e));
  EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
                                                 test_string_e));
  EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
                                                test_string_e));
  EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_third_e, test_string_e));

  // Starting over gives same result
  EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
                                                 test_string_e));
}

// Try indices that are not even multiples of the block size.
// Add three ranges and verify the results after each
// call to AddAllBlocksThroughIndex().
TEST_F(BlockHashTest, AddRangeWithUnalignedIndices) {
  th_->AddAllBlocksThroughIndex(index_of_first_e + 1);
  EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
                                                 test_string_e));
  EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_first_e, test_string_e));

  // Starting over gives same result
  EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
                                                 test_string_e));

  // Add the second range to expand the result set
  th_->AddAllBlocksThroughIndex(index_of_fourth_e - 3);
  EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
                                                 test_string_e));
  EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
                                                 test_string_e));
  EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
                                                test_string_e));
  EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_third_e, test_string_e));

  // Starting over gives same result
  EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
                                                 test_string_e));

  // Add the third range to expand the result set
  th_->AddAllBlocksThroughIndex(index_of_fourth_e + 1);

  EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
                                                 test_string_e));
  EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
                                                 test_string_e));
  EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
                                                test_string_e));
  EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,
                                                 test_string_e));
  EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e));

  // Starting over gives same result
  EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
                                                 test_string_e));
}

#ifdef GTEST_HAS_DEATH_TEST
TEST_F(BlockHashDeathTest, AddingRangesInDescendingOrderNoEffect) {
  th_->AddAllBlocksThroughIndex(index_of_fourth_e + 1);

  EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
                                                 test_string_e));
  EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
                                                 test_string_e));
  EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
                                                test_string_e));
  EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,
                                                 test_string_e));
  EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e));

  // Starting over gives same result
  EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
                                                 test_string_e));

  // These calls will produce DFATAL error messages, and will do nothing,
  // since the ranges have already been added.
  EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(index_of_fourth_e - 3),
                     "<");
  EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(index_of_first_e + 1),
                     "<");

  EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
                                                 test_string_e));
  EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
                                                 test_string_e));
  EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
                                                test_string_e));
  EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,
                                                 test_string_e));
  EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e));

  // Starting over gives same result
  EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
                                                 test_string_e));
}
#endif  // GTEST_HAS_DEATH_TEST

TEST_F(BlockHashTest, AddEntireRangeFindSixMatches) {
  th_->AddAllBlocksThroughIndex(StringLengthAsInt(sample_text));
  EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
                                                 test_string_e));
  EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
                                                 test_string_e));
  EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
                                                test_string_e));
  EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,
                                                 test_string_e));
  EXPECT_EQ(block_of_fifth_e, NextMatchingBlock(*th_, block_of_fourth_e,
                                                test_string_e));
  EXPECT_EQ(block_of_sixth_e, NextMatchingBlock(*th_, block_of_fifth_e,
                                                test_string_e));
  EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_sixth_e, test_string_e));

  // Starting over gives same result
  EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
                                                 test_string_e));
}

TEST_F(BlockHashTest, ZeroSizeSourceAccepted) {
  BlockHash zero_sized_hash(sample_text, 0, 0);
  EXPECT_EQ(true, zero_sized_hash.Init(true));
  EXPECT_EQ(-1, FirstMatchingBlock(*th_, hashed_y, test_string_y));
}

#ifdef GTEST_HAS_DEATH_TEST
TEST_F(BlockHashDeathTest, BadNextMatchingBlockReturnsNoMatch) {
  EXPECT_DEBUG_DEATH(EXPECT_EQ(-1, NextMatchingBlock(*dh_, 0xFFFFFFFE, "    ")),
                     "invalid");
}

TEST_F(BlockHashDeathTest, CallingInitTwiceIsIllegal) {
  BlockHash bh(sample_text, strlen(sample_text), 0);
  EXPECT_TRUE(bh.Init(false));
  EXPECT_DEBUG_DEATH(EXPECT_FALSE(bh.Init(false)), "twice");
}

TEST_F(BlockHashDeathTest, CallingAddBlockBeforeInitIsIllegal) {
  BlockHash bh(sample_text, strlen(sample_text), 0);
  EXPECT_DEBUG_DEATH(bh.AddAllBlocksThroughIndex(index_of_first_e),
                     "called before");
}

TEST_F(BlockHashDeathTest, AddAllBlocksThroughIndexOutOfRange) {
  EXPECT_DEBUG_DEATH(
      th_->AddAllBlocksThroughIndex(static_cast<int>(strlen(sample_text) + 1)),
      "higher than end");
}
#endif  // GTEST_HAS_DEATH_TEST

TEST_F(BlockHashTest, UnknownFingerprintReturnsNoMatch) {
  EXPECT_EQ(-1, FirstMatchingBlock(*dh_, 0xFAFAFAFA, "FAFA"));
}

TEST_F(BlockHashTest, FindBestMatch) {
  dh_->FindBestMatch(hashed_f,
                     &search_string[index_of_f_in_fearsome],
                     search_string,
                     strlen(search_string),
                     &best_match_);
  EXPECT_EQ(index_of_longest_match_ear_is_fear, best_match_.source_offset());
  EXPECT_EQ(index_of_second_e_in_what_we_hear, best_match_.target_offset());
  // The match includes the spaces after the final character,
  // which is why (kBlockSize - 1) is added to the expected best size.
  EXPECT_EQ((strlen("ear is fear") * kBlockSize) + (kBlockSize - 1),
            best_match_.size());
}

TEST_F(BlockHashTest, FindBestMatchWithStartingOffset) {
  BlockHash th2(sample_text, strlen(sample_text), 0x10000);
  th2.Init(true);  // hash all blocks
  th2.FindBestMatch(hashed_f,
                    &search_string[index_of_f_in_fearsome],
                    search_string,
                    strlen(search_string),
                    &best_match_);
  // Offset should begin with dictionary_size
  EXPECT_EQ(0x10000 + (index_of_longest_match_ear_is_fear),
            best_match_.source_offset());
  EXPECT_EQ(index_of_second_e_in_what_we_hear, best_match_.target_offset());
  // The match includes the spaces after the final character,
  // which is why (kBlockSize - 1) is added to the expected best size.
  EXPECT_EQ((strlen("ear is fear") * kBlockSize) + (kBlockSize - 1),
            best_match_.size());
}

TEST_F(BlockHashTest, BestMatchReachesEndOfDictionary) {
  // Hash the "i" in "fear itself"
  uint32_t hash_value = RollingHash<kBlockSize>::Hash(
      &search_to_end_string[index_of_i_in_itself]);
  dh_->FindBestMatch(hash_value,
                     &search_to_end_string[index_of_i_in_itself],
                     search_to_end_string,
                     strlen(search_to_end_string),
                     &best_match_);
  EXPECT_EQ(index_of_space_before_itself, best_match_.source_offset());
  EXPECT_EQ(index_of_space_in_eat_itself, best_match_.target_offset());
  EXPECT_EQ(strlen(" itself") * kBlockSize, best_match_.size());
}

TEST_F(BlockHashTest, BestMatchReachesStartOfDictionary) {
  // Hash the "i" in "fear itself"
  uint32_t hash_value = RollingHash<kBlockSize>::Hash(
      &search_to_beginning_string[index_of_o_in_online]);
  dh_->FindBestMatch(hash_value,
                     &search_to_beginning_string[index_of_o_in_online],
                     search_to_beginning_string,
                     strlen(search_to_beginning_string),
                     &best_match_);
  EXPECT_EQ(0, best_match_.source_offset());  // beginning of dictionary
  EXPECT_EQ(index_of_t_in_use_the, best_match_.target_offset());
  // The match includes the spaces after the final character,
  // which is why (kBlockSize - 1) is added to the expected best size.
  EXPECT_EQ((strlen("The onl") * kBlockSize) + (kBlockSize - 1),
            best_match_.size());
}

TEST_F(BlockHashTest, BestMatchWithManyMatches) {
  BlockHash many_matches_hash(sample_text_many_matches,
                              strlen(sample_text_many_matches),
                              0);
  EXPECT_TRUE(many_matches_hash.Init(true));
  // Hash the "   a" at the beginning of the search string "ababc"
  uint32_t hash_value =
      RollingHash<kBlockSize>::Hash(search_string_many_matches);
  many_matches_hash.FindBestMatch(hash_value,
                                  search_string_many_matches,
                                  search_string_many_matches,
                                  strlen(search_string_many_matches),
                                  &best_match_);
  EXPECT_EQ(index_of_ababc, best_match_.source_offset());
  EXPECT_EQ(0, best_match_.target_offset());
  EXPECT_EQ(strlen(search_string_many_matches), best_match_.size());
}

TEST_F(BlockHashTest, HashCollisionFindsNoMatch) {
  char* collision_search_string = new char[strlen(search_string) + 1];
  memcpy(collision_search_string, search_string, strlen(search_string) + 1);
  char* fearsome_location = &collision_search_string[index_of_f_in_fearsome];

  // Tweak the collision string so that it has the same hash value
  // but different text.  The last four characters of the search string
  // should be "   f", and the bytes given below have the same hash value
  // as those characters.
  CHECK_GE(kBlockSize, 4);
  fearsome_location[kBlockSize - 4] = 0x84;
  fearsome_location[kBlockSize - 3] = 0xF1;
  fearsome_location[kBlockSize - 2] = 0x51;
  fearsome_location[kBlockSize - 1] = 0x00;
  EXPECT_EQ(hashed_f, RollingHash<kBlockSize>::Hash(fearsome_location));
  EXPECT_NE(0, memcmp(&search_string[index_of_f_in_fearsome],
                      fearsome_location,
                      kBlockSize));
  // No match should be found this time.
  dh_->FindBestMatch(hashed_f,
      fearsome_location,
      collision_search_string,
      strlen(search_string),  // since collision_search_string has embedded \0
      &best_match_);
  EXPECT_EQ(-1, best_match_.source_offset());
  EXPECT_EQ(-1, best_match_.target_offset());
  EXPECT_EQ(0U, best_match_.size());
  delete[] collision_search_string;
}

// If the footprint passed to FindBestMatch does not actually match
// the search string, it should not find any matches.
TEST_F(BlockHashTest, WrongFootprintFindsNoMatch) {
  dh_->FindBestMatch(hashed_e,  // Using hashed value of "e" instead of "f"!
                     &search_string[index_of_f_in_fearsome],
                     search_string,
                     strlen(search_string),
                     &best_match_);
  EXPECT_EQ(-1, best_match_.source_offset());
  EXPECT_EQ(-1, best_match_.target_offset());
  EXPECT_EQ(0U, best_match_.size());
}

// Use a dictionary containing 1M copies of the letter 'Q',
// and target data that also contains 1M Qs.  If FindBestMatch
// is not throttled to find a maximum number of matches, this
// will take a very long time -- several seconds at least.
// If this test appears to hang, it is because the throttling code
// (see BlockHash::kMaxMatchesToCheck for details) is not working.
TEST_F(BlockHashTest, SearchStringFindsTooManyMatches) {
  const int kTestSize = 1 << 20;  // 1M
  char* huge_dictionary = new char[kTestSize];
  memset(huge_dictionary, 'Q', kTestSize);
  BlockHash huge_bh(huge_dictionary, kTestSize, 0);
  EXPECT_TRUE(huge_bh.Init(/* populate_hash_table = */ true));
  char* huge_target = new char[kTestSize];
  memset(huge_target, 'Q', kTestSize);
  CycleTimer timer;
  timer.Start();
  huge_bh.FindBestMatch(hashed_all_Qs,
                        huge_target + (kTestSize / 2),  // middle of target
                        huge_target,
                        kTestSize,
                        &best_match_);
  timer.Stop();
  double elapsed_time_in_us = static_cast<double>(timer.GetInUsec());
  std::cout << "Time to search for best match with 1M matches: "
            << elapsed_time_in_us << " us" << std::endl;
  // All blocks match the candidate block.  FindBestMatch should have checked
  // a certain number of matches before giving up.  The best match
  // should include at least half the source and target, since the candidate
  // block was in the middle of the target data.
  EXPECT_GT((kTestSize / 2), best_match_.source_offset());
  EXPECT_GT((kTestSize / 2), best_match_.target_offset());
  EXPECT_LT(static_cast<size_t>(kTestSize / 2), best_match_.size());
  EXPECT_GT(5000000, elapsed_time_in_us);  // < 5 seconds
#ifdef NDEBUG
  EXPECT_GT(1000000, elapsed_time_in_us);  // < 1 second
#endif  // NDEBUG
  delete[] huge_target;
  delete[] huge_dictionary;
}

#ifdef GTEST_HAS_DEATH_TEST
TEST_F(BlockHashDeathTest, AddTooManyBlocks) {
  for (int i = 0; i < StringLengthAsInt(sample_text_without_spaces); ++i) {
    th_->AddOneIndexHash(i * kBlockSize, hashed_e);
  }
  // Didn't expect another block to be added
  EXPECT_DEBUG_DEATH(th_->AddOneIndexHash(StringLengthAsInt(sample_text),
                                          hashed_e),
                     "AddBlock");
}
#endif  // GTEST_HAS_DEATH_TEST

}  //  namespace open_vcdiff