// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/variations/entropy_provider.h"

#include <cmath>
#include <limits>
#include <numeric>

#include "base/basictypes.h"
#include "base/guid.h"
#include "base/memory/scoped_ptr.h"
#include "base/rand_util.h"
#include "base/strings/string_number_conversions.h"
#include "components/variations/metrics_util.h"
#include "testing/gtest/include/gtest/gtest.h"

namespace metrics {

namespace {

// Size of the low entropy source to use for the permuted entropy provider
// in tests.
const size_t kMaxLowEntropySize = 8000;

// Field trial names used in unit tests.
const char* const kTestTrialNames[] = { "TestTrial", "AnotherTestTrial",
                                        "NewTabButton" };

// Computes the Chi-Square statistic for |values| assuming they follow a uniform
// distribution, where each entry has expected value |expected_value|.
//
// The Chi-Square statistic is defined as Sum((O-E)^2/E) where O is the observed
// value and E is the expected value.
double ComputeChiSquare(const std::vector<int>& values,
                        double expected_value) {
  double sum = 0;
  for (size_t i = 0; i < values.size(); ++i) {
    const double delta = values[i] - expected_value;
    sum += (delta * delta) / expected_value;
  }
  return sum;
}

// Computes SHA1-based entropy for the given |trial_name| based on
// |entropy_source|
double GenerateSHA1Entropy(const std::string& entropy_source,
                           const std::string& trial_name) {
  SHA1EntropyProvider sha1_provider(entropy_source);
  return sha1_provider.GetEntropyForTrial(trial_name, 0);
}

// Generates permutation-based entropy for the given |trial_name| based on
// |entropy_source| which must be in the range [0, entropy_max).
double GeneratePermutedEntropy(uint16 entropy_source,
                               size_t entropy_max,
                               const std::string& trial_name) {
  PermutedEntropyProvider permuted_provider(entropy_source, entropy_max);
  return permuted_provider.GetEntropyForTrial(trial_name, 0);
}

// Helper interface for testing used to generate entropy values for a given
// field trial. Unlike EntropyProvider, which keeps the low/high entropy source
// value constant and generates entropy for different trial names, instances
// of TrialEntropyGenerator keep the trial name constant and generate low/high
// entropy source values internally to produce each output entropy value.
class TrialEntropyGenerator {
 public:
  virtual ~TrialEntropyGenerator() {}
  virtual double GenerateEntropyValue() const = 0;
};

// An TrialEntropyGenerator that uses the SHA1EntropyProvider with the high
// entropy source (random GUID with 128 bits of entropy + 13 additional bits of
// entropy corresponding to a low entropy source).
class SHA1EntropyGenerator : public TrialEntropyGenerator {
 public:
  explicit SHA1EntropyGenerator(const std::string& trial_name)
      : trial_name_(trial_name) {
  }

  virtual ~SHA1EntropyGenerator() {
  }

  virtual double GenerateEntropyValue() const OVERRIDE {
    // Use a random GUID + 13 additional bits of entropy to match how the
    // SHA1EntropyProvider is used in metrics_service.cc.
    const int low_entropy_source =
        static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1));
    const std::string high_entropy_source =
        base::GenerateGUID() + base::IntToString(low_entropy_source);
    return GenerateSHA1Entropy(high_entropy_source, trial_name_);
  }

 private:
  std::string trial_name_;

  DISALLOW_COPY_AND_ASSIGN(SHA1EntropyGenerator);
};

// An TrialEntropyGenerator that uses the permuted entropy provider algorithm,
// using 13-bit low entropy source values.
class PermutedEntropyGenerator : public TrialEntropyGenerator {
 public:
  explicit PermutedEntropyGenerator(const std::string& trial_name)
      : mapping_(kMaxLowEntropySize) {
    // Note: Given a trial name, the computed mapping will be the same.
    // As a performance optimization, pre-compute the mapping once per trial
    // name and index into it for each entropy value.
    const uint32 randomization_seed = HashName(trial_name);
    internal::PermuteMappingUsingRandomizationSeed(randomization_seed,
                                                   &mapping_);
  }

  virtual ~PermutedEntropyGenerator() {
  }

  virtual double GenerateEntropyValue() const OVERRIDE {
    const int low_entropy_source =
        static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1));
    return mapping_[low_entropy_source] /
           static_cast<double>(kMaxLowEntropySize);
  }

 private:
  std::vector<uint16> mapping_;

  DISALLOW_COPY_AND_ASSIGN(PermutedEntropyGenerator);
};

// Tests uniformity of a given |entropy_generator| using the Chi-Square Goodness
// of Fit Test.
void PerformEntropyUniformityTest(
    const std::string& trial_name,
    const TrialEntropyGenerator& entropy_generator) {
  // Number of buckets in the simulated field trials.
  const size_t kBucketCount = 20;
  // Max number of iterations to perform before giving up and failing.
  const size_t kMaxIterationCount = 100000;
  // The number of iterations to perform before each time the statistical
  // significance of the results is checked.
  const size_t kCheckIterationCount = 10000;
  // This is the Chi-Square threshold from the Chi-Square statistic table for
  // 19 degrees of freedom (based on |kBucketCount|) with a 99.9% confidence
  // level. See: http://www.medcalc.org/manual/chi-square-table.php
  const double kChiSquareThreshold = 43.82;

  std::vector<int> distribution(kBucketCount);

  for (size_t i = 1; i <= kMaxIterationCount; ++i) {
    const double entropy_value = entropy_generator.GenerateEntropyValue();
    const size_t bucket = static_cast<size_t>(kBucketCount * entropy_value);
    ASSERT_LT(bucket, kBucketCount);
    distribution[bucket] += 1;

    // After |kCheckIterationCount| iterations, compute the Chi-Square
    // statistic of the distribution. If the resulting statistic is greater
    // than |kChiSquareThreshold|, we can conclude with 99.9% confidence
    // that the observed samples do not follow a uniform distribution.
    //
    // However, since 99.9% would still result in a false negative every
    // 1000 runs of the test, do not treat it as a failure (else the test
    // will be flaky). Instead, perform additional iterations to determine
    // if the distribution will converge, up to |kMaxIterationCount|.
    if ((i % kCheckIterationCount) == 0) {
      const double expected_value_per_bucket =
          static_cast<double>(i) / kBucketCount;
      const double chi_square =
          ComputeChiSquare(distribution, expected_value_per_bucket);
      if (chi_square < kChiSquareThreshold)
        break;

      // If |i == kMaxIterationCount|, the Chi-Square statistic did not
      // converge after |kMaxIterationCount|.
      EXPECT_NE(i, kMaxIterationCount) << "Failed for trial " <<
          trial_name << " with chi_square = " << chi_square <<
          " after " << kMaxIterationCount << " iterations.";
    }
  }
}

}  // namespace

TEST(EntropyProviderTest, UseOneTimeRandomizationSHA1) {
  // Simply asserts that two trials using one-time randomization
  // that have different names, normally generate different results.
  //
  // Note that depending on the one-time random initialization, they
  // _might_ actually give the same result, but we know that given
  // the particular client_id we use for unit tests they won't.
  base::FieldTrialList field_trial_list(new SHA1EntropyProvider("client_id"));
  const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
  scoped_refptr<base::FieldTrial> trials[] = {
      base::FieldTrialList::FactoryGetFieldTrial(
          "one", 100, "default", kNoExpirationYear, 1, 1,
          base::FieldTrial::ONE_TIME_RANDOMIZED, NULL),
      base::FieldTrialList::FactoryGetFieldTrial(
          "two", 100, "default", kNoExpirationYear, 1, 1,
          base::FieldTrial::ONE_TIME_RANDOMIZED, NULL),
  };

  for (size_t i = 0; i < arraysize(trials); ++i) {
    for (int j = 0; j < 100; ++j)
      trials[i]->AppendGroup(std::string(), 1);
  }

  // The trials are most likely to give different results since they have
  // different names.
  EXPECT_NE(trials[0]->group(), trials[1]->group());
  EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
}

TEST(EntropyProviderTest, UseOneTimeRandomizationPermuted) {
  // Simply asserts that two trials using one-time randomization
  // that have different names, normally generate different results.
  //
  // Note that depending on the one-time random initialization, they
  // _might_ actually give the same result, but we know that given
  // the particular client_id we use for unit tests they won't.
  base::FieldTrialList field_trial_list(
      new PermutedEntropyProvider(1234, kMaxLowEntropySize));
  const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
  scoped_refptr<base::FieldTrial> trials[] = {
      base::FieldTrialList::FactoryGetFieldTrial(
          "one", 100, "default", kNoExpirationYear, 1, 1,
          base::FieldTrial::ONE_TIME_RANDOMIZED, NULL),
      base::FieldTrialList::FactoryGetFieldTrial(
          "two", 100, "default", kNoExpirationYear, 1, 1,
          base::FieldTrial::ONE_TIME_RANDOMIZED, NULL),
  };

  for (size_t i = 0; i < arraysize(trials); ++i) {
    for (int j = 0; j < 100; ++j)
      trials[i]->AppendGroup(std::string(), 1);
  }

  // The trials are most likely to give different results since they have
  // different names.
  EXPECT_NE(trials[0]->group(), trials[1]->group());
  EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
}

TEST(EntropyProviderTest, UseOneTimeRandomizationWithCustomSeedPermuted) {
  // Ensures that two trials with different names but the same custom seed used
  // for one time randomization produce the same group assignments.
  base::FieldTrialList field_trial_list(
      new PermutedEntropyProvider(1234, kMaxLowEntropySize));
  const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
  const uint32 kCustomSeed = 9001;
  scoped_refptr<base::FieldTrial> trials[] = {
      base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed(
          "one", 100, "default", kNoExpirationYear, 1, 1,
          base::FieldTrial::ONE_TIME_RANDOMIZED, kCustomSeed, NULL),
      base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed(
          "two", 100, "default", kNoExpirationYear, 1, 1,
          base::FieldTrial::ONE_TIME_RANDOMIZED, kCustomSeed, NULL),
  };

  for (size_t i = 0; i < arraysize(trials); ++i) {
    for (int j = 0; j < 100; ++j)
      trials[i]->AppendGroup(std::string(), 1);
  }

  // Normally, these trials should produce different groups, but if the same
  // custom seed is used, they should produce the same group assignment.
  EXPECT_EQ(trials[0]->group(), trials[1]->group());
  EXPECT_EQ(trials[0]->group_name(), trials[1]->group_name());
}

TEST(EntropyProviderTest, SHA1Entropy) {
  const double results[] = { GenerateSHA1Entropy("hi", "1"),
                             GenerateSHA1Entropy("there", "1") };

  EXPECT_NE(results[0], results[1]);
  for (size_t i = 0; i < arraysize(results); ++i) {
    EXPECT_LE(0.0, results[i]);
    EXPECT_GT(1.0, results[i]);
  }

  EXPECT_EQ(GenerateSHA1Entropy("yo", "1"),
            GenerateSHA1Entropy("yo", "1"));
  EXPECT_NE(GenerateSHA1Entropy("yo", "something"),
            GenerateSHA1Entropy("yo", "else"));
}

TEST(EntropyProviderTest, PermutedEntropy) {
  const double results[] = {
      GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"),
      GeneratePermutedEntropy(4321, kMaxLowEntropySize, "1") };

  EXPECT_NE(results[0], results[1]);
  for (size_t i = 0; i < arraysize(results); ++i) {
    EXPECT_LE(0.0, results[i]);
    EXPECT_GT(1.0, results[i]);
  }

  EXPECT_EQ(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"),
            GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"));
  EXPECT_NE(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "something"),
            GeneratePermutedEntropy(1234, kMaxLowEntropySize, "else"));
}

TEST(EntropyProviderTest, PermutedEntropyProviderResults) {
  // Verifies that PermutedEntropyProvider produces expected results. This
  // ensures that the results are the same between platforms and ensures that
  // changes to the implementation do not regress this accidentally.

  EXPECT_DOUBLE_EQ(2194 / static_cast<double>(kMaxLowEntropySize),
                   GeneratePermutedEntropy(1234, kMaxLowEntropySize, "XYZ"));
  EXPECT_DOUBLE_EQ(5676 / static_cast<double>(kMaxLowEntropySize),
                   GeneratePermutedEntropy(1, kMaxLowEntropySize, "Test"));
  EXPECT_DOUBLE_EQ(1151 / static_cast<double>(kMaxLowEntropySize),
                   GeneratePermutedEntropy(5000, kMaxLowEntropySize, "Foo"));
}

TEST(EntropyProviderTest, SHA1EntropyIsUniform) {
  for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
    SHA1EntropyGenerator entropy_generator(kTestTrialNames[i]);
    PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
  }
}

TEST(EntropyProviderTest, PermutedEntropyIsUniform) {
  for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
    PermutedEntropyGenerator entropy_generator(kTestTrialNames[i]);
    PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
  }
}

TEST(EntropyProviderTest, SeededRandGeneratorIsUniform) {
  // Verifies that SeededRandGenerator has a uniform distribution.
  //
  // Mirrors RandUtilTest.RandGeneratorIsUniform in base/rand_util_unittest.cc.

  const uint32 kTopOfRange = (std::numeric_limits<uint32>::max() / 4ULL) * 3ULL;
  const uint32 kExpectedAverage = kTopOfRange / 2ULL;
  const uint32 kAllowedVariance = kExpectedAverage / 50ULL;  // +/- 2%
  const int kMinAttempts = 1000;
  const int kMaxAttempts = 1000000;

  for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
    const uint32 seed = HashName(kTestTrialNames[i]);
    internal::SeededRandGenerator rand_generator(seed);

    double cumulative_average = 0.0;
    int count = 0;
    while (count < kMaxAttempts) {
      uint32 value = rand_generator(kTopOfRange);
      cumulative_average = (count * cumulative_average + value) / (count + 1);

      // Don't quit too quickly for things to start converging, or we may have
      // a false positive.
      if (count > kMinAttempts &&
          kExpectedAverage - kAllowedVariance < cumulative_average &&
          cumulative_average < kExpectedAverage + kAllowedVariance) {
        break;
      }

      ++count;
    }

    ASSERT_LT(count, kMaxAttempts) << "Expected average was " <<
        kExpectedAverage << ", average ended at " << cumulative_average <<
        ", for trial " << kTestTrialNames[i];
  }
}

}  // namespace metrics