C++程序  |  199行  |  6.83 KB

/*
 * Copyright 2014 Google Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef SEMISTATIC_MAP_TEMPLATES_H
#define SEMISTATIC_MAP_TEMPLATES_H

#if !IN_FRUIT_CPP_FILE
#error "Fruit .template.h file included in non-cpp file."
#endif

#include <algorithm>
#include <cassert>
#include <chrono>
#include <random>
#include <utility>
// This include is not necessary for GCC/Clang, but it's necessary for MSVC.
#include <numeric>

#include <fruit/impl/data_structures/semistatic_map.h>

#include <fruit/impl/data_structures/arena_allocator.h>
#include <fruit/impl/data_structures/fixed_size_vector.templates.h>
#include <fruit/impl/fruit_assert.h>

namespace fruit {
namespace impl {

template <typename Key, typename Value>
template <typename Iter>
SemistaticMap<Key, Value>::SemistaticMap(
    Iter values_begin, Iter values_end, std::size_t num_values, MemoryPool& memory_pool) {
  NumBits num_bits = pickNumBits(num_values);
  std::size_t num_buckets = size_t(1) << num_bits;

  FixedSizeVector<Unsigned, ArenaAllocator<Unsigned>> count(num_buckets, 0, ArenaAllocator<Unsigned>(memory_pool));

  hash_function.shift = (sizeof(Unsigned) * CHAR_BIT - num_bits);

  // The cast is a no-op in some systems (e.g. GCC and Clang under Linux 64bit) but it's needed in other systems (e.g.
  // MSVC).
  unsigned seed = (unsigned)std::chrono::system_clock::now().time_since_epoch().count();
  std::default_random_engine random_generator(seed);
  std::uniform_int_distribution<Unsigned> random_distribution;

  while (1) {
    hash_function.a = random_distribution(random_generator);

    for (Iter itr = values_begin; !(itr == values_end); ++itr) {
      Unsigned& this_count = count[hash((*itr).first)];
      ++this_count;
      if (this_count == beta) {
        goto pick_another;
      }
    }
    break;

  pick_another:
    std::memset(count.data(), 0, num_buckets * sizeof(Unsigned));
  }

  values = FixedSizeVector<value_type>(num_values, value_type());

  std::partial_sum(count.begin(), count.end(), count.begin());
  lookup_table = FixedSizeVector<CandidateValuesRange>(count.size());
  for (Unsigned n : count) {
    lookup_table.push_back(CandidateValuesRange{values.data() + n, values.data() + n});
  }

  // At this point lookup_table[h] is the number of keys in [first, last) that have a hash <=h.
  // Note that even though we ensure this after construction, it is not maintained by insert() so it's not an invariant.

  Iter itr = values_begin;
  for (std::size_t i = 0; i < num_values; ++i, ++itr) {
    value_type*& first_value_ptr = lookup_table[hash((*itr).first)].begin;
    --first_value_ptr;
    FruitAssert(values.data() <= first_value_ptr);
    FruitAssert(first_value_ptr < values.data() + values.size());
    *first_value_ptr = *itr;
  }
}

template <typename Key, typename Value>
SemistaticMap<Key, Value>::SemistaticMap(const SemistaticMap<Key, Value>& map,
                                         std::vector<value_type, ArenaAllocator<value_type>>&& new_elements)
    : hash_function(map.hash_function), lookup_table(map.lookup_table, map.lookup_table.size()) {

  // Sort by hash.
  std::sort(new_elements.begin(), new_elements.end(),
            [this](const value_type& x, const value_type& y) { return hash(x.first) < hash(y.first); });

  std::size_t num_additional_values = new_elements.size();
  // Add the space needed to store copies of the old buckets.
  for (auto itr = new_elements.begin(), itr_end = new_elements.end(); itr != itr_end; /* no increment */) {
    Unsigned h = hash(itr->first);
    auto p = map.lookup_table[h];
    num_additional_values += (p.end - p.begin);
    for (; itr != itr_end && hash(itr->first) == h; ++itr) {
    }
  }

  values = FixedSizeVector<value_type>(num_additional_values);

  // Now actually perform the insertions.

  if (new_elements.empty()) {
    // This is to workaround a bug in the STL shipped with GCC <4.8.2, where calling data() on an
    // empty vector causes undefined behavior (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59829).
    return;
  }
  for (value_type *itr = new_elements.data(), *itr_end = new_elements.data() + new_elements.size(); itr != itr_end;
       /* no increment */) {
    Unsigned h = hash(itr->first);
    auto p = map.lookup_table[h];
    num_additional_values += (p.end - p.begin);
    value_type* first = itr;
    for (; itr != itr_end && hash(itr->first) == h; ++itr) {
    }
    value_type* last = itr;
    insert(h, first, last);
  }
}

template <typename Key, typename Value>
void SemistaticMap<Key, Value>::insert(std::size_t h, const value_type* elems_begin, const value_type* elems_end) {

  value_type* old_bucket_begin = lookup_table[h].begin;
  value_type* old_bucket_end = lookup_table[h].end;

  lookup_table[h].begin = values.data() + values.size();

  // Step 1: re-insert all keys with the same hash at the end (if any).
  for (value_type* p = old_bucket_begin; p != old_bucket_end; ++p) {
    values.push_back(*p);
  }

  // Step 2: also insert the new keys and values
  for (auto itr = elems_begin; itr != elems_end; ++itr) {
    values.push_back(*itr);
  }

  lookup_table[h].end = values.data() + values.size();

  // The old sequence is no longer pointed to by any index in the lookup table, but recompacting the vectors would be
  // too slow.
}

template <typename Key, typename Value>
const Value& SemistaticMap<Key, Value>::at(Key key) const {
  Unsigned h = hash(key);
  for (const value_type* p = lookup_table[h].begin; /* p!=lookup_table[h].end but no need to check */; ++p) {
    FruitAssert(p != lookup_table[h].end);
    if (p->first == key) {
      return p->second;
    }
  }
}

template <typename Key, typename Value>
const Value* SemistaticMap<Key, Value>::find(Key key) const {
  Unsigned h = hash(key);
  for (const value_type *p = lookup_table[h].begin, *p_end = lookup_table[h].end; p != p_end; ++p) {
    if (p->first == key) {
      return &(p->second);
    }
  }
  return nullptr;
}

template <typename Key, typename Value>
typename SemistaticMap<Key, Value>::NumBits SemistaticMap<Key, Value>::pickNumBits(std::size_t n) {
  NumBits result = 1;
  while ((std::size_t(1) << result) < n) {
    ++result;
  }
  return result + 1;
}

// This is here so that we don't have to include fixed_size_vector.templates.h in fruit.h.
template <typename Key, typename Value>
SemistaticMap<Key, Value>::~SemistaticMap() {}

} // namespace impl
} // namespace fruit

#endif // SEMISTATIC_MAP_TEMPLATES_H