/* * Copyright 2014 Google Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef SEMISTATIC_MAP_TEMPLATES_H #define SEMISTATIC_MAP_TEMPLATES_H #if !IN_FRUIT_CPP_FILE #error "Fruit .template.h file included in non-cpp file." #endif #include <algorithm> #include <cassert> #include <chrono> #include <random> #include <utility> // This include is not necessary for GCC/Clang, but it's necessary for MSVC. #include <numeric> #include <fruit/impl/data_structures/semistatic_map.h> #include <fruit/impl/data_structures/arena_allocator.h> #include <fruit/impl/data_structures/fixed_size_vector.templates.h> #include <fruit/impl/fruit_assert.h> namespace fruit { namespace impl { template <typename Key, typename Value> template <typename Iter> SemistaticMap<Key, Value>::SemistaticMap( Iter values_begin, Iter values_end, std::size_t num_values, MemoryPool& memory_pool) { NumBits num_bits = pickNumBits(num_values); std::size_t num_buckets = size_t(1) << num_bits; FixedSizeVector<Unsigned, ArenaAllocator<Unsigned>> count(num_buckets, 0, ArenaAllocator<Unsigned>(memory_pool)); hash_function.shift = (sizeof(Unsigned) * CHAR_BIT - num_bits); // The cast is a no-op in some systems (e.g. GCC and Clang under Linux 64bit) but it's needed in other systems (e.g. // MSVC). unsigned seed = (unsigned)std::chrono::system_clock::now().time_since_epoch().count(); std::default_random_engine random_generator(seed); std::uniform_int_distribution<Unsigned> random_distribution; while (1) { hash_function.a = random_distribution(random_generator); for (Iter itr = values_begin; !(itr == values_end); ++itr) { Unsigned& this_count = count[hash((*itr).first)]; ++this_count; if (this_count == beta) { goto pick_another; } } break; pick_another: std::memset(count.data(), 0, num_buckets * sizeof(Unsigned)); } values = FixedSizeVector<value_type>(num_values, value_type()); std::partial_sum(count.begin(), count.end(), count.begin()); lookup_table = FixedSizeVector<CandidateValuesRange>(count.size()); for (Unsigned n : count) { lookup_table.push_back(CandidateValuesRange{values.data() + n, values.data() + n}); } // At this point lookup_table[h] is the number of keys in [first, last) that have a hash <=h. // Note that even though we ensure this after construction, it is not maintained by insert() so it's not an invariant. Iter itr = values_begin; for (std::size_t i = 0; i < num_values; ++i, ++itr) { value_type*& first_value_ptr = lookup_table[hash((*itr).first)].begin; --first_value_ptr; FruitAssert(values.data() <= first_value_ptr); FruitAssert(first_value_ptr < values.data() + values.size()); *first_value_ptr = *itr; } } template <typename Key, typename Value> SemistaticMap<Key, Value>::SemistaticMap(const SemistaticMap<Key, Value>& map, std::vector<value_type, ArenaAllocator<value_type>>&& new_elements) : hash_function(map.hash_function), lookup_table(map.lookup_table, map.lookup_table.size()) { // Sort by hash. std::sort(new_elements.begin(), new_elements.end(), [this](const value_type& x, const value_type& y) { return hash(x.first) < hash(y.first); }); std::size_t num_additional_values = new_elements.size(); // Add the space needed to store copies of the old buckets. for (auto itr = new_elements.begin(), itr_end = new_elements.end(); itr != itr_end; /* no increment */) { Unsigned h = hash(itr->first); auto p = map.lookup_table[h]; num_additional_values += (p.end - p.begin); for (; itr != itr_end && hash(itr->first) == h; ++itr) { } } values = FixedSizeVector<value_type>(num_additional_values); // Now actually perform the insertions. if (new_elements.empty()) { // This is to workaround a bug in the STL shipped with GCC <4.8.2, where calling data() on an // empty vector causes undefined behavior (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59829). return; } for (value_type *itr = new_elements.data(), *itr_end = new_elements.data() + new_elements.size(); itr != itr_end; /* no increment */) { Unsigned h = hash(itr->first); auto p = map.lookup_table[h]; num_additional_values += (p.end - p.begin); value_type* first = itr; for (; itr != itr_end && hash(itr->first) == h; ++itr) { } value_type* last = itr; insert(h, first, last); } } template <typename Key, typename Value> void SemistaticMap<Key, Value>::insert(std::size_t h, const value_type* elems_begin, const value_type* elems_end) { value_type* old_bucket_begin = lookup_table[h].begin; value_type* old_bucket_end = lookup_table[h].end; lookup_table[h].begin = values.data() + values.size(); // Step 1: re-insert all keys with the same hash at the end (if any). for (value_type* p = old_bucket_begin; p != old_bucket_end; ++p) { values.push_back(*p); } // Step 2: also insert the new keys and values for (auto itr = elems_begin; itr != elems_end; ++itr) { values.push_back(*itr); } lookup_table[h].end = values.data() + values.size(); // The old sequence is no longer pointed to by any index in the lookup table, but recompacting the vectors would be // too slow. } template <typename Key, typename Value> const Value& SemistaticMap<Key, Value>::at(Key key) const { Unsigned h = hash(key); for (const value_type* p = lookup_table[h].begin; /* p!=lookup_table[h].end but no need to check */; ++p) { FruitAssert(p != lookup_table[h].end); if (p->first == key) { return p->second; } } } template <typename Key, typename Value> const Value* SemistaticMap<Key, Value>::find(Key key) const { Unsigned h = hash(key); for (const value_type *p = lookup_table[h].begin, *p_end = lookup_table[h].end; p != p_end; ++p) { if (p->first == key) { return &(p->second); } } return nullptr; } template <typename Key, typename Value> typename SemistaticMap<Key, Value>::NumBits SemistaticMap<Key, Value>::pickNumBits(std::size_t n) { NumBits result = 1; while ((std::size_t(1) << result) < n) { ++result; } return result + 1; } // This is here so that we don't have to include fixed_size_vector.templates.h in fruit.h. template <typename Key, typename Value> SemistaticMap<Key, Value>::~SemistaticMap() {} } // namespace impl } // namespace fruit #endif // SEMISTATIC_MAP_TEMPLATES_H