/* * This file is part of ltrace. * Copyright (C) 2012, 2013 Petr Machata, Red Hat Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA * 02110-1301 USA */ #include <string.h> #include <stdlib.h> #include <stdio.h> #include "dict.h" struct status_bits { unsigned char taken : 1; unsigned char erased : 1; }; static struct status_bits * bitp(struct dict *dict, size_t n) { return VECT_ELEMENT(&dict->status, struct status_bits, n); } void dict_init(struct dict *dict, size_t key_size, size_t value_size, size_t (*hash1)(const void *), int (*eq)(const void *, const void *), size_t (*hash2)(size_t)) { assert(hash1 != NULL); assert(eq != NULL); vect_init(&dict->keys, key_size); vect_init(&dict->values, value_size); VECT_INIT(&dict->status, struct status_bits); dict->size = 0; dict->hash1 = hash1; dict->hash2 = hash2; dict->eq = eq; } struct clone_data { struct dict *target; int (*clone_key)(void *tgt, const void *src, void *data); int (*clone_value)(void *tgt, const void *src, void *data); void (*dtor_key)(void *tgt, void *data); void (*dtor_value)(void *tgt, void *data); void *data; }; static enum callback_status clone_cb(void *key, void *value, void *data) { struct clone_data *clone_data = data; char nkey[clone_data->target->keys.elt_size]; if (clone_data->clone_key == NULL) memmove(nkey, key, sizeof(nkey)); else if (clone_data->clone_key(&nkey, key, clone_data->data) < 0) return CBS_STOP; char nvalue[clone_data->target->values.elt_size]; if (clone_data->clone_value == NULL) { memmove(nvalue, value, sizeof(nvalue)); } else if (clone_data->clone_value(&nvalue, value, clone_data->data) < 0) { fail: if (clone_data->clone_key != NULL) clone_data->dtor_key(&nkey, clone_data->data); return CBS_STOP; } if (dict_insert(clone_data->target, nkey, nvalue) < 0) { if (clone_data->clone_value != NULL) clone_data->dtor_value(&nvalue, clone_data->data); goto fail; } return CBS_CONT; } int dict_clone(struct dict *target, const struct dict *source, int (*clone_key)(void *tgt, const void *src, void *data), void (*dtor_key)(void *tgt, void *data), int (*clone_value)(void *tgt, const void *src, void *data), void (*dtor_value)(void *tgt, void *data), void *data) { assert((clone_key != NULL) == (dtor_key != NULL)); assert((clone_value != NULL) == (dtor_value != NULL)); dict_init(target, source->keys.elt_size, source->values.elt_size, source->hash1, source->eq, source->hash2); struct clone_data clone_data = { target, clone_key, clone_value, dtor_key, dtor_value, data }; if (dict_each((struct dict *)source, NULL, clone_cb, &clone_data) != NULL) { dict_destroy(target, dtor_key, dtor_value, data); return -1; } return 0; } size_t dict_size(const struct dict *dict) { return dict->size; } int dict_empty(const struct dict *dict) { return dict->size == 0; } struct destroy_data { void (*dtor_key)(void *tgt, void *data); void (*dtor_value)(void *tgt, void *data); void *data; }; static enum callback_status destroy_cb(void *key, void *value, void *data) { struct destroy_data *destroy_data = data; if (destroy_data->dtor_key) destroy_data->dtor_key(key, destroy_data->data); if (destroy_data->dtor_value) destroy_data->dtor_value(value, destroy_data->data); return CBS_CONT; } void dict_destroy(struct dict *dict, void (*dtor_key)(void *tgt, void *data), void (*dtor_value)(void *tgt, void *data), void *data) { /* Some slots are unused (the corresponding keys and values * are uninitialized), so we can't call dtors for them. * Iterate DICT instead. */ if (dtor_key != NULL || dtor_value != NULL) { struct destroy_data destroy_data = { dtor_key, dtor_value, data }; dict_each(dict, NULL, destroy_cb, &destroy_data); } vect_destroy(&dict->keys, NULL, NULL); vect_destroy(&dict->values, NULL, NULL); vect_destroy(&dict->status, NULL, NULL); } static size_t default_secondary_hash(size_t pos) { return pos % 97 + 1; } static size_t small_secondary_hash(size_t pos) { return 1; } static inline size_t n(struct dict *dict) { return vect_size(&dict->keys); } static inline size_t (* hash2(struct dict *dict))(size_t) { if (dict->hash2 != NULL) return dict->hash2; else if (n(dict) < 100) return small_secondary_hash; else return default_secondary_hash; } static void * getkey(struct dict *dict, size_t pos) { return ((unsigned char *)dict->keys.data) + dict->keys.elt_size * pos; } static void * getvalue(struct dict *dict, size_t pos) { return ((unsigned char *)dict->values.data) + dict->values.elt_size * pos; } static size_t find_slot(struct dict *dict, const void *key, int *foundp, int *should_rehash, size_t *pi) { assert(n(dict) > 0); size_t pos = dict->hash1(key) % n(dict); size_t pos0 = -1; size_t d = hash2(dict)(pos); size_t i = 0; *foundp = 0; /* We skip over any taken or erased slots. But we remember * the first erased that we find, and if we don't find the key * later, we return that position. */ for (; bitp(dict, pos)->taken || bitp(dict, pos)->erased; pos = (pos + d) % n(dict)) { if (pos0 == (size_t)-1 && bitp(dict, pos)->erased) pos0 = pos; /* If there is a loop, but we've seen an erased * element, take that one. Otherwise give up. */ if (++i > dict->size) { if (pos0 != (size_t)-1) break; return (size_t)-1; } if (bitp(dict, pos)->taken && dict->eq(getkey(dict, pos), key)) { *foundp = 1; break; } } if (!*foundp && pos0 != (size_t)-1) pos = pos0; /* If the hash table degraded into a linked list, request a * rehash. */ if (should_rehash != NULL) *should_rehash = i > 10 && i > n(dict) / 10; if (pi != NULL) *pi = i; return pos; } static enum callback_status rehash_move(void *key, void *value, void *data) { if (dict_insert(data, key, value) < 0) return CBS_STOP; else return CBS_CONT; } static int rehash(struct dict *dict, size_t nn) { assert(nn != n(dict)); int ret = -1; struct dict tmp; dict_init(&tmp, dict->keys.elt_size, dict->values.elt_size, dict->hash1, dict->eq, dict->hash2); /* To honor all invariants (so that we can safely call * dict_destroy), we first make a request to _reserve_ enough * room in all vectors. This has no observable effect on * contents of vectors. */ if (vect_reserve(&tmp.keys, nn) < 0 || vect_reserve(&tmp.values, nn) < 0 || vect_reserve(&tmp.status, nn) < 0) goto done; /* Now that we know that there is enough size in vectors, we * simply bump the size. */ tmp.keys.size = nn; tmp.values.size = nn; size_t old_size = tmp.status.size; tmp.status.size = nn; memset(VECT_ELEMENT(&tmp.status, struct status_bits, old_size), 0, (tmp.status.size - old_size) * tmp.status.elt_size); /* At this point, TMP is once more an empty dictionary with NN * slots. Now move stuff from DICT to TMP. */ if (dict_each(dict, NULL, rehash_move, &tmp) != NULL) goto done; /* And now swap contents of DICT and TMP, and we are done. */ { struct dict tmp2 = *dict; *dict = tmp; tmp = tmp2; } ret = 0; done: /* We only want to release the containers, not the actual data * that they hold, so it's fine if we don't pass any dtor. */ dict_destroy(&tmp, NULL, NULL, NULL); return ret; } static const size_t primes[] = { 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, 32749, 65521, 130981, 0 }; static size_t larger_size(size_t current) { if (current == 0) return primes[0]; if (current < primes[sizeof(primes)/sizeof(*primes) - 2]) { size_t i; for (i = 0; primes[i] != 0; ++i) if (primes[i] > current) return primes[i]; abort(); } /* We ran out of primes, so invent a new one. The following * gives primes until about 17M elements (and then some more * later). */ return 2 * current + 6585; } static size_t smaller_size(size_t current) { if (current <= primes[0]) return primes[0]; if (current <= primes[sizeof(primes)/sizeof(*primes) - 2]) { size_t i; size_t prev = 0; for (i = 0; primes[i] != 0; ++i) { if (primes[i] >= current) return prev; prev = primes[i]; } abort(); } return (current - 6585) / 2; } int dict_insert(struct dict *dict, void *key, void *value) { if (n(dict) == 0 || dict->size > 0.7 * n(dict)) rehash: if (rehash(dict, larger_size(n(dict))) < 0) return -1; int found; int should_rehash; size_t slot_n = find_slot(dict, key, &found, &should_rehash, NULL); if (slot_n == (size_t)-1) return -1; if (found) return 1; assert(!bitp(dict, slot_n)->taken); /* If rehash was requested, do that, and retry. But just live * with it for apparently sparse tables. No resizing can fix * a rubbish hash. */ if (should_rehash && dict->size > 0.3 * n(dict)) goto rehash; memmove(getkey(dict, slot_n), key, dict->keys.elt_size); memmove(getvalue(dict, slot_n), value, dict->values.elt_size); bitp(dict, slot_n)->taken = 1; bitp(dict, slot_n)->erased = 0; ++dict->size; return 0; } void * dict_find(struct dict *dict, const void *key) { if (dict->size == 0) return NULL; assert(n(dict) > 0); int found; size_t slot_n = find_slot(dict, key, &found, NULL, NULL); if (found) return getvalue(dict, slot_n); else return NULL; } int dict_erase(struct dict *dict, const void *key, void (*dtor_key)(void *tgt, void *data), void (*dtor_value)(void *tgt, void *data), void *data) { int found; size_t i; size_t slot_n = find_slot(dict, key, &found, NULL, &i); if (!found) return -1; if (dtor_key != NULL) dtor_key(getkey(dict, slot_n), data); if (dtor_value != NULL) dtor_value(getvalue(dict, slot_n), data); bitp(dict, slot_n)->taken = 0; bitp(dict, slot_n)->erased = 1; --dict->size; if (dict->size < 0.3 * n(dict)) { size_t smaller = smaller_size(n(dict)); if (smaller != n(dict)) /* Don't mind if it fails when shrinking. */ rehash(dict, smaller); } return 0; } void * dict_each(struct dict *dict, void *start_after, enum callback_status (*cb)(void *, void *, void *), void *data) { size_t i; if (start_after != NULL) i = ((start_after - dict->keys.data) / dict->keys.elt_size) + 1; else i = 0; for (; i < dict->keys.size; ++i) if (bitp(dict, i)->taken && !bitp(dict, i)->erased) { void *key = getkey(dict, i); if (cb(key, getvalue(dict, i), data) != CBS_CONT) return key; } return NULL; } size_t dict_hash_int(const int *key) { return (size_t)(*key * 2654435761U); } int dict_eq_int(const int *key1, const int *key2) { return *key1 == *key2; } size_t dict_hash_string(const char **key) { size_t h = 5381; const char *str = *key; while (*str != 0) h = h * 33 ^ *str++; return h; } int dict_eq_string(const char **key1, const char **key2) { return strcmp(*key1, *key2) == 0; } void dict_dtor_string(const char **key, void *data) { free((char *)*key); } int dict_clone_string(const char **tgt, const char **src, void *data) { *tgt = strdup(*src); return *tgt != NULL ? 0 : -1; } #ifdef TEST static enum callback_status dump(int *key, int *value, void *data) { char *seen = data; assert(seen[*key] == 0); seen[*key] = 1; assert(*value == *key * 2 + 1); return CBS_STOP; } static size_t dict_hash_int_silly(const int *key) { return *key % 10; } static void verify(struct dict *di, size_t len, char *seen) { size_t ct = 0; int *it; for (it = NULL; (it = DICT_EACH(di, int, int, it, dump, seen)) != NULL;) ct++; assert(ct == len); memset(seen, 0, len); } static enum callback_status fill_keys(int *key, int *value, void *data) { int *array = data; array[++array[0]] = *key; return CBS_CONT; } static void test1(void) { struct dict di; DICT_INIT(&di, int, int, dict_hash_int, dict_eq_int, NULL); char seen[100000] = {}; size_t i; for (i = 0; i < sizeof(seen); ++i) { int key = i; int value = 2 * i + 1; DICT_INSERT(&di, &key, &value); int *valp = DICT_FIND_REF(&di, &key, int); assert(valp != NULL); assert(*valp == value); assert(dict_size(&di) == i + 1); } verify(&di, sizeof(seen), seen); struct dict d2; DICT_CLONE(&d2, &di, int, int, NULL, NULL, NULL, NULL, NULL); DICT_DESTROY(&di, int, int, NULL, NULL, NULL); verify(&d2, sizeof(seen), seen); /* Now we try to gradually erase all elements. We can't erase * inside a DICT_EACH call, so copy first keys to a separate * memory area first. */ int keys[d2.size + 1]; size_t ct = 0; keys[0] = 0; DICT_EACH(&d2, int, int, NULL, fill_keys, keys); for (i = 0; i < (size_t)keys[0]; ++i) { assert(DICT_ERASE(&d2, &keys[i + 1], int, NULL, NULL, NULL) == 0); ++ct; } assert(ct == sizeof(seen)); DICT_DESTROY(&d2, int, int, NULL, NULL, NULL); } static void test_erase(void) { int i; /* To test erase, we need a relatively bad hash function, so * that there are some overlapping chains in the table. */ struct dict d2; DICT_INIT(&d2, int, int, dict_hash_int_silly, dict_eq_int, NULL); const int limit = 500; for (i = 0; i < limit; ++i) { int key = 2 * i + 1; int value = 2 * key + 1; DICT_INSERT(&d2, &key, &value); } /* Now we try to delete each of the keys, and verify that none * of the chains was broken. */ for (i = 0; i < limit; ++i) { struct dict copy; DICT_CLONE(©, &d2, int, int, NULL, NULL, NULL, NULL, NULL); int key = 2 * i + 1; DICT_ERASE(©, &key, int, NULL, NULL, NULL); assert(dict_size(©) == dict_size(&d2) - 1); int j; for (j = 0; j < limit; ++j) { key = 2 * j + 1; int *valp = DICT_FIND_REF(©, &key, int); if (i != j) { assert(valp != NULL); assert(*valp == 2 * key + 1); } else { assert(valp == NULL); } } DICT_DESTROY(©, int, int, NULL, NULL, NULL); } DICT_DESTROY(&d2, int, int, NULL, NULL, NULL); } int main(int argc, char *argv[]) { test1(); test_erase(); return 0; } #endif