/* hash.c -- * * ********************************************************************* * * Copyright (C) 1985, 1990 Regents of the University of California. * * * Permission to use, copy, modify, and distribute this * * * software and its documentation for any purpose and without * * * fee is hereby granted, provided that the above copyright * * * notice appear in all copies. The University of California * * * makes no representations about the suitability of this * * * software for any purpose. It is provided "as is" without * * * express or implied warranty. Export of this software outside * * * of the United States of America may require an export license. * * ********************************************************************* * * This module contains routines to manipulate a hash table. * See hash.h for a definition of the structure of the hash * table. Hash tables grow automatically as the amount of * information increases. */ #ifndef lint static char rcsid[] __attribute__ ((unused)) = "$Header: /usr/cvsroot/magic-8.0/utils/hash.c,v 1.2 2009/05/13 15:03:18 tim Exp $"; #endif /* not lint */ #include #include #include "utils/magic.h" #include "utils/hash.h" #include "utils/malloc.h" /* Used before it's defined: */ void rebuild(); /* * The following defines the ratio of # entries to # buckets * at which we rebuild the table to make it larger. */ static int rebuildLimit = 3; /* * An invalid pointer, guaranteed to cause a coredump if * we try to indirect through it. This should help catch * attempts to indirect through stale pointers. */ #define NIL ((HashEntry *) (1<<29)) /*--------------------------------------------------------- * * HashInit -- * HashInitClient -- * * These procedures simply set up the hash table. The standard * way of initializing the hash table is to use HashInit(), but * if it's desired to provide the hash module with procedures to * use for comparing and copying hash table keys, use HashInitClient(). * * The number of buckets in the table at the start is 'nBuckets', * which is automatically rounded up to a power of two. This isn't * a limit on the number of buckets the table will eventually contain, * though, since more buckets are automatically created if the table * gets too full (the number of buckets increases by 4x). * * Results: * None. * * Side Effects: * Memory is allocated for the initial bucket area. * * Table Organization: * Tables can be organized in either of four ways, depending * on the type of comparison keys as specified by ptrKeys. * * HT_STRINGKEYS: * Keys are NULL-terminated; their address is passed to * HashFind as a (char *). * * HT_WORDKEYS: * These are any 32-bit word, passed to HashFind as a (char *). * * HT_STRUCTKEYS: * Actually, any value of ptrKeys >= HT_STRUCTKEYS means * that keys are ptrKeys-word values whose ADDRESS is * passed to HashFind as a (char *). * * HT_CLIENTKEYS: * Like HT_WORDKEYS, these are also 32-bit values, passed * to HashFind as a (char *). However, they are compared * and copied using user-supplied procedures passed to * HashInitClient() when the hash table was created. * (Note that hash tables with keys of type HT_CLIENTKEYS * can ONLY be created using HashInitClient()). * * Single-word values, a la HT_WORDKEYS, are fastest but most * restrictive. * * Client procedures: * Four client procedures are provided to HashInitClient() * for use in dealing with HT_CLIENTKEYS data. They should * be of the following form: * * Compare two hash keys; return 0 if equal, 1 if not. If this * procedure is NULL, comparison is just 32-bit comparison of * k1 and k2. * * int * (*compareFn)(k1, k2) * char *k1, *k2; * { * } * * Create a copy of a hash key for storing in a newly created * hash entry. If this procedure is NULL, the key is stored * without being copied. * * char * * (*copyFn)(key) * char *key; * { * } * * Produce a single 32-bit integer for a key value that will * then be randomized by the hashing function. If NULL, then * the key itself is used as the 32-bit integer. * * int * (*hashFn)(key) * char *key; * { * } * * Free a key that had been allocated with (*copyFn)(). * If NULL, then nothing is done. * * int * (*killFn)(key) * char *key; * { * } * *--------------------------------------------------------- */ void HashInit(table, nBuckets, ptrKeys) HashTable *table; /* Table to be initialized */ int nBuckets; /* How many buckets to create for starters */ int ptrKeys; /* See comments above */ { ASSERT(ptrKeys != HT_CLIENTKEYS, "HashInit: should use HashInitClient"); HashInitClient(table, nBuckets, ptrKeys, (int (*)()) NULL, (char *(*)()) NULL, (int (*)()) NULL, (int (*)()) NULL); } void HashInitClient(table, nBuckets, ptrKeys, compareFn, copyFn, hashFn, killFn) HashTable *table; /* Table to be initialized */ int nBuckets; /* How many buckets to create for starters */ int ptrKeys; /* See comments above */ int (*compareFn)(); /* Function to compare two keys */ char *(*copyFn)(); /* Function to copy a key */ int (*hashFn)(); /* For hashing */ int (*killFn)(); /* For hashing */ { HashEntry ** ptr; int i; table->ht_nEntries = 0; table->ht_ptrKeys = ptrKeys; table->ht_compareFn = compareFn; table->ht_copyFn = copyFn; table->ht_hashFn = hashFn; table->ht_killFn = killFn; /* Round up the size to a power of two */ if (nBuckets < 0) nBuckets = -nBuckets; table->ht_size = 2; table->ht_mask = 1; table->ht_downShift = 29; while (table->ht_size < nBuckets) { table->ht_size <<= 1; table->ht_mask = (table->ht_mask<<1) + 1; table->ht_downShift--; } /* Allocate and initialize the buckets */ table->ht_table = (HashEntry **) mallocMagic( (unsigned) (sizeof (HashEntry *) * table->ht_size)); ptr = table->ht_table; for (i = 0; i < table->ht_size; i++) *ptr++ = NIL; } /*--------------------------------------------------------- * * hash -- * * This is a local procedure to compute a hash table * bucket address based on a key value. * * Results: * The return value is an integer between 0 and size-1. * * Side Effects: * None. * * Design: * The randomizing code is stolen straight from the rand() * library routine. * *--------------------------------------------------------- */ int hash(table, key) HashTable *table; char *key; { unsigned *up; unsigned long i; int j; i = 0; switch (table->ht_ptrKeys) { /* Add up the characters as though this were a number */ case HT_STRINGKEYS: while (*key != 0) i = (*key++) + (i << 6) + (i << 16) - i; break; /* Map the key into another 32-bit value if necessary */ case HT_CLIENTKEYS: if (table->ht_hashFn) { i = (*(table->ht_hashFn))(key); break; } /* Fall through to ... */ /* Just use the 32-bit key value */ case HT_WORDKEYS: i = (spointertype) key; break; /* Special case for two-word structs */ case HT_STRUCTKEYS: i = ((unsigned *) key)[0] + ((unsigned *) key)[1]; break; /* General case of multi-word structs */ default: j = table->ht_ptrKeys; up = (unsigned *) key; do { i += *up++; } while (--j); break; } /* Randomize! */ return ((i*1103515245 + 12345) >> table->ht_downShift) & table->ht_mask; } /*--------------------------------------------------------- * * HashLookOnly -- * * Searches a hash table for an entry corresponding to key. * * Results: * The return value is a pointer to the entry for key, * if key was present in the table. If key was not * present, NULL is returned. * * Side Effects: * None. * *--------------------------------------------------------- */ HashEntry * HashLookOnly(table, key) HashTable *table; /* Hash table to search. */ const char *key; /* Interpreted according to table->ht_ptrKeys * as described in HashInit()'s comments. */ { HashEntry *h; unsigned *up, *kp; int n; int bucket; bucket = hash(table, key); h = *(table->ht_table + bucket); while (h != NIL) { switch (table->ht_ptrKeys) { case HT_STRINGKEYS: if (strcmp(h->h_key.h_name, key) == 0) return h; break; case HT_CLIENTKEYS: if (table->ht_compareFn) { if ((*table->ht_compareFn)(h->h_key.h_ptr, key) == 0) return h; break; } /* Fall through to ... */ case HT_WORDKEYS: if (h->h_key.h_ptr == key) return h; break; case HT_STRUCTKEYS: up = h->h_key.h_words; kp = (unsigned *) key; if (*up++ == *kp++ && *up == *kp) return h; break; default: n = table->ht_ptrKeys; up = h->h_key.h_words; kp = (unsigned *) key; do { if (*up++ != *kp++) goto next; } while (--n); return h; } next: h = h->h_next; } /* The desired entry isn't there */ return ((HashEntry *) NULL); } /*--------------------------------------------------------- * * HashFind -- * * Searches a hash table for an entry corresponding to * key. If no entry is found, then one is created. * * Results: * The return value is a pointer to the entry for key. * If the entry is a new one, then the h_pointer field * of the entry we return is zero. * * Side Effects: * Memory is allocated, and the hash buckets may be modified. * *--------------------------------------------------------- */ HashEntry * HashFind(table, key) HashTable *table; /* Hash table to search. */ const char *key; /* Interpreted according to table->ht_ptrKeys * as described in HashInit()'s comments. */ { unsigned *up, *kp; HashEntry *h; int n; int bucket; bucket = hash(table, key); h = *(table->ht_table + bucket); while (h != NIL) { switch (table->ht_ptrKeys) { case HT_STRINGKEYS: if (strcmp(h->h_key.h_name, key) == 0) return h; break; case HT_CLIENTKEYS: if (table->ht_compareFn) { if ((*table->ht_compareFn)(h->h_key.h_ptr, key) == 0) return h; break; } /* Fall through to ... */ case HT_WORDKEYS: if (h->h_key.h_ptr == key) return h; break; case HT_STRUCTKEYS: up = h->h_key.h_words; kp = (unsigned *) key; if (*up++ == *kp++ && *up == *kp) return h; break; default: n = table->ht_ptrKeys; up = h->h_key.h_words; kp = (unsigned *) key; do { if (*up++ != *kp++) goto next; } while (--n); return h; } next: h = h->h_next; } /* * The desired entry isn't there. Before allocating a new entry, * see if we're overloading the buckets. If so, then make a * bigger table (4x as big). */ if (table->ht_nEntries >= rebuildLimit*table->ht_size) { rebuild(table); bucket = hash(table, key); } table->ht_nEntries += 1; /* * Now allocate a new entry. The size of the HashEntry allocated * depends on the size of the key: for multi-word keys or string * keys longer than 3 bytes, there will be extra space at the end * of the HashEntry to hold the key. */ switch (table->ht_ptrKeys) { case HT_STRINGKEYS: h = (HashEntry *) mallocMagic((unsigned) (sizeof(HashEntry)+strlen(key)-3)); (void) strcpy(h->h_key.h_name, key); break; case HT_CLIENTKEYS: if (table->ht_copyFn) { h = (HashEntry *) mallocMagic((unsigned) (sizeof (HashEntry))); h->h_key.h_ptr = (*table->ht_copyFn)(key); break; } /* Fall through to ... */ case HT_WORDKEYS: h = (HashEntry *) mallocMagic((unsigned) (sizeof (HashEntry))); h->h_key.h_ptr = key; break; case HT_STRUCTKEYS: h = (HashEntry *) mallocMagic( (unsigned) (sizeof (HashEntry) + sizeof (unsigned))); up = h->h_key.h_words; kp = (unsigned *) key; *up++ = *kp++; *up = *kp; break; default: n = table->ht_ptrKeys; h = (HashEntry *) mallocMagic( (unsigned) (sizeof(HashEntry) + (n-1) * sizeof (unsigned))); up = h->h_key.h_words; kp = (unsigned *) key; do { *up++ = *kp++; } while (--n); break; } h->h_pointer = 0; h->h_next = *(table->ht_table + bucket); *(table->ht_table + bucket) = h; return h; } /*--------------------------------------------------------- * * rebuild -- * * This local routine makes a new hash table that * is 4x larger than the old one. * * Results: * None. * * Side Effects: * The entire hash table is moved, so any bucket numbers * from the old table are invalid. * *--------------------------------------------------------- */ void rebuild(table) HashTable *table; /* Table to be enlarged. */ { HashEntry **oldTable, **old2, *h, *next; int oldSize, bucket; oldTable = table->ht_table; old2 = oldTable; oldSize = table->ht_size; /* Build a new table 4 times as large as the old one. */ HashInitClient(table, table->ht_size*4, table->ht_ptrKeys, table->ht_compareFn, table->ht_copyFn, table->ht_hashFn, table->ht_killFn); for ( ; oldSize > 0; oldSize--) { h = *old2++; while (h != NIL) { next = h->h_next; switch (table->ht_ptrKeys) { case HT_STRINGKEYS: bucket = hash(table, h->h_key.h_name); break; case HT_WORDKEYS: case HT_CLIENTKEYS: bucket = hash(table, h->h_key.h_ptr); break; default: bucket = hash(table, (char *) h->h_key.h_words); break; } h->h_next = *(table->ht_table + bucket); *(table->ht_table + bucket) = h; table->ht_nEntries += 1; h = next; } } freeMagic((char *) oldTable); } /*--------------------------------------------------------- * * HashStats -- * * This routine merely prints statistics about the * current bucket situation. * * Results: * None. * * Side Effects: * Junk gets printed. * *--------------------------------------------------------- */ #define MAXCOUNT 15 void HashStats(table) HashTable *table; { int count[MAXCOUNT], overflow, i, j; HashEntry *h; overflow = 0; for (i = 0; i < MAXCOUNT; i++) count[i] = 0; for (i = 0; i < table->ht_size; i++) { j = 0; for (h = *(table->ht_table+i); h != NIL; h = h->h_next) j++; if (j < MAXCOUNT) count[j]++; else overflow++; } for (i = 0; i < MAXCOUNT; i++) printf("# of buckets with %d entries: %d.\n", i, count[i]); printf("# of buckets with >%d entries: %d.\n", MAXCOUNT-1, overflow); } /*--------------------------------------------------------- * * HashRemove -- * * Remove an entry from the hash table. The entry * is assumed to have no data (NULL value). Only * use this routine on STRINGKEY type hash tables. * If the entry does not exist, then the routine * returns without doing anything. * * Results: * None *--------------------------------------------------------- */ void HashRemove(table, key) HashTable *table; /* Hash table to search. */ const char *key; /* Interpreted according to table->ht_ptrKeys * as described in HashInit()'s comments. */ { HashEntry *h, *hlast; int bucket; bucket = hash(table, key); h = *(table->ht_table + bucket); hlast = NULL; while (h != NIL) { if (strcmp(h->h_key.h_name, key) == 0) { freeMagic((char *)h); if (hlast != NULL) hlast->h_next = h->h_next; else *(table->ht_table + bucket) = h->h_next; break; } hlast = h; h = h->h_next; } } /*--------------------------------------------------------- * * HashStartSearch -- * * This procedure sets things up for a complete search * of all entries recorded in the hash table. * * Results: * None. * * Side Effects: * The information in hs is initialized so that successive * calls to HashNext will return successive HashEntry's * from the table. *--------------------------------------------------------- */ void HashStartSearch(hs) HashSearch *hs; /* Area in which to keep state about search.*/ { hs->hs_nextIndex = 0; hs->hs_h = NIL; } /*--------------------------------------------------------- * * HashNext -- * * This procedure returns successive entries in the * hash table. * * Results: * The return value is a pointer to the next HashEntry * in the table, or NULL when the end of the table is * reached. * * Side Effects: * The information in hs is modified to advance to the * next entry. * *--------------------------------------------------------- */ HashEntry * HashNext(table, hs) HashTable *table; /* Table to be searched. */ HashSearch *hs; /* Area used to keep state about search. */ { HashEntry *h; while (hs->hs_h == NIL) { if (hs->hs_nextIndex >= table->ht_size) return NULL; hs->hs_h = *(table->ht_table + hs->hs_nextIndex); hs->hs_nextIndex += 1; } h = hs->hs_h; hs->hs_h = h->h_next; return h; } /*--------------------------------------------------------- * * HashKill -- * * This routine removes everything from a hash table * and frees up the memory space it occupied. * * Results: * None. * * Side Effects: * Lots of memory is freed up. *--------------------------------------------------------- */ void HashKill(table) HashTable *table; /* Hash table whose space is to be freed */ { HashEntry *h, **hp, **hend; int (*killFn)() = (int (*)()) NULL; if (table->ht_ptrKeys == HT_CLIENTKEYS) killFn = table->ht_killFn; for (hp = table->ht_table, hend = &hp[table->ht_size]; hp < hend; hp++) for (h = *hp; h != NIL; h = h->h_next) { const void *p; freeMagic((char *) h); if (killFn) { p = h->h_key.h_ptr; (*killFn)((void *)p); } } freeMagic((char *) table->ht_table); /* * Set up the hash table to cause memory faults on any future * access attempts until re-initialization. */ table->ht_table = (HashEntry **) (1<<29); } /*--------------------------------------------------------- * * HashFreeKill --- * * This routine removes everything from a hash table * and frees up the memory space it occupied along with * the stuff pointed by h_pointer * * Results: * None. * * Side Effects: * Lots of memory is freed up. *--------------------------------------------------------- */ void HashFreeKill(table) HashTable *table; { HashSearch hs; HashEntry *he; void *p; HashStartSearch(&hs); while ((he = HashNext(table, &hs))) { p = HashGetValue(he); freeMagic(p); } HashKill(table); }