692 lines
17 KiB
C
692 lines
17 KiB
C
/* hash.c --
|
||
*
|
||
* *********************************************************************
|
||
* * Copyright (C) 1985, 1990 Regents of the University of California. *
|
||
* * Permission to use, copy, modify, and distribute this *
|
||
* * software and its documentation for any purpose and without *
|
||
* * fee is hereby granted, provided that the above copyright *
|
||
* * notice appear in all copies. The University of California *
|
||
* * makes no representations about the suitability of this *
|
||
* * software for any purpose. It is provided "as is" without *
|
||
* * express or implied warranty. Export of this software outside *
|
||
* * of the United States of America may require an export license. *
|
||
* *********************************************************************
|
||
*
|
||
* This module contains routines to manipulate a hash table.
|
||
* See hash.h for a definition of the structure of the hash
|
||
* table. Hash tables grow automatically as the amount of
|
||
* information increases.
|
||
*/
|
||
|
||
#ifndef lint
|
||
static char rcsid[] __attribute__ ((unused)) = "$Header: /usr/cvsroot/magic-8.0/utils/hash.c,v 1.2 2009/05/13 15:03:18 tim Exp $";
|
||
#endif /* not lint */
|
||
|
||
#include <stdio.h>
|
||
#include <string.h>
|
||
|
||
#include "utils/magic.h"
|
||
#include "utils/hash.h"
|
||
#include "utils/malloc.h"
|
||
|
||
/* Used before it's defined: */
|
||
void rebuild();
|
||
|
||
/*
|
||
* The following defines the ratio of # entries to # buckets
|
||
* at which we rebuild the table to make it larger.
|
||
*/
|
||
static int rebuildLimit = 3;
|
||
|
||
/*
|
||
* An invalid pointer, guaranteed to cause a coredump if
|
||
* we try to indirect through it. This should help catch
|
||
* attempts to indirect through stale pointers.
|
||
*/
|
||
#define NIL ((HashEntry *) (1<<29))
|
||
|
||
|
||
/*---------------------------------------------------------
|
||
*
|
||
* HashInit --
|
||
* HashInitClient --
|
||
*
|
||
* These procedures simply set up the hash table. The standard
|
||
* way of initializing the hash table is to use HashInit(), but
|
||
* if it's desired to provide the hash module with procedures to
|
||
* use for comparing and copying hash table keys, use HashInitClient().
|
||
*
|
||
* The number of buckets in the table at the start is 'nBuckets',
|
||
* which is automatically rounded up to a power of two. This isn't
|
||
* a limit on the number of buckets the table will eventually contain,
|
||
* though, since more buckets are automatically created if the table
|
||
* gets too full (the number of buckets increases by 4x).
|
||
*
|
||
* Results:
|
||
* None.
|
||
*
|
||
* Side Effects:
|
||
* Memory is allocated for the initial bucket area.
|
||
*
|
||
* Table Organization:
|
||
* Tables can be organized in either of four ways, depending
|
||
* on the type of comparison keys as specified by ptrKeys.
|
||
*
|
||
* HT_STRINGKEYS:
|
||
* Keys are NULL-terminated; their address is passed to
|
||
* HashFind as a (char *).
|
||
*
|
||
* HT_WORDKEYS:
|
||
* These are any 32-bit word, passed to HashFind as a (char *).
|
||
*
|
||
* HT_STRUCTKEYS:
|
||
* Actually, any value of ptrKeys >= HT_STRUCTKEYS means
|
||
* that keys are ptrKeys-word values whose ADDRESS is
|
||
* passed to HashFind as a (char *).
|
||
*
|
||
* HT_CLIENTKEYS:
|
||
* Like HT_WORDKEYS, these are also 32-bit values, passed
|
||
* to HashFind as a (char *). However, they are compared
|
||
* and copied using user-supplied procedures passed to
|
||
* HashInitClient() when the hash table was created.
|
||
* (Note that hash tables with keys of type HT_CLIENTKEYS
|
||
* can ONLY be created using HashInitClient()).
|
||
*
|
||
* Single-word values, a la HT_WORDKEYS, are fastest but most
|
||
* restrictive.
|
||
*
|
||
* Client procedures:
|
||
* Four client procedures are provided to HashInitClient()
|
||
* for use in dealing with HT_CLIENTKEYS data. They should
|
||
* be of the following form:
|
||
*
|
||
* Compare two hash keys; return 0 if equal, 1 if not. If this
|
||
* procedure is NULL, comparison is just 32-bit comparison of
|
||
* k1 and k2.
|
||
*
|
||
* int
|
||
* (*compareFn)(k1, k2)
|
||
* char *k1, *k2;
|
||
* {
|
||
* }
|
||
*
|
||
* Create a copy of a hash key for storing in a newly created
|
||
* hash entry. If this procedure is NULL, the key is stored
|
||
* without being copied.
|
||
*
|
||
* char *
|
||
* (*copyFn)(key)
|
||
* char *key;
|
||
* {
|
||
* }
|
||
*
|
||
* Produce a single 32-bit integer for a key value that will
|
||
* then be randomized by the hashing function. If NULL, then
|
||
* the key itself is used as the 32-bit integer.
|
||
*
|
||
* int
|
||
* (*hashFn)(key)
|
||
* char *key;
|
||
* {
|
||
* }
|
||
*
|
||
* Free a key that had been allocated with (*copyFn)().
|
||
* If NULL, then nothing is done.
|
||
*
|
||
* int
|
||
* (*killFn)(key)
|
||
* char *key;
|
||
* {
|
||
* }
|
||
*
|
||
*---------------------------------------------------------
|
||
*/
|
||
|
||
void
|
||
HashInit(table, nBuckets, ptrKeys)
|
||
HashTable *table; /* Table to be initialized */
|
||
int nBuckets; /* How many buckets to create for starters */
|
||
int ptrKeys; /* See comments above */
|
||
{
|
||
ASSERT(ptrKeys != HT_CLIENTKEYS, "HashInit: should use HashInitClient");
|
||
HashInitClient(table, nBuckets, ptrKeys,
|
||
(int (*)()) NULL, (char *(*)()) NULL,
|
||
(int (*)()) NULL, (int (*)()) NULL);
|
||
}
|
||
|
||
void
|
||
HashInitClient(table, nBuckets, ptrKeys, compareFn, copyFn, hashFn, killFn)
|
||
HashTable *table; /* Table to be initialized */
|
||
int nBuckets; /* How many buckets to create for starters */
|
||
int ptrKeys; /* See comments above */
|
||
int (*compareFn)(); /* Function to compare two keys */
|
||
char *(*copyFn)(); /* Function to copy a key */
|
||
int (*hashFn)(); /* For hashing */
|
||
int (*killFn)(); /* For hashing */
|
||
{
|
||
HashEntry ** ptr;
|
||
int i;
|
||
|
||
table->ht_nEntries = 0;
|
||
table->ht_ptrKeys = ptrKeys;
|
||
table->ht_compareFn = compareFn;
|
||
table->ht_copyFn = copyFn;
|
||
table->ht_hashFn = hashFn;
|
||
table->ht_killFn = killFn;
|
||
|
||
/* Round up the size to a power of two */
|
||
if (nBuckets < 0) nBuckets = -nBuckets;
|
||
table->ht_size = 2;
|
||
table->ht_mask = 1;
|
||
table->ht_downShift = 29;
|
||
while (table->ht_size < nBuckets)
|
||
{
|
||
table->ht_size <<= 1;
|
||
table->ht_mask = (table->ht_mask<<1) + 1;
|
||
table->ht_downShift--;
|
||
}
|
||
|
||
/* Allocate and initialize the buckets */
|
||
table->ht_table = (HashEntry **) mallocMagic(
|
||
(unsigned) (sizeof (HashEntry *) * table->ht_size));
|
||
ptr = table->ht_table;
|
||
for (i = 0; i < table->ht_size; i++)
|
||
*ptr++ = NIL;
|
||
}
|
||
|
||
/*---------------------------------------------------------
|
||
*
|
||
* hash --
|
||
*
|
||
* This is a local procedure to compute a hash table
|
||
* bucket address based on a key value.
|
||
*
|
||
* Results:
|
||
* The return value is an integer between 0 and size-1.
|
||
*
|
||
* Side Effects:
|
||
* None.
|
||
*
|
||
* Design:
|
||
* The randomizing code is stolen straight from the rand()
|
||
* library routine.
|
||
*
|
||
*---------------------------------------------------------
|
||
*/
|
||
|
||
int
|
||
hash(table, key)
|
||
HashTable *table;
|
||
char *key;
|
||
{
|
||
unsigned *up;
|
||
int i, j;
|
||
|
||
i = 0;
|
||
switch (table->ht_ptrKeys)
|
||
{
|
||
/* Add up the characters as though this were a number */
|
||
case HT_STRINGKEYS:
|
||
while (*key != 0) i = (i*10) + (*key++ - '0');
|
||
break;
|
||
|
||
/* Map the key into another 32-bit value if necessary */
|
||
case HT_CLIENTKEYS:
|
||
if (table->ht_hashFn)
|
||
{
|
||
i = (*(table->ht_hashFn))(key);
|
||
break;
|
||
}
|
||
/* Fall through to ... */
|
||
|
||
/* Just use the 32-bit key value */
|
||
case HT_WORDKEYS:
|
||
i = (spointertype) key;
|
||
break;
|
||
|
||
/* Special case for two-word structs */
|
||
case HT_STRUCTKEYS:
|
||
i = ((unsigned *) key)[0] + ((unsigned *) key)[1];
|
||
break;
|
||
|
||
/* General case of multi-word structs */
|
||
default:
|
||
j = table->ht_ptrKeys;
|
||
up = (unsigned *) key;
|
||
do { i += *up++; } while (--j);
|
||
break;
|
||
}
|
||
|
||
/* Randomize! */
|
||
return ((i*1103515245 + 12345) >> table->ht_downShift) & table->ht_mask;
|
||
}
|
||
|
||
/*---------------------------------------------------------
|
||
*
|
||
* HashLookOnly --
|
||
*
|
||
* Searches a hash table for an entry corresponding to key.
|
||
*
|
||
* Results:
|
||
* The return value is a pointer to the entry for key,
|
||
* if key was present in the table. If key was not
|
||
* present, NULL is returned.
|
||
*
|
||
* Side Effects:
|
||
* None.
|
||
*
|
||
*---------------------------------------------------------
|
||
*/
|
||
|
||
HashEntry *
|
||
HashLookOnly(table, key)
|
||
HashTable *table; /* Hash table to search. */
|
||
char *key; /* Interpreted according to table->ht_ptrKeys
|
||
* as described in HashInit()'s comments.
|
||
*/
|
||
{
|
||
HashEntry *h;
|
||
unsigned *up, *kp;
|
||
int n;
|
||
int bucket;
|
||
|
||
bucket = hash(table, key);
|
||
h = *(table->ht_table + bucket);
|
||
while (h != NIL)
|
||
{
|
||
switch (table->ht_ptrKeys)
|
||
{
|
||
case HT_STRINGKEYS:
|
||
if (strcmp(h->h_key.h_name, key) == 0) return h;
|
||
break;
|
||
case HT_CLIENTKEYS:
|
||
if (table->ht_compareFn)
|
||
{
|
||
if ((*table->ht_compareFn)(h->h_key.h_ptr, key) == 0)
|
||
return h;
|
||
break;
|
||
}
|
||
/* Fall through to ... */
|
||
case HT_WORDKEYS:
|
||
if (h->h_key.h_ptr == key) return h;
|
||
break;
|
||
case HT_STRUCTKEYS:
|
||
up = h->h_key.h_words;
|
||
kp = (unsigned *) key;
|
||
if (*up++ == *kp++ && *up == *kp) return h;
|
||
break;
|
||
default:
|
||
n = table->ht_ptrKeys;
|
||
up = h->h_key.h_words;
|
||
kp = (unsigned *) key;
|
||
do { if (*up++ != *kp++) goto next; } while (--n);
|
||
return h;
|
||
}
|
||
next:
|
||
h = h->h_next;
|
||
}
|
||
|
||
/* The desired entry isn't there */
|
||
return ((HashEntry *) NULL);
|
||
}
|
||
|
||
/*---------------------------------------------------------
|
||
*
|
||
* HashFind --
|
||
*
|
||
* Searches a hash table for an entry corresponding to
|
||
* key. If no entry is found, then one is created.
|
||
*
|
||
* Results:
|
||
* The return value is a pointer to the entry for key.
|
||
* If the entry is a new one, then the h_pointer field
|
||
* of the entry we return is zero.
|
||
*
|
||
* Side Effects:
|
||
* Memory is allocated, and the hash buckets may be modified.
|
||
*
|
||
*---------------------------------------------------------
|
||
*/
|
||
|
||
HashEntry *
|
||
HashFind(table, key)
|
||
HashTable *table; /* Hash table to search. */
|
||
char *key; /* Interpreted according to table->ht_ptrKeys
|
||
* as described in HashInit()'s comments.
|
||
*/
|
||
{
|
||
unsigned *up, *kp;
|
||
HashEntry *h;
|
||
int n;
|
||
int bucket;
|
||
|
||
bucket = hash(table, key);
|
||
h = *(table->ht_table + bucket);
|
||
while (h != NIL)
|
||
{
|
||
switch (table->ht_ptrKeys)
|
||
{
|
||
case HT_STRINGKEYS:
|
||
if (strcmp(h->h_key.h_name, key) == 0) return h;
|
||
break;
|
||
case HT_CLIENTKEYS:
|
||
if (table->ht_compareFn)
|
||
{
|
||
if ((*table->ht_compareFn)(h->h_key.h_ptr, key) == 0)
|
||
return h;
|
||
break;
|
||
}
|
||
/* Fall through to ... */
|
||
case HT_WORDKEYS:
|
||
if (h->h_key.h_ptr == key) return h;
|
||
break;
|
||
case HT_STRUCTKEYS:
|
||
up = h->h_key.h_words;
|
||
kp = (unsigned *) key;
|
||
if (*up++ == *kp++ && *up == *kp) return h;
|
||
break;
|
||
default:
|
||
n = table->ht_ptrKeys;
|
||
up = h->h_key.h_words;
|
||
kp = (unsigned *) key;
|
||
do { if (*up++ != *kp++) goto next; } while (--n);
|
||
return h;
|
||
}
|
||
next:
|
||
h = h->h_next;
|
||
}
|
||
|
||
/*
|
||
* The desired entry isn't there. Before allocating a new entry,
|
||
* see if we're overloading the buckets. If so, then make a
|
||
* bigger table (4x as big).
|
||
*/
|
||
if (table->ht_nEntries >= rebuildLimit*table->ht_size)
|
||
{
|
||
rebuild(table);
|
||
bucket = hash(table, key);
|
||
}
|
||
table->ht_nEntries += 1;
|
||
|
||
/*
|
||
* Now allocate a new entry. The size of the HashEntry allocated
|
||
* depends on the size of the key: for multi-word keys or string
|
||
* keys longer than 3 bytes, there will be extra space at the end
|
||
* of the HashEntry to hold the key.
|
||
*/
|
||
switch (table->ht_ptrKeys)
|
||
{
|
||
case HT_STRINGKEYS:
|
||
h = (HashEntry *) mallocMagic((unsigned) (sizeof(HashEntry)+strlen(key)-3));
|
||
(void) strcpy(h->h_key.h_name, key);
|
||
break;
|
||
case HT_CLIENTKEYS:
|
||
if (table->ht_copyFn)
|
||
{
|
||
h = (HashEntry *) mallocMagic((unsigned) (sizeof (HashEntry)));
|
||
h->h_key.h_ptr = (*table->ht_copyFn)(key);
|
||
break;
|
||
}
|
||
/* Fall through to ... */
|
||
case HT_WORDKEYS:
|
||
h = (HashEntry *) mallocMagic((unsigned) (sizeof (HashEntry)));
|
||
h->h_key.h_ptr = key;
|
||
break;
|
||
case HT_STRUCTKEYS:
|
||
h = (HashEntry *) mallocMagic(
|
||
(unsigned) (sizeof (HashEntry) + sizeof (unsigned)));
|
||
up = h->h_key.h_words;
|
||
kp = (unsigned *) key;
|
||
*up++ = *kp++;
|
||
*up = *kp;
|
||
break;
|
||
default:
|
||
n = table->ht_ptrKeys;
|
||
h = (HashEntry *) mallocMagic(
|
||
(unsigned) (sizeof(HashEntry) + (n-1) * sizeof (unsigned)));
|
||
up = h->h_key.h_words;
|
||
kp = (unsigned *) key;
|
||
do { *up++ = *kp++; } while (--n);
|
||
break;
|
||
}
|
||
|
||
h->h_pointer = 0;
|
||
h->h_next = *(table->ht_table + bucket);
|
||
*(table->ht_table + bucket) = h;
|
||
return h;
|
||
}
|
||
|
||
/*---------------------------------------------------------
|
||
*
|
||
* rebuild --
|
||
*
|
||
* This local routine makes a new hash table that
|
||
* is 4x larger than the old one.
|
||
*
|
||
* Results:
|
||
* None.
|
||
*
|
||
* Side Effects:
|
||
* The entire hash table is moved, so any bucket numbers
|
||
* from the old table are invalid.
|
||
*
|
||
*---------------------------------------------------------
|
||
*/
|
||
|
||
void
|
||
rebuild(table)
|
||
HashTable *table; /* Table to be enlarged. */
|
||
{
|
||
HashEntry **oldTable, **old2, *h, *next;
|
||
int oldSize, bucket;
|
||
|
||
oldTable = table->ht_table;
|
||
old2 = oldTable;
|
||
oldSize = table->ht_size;
|
||
|
||
/* Build a new table 4 times as large as the old one. */
|
||
HashInitClient(table, table->ht_size*4, table->ht_ptrKeys,
|
||
table->ht_compareFn, table->ht_copyFn,
|
||
table->ht_hashFn, table->ht_killFn);
|
||
for ( ; oldSize > 0; oldSize--)
|
||
{
|
||
h = *old2++;
|
||
while (h != NIL)
|
||
{
|
||
next = h->h_next;
|
||
switch (table->ht_ptrKeys)
|
||
{
|
||
case HT_STRINGKEYS:
|
||
bucket = hash(table, h->h_key.h_name);
|
||
break;
|
||
case HT_WORDKEYS:
|
||
case HT_CLIENTKEYS:
|
||
bucket = hash(table, h->h_key.h_ptr);
|
||
break;
|
||
default:
|
||
bucket = hash(table, (char *) h->h_key.h_words);
|
||
break;
|
||
}
|
||
h->h_next = *(table->ht_table + bucket);
|
||
*(table->ht_table + bucket) = h;
|
||
table->ht_nEntries += 1;
|
||
h = next;
|
||
}
|
||
}
|
||
|
||
freeMagic((char *) oldTable);
|
||
}
|
||
|
||
/*---------------------------------------------------------
|
||
*
|
||
* HashStats --
|
||
*
|
||
* This routine merely prints statistics about the
|
||
* current bucket situation.
|
||
*
|
||
* Results:
|
||
* None.
|
||
*
|
||
* Side Effects:
|
||
* Junk gets printed.
|
||
*
|
||
*---------------------------------------------------------
|
||
*/
|
||
|
||
#define MAXCOUNT 15
|
||
|
||
void
|
||
HashStats(table)
|
||
HashTable *table;
|
||
{
|
||
int count[MAXCOUNT], overflow, i, j;
|
||
HashEntry *h;
|
||
|
||
overflow = 0;
|
||
for (i = 0; i < MAXCOUNT; i++) count[i] = 0;
|
||
for (i = 0; i < table->ht_size; i++)
|
||
{
|
||
j = 0;
|
||
for (h = *(table->ht_table+i); h != NIL; h = h->h_next)
|
||
j++;
|
||
if (j < MAXCOUNT) count[j]++;
|
||
else overflow++;
|
||
}
|
||
|
||
for (i = 0; i < MAXCOUNT; i++)
|
||
printf("# of buckets with %d entries: %d.\n", i, count[i]);
|
||
printf("# of buckets with >%d entries: %d.\n", MAXCOUNT-1, overflow);
|
||
}
|
||
|
||
/*---------------------------------------------------------
|
||
*
|
||
* HashStartSearch --
|
||
*
|
||
* This procedure sets things up for a complete search
|
||
* of all entries recorded in the hash table.
|
||
*
|
||
* Results:
|
||
* None.
|
||
*
|
||
* Side Effects:
|
||
* The information in hs is initialized so that successive
|
||
* calls to HashNext will return successive HashEntry's
|
||
* from the table.
|
||
*---------------------------------------------------------
|
||
*/
|
||
|
||
void
|
||
HashStartSearch(hs)
|
||
HashSearch *hs; /* Area in which to keep state about search.*/
|
||
{
|
||
hs->hs_nextIndex = 0;
|
||
hs->hs_h = NIL;
|
||
}
|
||
|
||
/*---------------------------------------------------------
|
||
*
|
||
* HashNext --
|
||
*
|
||
* This procedure returns successive entries in the
|
||
* hash table.
|
||
*
|
||
* Results:
|
||
* The return value is a pointer to the next HashEntry
|
||
* in the table, or NULL when the end of the table is
|
||
* reached.
|
||
*
|
||
* Side Effects:
|
||
* The information in hs is modified to advance to the
|
||
* next entry.
|
||
*
|
||
*---------------------------------------------------------
|
||
*/
|
||
|
||
HashEntry *
|
||
HashNext(table, hs)
|
||
HashTable *table; /* Table to be searched. */
|
||
HashSearch *hs; /* Area used to keep state about search. */
|
||
{
|
||
HashEntry *h;
|
||
|
||
while (hs->hs_h == NIL)
|
||
{
|
||
if (hs->hs_nextIndex >= table->ht_size) return NULL;
|
||
hs->hs_h = *(table->ht_table + hs->hs_nextIndex);
|
||
hs->hs_nextIndex += 1;
|
||
}
|
||
h = hs->hs_h;
|
||
hs->hs_h = h->h_next;
|
||
return h;
|
||
}
|
||
|
||
/*---------------------------------------------------------
|
||
*
|
||
* HashKill --
|
||
*
|
||
* This routine removes everything from a hash table
|
||
* and frees up the memory space it occupied.
|
||
*
|
||
* Results:
|
||
* None.
|
||
*
|
||
* Side Effects:
|
||
* Lots of memory is freed up.
|
||
*---------------------------------------------------------
|
||
*/
|
||
|
||
void
|
||
HashKill(table)
|
||
HashTable *table; /* Hash table whose space is to be freed */
|
||
{
|
||
HashEntry *h, **hp, **hend;
|
||
int (*killFn)() = (int (*)()) NULL;
|
||
|
||
if (table->ht_ptrKeys == HT_CLIENTKEYS) killFn = table->ht_killFn;
|
||
for (hp = table->ht_table, hend = &hp[table->ht_size]; hp < hend; hp++)
|
||
for (h = *hp; h != NIL; h = h->h_next)
|
||
{
|
||
freeMagic((char *) h);
|
||
if (killFn)
|
||
(*killFn)(h->h_key.h_ptr);
|
||
}
|
||
freeMagic((char *) table->ht_table);
|
||
|
||
/*
|
||
* Set up the hash table to cause memory faults on any future
|
||
* access attempts until re-initialization.
|
||
*/
|
||
table->ht_table = (HashEntry **) (1<<29);
|
||
}
|
||
|
||
/*---------------------------------------------------------
|
||
*
|
||
* HashFreeKill ---
|
||
*
|
||
* This routine removes everything from a hash table
|
||
* and frees up the memory space it occupied along with
|
||
* the stuff pointed by h_pointer
|
||
*
|
||
* Results:
|
||
* None.
|
||
*
|
||
* Side Effects:
|
||
* Lots of memory is freed up.
|
||
*---------------------------------------------------------
|
||
*/
|
||
void
|
||
HashFreeKill(table)
|
||
HashTable *table;
|
||
{
|
||
HashSearch hs;
|
||
HashEntry *he;
|
||
void *p;
|
||
|
||
HashStartSearch(&hs);
|
||
while (he = HashNext(table, &hs)) {
|
||
p = HashGetValue(he);
|
||
freeMagic(p);
|
||
}
|
||
HashKill(table);
|
||
}
|