Main Page | Modules | Class List | Directories | File List | Class Members | File Members | Related Pages

sfghash.c

Go to the documentation of this file.
00001 /*
00002 *
00003 *  sfghash.c
00004 *
00005 *  Generic hash table library.
00006 *
00007 *  This hash table maps unique keys to void data pointers.
00008 *
00009 *  Features:
00010 *    1) Keys may be ascii strings of variable size, or
00011 *       fixed length (per table) binary byte sequences.  This
00012 *       allows use as a Mapping for String+Data pairs, or a 
00013 *       generic hashing.
00014 *    2) User can allocate keys, or pass copies and we can 
00015 *       allocate space and save keys.
00016 *    3) User can pass a free function to free up user data
00017 *       when the table is deleted.
00018 *    4) Table rows sizes can be automatically adjusted to
00019 *       the nearest prime number size.
00020 *
00021 *  6/10/03 - man - Upgraded the hash function to a Hardened hash function,
00022 *      it has no predictable cycles, and each hash table gets a different
00023 *      randomized hashing function. So even with the source code, you cannot predict 
00024 *      anything with this function.  If an  attacker can can setup a feedback
00025 *      loop he might gain some knowledge of how to muck with us, buit even in that case
00026 *      his odds are astronomically skinny.  This is actually the same problem as solved
00027 *      early on with hashing functions where degenerate data with close keys could
00028 *      produce very long bucket chains.
00029 *
00030 *  Copyright (C) 2001 Marc A Norton
00031 *  Copyright (C) 2003 Sourcefire,Inc.
00032 *
00033 */
00034 #include <stdio.h>
00035 #include <stdlib.h>
00036 #include <string.h>
00037 #include <time.h>
00038 
00039 #include "sfghash.h"
00040 
00041 /*
00042 *  uncomment this include to use xmalloc functions
00043 */
00044 
00045 /*
00046 *  Private Malloc
00047 */
00048 static 
00049 void * s_malloc( int n )
00050 {
00051      return malloc( n );
00052 }
00053 
00054 /*
00055 *  Private Free
00056 */
00057 static 
00058 void s_free( void * p )
00059 {
00060    if( p )free( p );
00061 }
00062 
00063 /*
00064 *  A classic hash routine using prime numbers
00065 *
00066 *  Constants for the Prime No. based hash routines
00067 */
00068 
00069 
00070 /*
00071 *   Primiitive Prime number test, not very fast nor efficient, but should be ok for
00072 *   hash table sizes of typical size.
00073 */
00074 static 
00075 int isPrime(int num )
00076 {
00077    int i;
00078    for(i=2;i<num;i++)
00079    {
00080       if( (num % i) == 0 ) break;//oops not prime, should have a remainder
00081    }
00082    if( i == num ) return 1;
00083    return 0;
00084 }
00085 /*
00086 *  Iterate number till we find a prime.
00087 */
00088 static
00089 int calcNextPrime(int num )
00090 {
00091         while( !isPrime( num ) ) num++;
00092         return num;
00093 }
00094 
00095 /*
00096 *
00097 *    Create a new hash table
00098 *
00099 *    nrows    : number of rows in hash table, primes are best.
00100 *               > 0  => we calc the nearest prime .ge. nrows internally
00101 *               < 0  => we use the magnitude as nrows.
00102 *    keysize  : > 0 => bytes in each key, keys are binary bytes,
00103 *               all keys are the same size.
00104 *               ==0 => keys are strings and are null terminated, 
00105 *               allowing random key lengths. 
00106 *    userkeys : > 0 => indicates user owns the key data
00107 *               and we should not allocate or free space for it,
00108 *               nor should we attempt to free the user key. We just
00109 *               save the pointer to the key. 
00110 *               ==0 => we should copy the keys and manage them internally
00111 *    userfree : routine to free users data, null if we should not 
00112 *               free user data in sfghash_delete(). The routine
00113 *               should be of the form 'void userfree(void * userdata)',
00114 *               'free' works for simple allocations.
00115 */
00116 SFGHASH * sfghash_new( int nrows, int keysize, int userkeys, void (*userfree)(void*p) )
00117 {
00118    int    i;
00119    SFGHASH * h;
00120 
00121    if( nrows > 0 ) /* make sure we have a prime number */
00122    {
00123       nrows = calcNextPrime( nrows );
00124    }
00125    else   /* use the magnitude or nrows as is */
00126    { 
00127       nrows = -nrows;
00128    }
00129 
00130 
00131    h = (SFGHASH*)s_malloc( sizeof(SFGHASH) );
00132    if( !h ) return 0;
00133 
00134    memset( h, 0, sizeof(SFGHASH) );
00135 
00136    h->sfhashfcn = sfhashfcn_new( nrows );
00137    if( !h->sfhashfcn ) return 0;
00138 
00139    h->table = (SFGHASH_NODE**) s_malloc( sizeof(SFGHASH_NODE*) * nrows );
00140    if( !h->table ) return 0;
00141 
00142    for( i=0; i<nrows; i++ )
00143    {
00144       h->table[i] = 0;
00145    }
00146 
00147    h->userkey = userkeys;
00148 
00149    h->keysize = keysize;
00150 
00151    h->nrows = nrows;
00152 
00153    h->count = 0;
00154 
00155    h->userfree = userfree;
00156 
00157    h->crow = 0; // findfirst/next current row
00158 
00159    h->cnode = 0; // findfirst/next current node ptr
00160 
00161    return h;
00162 }
00163 
00164 /*
00165 *  Set Splay mode : Splays nodes to front of list on each access
00166 */
00167 void sfghash_splaymode( SFGHASH * t, int n )
00168 {
00169    t->splay = n;
00170 }
00171 
00172 
00173 SFDICT * sfdict_new( int nitems )
00174 {
00175    return sfghash_new( nitems, 0, GH_COPYKEYS, NULL );
00176 }
00177 
00178 void sfdict_delete( SFDICT * h )
00179 {
00180     sfghash_delete( h );
00181 }
00182 
00183 /*
00184 *  Delete the hash Table 
00185 *
00186 *  free key's, free node's, and free the users data.
00187 */
00188 void sfghash_delete( SFGHASH * h )
00189 {
00190   int            i;
00191   SFGHASH_NODE * node, * onode;
00192 
00193   if( !h ) return;
00194  
00195   sfhashfcn_free( h->sfhashfcn );
00196 
00197   if( h->table )
00198   {  
00199     for(i=0;i<h->nrows;i++)
00200     {
00201       for( node=h->table[i]; node;  )
00202       {
00203         onode = node;
00204         node  = node->next;
00205 
00206         if( !h->userkey && onode->key ) 
00207             s_free( onode->key );
00208 
00209         if( h->userfree && onode->data )
00210             h->userfree( onode->data ); /* free users data, with users function */
00211 
00212         s_free( onode );
00213       }
00214     }
00215     s_free( h->table );
00216     h->table = 0;
00217   }
00218 
00219   s_free( h );
00220 }
00221 
00222 /*
00223 *  Get the # of Nodes in HASH the table
00224 */
00225 int sfghash_count( SFGHASH * t )
00226 {
00227   return t->count;
00228 }
00229 
00230 int sfdict_count( SFDICT * t )
00231 {
00232   return t->count;
00233 }
00234 
00235 
00236 /*
00237 *  Add a key + data pair
00238 *  ---------------------
00239 *
00240 *  key + data should both be non-zero, although data can be zero
00241 *
00242 *  t    - hash table
00243 *  key  - users key data (should be unique in this table)
00244 *         may be ascii strings or fixed size binary keys
00245 *  data - users data pointer
00246 *
00247 *  returns  SF_HASH_NOMEM: malloc error
00248 *           SF_HASH_INTABLE : key already in table (t->cnode points to the node)
00249 *           SF_OK: added a node for this key + data pair
00250 *
00251 *  Notes:
00252 *  If the key node already exists, then t->cnode points to it on return,
00253 *  this allows you to do something with the node - like add the data to a 
00254 *  linked list of data items held by the node, or track a counter, or whatever.
00255 *
00256 */
00257 int sfghash_add( SFGHASH * t, void * key, void * data )
00258 {
00259     unsigned    hashkey;
00260         int         klen;
00261     int         index;
00262     SFGHASH_NODE  *hnode;
00263 
00264     /*
00265     *   Get proper Key Size
00266     */  
00267     if( t->keysize > 0  )
00268     {
00269         klen = t->keysize;
00270     }
00271     else
00272     {
00273         /* need the nul byte for strcmp() in sfghash_find() */
00274         klen = strlen( (char*)key ) + 1;
00275     }
00276     
00277     hashkey = t->sfhashfcn->hash_fcn(  t->sfhashfcn, (unsigned char*) key, klen );
00278     
00279     index = hashkey % t->nrows;
00280 
00281     /*
00282     *  Uniqueness: 
00283     *  Check 1st to see if the key is already in the table
00284     *  Just bail if it is.
00285     */
00286     for( hnode=t->table[index]; hnode; hnode=hnode->next )
00287     {
00288        if( t->keysize > 0 )
00289        {
00290           if( !t->sfhashfcn->keycmp_fcn(hnode->key,key,klen) )
00291           {
00292               t->cnode = hnode; /* save pointer to the node */
00293               return SFGHASH_INTABLE; /* found it */
00294           }
00295        }
00296        else
00297        {
00298          if( !strcmp((const char *)hnode->key,(const char*)key) )
00299          {
00300              t->cnode = hnode; /* save pointer to the node */
00301              return SFGHASH_INTABLE; /* found it */
00302          }
00303        }
00304     }
00305 
00306     /* 
00307     *  Create new node 
00308     */
00309     hnode = (SFGHASH_NODE*)s_malloc(sizeof(SFGHASH_NODE));
00310     if( !hnode )
00311          return SFGHASH_NOMEM;
00312     
00313     /* Add the Key */
00314     if( t->userkey )
00315     {
00316       /* Use the Users key */
00317       hnode->key = key;
00318     }
00319     else
00320     {
00321       /* Create new key */
00322       hnode->key = s_malloc( klen );
00323       if( !hnode->key )
00324            return SFGHASH_NOMEM;
00325 
00326       /* Copy key  */
00327       memcpy(hnode->key,key,klen);
00328     }
00329     
00330     /* Add The Node */
00331     if( t->table[index] ) /* add the node to the existing list */
00332     {
00333         hnode->prev = 0;  // insert node as head node
00334         hnode->next=t->table[index];
00335         hnode->data=data;
00336         t->table[index]->prev = hnode;
00337         t->table[index] = hnode;
00338     }
00339     else /* 1st node in this list */
00340     {
00341         hnode->prev=0;
00342         hnode->next=0;
00343         hnode->data=data;
00344         t->table[index] = hnode;
00345     }
00346 
00347     t->count++;
00348 
00349     return SFGHASH_OK;
00350 }
00351 
00352 /*
00353 *
00354 */
00355 int sfdict_add( SFGHASH * t, char * key, void * data )
00356 {
00357    return sfghash_add( t, key, data );
00358 }
00359 /*
00360 *  move a node to the front of the list
00361 */
00362 static void movetofront( SFGHASH *t , int index, SFGHASH_NODE * n )
00363 {
00364     if( t->table[index] != n ) // if not at fron of list already...
00365     {
00366       /* Unlink the node */
00367       if( n->prev ) n->prev->next = n->next;
00368       if( n->next ) n->next->prev = n->prev;
00369       
00370       /* Link at front of list */
00371       n->prev=0;
00372       n->next=t->table[index];
00373       t->table[index]->prev=n;
00374     }
00375 }
00376 
00377 /*
00378 *  Find a Node based on the key, return users data.
00379 */
00380 static SFGHASH_NODE * sfghash_find_node( SFGHASH * t, void * key)
00381 {
00382     unsigned    hashkey;
00383     int         index, klen;
00384     SFGHASH_NODE  *hnode;
00385 
00386     if( t->keysize  )
00387     {
00388         klen = t->keysize;
00389     }
00390     else
00391     {
00392         klen = strlen( (char*) key ) + 1;
00393     }
00394 
00395     hashkey = t->sfhashfcn->hash_fcn(  t->sfhashfcn, (unsigned char*) key, klen );
00396     
00397     index = hashkey % t->nrows;
00398    
00399     for( hnode=t->table[index]; hnode; hnode=hnode->next )
00400     {
00401         if( t->keysize == 0 )
00402         {
00403            if( !strcmp((char*)hnode->key,(char*)key) )
00404            {
00405                if( t->splay  > 0 )
00406                    movetofront(t,index,hnode);
00407 
00408                return hnode;
00409            }
00410         }
00411         else
00412         {
00413            if( !t->sfhashfcn->keycmp_fcn(hnode->key,key,t->keysize) )
00414            {
00415                if( t->splay  > 0 )
00416                    movetofront(t,index,hnode);
00417 
00418                return hnode;
00419            }
00420         }
00421     }
00422 
00423    return NULL;
00424 }
00425 
00426 /*
00427 *  Find a Node based on the key, return users data.
00428 */
00429 void * sfghash_find( SFGHASH * t, void * key)
00430 {
00431     SFGHASH_NODE * hnode;
00432 
00433     hnode = sfghash_find_node( t, key );
00434 
00435     if( hnode ) return hnode->data;
00436 
00437     return NULL;
00438 }
00439 
00440 /*
00441 *  Unlink and free the node
00442 */
00443 static int sfghash_free_node( SFGHASH * t, unsigned index, SFGHASH_NODE * hnode )
00444 {
00445     if( !t->userkey && hnode->key ) 
00446         s_free( hnode->key );
00447     hnode->key = 0;
00448 
00449     if( t->userfree && hnode->data )
00450         t->userfree( hnode->data ); /* free users data, with users function */
00451 
00452     if( hnode->prev )  // not the 1st node
00453     {
00454           hnode->prev->next = hnode->next;
00455           if( hnode->next ) hnode->next->prev = hnode->prev;
00456     }
00457     else if( t->table[index] )  // 1st node
00458     {
00459            t->table[index] = t->table[index]->next;
00460            if( t->table[index] )t->table[index]->prev = 0;
00461     }
00462 
00463     s_free( hnode );
00464 
00465     t->count--;
00466 
00467     return SFGHASH_OK;
00468 }
00469 
00470 /*
00471 *  Remove a Key/Data Pair from the table - find it, unlink it, and free the memory for it.
00472 *
00473 *  returns : 0 - OK
00474 *           -1 - node not found
00475 */
00476 int sfghash_remove( SFGHASH * t, void * key)
00477 {
00478     SFGHASH_NODE * hnode;
00479     int klen;
00480     unsigned hashkey, index;
00481 
00482     if( t->keysize > 0 )
00483     {
00484        klen = t->keysize;
00485     }
00486     else
00487     {
00488        klen = strlen((char*)key) + 1;
00489     }
00490 
00491     hashkey = t->sfhashfcn->hash_fcn(  t->sfhashfcn, (unsigned char*) key, klen );
00492     
00493     index = hashkey % t->nrows;
00494 
00495     for( hnode=t->table[index]; hnode; hnode=hnode->next )
00496     {
00497        if( t->keysize > 0 )
00498        {
00499          if( !t->sfhashfcn->keycmp_fcn(hnode->key,key,klen) )
00500          {
00501              return sfghash_free_node( t, index, hnode );
00502          }
00503        }
00504        else
00505        {
00506          if( !strcmp((const char *)hnode->key,(const char*)key) )
00507          {
00508              return sfghash_free_node( t, index, hnode );
00509          }
00510        }
00511     }
00512 
00513    return SFGHASH_ERR;  
00514 }
00515 
00516 /*
00517 *
00518 */
00519 int sfdict_remove( SFGHASH * t, char * key)
00520 {
00521    return sfghash_remove( t, key);
00522 }
00523 
00524 /*
00525 *   Get First Hash Table Node
00526 */
00527 SFGHASH_NODE * sfghash_findfirst1( SFGHASH * t )
00528 {
00529     /* Start with 1st row */
00530     for( t->crow=0; t->crow < t->nrows; t->crow++ )
00531     {    
00532        /* Get 1st Non-Null node in row list */
00533        t->cnode = t->table[t->crow];
00534 
00535        if( t->cnode ) return t->cnode;
00536     }
00537   return NULL;
00538 }
00539 
00540 /*
00541 *   Get Next Hash Table Node
00542 */
00543 SFGHASH_NODE * sfghash_findnext1( SFGHASH * t )
00544 {
00545     if( t->cnode ) /* get next in this list */
00546     {
00547        /* Next node in current node list */
00548        t->cnode = t->cnode->next;
00549        if( t->cnode )
00550        {
00551            return t->cnode;
00552        }
00553     }
00554 
00555     /* Get 1st node in next non-emtoy row/node list */
00556     for( t->crow++; t->crow < t->nrows; t->crow++ )
00557     {    
00558        t->cnode = t->table[ t->crow ];
00559        if( t->cnode ) 
00560        {
00561            return t->cnode;
00562        }
00563     }
00564 
00565     return  NULL;
00566 }
00567 
00568 /* Internal use only */
00569 static void sfghash_next( SFGHASH * t )
00570 {
00571     if( !t->cnode )
00572         return ;
00573  
00574     /* Next node in current node list */
00575     t->cnode = t->cnode->next;
00576     if( t->cnode )
00577     {
00578         return;
00579     }
00580 
00581     /* Next row */ 
00582     /* Get 1st node in next non-emtoy row/node list */
00583     for( t->crow++; t->crow < t->nrows; t->crow++ )
00584     {    
00585        t->cnode = t->table[ t->crow ];
00586        if( t->cnode ) 
00587        {
00588            return;
00589        }
00590     }
00591 }
00592 /*
00593 *   Get First Hash Table Node
00594 */
00595 SFGHASH_NODE * sfghash_findfirst( SFGHASH * t )
00596 {
00597     SFGHASH_NODE * n;
00598 
00599     /* Start with 1st row */
00600     for( t->crow=0; t->crow < t->nrows; t->crow++ )
00601     {    
00602        /* Get 1st Non-Null node in row list */
00603        t->cnode = t->table[ t->crow ];
00604 
00605        if( t->cnode )
00606        {
00607          n = t->cnode;
00608 
00609          sfghash_next( t ); // load t->cnode with the next entry
00610 
00611          return n;
00612        }
00613     }
00614   return NULL;
00615 }
00616 
00617 /*
00618 *   Get Next Hash Table Node
00619 */
00620 SFGHASH_NODE * sfghash_findnext( SFGHASH * t )
00621 {
00622     SFGHASH_NODE * n;
00623 
00624     n = t->cnode;
00625 
00626     if( !n ) /* Done, no more entries */
00627     {
00628         return NULL;
00629     }
00630 
00631     /*
00632        Preload next node into current node 
00633     */
00634     sfghash_next( t ); 
00635 
00636     return  n;
00637 }
00638 
00639 /*
00640 *
00641 *
00642 *   ATOM SUPPORT  - A Global String+DataPtr Hash Table
00643 *
00644 *   Data Pointers are not free'd automatically, the user
00645 *   must do this.
00646 */
00647 /*
00648 *   
00649 */
00650 static SFGHASH * g_atom=0;       /* atom hash table */
00651 static int       atom_first=1; /* supports auto init on 1st add_atom call */
00652 static int       natoms=1000;  /* # rows in hash table - more makes it faster access */
00653 
00654 /*
00655 *   set size of atom hash table
00656 */
00657 int sfatom_setsize( int n )
00658 {
00659     natoms = n;
00660     return 0;
00661 }
00662 /*
00663 *   
00664 */
00665 int sfatom_init()
00666 {
00667    if( !atom_first ) return 0;
00668 
00669    /* Create a Hash Table */
00670    g_atom = sfghash_new( natoms, 0 /* string keys */, GH_COPYKEYS, NULL /* User frees data */ );
00671 
00672    if( !g_atom  )
00673    {
00674        return SFGHASH_ERR;
00675    }
00676 
00677    atom_first = 0;
00678 
00679    return SFGHASH_OK;
00680 }
00681 /*
00682 *
00683 */
00684 int sfatom_reset()
00685 {
00686     atom_first = 1;
00687 
00688     sfghash_delete( g_atom );
00689 
00690     if( sfatom_init() )
00691     {
00692       return SFGHASH_ERR;
00693     }
00694 
00695     return SFGHASH_OK;
00696 }
00697 /*
00698 *
00699 */
00700 int sfatom_add(char * str, void * data)
00701 {
00702    if( atom_first )
00703    { 
00704       if( sfatom_init() )
00705       {
00706          return SFGHASH_ERR;
00707       }
00708     }
00709 
00710     if( !g_atom ) 
00711     {
00712         return SFGHASH_ERR;
00713     }
00714 
00715     sfghash_add( g_atom, strdup(str), data );
00716 
00717     return SFGHASH_OK;
00718 }
00719 /*
00720 *
00721 */
00722 int sfatom_remove(char * str)
00723 {
00724     return sfghash_remove( g_atom, str );
00725 }
00726 /*
00727 *
00728 */
00729 void * sfatom_find(char * str)
00730 {
00731     return (void*) sfghash_find( g_atom, str );
00732 }
00733 /*
00734 *
00735 */
00736 int sfatom_count()
00737 {
00738     return g_atom->count;
00739 }
00740 /*
00741 *
00742 */
00743 SFGHASH_NODE * sfatom_findfirst()
00744 {
00745    SFGHASH_NODE * node = sfghash_findfirst( g_atom );
00746 
00747    if( node ) return node;
00748 
00749    return NULL;
00750 }
00751 /*
00752 *
00753 */
00754 SFGHASH_NODE * sfatom_findnext()
00755 {
00756    SFGHASH_NODE * node = sfghash_findnext( g_atom );
00757 
00758    if( node ) return node;
00759 
00760    return NULL;
00761 }
00762 
00763 
00764 /*
00765 *
00766 *   Test Driver for Hashing
00767 *  
00768 */
00769 
00770 #ifdef SFGHASH_MAIN 
00771 
00772 void myfree ( void * p )
00773 {
00774         printf("freeing '%s'\n",p);
00775         free(p);
00776 }
00777 
00778 /*
00779 *       Hash test program  
00780 */
00781 int main ( int argc, char ** argv )
00782 {
00783    int         i;
00784    SFGHASH      * t;
00785    SFGHASH_NODE * n, *m;
00786    char str[256],*p;
00787    int  num=100;
00788 
00789    if( argc > 1 )
00790        num = atoi(argv[1]);
00791 
00792    sfatom_init();
00793 
00794    /* Create a Hash Table */
00795    t = sfghash_new( 1000, 0 , GH_COPYKEYS , myfree  );
00796 
00797    /* Add Nodes to the Hash Table */
00798    for(i=0;i<num;i++) 
00799    {
00800        sprintf(str,"KeyWord%d",i+1);
00801        sfghash_add( t, str,  strupr(strdup(str)) );
00802 
00803        sfatom_add( str,  strupr(strdup(str)) );
00804    }  
00805 
00806    /* Find and Display Nodes in the Hash Table */
00807    printf("\n** FIND KEY TEST\n");
00808 
00809    for(i=0;i<num;i++) 
00810    {
00811       sprintf(str,"KeyWord%d",i+1);
00812 
00813       p = (char*) sfghash_find( t, str );
00814 
00815       printf("Hash-key=%*s, data=%*s\n", strlen(str),str, strlen(str), p );
00816 
00817       p = (char*) sfatom_find( str );
00818 
00819       printf("Atom-key=%*s, data=%*s\n", strlen(str),str, strlen(str), p );
00820    }  
00821 
00822    /* Display All Nodes in the Hash Table */
00823    printf("\n** FINDFIRST / FINDNEXT TEST\n");
00824 
00825    for( n = sfghash_findfirst(t); n; n = sfghash_findnext(t) )
00826    {
00827       printf("hash-findfirst/next: key=%s, data=%s\n", n->key, n->data );
00828 
00829       // hashing code frees user data using 'myfree' above ....
00830       if( sfghash_remove(t,n->key) ) 
00831             printf("Could not remove the key node\n");
00832       else  
00833             printf("key node removed\n");
00834    }
00835 
00836    for( n = sfatom_findfirst(); n; n = sfatom_findnext() )
00837    {
00838       printf("atom-findfirst/next: key=%s, data=%s\n", n->key, n->data );
00839 
00840       free( n->data );  //since atom data is not freed automatically
00841    }
00842 
00843    /* Free the table and it's user data */
00844    printf("****sfghash_delete\n");
00845    sfghash_delete( t );
00846 
00847    printf("****sfatom_reset\n");
00848    sfatom_reset();
00849 
00850    printf("\nnormal pgm finish\n\n");
00851 
00852    return 0;
00853 }
00854 
00855 
00856 
00857 #endif
00858 
00859 

Generated on Sun May 14 14:51:18 2006 by  doxygen 1.4.2