Main Page | Modules | Class List | Directories | File List | Class Members | File Members | Related Pages

acsmx2.c

Go to the documentation of this file.
00001 /*
00002 **   $Id$
00003 ** 
00004 **   acsmx2.c
00005 **
00006 **   Multi-Pattern Search Engine
00007 **
00008 **   Aho-Corasick State Machine - version 2.0
00009 **
00010 **   Supports both Non-Deterministic and Deterministic Finite Automata 
00011 **
00012 **
00013 **   Reference - Efficient String matching: An Aid to Bibliographic Search
00014 **               Alfred V Aho and Margaret J Corasick
00015 **               Bell Labratories 
00016 **               Copyright(C) 1975 Association for Computing Machinery,Inc
00017 **
00018 **   +++
00019 **   +++ Version 1.0 notes - Marc Norton:
00020 **   +++
00021 **
00022 **   Original implementation based on the 4 algorithms in the paper by Aho & Corasick,
00023 **   some implementation ideas from 'Practical Algorithms in C', and some
00024 **   of my own. 
00025 **
00026 **   1) Finds all occurrences of all patterns within a text.
00027 **
00028 **   +++
00029 **   +++ Version 2.0 Notes - Marc Norton/Dan Roelker:
00030 **   +++
00031 **  
00032 **   New implementation modifies the state table storage and access model to use
00033 **   compacted sparse vector storage. Dan Roelker and I hammered this strategy out
00034 **   amongst many others in order to reduce memory usage and improve caching performance.
00035 **   The memory usage is greatly reduced, we only use 1/4 of what we use to. The caching
00036 **   performance is better in pure benchmarking tests, but does not show overall improvement 
00037 **   in Snort.  Unfortunately, once a pattern match test has been performed Snort moves on to doing
00038 **   many other things before we get back to a patteren match test, so the cache is voided.
00039 **  
00040 **   This versions has better caching performance characteristics, reduced memory, 
00041 **   more state table storage options, and requires no a priori case conversions.  
00042 **   It does maintain the same public interface. (Snort only used banded storage).
00043 **
00044 **     1) Supports NFA and DFA state machines, and basic keyword state machines
00045 **     2) Initial transition table uses Linked Lists
00046 **     3) Improved state table memory options. NFA and DFA state 
00047 **        transition tables are converted to one of 4 formats during compilation.
00048 **        a) Full matrix 
00049 **        b) Sparse matrix
00050 **        c) Banded matrix (Default-this is the only one used in snort)
00051 **        d) Sparse-Banded matrix
00052 **     4) Added support for acstate_t in .h file so we can compile states as 
00053 **        16, or 32 bit state values for another reduction in memory consumption,
00054 **        smaller states allows more of the state table to be cached, and improves
00055 **        performance on x86-P4.  Your mileage may vary, especially on risc systems.
00056 **     5) Added a bool to each state transition list to indicate if there is a matching
00057 **        pattern in the state. This prevents us from accessing another data array
00058 **        and can improve caching/performance.
00059 **     6) The search functions are very sensitive, don't change them without extensive testing,
00060 **        or you'll just spoil the caching and prefetching opportunities.
00061 **  
00062 **   Extras for fellow pattern matchers:
00063 **    The table below explains the storage format used at each step.
00064 **    You can use an NFA or DFA to match with, the NFA is slower but tiny - set the structure directly.
00065 **    You can use any of the 4 storage modes above -full,sparse,banded,sparse-bands, set the structure directly.
00066 **    For applications where you have lots of data and a pattern set to search, this version was up to 3x faster
00067 **    than the previous verion, due to caching performance. This cannot be fully realized in Snort yet,
00068 **    but other applications may have better caching opportunities.
00069 **    Snort only needs to use the banded or full storage.
00070 **
00071 **  Transition table format at each processing stage.
00072 **  -------------------------------------------------
00073 **  Patterns -> Keyword State Table (List)
00074 **  Keyword State Table -> NFA (List)
00075 **  NFA -> DFA (List)
00076 **  DFA (List)-> Sparse Rows  O(m-avg # transitions per state)
00077 **            -> Banded Rows  O(1) 
00078 **            -> Sparse-Banded Rows O(nb-# bands)
00079 **            -> Full Matrix  O(1)
00080 **
00081 ** Copyright(C) 2002,2003,2004 Marc Norton
00082 ** Copyright(C) 2003,2004 Daniel Roelker 
00083 ** Copyright(C) 2002,2003,2004 Sourcefire,Inc.
00084 **
00085 ** This program is free software; you can redistribute it and/or modify
00086 ** it under the terms of the GNU General Public License as published by
00087 ** the Free Software Foundation; either version 2 of the License, or
00088 ** (at your option) any later version.
00089 **
00090 ** This program is distributed in the hope that it will be useful,
00091 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
00092 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00093 ** GNU General Public License for more details.
00094 **
00095 ** You should have received a copy of the GNU General Public License
00096 ** along with this program; if not, write to the Free Software
00097 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
00098 *
00099 */  
00100   
00101 #include <stdio.h>
00102 #include <stdlib.h>
00103 #include <string.h>
00104 #include <ctype.h>
00105   
00106 #include "acsmx2.h"
00107   
00108 /*
00109 *
00110 */ 
00111 #define MEMASSERT(p,s) if(!p){printf("ACSM-No Memory: %s!\n",s);exit(0);}
00112 
00113 /*
00114 *
00115 */ 
00116 static int max_memory = 0;
00117 
00118 /*
00119 *
00120 */ 
00121 static int s_verbose=0;
00122 
00123 /*
00124 *
00125 */ 
00126 typedef struct acsm_summary_s
00127 {
00128       unsigned    num_states;
00129       unsigned    num_transitions;
00130       ACSM_STRUCT2 acsm;
00131 
00132 }acsm_summary_t;
00133 
00134 /*
00135 *
00136 */ 
00137 static acsm_summary_t summary={0,0}; 
00138 
00139 /*
00140 ** Case Translation Table 
00141 */ 
00142 static unsigned char xlatcase[256];
00143 /*
00144 *
00145 */ 
00146 static
00147 void
00148 init_xlatcase() 
00149 {
00150   int i;
00151   for (i = 0; i < 256; i++)
00152     {
00153       xlatcase[i] = toupper(i);
00154     }
00155 }
00156 /*
00157 *    Case Conversion
00158 */ 
00159 static 
00160 inline 
00161 void
00162 ConvertCaseEx (unsigned char *d, unsigned char *s, int m) 
00163 {
00164   int i;
00165 #ifdef XXXX
00166   int n;
00167   n   = m & 3;
00168   m >>= 2;
00169 
00170   for (i = 0; i < m; i++ )
00171     {
00172       d[0] = xlatcase[ s[0] ];
00173       d[2] = xlatcase[ s[2] ];
00174       d[1] = xlatcase[ s[1] ];
00175       d[3] = xlatcase[ s[3] ];
00176       d+=4;
00177       s+=4;
00178     }
00179 
00180   for (i=0; i < n; i++)
00181     {
00182       d[i] = xlatcase[ s[i] ];
00183     }
00184 #else
00185   for (i=0; i < m; i++)
00186     {
00187       d[i] = xlatcase[ s[i] ];
00188     }
00189 
00190 #endif
00191 }
00192 
00193 
00194 /*
00195 *
00196 */ 
00197 void acsmSetVerbose2(int n)
00198 {
00199      s_verbose = 1;
00200 }
00201 
00202 /*
00203 *
00204 */ 
00205 static void *
00206 AC_MALLOC (int n) 
00207 {
00208   void *p;
00209   p = malloc (n);
00210   if (p)
00211     max_memory += n;
00212   return p;
00213 }
00214 
00215 
00216 /*
00217 *
00218 */ 
00219 static void
00220 AC_FREE (void *p) 
00221 {
00222   if (p)
00223     free (p);
00224 }
00225 
00226 
00227 /*
00228 *    Simple QUEUE NODE
00229 */ 
00230 typedef struct _qnode
00231 {
00232   int state;
00233    struct _qnode *next;
00234 }
00235 QNODE;
00236 
00237 /*
00238 *    Simple QUEUE Structure
00239 */ 
00240 typedef struct _queue
00241 {
00242   QNODE * head, *tail;
00243   int count;
00244 }
00245 QUEUE;
00246 
00247 /*
00248 *   Initialize the queue
00249 */ 
00250 static void
00251 queue_init (QUEUE * s) 
00252 {
00253   s->head = s->tail = 0;
00254   s->count= 0;
00255 }
00256 
00257 /*
00258 *  Find a State in the queue
00259 */ 
00260 static int
00261 queue_find (QUEUE * s, int state) 
00262 {
00263   QNODE * q;
00264   q = s->head;
00265   while( q )
00266   {
00267       if( q->state == state ) return 1;
00268       q = q->next;
00269   }
00270   return 0;
00271 }
00272 
00273 /*
00274 *  Add Tail Item to queue (FiFo/LiLo)
00275 */ 
00276 static void
00277 queue_add (QUEUE * s, int state) 
00278 {
00279   QNODE * q;
00280 
00281   if( queue_find( s, state ) ) return;  
00282 
00283   if (!s->head)
00284   {
00285       q = s->tail = s->head = (QNODE *) AC_MALLOC (sizeof (QNODE));
00286       MEMASSERT (q, "queue_add");
00287       q->state = state;
00288       q->next = 0;
00289   }
00290   else
00291   {
00292       q = (QNODE *) AC_MALLOC (sizeof (QNODE));
00293       q->state = state;
00294       q->next = 0;
00295       s->tail->next = q;
00296       s->tail = q;
00297   }
00298   s->count++;
00299 }
00300 
00301 
00302 /*
00303 *  Remove Head Item from queue
00304 */ 
00305 static int
00306 queue_remove (QUEUE * s) 
00307 {
00308   int state = 0;
00309   QNODE * q;
00310   if (s->head)
00311   {
00312       q       = s->head;
00313       state   = q->state;
00314       s->head = s->head->next;
00315       s->count--;
00316 
00317       if( !s->head )
00318       {
00319           s->tail = 0;
00320           s->count = 0;
00321       }
00322       AC_FREE (q);
00323   }
00324   return state;
00325 }
00326 
00327 
00328 /*
00329 *   Return items in the queue
00330 */ 
00331 static int
00332 queue_count (QUEUE * s) 
00333 {
00334   return s->count;
00335 }
00336 
00337 
00338 /*
00339 *  Free the queue
00340 */ 
00341 static void
00342 queue_free (QUEUE * s) 
00343 {
00344   while (queue_count (s))
00345     {
00346       queue_remove (s);
00347     }
00348 }
00349 
00350 /*
00351 *  Get Next State-NFA
00352 */
00353 static 
00354 int List_GetNextState( ACSM_STRUCT2 * acsm, int state, int input )
00355 {
00356   trans_node_t * t = acsm->acsmTransTable[state];
00357 
00358   while( t )
00359   {
00360     if( t->key == input )
00361     {
00362         return t->next_state;
00363     }
00364     t=t->next;
00365   }
00366 
00367   if( state == 0 ) return 0;
00368   
00369   return ACSM_FAIL_STATE2; /* Fail state ??? */
00370 }
00371 
00372 /*
00373 *  Get Next State-DFA
00374 */
00375 static 
00376 int List_GetNextState2( ACSM_STRUCT2 * acsm, int state, int input )
00377 {
00378   trans_node_t * t = acsm->acsmTransTable[state];
00379 
00380   while( t )
00381   {
00382     if( t->key == input )
00383     {
00384       return t->next_state;
00385     }
00386     t = t->next;
00387   }
00388 
00389   return 0; /* default state */
00390 }
00391 /*
00392 *  Put Next State - Head insertion, and transition updates
00393 */
00394 static 
00395 int List_PutNextState( ACSM_STRUCT2 * acsm, int state, int input, int next_state )
00396 {
00397   trans_node_t * p;
00398   trans_node_t * tnew;
00399 
00400  // printf("   List_PutNextState: state=%d, input='%c', next_state=%d\n",state,input,next_state);
00401 
00402 
00403   /* Check if the transition already exists, if so just update the next_state */
00404   p = acsm->acsmTransTable[state];
00405   while( p )
00406   {
00407     if( p->key == input )  /* transition already exists- reset the next state */
00408     {
00409         p->next_state = next_state;
00410         return 0;    
00411     }
00412     p=p->next;
00413   }
00414 
00415   /* Definitely not an existing transition - add it */
00416   tnew = (trans_node_t*)AC_MALLOC(sizeof(trans_node_t));
00417   if( !tnew ) return -1; 
00418 
00419   tnew->key        = input;
00420   tnew->next_state = next_state;
00421   tnew->next       = 0;
00422 
00423   tnew->next = acsm->acsmTransTable[state];
00424   acsm->acsmTransTable[state] = tnew; 
00425 
00426   acsm->acsmNumTrans++;
00427   
00428   return 0; 
00429 }
00430 /*
00431 *   Free the entire transition table 
00432 */
00433 static 
00434 int List_FreeTransTable( ACSM_STRUCT2 * acsm )
00435 {
00436   int i;
00437   trans_node_t * t, *p;
00438 
00439   if( !acsm->acsmTransTable ) return 0;
00440 
00441   for(i=0;i< acsm->acsmMaxStates;i++)
00442   {  
00443      t = acsm->acsmTransTable[i];
00444 
00445      while( t )
00446      {
00447        p = t->next;
00448        free(t);      
00449        t = p;
00450        max_memory -= sizeof(trans_node_t);
00451      }
00452    }
00453 
00454    free(acsm->acsmTransTable);
00455 
00456    max_memory -= sizeof(void*) * acsm->acsmMaxStates;
00457 
00458    acsm->acsmTransTable = 0;
00459 
00460    return 0;
00461 }
00462 
00463 /*
00464 *  
00465 */
00466 /*
00467 static 
00468 int List_FreeList( trans_node_t * t )
00469 {
00470   int tcnt=0;
00471 
00472   trans_node_t *p;
00473 
00474   while( t )
00475   {
00476        p = t->next;
00477        free(t);      
00478        t = p;
00479        max_memory -= sizeof(trans_node_t);
00480        tcnt++;
00481    }
00482 
00483    return tcnt;
00484 }
00485 */
00486 
00487 /*
00488 *    Print the trans table to stdout
00489 */
00490 static 
00491 int List_PrintTransTable( ACSM_STRUCT2 * acsm )
00492 {
00493   int i;
00494   trans_node_t * t;
00495   ACSM_PATTERN2 * patrn;
00496 
00497   if( !acsm->acsmTransTable ) return 0;
00498 
00499   printf("Print Transition Table- %d active states\n",acsm->acsmNumStates);
00500 
00501   for(i=0;i< acsm->acsmNumStates;i++)
00502   {  
00503      t = acsm->acsmTransTable[i];
00504 
00505      printf("state %3d: ",i);
00506 
00507      while( t )
00508      { 
00509        if( isprint(t->key) )
00510          printf("%3c->%-5d\t",t->key,t->next_state);
00511        else
00512          printf("%3d->%-5d\t",t->key,t->next_state);
00513 
00514        t = t->next;
00515      }
00516 
00517      patrn =acsm->acsmMatchList[i];
00518 
00519      while( patrn )
00520      {
00521          printf("%.*s ",patrn->n,patrn->patrn);
00522  
00523          patrn = patrn->next;
00524      }
00525 
00526      printf("\n");
00527    }
00528    return 0;
00529 }
00530 
00531 
00532 /*
00533 *   Converts row of states from list to a full vector format
00534 */ 
00535 static 
00536 int List_ConvToFull(ACSM_STRUCT2 * acsm, acstate_t state, acstate_t * full )
00537 {
00538     int tcnt = 0;
00539     trans_node_t * t = acsm->acsmTransTable[ state ];
00540 
00541     memset(full,0,sizeof(acstate_t)*acsm->acsmAlphabetSize);
00542    
00543     if( !t ) return 0;
00544 
00545     while(t)
00546     {
00547       full[ t->key ] = t->next_state;
00548       tcnt++;
00549       t = t->next;
00550     }
00551     return tcnt;
00552 }
00553 
00554 /*
00555 *   Copy a Match List Entry - don't dup the pattern data
00556 */ 
00557 static ACSM_PATTERN2*
00558 CopyMatchListEntry (ACSM_PATTERN2 * px) 
00559 {
00560   ACSM_PATTERN2 * p;
00561 
00562   p = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2));
00563   MEMASSERT (p, "CopyMatchListEntry");
00564 
00565   memcpy (p, px, sizeof (ACSM_PATTERN2));
00566 
00567   p->next = 0;
00568 
00569   return p;
00570 }
00571 
00572 /*
00573 *  Check if a pattern is in the list already,
00574 *  validate it using the 'id' field. This must be unique
00575 *  for every pattern.
00576 */
00577 /*
00578 static
00579 int FindMatchListEntry (ACSM_STRUCT2 * acsm, int state, ACSM_PATTERN2 * px) 
00580 {
00581   ACSM_PATTERN2 * p;
00582 
00583   p = acsm->acsmMatchList[state];
00584   while( p )
00585   {
00586     if( p->id == px->id ) return 1;
00587     p = p->next;
00588   }    
00589 
00590   return 0;
00591 }
00592 */
00593 
00594 
00595 /*
00596 *  Add a pattern to the list of patterns terminated at this state.
00597 *  Insert at front of list.
00598 */ 
00599 static void
00600 AddMatchListEntry (ACSM_STRUCT2 * acsm, int state, ACSM_PATTERN2 * px) 
00601 {
00602   ACSM_PATTERN2 * p;
00603 
00604   p = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2));
00605   
00606   MEMASSERT (p, "AddMatchListEntry");
00607 
00608   memcpy (p, px, sizeof (ACSM_PATTERN2));
00609 
00610   p->next = acsm->acsmMatchList[state];
00611 
00612   acsm->acsmMatchList[state] = p;
00613 }
00614 
00615 
00616 static void
00617 AddPatternStates (ACSM_STRUCT2 * acsm, ACSM_PATTERN2 * p) 
00618 {
00619   int            state, next, n;
00620   unsigned char *pattern;
00621 
00622   n       = p->n;
00623   pattern = p->patrn;
00624   state   = 0;
00625 
00626   if(s_verbose)printf(" Begin AddPatternStates: acsmNumStates=%d\n",acsm->acsmNumStates);
00627   if(s_verbose)printf("    adding '%.*s', nocase=%d\n", n,p->patrn, p->nocase );
00628   
00629   /* 
00630   *  Match up pattern with existing states
00631   */ 
00632   for (; n > 0; pattern++, n--)
00633   {
00634       if(s_verbose)printf(" find char='%c'\n", *pattern );
00635 
00636       next = List_GetNextState(acsm,state,*pattern);
00637       if (next == ACSM_FAIL_STATE2 || next == 0)
00638          {
00639              break;
00640          }
00641       state = next;
00642   }
00643   
00644   /*
00645   *   Add new states for the rest of the pattern bytes, 1 state per byte
00646   */ 
00647   for (; n > 0; pattern++, n--)
00648   {
00649       if(s_verbose)printf(" add char='%c' state=%d NumStates=%d\n", *pattern, state, acsm->acsmNumStates );
00650 
00651       acsm->acsmNumStates++; 
00652       List_PutNextState(acsm,state,*pattern,acsm->acsmNumStates);
00653       state = acsm->acsmNumStates;
00654   }
00655 
00656   AddMatchListEntry (acsm, state, p );
00657 
00658   if(s_verbose)printf(" End AddPatternStates: acsmNumStates=%d\n",acsm->acsmNumStates);
00659 }
00660 
00661 /*
00662 *   Build A Non-Deterministic Finite Automata
00663 *   The keyword state table must already be built, via AddPatternStates().
00664 */ 
00665 static void
00666 Build_NFA (ACSM_STRUCT2 * acsm) 
00667 {
00668     int r, s, i;
00669     QUEUE q, *queue = &q;
00670     acstate_t     * FailState = acsm->acsmFailState;
00671     ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
00672     ACSM_PATTERN2  * mlist,* px;
00673   
00674     /* Init a Queue */ 
00675     queue_init (queue);
00676 
00677   
00678     /* Add the state 0 transitions 1st, the states at depth 1, fail to state 0 */ 
00679     for (i = 0; i < acsm->acsmAlphabetSize; i++)
00680     {
00681       s = List_GetNextState2(acsm,0,i);
00682       if( s )
00683       {
00684           queue_add (queue, s);
00685           FailState[s] = 0;
00686       }
00687     }
00688   
00689     /* Build the fail state successive layer of transitions */ 
00690     while (queue_count (queue) > 0)
00691     {
00692         r = queue_remove (queue);
00693       
00694         /* Find Final States for any Failure */ 
00695         for (i = 0; i < acsm->acsmAlphabetSize; i++)
00696         {
00697            int fs, next;
00698 
00699            s = List_GetNextState(acsm,r,i);
00700 
00701            if( s != ACSM_FAIL_STATE2 )
00702            { 
00703                 queue_add (queue, s);
00704  
00705                 fs = FailState[r];
00706 
00707                 /* 
00708                  *  Locate the next valid state for 'i' starting at fs 
00709                  */ 
00710                 while( (next=List_GetNextState(acsm,fs,i)) == ACSM_FAIL_STATE2 )
00711                 {
00712                   fs = FailState[fs];
00713                 }
00714               
00715                 /*
00716                  *  Update 's' state failure state to point to the next valid state
00717                  */ 
00718                 FailState[s] = next;
00719               
00720                 /*
00721                  *  Copy 'next'states MatchList to 's' states MatchList, 
00722                  *  we copy them so each list can be AC_FREE'd later,
00723                  *  else we could just manipulate pointers to fake the copy.
00724                  */ 
00725                 for( mlist = MatchList[next]; 
00726                      mlist;
00727                      mlist = mlist->next)
00728                 {
00729                     px = CopyMatchListEntry (mlist);
00730   
00731                     /* Insert at front of MatchList */ 
00732                     px->next = MatchList[s];
00733                     MatchList[s] = px;
00734                 }
00735             }
00736         }
00737     }
00738   
00739     /* Clean up the queue */ 
00740     queue_free (queue);
00741 
00742     if( s_verbose)printf("End Build_NFA: NumStates=%d\n",acsm->acsmNumStates);
00743 }
00744 
00745 /*
00746 *   Build Deterministic Finite Automata from the NFA
00747 */ 
00748 static void
00749 Convert_NFA_To_DFA (ACSM_STRUCT2 * acsm) 
00750 {
00751     int i, r, s, cFailState;
00752     QUEUE  q, *queue = &q;
00753     acstate_t * FailState = acsm->acsmFailState;
00754   
00755     /* Init a Queue */
00756     queue_init (queue);
00757   
00758     /* Add the state 0 transitions 1st */
00759     for(i=0; i<acsm->acsmAlphabetSize; i++)
00760     {
00761       s = List_GetNextState(acsm,0,i);
00762       if ( s != 0 )
00763       {
00764           queue_add (queue, s);
00765       }
00766     }
00767   
00768     /* Start building the next layer of transitions */
00769     while( queue_count(queue) > 0 )
00770     {
00771         r = queue_remove(queue);
00772       
00773         /* Process this states layer */ 
00774         for (i = 0; i < acsm->acsmAlphabetSize; i++)
00775         {
00776           s = List_GetNextState(acsm,r,i);
00777 
00778           if( s != ACSM_FAIL_STATE2 && s!= 0)
00779           {
00780               queue_add (queue, s);
00781           }
00782           else
00783           {
00784               cFailState = List_GetNextState(acsm,FailState[r],i);
00785 
00786               if( cFailState != 0 && cFailState != ACSM_FAIL_STATE2 )
00787               {
00788                   List_PutNextState(acsm,r,i,cFailState);
00789               }
00790           }
00791         }
00792     }
00793   
00794     /* Clean up the queue */ 
00795     queue_free (queue);
00796 
00797     if(s_verbose)printf("End Convert_NFA_To_DFA: NumStates=%d\n",acsm->acsmNumStates);
00798 
00799 }
00800 
00801 /*
00802 *
00803 *  Convert a row lists for the state table to a full vector format
00804 *
00805 */
00806 static int 
00807 Conv_List_To_Full(ACSM_STRUCT2 * acsm) 
00808 {
00809   int         tcnt, k;
00810   acstate_t * p;
00811   acstate_t ** NextState = acsm->acsmNextState;
00812 
00813   for(k=0;k<acsm->acsmMaxStates;k++)
00814   {
00815     p = AC_MALLOC( sizeof(acstate_t) * (acsm->acsmAlphabetSize+2) );
00816     if(!p) return -1;
00817 
00818     tcnt = List_ConvToFull( acsm, (acstate_t)k, p+2 );
00819 
00820     p[0] = ACF_FULL;
00821     p[1] = 0; /* no matches yet */
00822 
00823     NextState[k] = p; /* now we have a full format row vector  */
00824   }
00825 
00826   return 0;
00827 }
00828 
00829 /*
00830 *   Convert DFA memory usage from list based storage to a sparse-row storage.  
00831 *
00832 *   The Sparse format allows each row to be either full or sparse formatted.  If the sparse row has
00833 *   too many transitions, performance or space may dictate that we use the standard full formatting 
00834 *   for the row.  More than 5 or 10 transitions per state ought to really whack performance. So the  
00835 *   user can specify the max state transitions per state allowed in the sparse format. 
00836 *
00837 *   Standard Full Matrix Format
00838 *   ---------------------------
00839 *   acstate_t ** NextState ( 1st index is row/state, 2nd index is column=event/input)
00840 *
00841 *   example:   
00842 *  
00843 *        events -> a b c d e f g h i j k l m n o p
00844 *   states 
00845 *     N            1 7 0 0 0 3 0 0 0 0 0 0 0 0 0 0
00846 *        
00847 *   Sparse Format, each row : Words     Value
00848 *                            1-1       fmt(0-full,1-sparse,2-banded,3-sparsebands)
00849 *                            2-2       bool match flag (indicates this state has pattern matches)
00850 *                            3-3       sparse state count ( # of input/next-state pairs )
00851 *                            4-3+2*cnt 'input,next-state' pairs... each sizof(acstate_t)
00852 *     
00853 *   above example case yields:
00854 *     Full Format:    0, 1 7 0 0 0 3 0 0 0 0 0 0 0 0 0 0 ...
00855 *     Sparse format:  1, 3, 'a',1,'b',7,'f',3  - uses 2+2*ntransitions (non-default transitions)
00856 */ 
00857 static int 
00858 Conv_Full_DFA_To_Sparse(ACSM_STRUCT2 * acsm) 
00859 {
00860   int          cnt, m, k, i;
00861   acstate_t  * p, state, maxstates=0;
00862   acstate_t ** NextState = acsm->acsmNextState;
00863   acstate_t    full[MAX_ALPHABET_SIZE];
00864 
00865   for(k=0;k<acsm->acsmMaxStates;k++)
00866   {
00867     cnt=0;
00868 
00869     List_ConvToFull(acsm, (acstate_t)k, full );
00870 
00871     for (i = 0; i < acsm->acsmAlphabetSize; i++)
00872     {
00873        state = full[i];
00874        if( state != 0 && state != ACSM_FAIL_STATE2 ) cnt++;
00875     }
00876 
00877     if( cnt > 0 ) maxstates++;
00878 
00879     if( k== 0 || cnt > acsm->acsmSparseMaxRowNodes )
00880     {
00881        p = AC_MALLOC(sizeof(acstate_t)*(acsm->acsmAlphabetSize+2) );
00882        if(!p) return -1;
00883 
00884        p[0] = ACF_FULL;
00885        p[1] = 0;
00886        memcpy(&p[2],full,acsm->acsmAlphabetSize*sizeof(acstate_t));       
00887     }
00888     else
00889     {
00890        p = AC_MALLOC(sizeof(acstate_t)*(3+2*cnt));
00891        if(!p) return -1;
00892 
00893        m      = 0;
00894        p[m++] = ACF_SPARSE;   
00895        p[m++] = 0;   /* no matches */
00896        p[m++] = cnt;
00897 
00898        for(i = 0; i < acsm->acsmAlphabetSize ; i++)
00899        {
00900          state = full[i];  
00901          if( state != 0 && state != ACSM_FAIL_STATE2 )
00902          {
00903            p[m++] = i;
00904            p[m++] = state;
00905          }
00906       }
00907     }
00908 
00909     NextState[k] = p; /* now we are a sparse formatted state transition array  */
00910   }
00911 
00912   return 0;
00913 }
00914 /*
00915     Convert Full matrix to Banded row format.
00916 
00917     Word     values
00918     1        2  -> banded
00919     2        n  number of values
00920     3        i  index of 1st value (0-256)
00921     4 - 3+n  next-state values at each index
00922 
00923 */
00924 static int 
00925 Conv_Full_DFA_To_Banded(ACSM_STRUCT2 * acsm) 
00926 {
00927   int first = -1, last;
00928   acstate_t * p, state, full[MAX_ALPHABET_SIZE];
00929   acstate_t ** NextState = acsm->acsmNextState;
00930   int       cnt,m,k,i;
00931 
00932   for(k=0;k<acsm->acsmMaxStates;k++)
00933   {
00934     cnt=0;
00935 
00936     List_ConvToFull(acsm, (acstate_t)k, full );
00937 
00938     first=-1;
00939     last =-2;
00940 
00941     for (i = 0; i < acsm->acsmAlphabetSize; i++)
00942     {
00943        state = full[i];
00944 
00945        if( state !=0 && state != ACSM_FAIL_STATE2 )
00946        {
00947            if( first < 0 ) first = i;
00948            last = i;
00949        }
00950     }
00951 
00952     /* calc band width */
00953     cnt= last - first + 1;
00954 
00955     p = AC_MALLOC(sizeof(acstate_t)*(4+cnt));
00956 
00957     if(!p) return -1;
00958 
00959     m      = 0;
00960     p[m++] = ACF_BANDED;   
00961     p[m++] = 0;   /* no matches */
00962     p[m++] = cnt;
00963     p[m++] = first;
00964 
00965     for(i = first; i <= last; i++)
00966     {
00967        p[m++] = full[i]; 
00968     }
00969 
00970     NextState[k] = p; /* now we are a banded formatted state transition array  */
00971   }
00972 
00973   return 0;
00974 }
00975 
00976 /*
00977 *   Convert full matrix to Sparse Band row format.
00978 *
00979 *   next  - Full formatted row of next states
00980 *   asize - size of alphabet
00981 *   zcnt - max number of zeros in a run of zeros in any given band.
00982 *
00983 *  Word Values
00984 *  1    ACF_SPARSEBANDS
00985 *  2    number of bands
00986 *  repeat 3 - 5+ ....once for each band in this row.
00987 *  3    number of items in this band*  4    start index of this band
00988 *  5-   next-state values in this band...
00989 */
00990 static 
00991 int calcSparseBands( acstate_t * next, int * begin, int * end, int asize, int zmax )
00992 {
00993    int i, nbands,zcnt,last=0;
00994    acstate_t state;
00995 
00996    nbands=0;
00997     for( i=0; i<asize; i++ )
00998     {
00999        state = next[i];
01000   
01001        if( state !=0 && state != ACSM_FAIL_STATE2 )
01002        {
01003            begin[nbands] = i;
01004            zcnt=0;
01005 
01006            for( ; i< asize; i++ )
01007            {
01008               state = next[i];
01009               if( state ==0 || state == ACSM_FAIL_STATE2 ) 
01010               {
01011                   zcnt++;
01012                   if( zcnt > zmax ) break;
01013               }
01014               else 
01015               {
01016                   zcnt=0;
01017                   last = i;                  
01018               }
01019            }
01020 
01021            end[nbands++] = last;
01022 
01023        }
01024     }
01025 
01026     return nbands;
01027 }
01028 
01029 
01030 /*
01031 *   Sparse Bands
01032 *
01033 *   Row Format:
01034 *   Word
01035 *   1    SPARSEBANDS format indicator
01036 *   2    bool indicates a pattern match in this state
01037 *   3    number of sparse bands
01038 *   4    number of elements in this band
01039 *   5    start index of this band
01040 *   6-   list of next states
01041 *   
01042 *   m    number of elements in this band
01043 *   m+1  start index of this band
01044 *   m+2- list of next states
01045 */
01046 static int 
01047 Conv_Full_DFA_To_SparseBands(ACSM_STRUCT2 * acsm) 
01048 {
01049   acstate_t  * p;
01050   acstate_t ** NextState = acsm->acsmNextState;
01051   int          cnt,m,k,i,zcnt=acsm->acsmSparseMaxZcnt;
01052 
01053   int       band_begin[MAX_ALPHABET_SIZE];
01054   int       band_end[MAX_ALPHABET_SIZE];
01055   int       nbands,j;
01056   acstate_t full[MAX_ALPHABET_SIZE];
01057 
01058   for(k=0;k<acsm->acsmMaxStates;k++)
01059   {
01060     cnt=0;
01061 
01062     List_ConvToFull(acsm, (acstate_t)k, full );
01063 
01064     nbands = calcSparseBands( full, band_begin, band_end, acsm->acsmAlphabetSize, zcnt );
01065     
01066     /* calc band width space*/
01067     cnt = 3;
01068     for(i=0;i<nbands;i++)
01069     {
01070        cnt += 2;
01071        cnt += band_end[i] - band_begin[i] + 1;
01072 
01073        /*printf("state %d: sparseband %d,  first=%d, last=%d, cnt=%d\n",k,i,band_begin[i],band_end[i],band_end[i]-band_begin[i]+1); */
01074     }
01075 
01076     p = AC_MALLOC(sizeof(acstate_t)*(cnt));
01077 
01078     if(!p) return -1;
01079 
01080     m      = 0;
01081     p[m++] = ACF_SPARSEBANDS;   
01082     p[m++] = 0; /* no matches */
01083     p[m++] = nbands;
01084 
01085     for( i=0;i<nbands;i++ )
01086     {
01087       p[m++] = band_end[i] - band_begin[i] + 1;  /* # states in this band */
01088       p[m++] = band_begin[i];   /* start index */
01089  
01090       for( j=band_begin[i]; j<=band_end[i]; j++ )
01091       {
01092          p[m++] = full[j];  /* some states may be state zero */
01093       }
01094     }
01095 
01096     NextState[k] = p; /* now we are a sparse-banded formatted state transition array  */
01097   }
01098 
01099   return 0;
01100 }
01101 
01102 void 
01103 Print_DFA_MatchList( ACSM_STRUCT2 * acsm, int state )
01104 {
01105      ACSM_PATTERN2 * mlist;
01106 
01107      for (mlist = acsm->acsmMatchList[state]; 
01108           mlist;
01109           mlist = mlist->next)
01110      {
01111         printf("%.*s ", mlist->n, mlist->patrn);
01112      }
01113 }
01114 /*
01115 *
01116 */
01117 static void
01118 Print_DFA(ACSM_STRUCT2 * acsm) 
01119 {
01120   int  k,i;
01121   acstate_t * p, state, n, fmt, index, nb, bmatch;
01122   acstate_t ** NextState = acsm->acsmNextState;
01123 
01124   printf("Print DFA - %d active states\n",acsm->acsmNumStates);
01125 
01126   for(k=0;k<acsm->acsmNumStates;k++)
01127   {
01128     p   = NextState[k];
01129 
01130     if( !p ) continue;
01131 
01132     fmt = *p++; 
01133 
01134     bmatch = *p++;
01135   
01136     printf("state %3d, fmt=%d: ",k,fmt);
01137 
01138     if( fmt ==ACF_SPARSE )
01139     {
01140        n = *p++; 
01141        for( ; n>0; n--, p+=2 )
01142        { 
01143          if( isprint(p[0]) )
01144          printf("%3c->%-5d\t",p[0],p[1]);
01145          else
01146          printf("%3d->%-5d\t",p[0],p[1]);
01147       }
01148     }
01149     else if( fmt ==ACF_BANDED )
01150     {
01151 
01152        n = *p++; 
01153        index = *p++;
01154 
01155        for( ; n>0; n--, p++ )
01156        { 
01157          if( isprint(p[0]) )
01158          printf("%3c->%-5d\t",index++,p[0]);
01159          else
01160          printf("%3d->%-5d\t",index++,p[0]);
01161       }
01162     }
01163     else if( fmt ==ACF_SPARSEBANDS )
01164     {
01165        nb    = *p++; 
01166        for(i=0;i<nb;i++)
01167        {
01168          n     = *p++;
01169          index = *p++;
01170          for( ; n>0; n--, p++ )
01171          { 
01172            if( isprint(index) )
01173            printf("%3c->%-5d\t",index++,p[0]);
01174            else
01175            printf("%3d->%-5d\t",index++,p[0]);
01176          }
01177        }
01178     }
01179     else if( fmt == ACF_FULL ) 
01180     {
01181 
01182       for( i=0; i<acsm->acsmAlphabetSize; i++ )
01183       {
01184          state = p[i];
01185 
01186          if( state != 0 && state != ACSM_FAIL_STATE2 )
01187          {
01188            if( isprint(i) )
01189              printf("%3c->%-5d\t",i,state);
01190            else
01191              printf("%3d->%-5d\t",i,state);
01192          }
01193       }
01194     }
01195 
01196     Print_DFA_MatchList( acsm, k);
01197 
01198     printf("\n");
01199   }
01200 }
01201 /*
01202 *  Write a state table to disk
01203 */
01204 /*
01205 static void
01206 Write_DFA(ACSM_STRUCT2 * acsm, char * f) 
01207 {
01208   int  k,i;
01209   acstate_t * p, n, fmt, index, nb, bmatch;
01210   acstate_t ** NextState = acsm->acsmNextState;
01211   FILE * fp;
01212 
01213   printf("Dump DFA - %d active states\n",acsm->acsmNumStates);
01214 
01215   fp = fopen(f,"wb");
01216   if(!fp)
01217    {
01218      printf("*** WARNING: could not write dfa to file - %s\n",f);
01219      return;
01220    }
01221 
01222   fwrite( &acsm->acsmNumStates, 4, 1, fp);
01223 
01224   for(k=0;k<acsm->acsmNumStates;k++)
01225   {
01226     p   = NextState[k];
01227 
01228     if( !p ) continue;
01229 
01230     fmt = *p++; 
01231 
01232     bmatch = *p++;
01233 
01234     fwrite( &fmt,    sizeof(acstate_t), 1, fp);
01235     fwrite( &bmatch, sizeof(acstate_t), 1, fp);
01236   
01237     if( fmt ==ACF_SPARSE )
01238     {
01239        n = *p++; 
01240        fwrite( &n,     sizeof(acstate_t), 1, fp);
01241        fwrite(  p, n*2*sizeof(acstate_t), 1, fp);
01242     }
01243     else if( fmt ==ACF_BANDED )
01244     {
01245        n = *p++; 
01246        fwrite( &n,     sizeof(acstate_t), 1, fp);
01247 
01248        index = *p++;
01249        fwrite( &index, sizeof(acstate_t), 1, fp);
01250 
01251        fwrite(  p, sizeof(acstate_t), n, fp);
01252     }
01253     else if( fmt ==ACF_SPARSEBANDS )
01254     {
01255        nb    = *p++; 
01256        fwrite( &nb,    sizeof(acstate_t), 1, fp);
01257        for(i=0;i<nb;i++)
01258        {
01259          n     = *p++;
01260          fwrite( &n,    sizeof(acstate_t), 1, fp);
01261 
01262          index = *p++;
01263          fwrite( &index,sizeof(acstate_t), 1, fp);
01264 
01265          fwrite( p,     sizeof(acstate_t), 1, fp);
01266        }
01267     }
01268     else if( fmt == ACF_FULL ) 
01269     {
01270       fwrite( p,  sizeof(acstate_t), acsm->acsmAlphabetSize,  fp);
01271     }
01272 
01273     //Print_DFA_MatchList( acsm, k);
01274 
01275   }
01276 
01277   fclose(fp);
01278 }
01279 */
01280 
01281 /*
01282 *
01283 *   Convert an NFA or DFA row from sparse to full format
01284 *   and store into the 'full'  buffer.
01285 *
01286 *   returns:
01287 *     0 - failed, no state transitions
01288 *    *p - pointer to 'full' buffer
01289 *
01290 */
01291 /*
01292 static 
01293 acstate_t * acsmConvToFull(ACSM_STRUCT2 * acsm, acstate_t k, acstate_t * full )
01294 {
01295     int i;
01296     acstate_t * p, n, fmt, index, nb, bmatch;
01297     acstate_t ** NextState = acsm->acsmNextState;
01298 
01299     p   = NextState[k];
01300 
01301     if( !p ) return 0;
01302 
01303     fmt = *p++; 
01304 
01305     bmatch = *p++;
01306 
01307     if( fmt ==ACF_SPARSE )
01308     {
01309        n = *p++; 
01310        for( ; n>0; n--, p+=2 )
01311        { 
01312          full[ p[0] ] = p[1];
01313       }
01314     }
01315     else if( fmt ==ACF_BANDED )
01316     {
01317 
01318        n = *p++; 
01319        index = *p++;
01320 
01321        for( ; n>0; n--, p++ )
01322        { 
01323          full[ index++ ] = p[0];
01324       }
01325     }
01326     else if( fmt ==ACF_SPARSEBANDS )
01327     {
01328        nb    = *p++; 
01329        for(i=0;i<nb;i++)
01330        {
01331          n     = *p++;
01332          index = *p++;
01333          for( ; n>0; n--, p++ )
01334          { 
01335            full[ index++ ] = p[0];
01336          }
01337        }
01338     }
01339     else if( fmt == ACF_FULL )
01340     {
01341       memcpy(full,p,acsm->acsmAlphabetSize*sizeof(acstate_t));
01342     }
01343     
01344     return full;   
01345 }
01346 */
01347 
01348 /*
01349 *   Select the desired storage mode
01350 */
01351 int acsmSelectFormat2( ACSM_STRUCT2 * acsm, int m )
01352 {
01353  switch( m )
01354  {
01355     case ACF_FULL:
01356     case ACF_SPARSE:
01357     case ACF_BANDED:
01358     case ACF_SPARSEBANDS:
01359       acsm->acsmFormat = m;
01360       break;
01361     default:
01362       return -1;
01363  }
01364 
01365  return 0;
01366 }
01367 /*
01368 *
01369 */
01370 void acsmSetMaxSparseBandZeros2( ACSM_STRUCT2 * acsm, int n )
01371 {
01372     acsm->acsmSparseMaxZcnt = n;  
01373 }
01374 /*
01375 *
01376 */
01377 void acsmSetMaxSparseElements2( ACSM_STRUCT2 * acsm, int n )
01378 {
01379     acsm->acsmSparseMaxRowNodes = n;
01380 }
01381 /*
01382 *    
01383 */
01384 int acsmSelectFSA2( ACSM_STRUCT2 * acsm, int m )
01385 {
01386  switch( m )
01387  {
01388     case FSA_TRIE:
01389     case FSA_NFA:
01390     case FSA_DFA:
01391       acsm->acsmFSA = m;
01392     default:
01393       return -1;
01394  }
01395 }
01396 /*
01397 *    
01398 */
01399 int acsmSetAlphabetSize2( ACSM_STRUCT2 * acsm, int n )
01400 {
01401    if( n <= MAX_ALPHABET_SIZE )
01402    {
01403      acsm->acsmAlphabetSize = n;
01404    }
01405    else
01406    {
01407       return -1;
01408    }
01409    return 0;
01410 }
01411 /*
01412 *  Create a new AC state machine
01413 */ 
01414 ACSM_STRUCT2 * acsmNew2 () 
01415 {
01416   ACSM_STRUCT2 * p;
01417 
01418   init_xlatcase ();
01419 
01420   p = (ACSM_STRUCT2 *) AC_MALLOC(sizeof (ACSM_STRUCT2));
01421   MEMASSERT (p, "acsmNew");
01422 
01423   if (p)
01424   {
01425     memset (p, 0, sizeof (ACSM_STRUCT2));
01426 
01427     /* Some defaults */
01428     p->acsmFSA               = FSA_DFA;
01429     p->acsmFormat            = ACF_FULL;//ACF_BANDED;
01430     p->acsmAlphabetSize      = 256;
01431     p->acsmSparseMaxRowNodes = 256;
01432     p->acsmSparseMaxZcnt     = 10;  
01433   }
01434   
01435   return p;
01436 }
01437 /*
01438 *   Add a pattern to the list of patterns for this state machine
01439 *
01440 */ 
01441 int
01442 acsmAddPattern2 (ACSM_STRUCT2 * p, unsigned char *pat, int n, int nocase,
01443                 int offset, int depth, void * id, int iid) 
01444 {
01445   ACSM_PATTERN2 * plist;
01446 
01447   plist = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2));
01448   MEMASSERT (plist, "acsmAddPattern");
01449 
01450   plist->patrn = (unsigned char *) AC_MALLOC ( n );
01451   MEMASSERT (plist->patrn, "acsmAddPattern");
01452 
01453   ConvertCaseEx(plist->patrn, pat, n);
01454   
01455   plist->casepatrn = (unsigned char *) AC_MALLOC ( n );
01456   MEMASSERT (plist->casepatrn, "acsmAddPattern");
01457   
01458   memcpy (plist->casepatrn, pat, n);
01459 
01460   plist->n      = n;
01461   plist->nocase = nocase;
01462   plist->offset = offset;
01463   plist->depth  = depth;
01464   plist->id     = id;
01465   plist->iid    = iid;
01466 
01467   plist->next     = p->acsmPatterns;
01468   p->acsmPatterns = plist;
01469 
01470   return 0;
01471 }
01472 /*
01473 *   Add a Key to the list of key+data pairs
01474 */ 
01475 int acsmAddKey2(ACSM_STRUCT2 * p, unsigned char *key, int klen, int nocase, void * data)
01476 {
01477   ACSM_PATTERN2 * plist;
01478 
01479   plist = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2));
01480   MEMASSERT (plist, "acsmAddPattern");
01481  
01482   plist->patrn = (unsigned char *) AC_MALLOC (klen);
01483   memcpy (plist->patrn, key, klen);
01484 
01485   plist->casepatrn = (unsigned char *) AC_MALLOC (klen);
01486   memcpy (plist->casepatrn, key, klen);
01487 
01488   plist->n      = klen;
01489   plist->nocase = nocase;
01490   plist->offset = 0;
01491   plist->depth  = 0;
01492   plist->id     = 0;
01493   plist->iid = 0;
01494 
01495   plist->next = p->acsmPatterns;
01496   p->acsmPatterns = plist;
01497 
01498   return 0;
01499 }
01500 
01501 /*
01502 *  Copy a boolean match flag int NextState table, for caching purposes.
01503 */
01504 static
01505 void acsmUpdateMatchStates( ACSM_STRUCT2 * acsm )
01506 {
01507   acstate_t        state;
01508   acstate_t     ** NextState = acsm->acsmNextState;
01509   ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
01510 
01511   for( state=0; state<acsm->acsmNumStates; state++ )
01512   {
01513      if( MatchList[state] )
01514      {
01515          NextState[state][1] = 1;
01516      }
01517      else 
01518      {
01519          NextState[state][1] = 0;
01520      }
01521   }
01522 }
01523 
01524 /*
01525 *   Compile State Machine - NFA or DFA and Full or Banded or Sparse or SparseBands
01526 */ 
01527 int
01528 acsmCompile2 (ACSM_STRUCT2 * acsm) 
01529 {
01530     int               k;
01531     ACSM_PATTERN2    * plist;
01532   
01533     /* Count number of states */ 
01534     for (plist = acsm->acsmPatterns; plist != NULL; plist = plist->next)
01535     {
01536      acsm->acsmMaxStates += plist->n;
01537      /* acsm->acsmMaxStates += plist->n*2; if we handle case in the table */
01538     }
01539     acsm->acsmMaxStates++; /* one extra */
01540 
01541     /* Alloc a List based State Transition table */
01542     acsm->acsmTransTable =(trans_node_t**) AC_MALLOC(sizeof(trans_node_t*) * acsm->acsmMaxStates );
01543     MEMASSERT (acsm->acsmTransTable, "acsmCompile");
01544 
01545     memset (acsm->acsmTransTable, 0, sizeof(trans_node_t*) * acsm->acsmMaxStates);
01546 
01547     if(s_verbose)printf ("ACSMX-Max Memory-TransTable Setup: %d bytes, %d states, %d active states\n", max_memory,acsm->acsmMaxStates,acsm->acsmNumStates);
01548 
01549     /* Alloc a failure table - this has a failure state, and a match list for each state */
01550     acsm->acsmFailState =(acstate_t*) AC_MALLOC(sizeof(acstate_t) * acsm->acsmMaxStates );
01551     MEMASSERT (acsm->acsmFailState, "acsmCompile");
01552 
01553     memset (acsm->acsmFailState, 0, sizeof(acstate_t) * acsm->acsmMaxStates );
01554 
01555     /* Alloc a MatchList table - this has a lis tof pattern matches for each state, if any */
01556     acsm->acsmMatchList=(ACSM_PATTERN2**) AC_MALLOC(sizeof(ACSM_PATTERN2*) * acsm->acsmMaxStates );
01557     MEMASSERT (acsm->acsmMatchList, "acsmCompile");
01558 
01559     memset (acsm->acsmMatchList, 0, sizeof(ACSM_PATTERN2*) * acsm->acsmMaxStates );
01560 
01561     if(s_verbose)printf ("ACSMX-Max Memory- MatchList Table Setup: %d bytes, %d states, %d active states\n", max_memory,acsm->acsmMaxStates,acsm->acsmNumStates);
01562  
01563     /* Alloc a separate state transition table == in state 's' due to event 'k', transition to 'next' state */
01564     acsm->acsmNextState=(acstate_t**)AC_MALLOC( acsm->acsmMaxStates * sizeof(acstate_t*) );
01565     MEMASSERT(acsm->acsmNextState, "acsmCompile-NextState");
01566 
01567     for (k = 0; k < acsm->acsmMaxStates; k++)
01568     {
01569       acsm->acsmNextState[k]=(acstate_t*)0;
01570     }
01571 
01572     if(s_verbose)printf ("ACSMX-Max Memory-Table Setup: %d bytes, %d states, %d active states\n", max_memory,acsm->acsmMaxStates,acsm->acsmNumStates);
01573      
01574     /* Initialize state zero as a branch */ 
01575     acsm->acsmNumStates = 0;
01576   
01577     /* Add the 0'th state,  */
01578     //acsm->acsmNumStates++; 
01579  
01580     /* Add each Pattern to the State Table - This forms a keywords state table  */ 
01581     for (plist = acsm->acsmPatterns; plist != NULL; plist = plist->next)
01582     {
01583         AddPatternStates (acsm, plist);
01584     }
01585 
01586     acsm->acsmNumStates++;
01587 
01588     if(s_verbose)printf ("ACSMX-Max Trie List Memory : %d bytes, %d states, %d active states\n", 
01589                  max_memory,acsm->acsmMaxStates,acsm->acsmNumStates);
01590 
01591     if(s_verbose)List_PrintTransTable( acsm );
01592   
01593     if( acsm->acsmFSA == FSA_DFA || acsm->acsmFSA == FSA_NFA )
01594     {
01595       /* Build the NFA */
01596       if(s_verbose)printf("Build_NFA\n");
01597 
01598       Build_NFA (acsm);
01599 
01600       if(s_verbose)printf("NFA-Trans-Nodes: %d\n",acsm->acsmNumTrans);
01601       if(s_verbose)printf("ACSMX-Max NFA List Memory  : %d bytes, %d states / %d active states\n", max_memory,acsm->acsmMaxStates,acsm->acsmNumStates);
01602 
01603       if(s_verbose)List_PrintTransTable( acsm );
01604     }
01605   
01606     if( acsm->acsmFSA == FSA_DFA )
01607     {
01608        /* Convert the NFA to a DFA */ 
01609        if(s_verbose)printf("Convert_NFA_To_DFA\n");
01610 
01611        Convert_NFA_To_DFA (acsm);
01612   
01613        if(s_verbose)printf("DFA-Trans-Nodes: %d\n",acsm->acsmNumTrans);
01614        if(s_verbose)printf("ACSMX-Max NFA-DFA List Memory  : %d bytes, %d states / %d active states\n", max_memory,acsm->acsmMaxStates,acsm->acsmNumStates);
01615 
01616        if(s_verbose)List_PrintTransTable( acsm );
01617     }
01618 
01619     /*
01620     *
01621     *  Select Final Transition Table Storage Mode
01622     *
01623     */
01624 
01625     if(s_verbose)printf("Converting Transition Lists -> Transition table, fmt=%d\n",acsm->acsmFormat);
01626 
01627     if( acsm->acsmFormat == ACF_SPARSE )
01628     {
01629       /* Convert DFA Full matrix to a Sparse matrix */
01630       if( Conv_Full_DFA_To_Sparse(acsm) )
01631           return -1;
01632    
01633       if(s_verbose)printf ("ACSMX-Max Memory-Sparse: %d bytes, %d states, %d active states\n", max_memory,acsm->acsmMaxStates,acsm->acsmNumStates);
01634       if(s_verbose)Print_DFA(acsm);
01635     }
01636 
01637     else if( acsm->acsmFormat == ACF_BANDED )
01638     {
01639       /* Convert DFA Full matrix to a Sparse matrix */
01640       if( Conv_Full_DFA_To_Banded(acsm) )
01641           return -1;
01642    
01643        if(s_verbose)printf ("ACSMX-Max Memory-banded: %d bytes, %d states, %d active states\n", max_memory,acsm->acsmMaxStates,acsm->acsmNumStates);
01644        if(s_verbose)Print_DFA(acsm);
01645     }
01646 
01647     else if( acsm->acsmFormat == ACF_SPARSEBANDS )
01648     {
01649       /* Convert DFA Full matrix to a Sparse matrix */
01650       if( Conv_Full_DFA_To_SparseBands(acsm) )
01651           return -1;
01652    
01653        if(s_verbose)printf ("ACSMX-Max Memory-sparse-bands: %d bytes, %d states, %d active states\n", max_memory,acsm->acsmMaxStates,acsm->acsmNumStates);
01654        if(s_verbose)Print_DFA(acsm);
01655     }
01656     else if( acsm->acsmFormat == ACF_FULL )
01657     {
01658       if( Conv_List_To_Full( acsm ) )
01659             return -1;
01660 
01661        if(s_verbose)printf ("ACSMX-Max Memory-Full: %d bytes, %d states, %d active states\n", max_memory,acsm->acsmMaxStates,acsm->acsmNumStates);
01662        if(s_verbose)Print_DFA(acsm);
01663     }
01664 
01665     acsmUpdateMatchStates( acsm ); /* load boolean match flags into state table */
01666 
01667     /* Free up the Table Of Transition Lists */
01668     List_FreeTransTable( acsm ); 
01669 
01670     if(s_verbose)printf ("ACSMX-Max Memory-Final: %d bytes, %d states, %d active states\n", max_memory,acsm->acsmMaxStates,acsm->acsmNumStates);
01671 
01672     /* For now -- show this info */
01673     /*
01674     *  acsmPrintInfo( acsm );
01675     */
01676 
01677 
01678     /* Accrue Summary State Stats */
01679     summary.num_states      += acsm->acsmNumStates;
01680     summary.num_transitions += acsm->acsmNumTrans;
01681 
01682     memcpy( &summary.acsm, acsm, sizeof(ACSM_STRUCT2));
01683     
01684     return 0;
01685 }
01686 
01687 /*
01688 *   Get the NextState from the NFA, all NFA storage formats use this
01689 */
01690 inline 
01691 acstate_t SparseGetNextStateNFA(acstate_t * ps, acstate_t state, unsigned  input)
01692 {
01693    acstate_t fmt;
01694    acstate_t n;
01695    int       index;
01696    int       nb;
01697 
01698    fmt = *ps++;
01699 
01700    ps++;  /* skip bMatchState */
01701    
01702    switch( fmt )
01703    {
01704     case  ACF_BANDED:
01705     {
01706      n     = ps[0];
01707      index = ps[1];
01708 
01709      if( input <  index     )  
01710      {
01711          if(state==0)
01712          {
01713            return 0; 
01714          }
01715          else
01716          { 
01717            return (acstate_t)ACSM_FAIL_STATE2;
01718          }
01719      }
01720      if( input >= index + n )
01721      {  
01722           if(state==0)
01723           {
01724               return 0; 
01725           }
01726           else 
01727           {
01728               return (acstate_t)ACSM_FAIL_STATE2;
01729           }
01730      }
01731      if( ps[input-index] == 0  ) 
01732      {
01733          if( state != 0 ) 
01734          {
01735            return ACSM_FAIL_STATE2;  
01736          }
01737      }
01738 
01739      return (acstate_t) ps[input-index];
01740     } 
01741 
01742     case ACF_SPARSE:
01743     {
01744      n = *ps++; /* number of sparse index-value entries */
01745 
01746      for( ; n>0 ; n-- )
01747      {
01748        if( ps[0] > input ) /* cannot match the input, already a higher value than the input  */
01749        {
01750            return (acstate_t)ACSM_FAIL_STATE2; /* default state */
01751        }
01752        else if( ps[0] == input )
01753        {
01754            return ps[1]; /* next state */
01755        }
01756        ps+=2;
01757      }
01758      if( state == 0 )
01759      {
01760          return 0;
01761      }
01762      return ACSM_FAIL_STATE2;
01763     } 
01764 
01765     case ACF_SPARSEBANDS:
01766     {
01767      nb  = *ps++;   /* number of bands */
01768 
01769      while( nb > 0 )  /* for each band */
01770      { 
01771        n     = *ps++;  /* number of elements */
01772        index = *ps++;  /* 1st element value */
01773 
01774        if( input <  index )
01775        {
01776            if( state != 0 ) 
01777            {
01778                return (acstate_t)ACSM_FAIL_STATE2;
01779            }
01780            return (acstate_t)0;
01781        }
01782        if( (input >=  index) && (input < (index + n)) )  
01783        {
01784            if( ps[input-index] == 0 )
01785            {
01786                if( state != 0 ) 
01787                {
01788                    return ACSM_FAIL_STATE2;
01789                }
01790            }
01791            return (acstate_t) ps[input-index];
01792        }
01793        nb--;
01794        ps += n;
01795      }
01796      if( state != 0 )
01797      {
01798        return (acstate_t)ACSM_FAIL_STATE2;
01799      }
01800      return (acstate_t)0;
01801     } 
01802 
01803     case ACF_FULL:
01804     {
01805       if( ps[input] == 0 )
01806       {
01807         if( state != 0 ) 
01808         {
01809             return ACSM_FAIL_STATE2; 
01810         }
01811       }
01812       return ps[input]; 
01813     }
01814   }
01815 
01816   return 0;
01817 }
01818 
01819 
01820 
01821 /*
01822 *   Get the NextState from the DFA Next State Transition table
01823 *   Full and banded are supported separately, this is for 
01824 *   sparse and sparse-bands
01825 */
01826 inline 
01827 acstate_t SparseGetNextStateDFA(acstate_t * ps, acstate_t state, unsigned  input)
01828 {
01829    acstate_t  n, nb;
01830    int        index;
01831 
01832    switch( ps[0] )  
01833    {
01834     /*   BANDED   */
01835     case  ACF_BANDED:
01836     {
01837        /* n=ps[2] : number of entries in the band */
01838        /* index=ps[3] : index of the 1st entry, sequential thereafter */
01839 
01840        if( input  <  ps[3]        )  return 0; 
01841        if( input >= (ps[3]+ps[2]) )  return 0; 
01842 
01843        return  ps[4+input-ps[3]];
01844     } 
01845 
01846     /*   FULL   */
01847     case ACF_FULL:
01848     {
01849        return ps[2+input]; 
01850     }
01851 
01852     /*   SPARSE   */
01853     case ACF_SPARSE:
01854     {
01855        n = ps[2]; /* number of entries/ key+next pairs */
01856         
01857        ps += 3;
01858   
01859        for( ; n>0 ; n-- )  
01860        {
01861           if( input < ps[0]  ) /* cannot match the input, already a higher value than the input  */
01862           {
01863             return (acstate_t)0; /* default state */
01864           }
01865           else if( ps[0] == input )
01866           {
01867             return ps[1]; /* next state */
01868           }
01869           ps += 2;
01870        }
01871        return (acstate_t)0;
01872     } 
01873 
01874 
01875     /*   SPARSEBANDS   */
01876     case ACF_SPARSEBANDS:
01877     {
01878        nb  =  ps[2]; /* number of bands */
01879       
01880        ps += 3;
01881 
01882        while( nb > 0 )  /* for each band */
01883        { 
01884           n     = ps[0];  /* number of elements in this band */
01885           index = ps[1];  /* start index/char of this band */
01886           if( input <  index )
01887           {
01888             return (acstate_t)0;
01889           }
01890           if( (input < (index + n)) )  
01891           {
01892             return (acstate_t) ps[2+input-index];
01893           }
01894           nb--;
01895           ps += n;
01896        }
01897        return (acstate_t)0;
01898     } 
01899   }
01900 
01901   return 0;
01902 }
01903 /*
01904 *   Search Text or Binary Data for Pattern matches
01905 *
01906 *   Sparse & Sparse-Banded Matrix search
01907 */
01908 static
01909 inline
01910 int
01911 acsmSearchSparseDFA(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
01912             int (*Match) (void * id, int index, void *data), 
01913             void *data) 
01914 {
01915   acstate_t state;
01916   ACSM_PATTERN2   * mlist;
01917   unsigned char   * Tend;
01918   int               nfound = 0;
01919   unsigned char   * T, * Tc;
01920   int               index;
01921   acstate_t      ** NextState = acsm->acsmNextState; 
01922   ACSM_PATTERN2  ** MatchList = acsm->acsmMatchList;
01923 
01924   Tc   = Tx;
01925   T    = Tx;
01926   Tend = T + n;
01927  
01928   for( state = 0; T < Tend; T++ )
01929   {
01930       state = SparseGetNextStateDFA ( NextState[state], state, xlatcase[*T] );
01931       
01932       /* test if this state has any matching patterns */
01933       if( NextState[state][1] ) 
01934       {   
01935             for( mlist = MatchList[state];
01936                  mlist!= NULL;
01937                  mlist = mlist->next )
01938             {
01939                  index = T - mlist->n - Tc; 
01940                  if( mlist->nocase )
01941                  {
01942                     nfound++;
01943                     if (Match (mlist->id, index, data))
01944                         return nfound;
01945                  }
01946                  else
01947                  {
01948                     if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
01949                     {
01950                       nfound++;
01951                       if (Match (mlist->id, index, data))
01952                           return nfound;
01953                     }
01954                  }
01955             }
01956       }
01957   }
01958   return nfound;
01959 }
01960 /*
01961 *   Full format DFA search
01962 *   Do not change anything here without testing, caching and prefetching 
01963 *   performance is very sensitive to any changes.
01964 *
01965 *   Perf-Notes: 
01966 *    1) replaced ConvertCaseEx with inline xlatcase - this improves performance 5-10%
01967 *    2) using 'nocase' improves performance again by 10-15%, since memcmp is not needed
01968 *    3) 
01969 */
01970 static 
01971 inline
01972 int
01973 acsmSearchSparseDFA_Full(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
01974             int (*Match) (void * id, int index, void *data), 
01975             void *data) 
01976 {
01977   ACSM_PATTERN2   * mlist;
01978   unsigned char   * Tend;
01979   unsigned char   * T;
01980   int               index;
01981   acstate_t         state;
01982   acstate_t       * ps; 
01983   acstate_t         sindex;
01984   acstate_t      ** NextState = acsm->acsmNextState;
01985   ACSM_PATTERN2  ** MatchList = acsm->acsmMatchList;
01986   int               nfound    = 0;
01987 
01988   T    = Tx;
01989   Tend = Tx + n;
01990  
01991   for( state = 0; T < Tend; T++ )
01992   {
01993       ps     = NextState[ state ];
01994 
01995       sindex = xlatcase[ T[0] ];
01996 
01997       /* check the current state for a pattern match */
01998       if( ps[1] ) 
01999       {   
02000             for( mlist = MatchList[state];
02001                  mlist!= NULL;
02002                  mlist = mlist->next )
02003             {
02004                  index = T - mlist->n - Tx; 
02005 
02006                  
02007                  if( mlist->nocase )
02008                  {
02009                     nfound++;
02010                     if (Match (mlist->id, index, data))
02011                         return nfound;
02012                  }
02013                  else
02014                  {
02015                     if( memcmp (mlist->casepatrn, Tx + index, mlist->n ) == 0 )
02016                     {
02017                       nfound++;
02018                       if (Match (mlist->id, index, data))
02019                           return nfound;
02020                     }
02021                  }
02022                 
02023             }
02024       }
02025       
02026       state = ps[ 2u + sindex ];
02027   }
02028 
02029   /* Check the last state for a pattern match */
02030   for( mlist = MatchList[state];
02031        mlist!= NULL;
02032        mlist = mlist->next )
02033   {
02034        index = T - mlist->n - Tx;
02035                  
02036        if( mlist->nocase )
02037        {
02038             nfound++;
02039             if (Match (mlist->id, index, data))
02040                 return nfound;
02041        }
02042        else
02043        {
02044             if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
02045             {
02046               nfound++;
02047               if (Match (mlist->id, index, data))
02048                   return nfound;
02049             }
02050        }
02051   }
02052 
02053   return nfound;
02054 }
02055 /*
02056 *   Banded-Row format DFA search
02057 *   Do not change anything here, caching and prefetching 
02058 *   performance is very sensitive to any changes.
02059 *
02060 *   ps[0] = storage fmt 
02061 *   ps[1] = bool match flag
02062 *   ps[2] = # elements in band 
02063 *   ps[3] = index of 1st element
02064 */
02065 static 
02066 inline
02067 int
02068 acsmSearchSparseDFA_Banded(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
02069             int (*Match) (void * id, int index, void *data), 
02070             void *data) 
02071 {
02072   acstate_t         state;
02073   unsigned char   * Tend;
02074   unsigned char   * T;
02075   int               sindex;
02076   int               index;
02077   acstate_t      ** NextState = acsm->acsmNextState;
02078   ACSM_PATTERN2  ** MatchList = acsm->acsmMatchList;
02079   ACSM_PATTERN2   * mlist;
02080   acstate_t       * ps; 
02081   int               nfound = 0;
02082 
02083   T    = Tx;
02084   Tend = T + n;
02085  
02086   for( state = 0; T < Tend; T++ )
02087   {
02088       ps     = NextState[state];
02089       
02090       sindex = xlatcase[ T[0] ];
02091             
02092       /* test if this state has any matching patterns */
02093       if( ps[1] ) 
02094       {   
02095             for( mlist = MatchList[state];
02096                  mlist!= NULL;
02097                  mlist = mlist->next )
02098             {
02099                  index = T - mlist->n - Tx; 
02100             
02101                  if( mlist->nocase )
02102                  {
02103                     nfound++;
02104                     if (Match (mlist->id, index, data))
02105                         return nfound;
02106                  }
02107                  else
02108                  {
02109                     if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
02110                     {
02111                       nfound++;
02112                       if (Match (mlist->id, index, data))
02113                           return nfound;
02114                     }
02115                  }
02116             }
02117       }
02118       
02119       if(      sindex <   ps[3]          )  state = 0;
02120       else if( sindex >= (ps[3] + ps[2]) )  state = 0; 
02121       else                                  state = ps[ 4u + sindex - ps[3] ];
02122   }
02123 
02124   /* Check the last state for a pattern match */
02125   for( mlist = MatchList[state];
02126        mlist!= NULL;
02127        mlist = mlist->next )
02128   {
02129        index = T - mlist->n - Tx; 
02130 
02131        if( mlist->nocase )
02132        {
02133             nfound++;
02134             if (Match (mlist->id, index, data))
02135                 return nfound;
02136        }
02137        else
02138        {
02139             if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
02140             {
02141               nfound++;
02142               if (Match (mlist->id, index, data))
02143                   return nfound;
02144             }
02145        }
02146   }
02147 
02148   return nfound;
02149 }
02150 
02151 
02152 
02153 /*
02154 *   Search Text or Binary Data for Pattern matches
02155 *
02156 *   Sparse Storage Version
02157 */
02158 static
02159 inline
02160 int
02161 acsmSearchSparseNFA(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
02162             int (*Match) (void * id, int index, void *data), 
02163             void *data) 
02164 {
02165   acstate_t         state;
02166   ACSM_PATTERN2   * mlist;
02167   unsigned char   * Tend;
02168   int               nfound = 0;
02169   unsigned char   * T, *Tc;
02170   int               index;
02171   acstate_t      ** NextState= acsm->acsmNextState;
02172   acstate_t       * FailState= acsm->acsmFailState;
02173   ACSM_PATTERN2  ** MatchList = acsm->acsmMatchList;
02174   unsigned char     Tchar;
02175 
02176   Tc   = Tx;
02177   T    = Tx;
02178   Tend = T + n;
02179  
02180   for( state = 0; T < Tend; T++ )
02181   {
02182       acstate_t nstate;
02183 
02184       Tchar = xlatcase[ *T ];
02185 
02186       while( (nstate=SparseGetNextStateNFA(NextState[state],state,Tchar))==ACSM_FAIL_STATE2 )
02187               state = FailState[state];
02188 
02189       state = nstate;
02190 
02191       for( mlist = MatchList[state];
02192            mlist!= NULL;
02193            mlist = mlist->next )
02194       {
02195            index = T - mlist->n - Tx; 
02196            if( mlist->nocase )
02197            {
02198               nfound++;
02199               if (Match (mlist->id, index, data))
02200                   return nfound;
02201            }
02202            else
02203            {
02204               if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
02205               {
02206                 nfound++;
02207                 if (Match (mlist->id, index, data))
02208                   return nfound;
02209               }
02210            }
02211       }
02212   }
02213 
02214   return nfound;
02215 }
02216 
02217 /*
02218 *   Search Function
02219 */
02220 int 
02221 acsmSearch2(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
02222            int (*Match) (void * id, int index, void *data), 
02223            void *data) 
02224 {
02225 
02226    switch( acsm->acsmFSA )
02227    {
02228        case FSA_DFA:
02229 
02230        if( acsm->acsmFormat == ACF_FULL )
02231        {
02232          return acsmSearchSparseDFA_Full( acsm, Tx, n, Match,data );
02233        }
02234        else if( acsm->acsmFormat == ACF_BANDED )
02235        {
02236          return acsmSearchSparseDFA_Banded( acsm, Tx, n, Match,data );
02237        }
02238        else
02239        {
02240          return acsmSearchSparseDFA( acsm, Tx, n, Match,data );
02241        }
02242 
02243        case FSA_NFA:
02244 
02245          return acsmSearchSparseNFA( acsm, Tx, n, Match,data );
02246 
02247        case FSA_TRIE:
02248 
02249          return 0;
02250    }
02251   return 0;
02252 }
02253 
02254 
02255 /*
02256 *   Free all memory
02257 */ 
02258   void
02259 acsmFree2 (ACSM_STRUCT2 * acsm) 
02260 {
02261   int i;
02262   ACSM_PATTERN2 * mlist, *ilist;
02263   for (i = 0; i < acsm->acsmMaxStates; i++)
02264   {
02265           mlist = acsm->acsmMatchList[i];
02266 
02267           while (mlist)
02268           {
02269               ilist = mlist;
02270               mlist = mlist->next;
02271               AC_FREE (ilist);
02272           }
02273           AC_FREE(acsm->acsmNextState[i]);
02274   }
02275   AC_FREE(acsm->acsmFailState);
02276   AC_FREE(acsm->acsmMatchList);
02277 }
02278 
02279 /*
02280 *
02281 */
02282 void acsmPrintInfo2( ACSM_STRUCT2 * p)
02283 {
02284     char * sf[]={
02285       "Full Matrix",
02286       "Sparse Matrix",
02287       "Banded Matrix",
02288       "Sparse Banded Matrix",
02289     };
02290     char * fsa[]={
02291       "TRIE",
02292       "NFA",
02293       "DFA",
02294     };
02295 
02296 
02297     printf("+--[Pattern Matcher:Aho-Corasick]-----------------------------\n");
02298     printf("| Alphabet Size    : %d Chars\n",p->acsmAlphabetSize);
02299     printf("| Sizeof State     : %d bytes\n",(int)(sizeof(acstate_t)));
02300     printf("| Storage Format   : %s \n",sf[ p->acsmFormat ]);
02301     printf("| Sparse Row Nodes : %d Max\n",p->acsmSparseMaxRowNodes);
02302     printf("| Sparse Band Zeros: %d Max\n",p->acsmSparseMaxZcnt);
02303     printf("| Num States       : %d\n",p->acsmNumStates);
02304     printf("| Num Transitions  : %d\n",p->acsmNumTrans);
02305     printf("| State Density    : %.1f%%\n",100.0*(double)p->acsmNumTrans/(p->acsmNumStates*p->acsmAlphabetSize));
02306     printf("| Finite Automatum : %s\n", fsa[p->acsmFSA]);
02307     if( max_memory < 1024*1024 )
02308     printf("| Memory           : %.2fKbytes\n", (float)max_memory/1024 );
02309     else
02310     printf("| Memory           : %.2fMbytes\n", (float)max_memory/(1024*1024) );
02311     printf("+-------------------------------------------------------------\n");
02312 
02313     /* Print_DFA(acsm); */
02314 
02315 }
02316 
02317 /*
02318  *
02319  */
02320 int acsmPrintDetailInfo2( ACSM_STRUCT2 * p )
02321 {
02322 
02323     return 0;
02324 }
02325 
02326 /*
02327  *   Global sumary of all info and all state machines built during this run
02328  *   This feeds off of the last pattern groupd built within snort,
02329  *   all groups use the same format, state size, etc..
02330  *   Combined with accrued stats, we get an average picture of things.
02331  */
02332 int acsmPrintSummaryInfo2()
02333 {
02334     char * sf[]={
02335       "Full",
02336       "Sparse",
02337       "Banded",
02338       "Sparse",
02339     };
02340 
02341     char * fsa[]={
02342       "TRIE",
02343       "NFA",
02344       "DFA",
02345     };
02346 
02347     ACSM_STRUCT2 * p = &summary.acsm;
02348 
02349     if( !summary.num_states )
02350             return 0;
02351     
02352     printf("+--[Pattern Matcher:Aho-Corasick Summary]----------------------\n");
02353     printf("| Alphabet Size    : %d Chars\n",p->acsmAlphabetSize);
02354     printf("| Sizeof State     : %d bytes\n",(int)(sizeof(acstate_t)));
02355     printf("| Storage Format   : %s \n",sf[ p->acsmFormat ]);
02356     printf("| Num States       : %d\n",summary.num_states);
02357     printf("| Num Transitions  : %d\n",summary.num_transitions);
02358     printf("| State Density    : %.1f%%\n",100.0*(double)summary.num_transitions/(summary.num_states*p->acsmAlphabetSize));
02359     printf("| Finite Automatum : %s\n", fsa[p->acsmFSA]);
02360     if( max_memory < 1024*1024 )
02361     printf("| Memory           : %.2fKbytes\n", (float)max_memory/1024 );
02362     else
02363     printf("| Memory           : %.2fMbytes\n", (float)max_memory/(1024*1024) );
02364     printf("+-------------------------------------------------------------\n");
02365 
02366 
02367     return 0;
02368 }
02369 
02370 
02371 
02372 
02373 #ifdef ACSMX2S_MAIN
02374   
02375 /*
02376 *  Text Data Buffer
02377 */ 
02378 unsigned char text[512];
02379 
02380 /* 
02381 *    A Match is found
02382 */ 
02383  int
02384 MatchFound (void* id, int index, void *data) 
02385 {
02386   fprintf (stdout, "%s\n", (char *) id);
02387   return 0;
02388 }
02389 
02390 /*
02391 *
02392 */ 
02393 int
02394 main (int argc, char **argv) 
02395 {
02396   int i, nc, nocase = 0;
02397   ACSM_STRUCT2 * acsm;
02398   char * p;
02399 
02400   if (argc < 3)
02401     
02402     {
02403       fprintf (stderr,"Usage: %s search-text pattern +pattern... [flags]\n",argv[0]);
02404       fprintf (stderr,"  flags: -nfa -nocase -full -sparse -bands -sparsebands -z zcnt (sparsebands) -sparsetree -v\n");
02405       exit (0);
02406     }
02407 
02408   acsm = acsmNew2 ();
02409   if( !acsm )
02410   {
02411      printf("acsm-no memory\n");
02412      exit(0);
02413   }
02414 
02415   strcpy (text, argv[1]);
02416 
02417   acsm->acsmFormat = ACF_FULL;
02418 
02419   for (i = 1; i < argc; i++)
02420   {
02421     if (strcmp (argv[i], "-nocase") == 0){
02422       nocase = 1;
02423     }
02424     if (strcmp (argv[i], "-v") == 0){
02425       s_verbose=1;
02426     }
02427 
02428     if (strcmp (argv[i], "-full") == 0){
02429        acsm->acsmFormat            = ACF_FULL;
02430     }
02431     if (strcmp (argv[i], "-sparse") == 0){
02432        acsm->acsmFormat            = ACF_SPARSE;
02433        acsm->acsmSparseMaxRowNodes = 10;
02434     }
02435     if (strcmp (argv[i], "-bands") == 0){
02436        acsm->acsmFormat            = ACF_BANDED;
02437     }
02438 
02439     if (strcmp (argv[i], "-sparsebands") == 0){
02440        acsm->acsmFormat            = ACF_SPARSEBANDS;
02441        acsm->acsmSparseMaxZcnt     = 10;  
02442     }
02443     if (strcmp (argv[i], "-z") == 0){
02444        acsm->acsmSparseMaxZcnt     = atoi(argv[++i]);  
02445     }
02446 
02447     if (strcmp (argv[i], "-nfa") == 0){
02448        acsm->acsmFSA     = FSA_NFA;
02449     }
02450     if (strcmp (argv[i], "-dfa") == 0){
02451        acsm->acsmFSA     = FSA_DFA;
02452     }
02453     if (strcmp (argv[i], "-trie") == 0){
02454        acsm->acsmFSA     = FSA_TRIE;
02455     }
02456   }
02457 
02458   for (i = 2; i < argc; i++)
02459   {
02460       if (argv[i][0] == '-')
02461           continue;
02462 
02463       p = argv[i];
02464 
02465       if ( *p == '+')
02466       {
02467           nc=1;
02468           p++;
02469       }
02470       else
02471       {
02472           nc = nocase;
02473       }
02474 
02475       acsmAddPattern2 (acsm, p, strlen(p), nc, 0, 0,(void*)p, i - 2);
02476   }
02477   
02478   if(s_verbose)printf("Patterns added\n");
02479 
02480   Print_DFA (acsm);
02481 
02482   acsmCompile2 (acsm);
02483 
02484   Write_DFA(acsm, "acsmx2-snort.dfa") ;
02485 
02486   if(s_verbose) printf("Patterns compiled--written to file.\n");
02487 
02488   acsmPrintInfo2 ( acsm );
02489 
02490   acsmSearch2 (acsm, text, strlen (text), MatchFound, (void *)0 );
02491 
02492   acsmFree2 (acsm);
02493 
02494   printf ("normal pgm end\n");
02495 
02496   return (0);
02497 }
02498 #endif /*  */
02499 

Generated on Sun May 14 14:51:18 2006 by  doxygen 1.4.2