Main Page | Modules | Class List | Directories | File List | Class Members | File Members | Related Pages

hi_util_hbm.c

Go to the documentation of this file.
00001 /**
00002 **  @file       hi_hbm.c
00003 **  
00004 **  @author     Marc Norton <mnorton@sourcefire.com>
00005 **  
00006 **  @brief      Implementation of a Horspool method of Boyer-Moore
00007 **  
00008 */
00009 
00010 #include <stdlib.h>
00011 
00012 #include "hi_util_hbm.h"
00013 
00014 /*
00015 *
00016 *  Boyer-Moore-Horspool for small pattern groups
00017 *    
00018 */
00019 #ifndef WIN32  /* To avoid naming conflict, Win32 will use the hbm_prepx() in mwm.c */
00020 HBM_STRUCT * hbm_prepx(HBM_STRUCT *p, unsigned char * pat, int m)
00021 {
00022      int     k;
00023 
00024      if( !m ) return 0;
00025      if( !p ) return 0;
00026 
00027 
00028      p->P = pat;
00029 
00030      p->M = m;
00031 
00032      /* Compute normal Boyer-Moore Bad Character Shift */
00033      for(k = 0; k < 256; k++) p->bcShift[k] = m;
00034      for(k = 0; k < m; k++)   p->bcShift[pat[k]] = m - k - 1;
00035 
00036      return p;
00037 }
00038 #endif
00039 
00040 /*
00041 *
00042 */
00043 #ifndef WIN32  /* To avoid naming conflict, Win32 will use the hbm_prep() in mwm.c */
00044 HBM_STRUCT * hbm_prep(unsigned char * pat, int m)
00045 {
00046      HBM_STRUCT    *p;
00047 
00048      p = (HBM_STRUCT*)malloc( sizeof(HBM_STRUCT) );
00049      if( !p ) return 0;
00050 
00051      return hbm_prepx( p, pat, m );
00052 }
00053 #endif
00054 
00055 /*
00056 *   Boyer-Moore Horspool
00057 *   Does NOT use Sentinel Byte(s)
00058 *   Scan and Match Loops are unrolled and separated
00059 *   Optimized for 1 byte patterns as well
00060 */
00061 unsigned char * hbm_match(HBM_STRUCT * px, unsigned char *text, int n)
00062 {
00063   unsigned char *pat, *t, *et, *q;
00064   int            m1, k;
00065   short    *bcShift;
00066 
00067   m1     = px->M-1;
00068   pat    = px->P;
00069   bcShift= px->bcShift;
00070 
00071   t  = text + m1;  
00072   et = text + n; 
00073 
00074   /* Handle 1 Byte patterns - it's a faster loop */
00075   /*
00076   if( !m1 )
00077   {
00078     for( ;t<et; t++ ) 
00079       if( *t == *pat ) return t;
00080     return 0;
00081   }
00082   */
00083  
00084   /* Handle MultiByte Patterns */
00085   while( t < et )
00086   {
00087     /* Scan Loop - Bad Character Shift */
00088     do 
00089     {
00090       t += bcShift[*t];
00091       if( t >= et )return 0;;
00092 
00093       t += (k=bcShift[*t]);      
00094       if( t >= et )return 0;
00095 
00096     } while( k );
00097 
00098     /* Unrolled Match Loop */
00099     k = m1;
00100     q = t - m1;
00101     while( k >= 4 )
00102     {
00103       if( pat[k] != q[k] )goto NoMatch;  k--;
00104       if( pat[k] != q[k] )goto NoMatch;  k--;
00105       if( pat[k] != q[k] )goto NoMatch;  k--;
00106       if( pat[k] != q[k] )goto NoMatch;  k--;
00107     }
00108     /* Finish Match Loop */
00109     while( k >= 0 )
00110     {
00111       if( pat[k] != q[k] )goto NoMatch;  k--;
00112     }
00113     /* If matched - return 1st char of pattern in text */
00114     return q;
00115 
00116 NoMatch:
00117     
00118     /* Shift by 1, this replaces the good suffix shift */
00119     t++; 
00120   }
00121 
00122   return 0;
00123 }
00124 

Generated on Sun May 14 14:51:15 2006 by  doxygen 1.4.2