Main Page | Modules | Class List | Directories | File List | Class Members | File Members | Related Pages

mwm.h

Go to the documentation of this file.
00001 /*
00002 ** $Id$
00003 **
00004 **  mwm.h
00005 **
00006 ** Copyright (C) 2002 Sourcefire,Inc
00007 ** Marc Norton
00008 **
00009 ** Modifed Wu-Manber style Multi-Pattern Matcher
00010 **
00011 ** This program is free software; you can redistribute it and/or modify
00012 ** it under the terms of the GNU General Public License as published by
00013 ** the Free Software Foundation; either version 2 of the License, or
00014 ** (at your option) any later version.
00015 **
00016 ** This program is distributed in the hope that it will be useful,
00017 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
00018 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00019 ** GNU General Public License for more details.
00020 **
00021 ** You should have received a copy of the GNU General Public License
00022 ** along with this program; if not, write to the Free Software
00023 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
00024 **
00025 **
00026 */
00027 
00028 #ifndef __MWM_H__
00029 #define __MWM_H__
00030 
00031 /*
00032 *   This macro enables use of the bitop test.
00033 */
00034 #define BITOP_TEST
00035 
00036 
00037 #ifdef HAVE_CONFIG_H
00038 #include "config.h"
00039 #endif
00040 
00041 #ifdef BITOP_TEST
00042 #include "bitop.h"
00043 #endif
00044 
00045 #ifndef WIN32
00046 #ifndef INLINE
00047 #define INLINE inline
00048 #endif
00049 
00050 #ifndef UINT64
00051 #define UINT64 unsigned long long
00052 #endif
00053 
00054 #else
00055 #ifndef INLINE
00056 #define INLINE __inline
00057 #endif
00058 
00059 #ifndef UINT64
00060 #define UINT64 __int64
00061 #endif
00062 
00063 #endif
00064 
00065 #ifndef CDECL 
00066 #define CDECL 
00067 #endif
00068 
00069 
00070 #define COPY_PATTERNS
00071 
00072 /*
00073 ** Enables display of pattern group stats
00074 */
00075 //#define SHOW_STATS
00076 
00077 
00078 #define MWM_FEATURES "MWM:BC/BW-SHIFT + 2-BYTE-HASH"  
00079 
00080 #define HASHTABLESIZE (64*1024)
00081 #define HASHBYTES16    2
00082 
00083 
00084 /* 
00085 ** Causes mbmAddPattern to check for and not allow duplicate patterns. 
00086 ** By default we allow multiple duplicate patterns, since the AND clause
00087 ** may case the whole signature to be different. We trigger each pattern
00088 ** to be processesed by default.
00089 */
00090 /*
00091   #define REQUIRE_UNIQUE_PATTERNS
00092 */
00093 
00094 
00095 /*
00096 *
00097 *  Boyer-Moore-Horsepool for small pattern groups
00098 *    
00099 */
00100 
00101 typedef struct {
00102 
00103  unsigned char *P;
00104  int            M;
00105  short          bcShift[256];
00106 
00107 }HBM_STRUCT;
00108 
00109 
00110 /*
00111 **  This struct is used internally my mwm.c
00112 */
00113 typedef struct _mwm_pattern_struct  {
00114  
00115   struct _mwm_pattern_struct * next;
00116 
00117   unsigned char *psPat;   // pattern array, no case
00118   unsigned char *psPatCase;   // pattern array, case(exact)
00119   unsigned       psLen;   // length of pattern in bytes
00120   void          *psID;    //  OTNX
00121   unsigned       psID2;    // PatMatchData
00122   int            psIID;    //internal ID, used by the pattern matcher
00123   unsigned       psNoCase;// Pattern match is case insensitive if true
00124   int            psOffset;  // start search this deep
00125   unsigned       psDepth;   // number of bytes after offset to search
00126 
00127   HBM_STRUCT     * psBmh;
00128 
00129 } MWM_PATTERN_STRUCT;
00130 
00131 
00132 /*
00133 *  Pattern Matching Methods - Boyer-Moore-Horspool or Modified Wu Manber
00134 */
00135 #define MTH_MWM 0
00136 #define MTH_BM  1
00137 
00138 #define HASH_TYPE short      
00139 
00140 #define BWSHIFTABLESIZE (64*1024)
00141 
00142 /*
00143 ** Pattern GROUP Structure, this struct is is used publicly, but by reference only
00144 */
00145 typedef struct _mwm_struct {
00146 
00147   int msMethod;  /* MTH_BM, MTH_MWM */
00148 
00149   MWM_PATTERN_STRUCT * plist;
00150 
00151   /*  Array of Patterns */
00152   int                 msMaxPatterns;
00153   MWM_PATTERN_STRUCT *msPatArray;
00154 
00155   /* Array of Group Counts, # of patterns in each hash group */
00156   unsigned short *msNumArray;
00157 
00158   /* One byte patterns */
00159   unsigned short  msNumArray1[256];
00160  
00161   /* Number of Patterns loaded */
00162   int        msNumPatterns;
00163 
00164   /* Wu-Manber Hash Tables */
00165   unsigned   msNumHashEntries;
00166   HASH_TYPE *msHash;           // 2+ character Pattern Big Hash Table
00167   HASH_TYPE  msHash1[256];     // One character Pattern Hash Table
00168   
00169   /* Bad Character Shift Table */
00170   short    msShift[256];
00171   unsigned msShiftLen;   
00172 
00173   /* Bad Word Shift Table */
00174   unsigned char* msShift2; 
00175   int msLargeShifts;
00176 
00177 #ifdef BITOP_TEST    
00178   BITOP * RuleMask;
00179 #endif
00180 
00181   /* Case insensitive search */
00182   int     msNoCase;  
00183 
00184   /* search function */
00185   int (*search)( struct _mwm_struct * ps, 
00186                  unsigned char * Tx, int n, unsigned char * Tc,
00187                  int(*match)(void * id, int index, void * data ),
00188                  void * data );
00189 
00190   /* Print Group Details */
00191   int msDetails;
00192   
00193   /* Pattern Group Stats  */
00194   int   msSmallest;
00195   int   msLargest;
00196   int   msAvg;
00197   int   msTotal;
00198   int * msLengths;
00199 
00200 } MWM_STRUCT;
00201 
00202 /*
00203 ** PROTOTYPES
00204 */
00205 void * mwmNew( void );
00206 void   mwmFree( void * pv );
00207 
00208 int  mwmAddPatternEx  ( void * pv, unsigned char * P, int m, 
00209              unsigned noCase,unsigned offset, unsigned depth,  void *  ID, int IID );
00210 
00211 void mwmLargeShifts   ( void * pv, int flag );
00212 int  mwmPrepPatterns  ( void * pv );
00213 
00214 #ifdef BITOP_TEST
00215 void mwmSetRuleMask   ( void *pv, BITOP * rm );
00216 #endif
00217 
00218 int  mwmSearch( void *pv, unsigned char * T, int n, 
00219     int ( *action )(void * id, int index, void *data), void * data ); 
00220 
00221 UINT64 mwmGetPatByteCount();
00222 void mwmResetByteCount();
00223 
00224                                 
00225 /* Not so useful, but not ready to be dumped  */
00226 int   mwmAddPattern( void * pv, unsigned char * P, int m, unsigned id );
00227 int   mwmGetNumPatterns( void * pv );
00228 void  mwmFeatures( void );
00229 
00230 
00231 #endif
00232 

Generated on Sun May 14 14:51:18 2006 by  doxygen 1.4.2