src/preprocessors/HttpInspect/normalization/hi

00001 /**
00002 **  @file       hi_norm.c
00003 **  
00004 **  @author     Daniel Roelker <droelker@sourcefire.com
00005 **  
00006 **  @brief      Contains normalization skeleton for server and client
00007 **              normalization routines.
00008 **  
00009 **  This file contains the core routines to normalize the different fields
00010 **  within the HTTP protocol.  We currently only support client URI
00011 **  normalization, but the hooks are here to easily add other routines.
00012 **  
00013 **  NOTES:
00014 **      - Initial development.  DJR
00015 */
00016 #include <stdlib.h>
00017 #include <stdio.h>
00018 #include <string.h>
00019 #include <ctype.h>
00020 #include <sys/types.h>
00021 
00022 #include "hi_client_norm.h"
00023 #include "hi_eo.h"
00024 #include "hi_eo_events.h"
00025 #include "hi_eo_log.h"
00026 #include "hi_ui_iis_unicode_map.h"
00027 #include "hi_return_codes.h"
00028 #include "hi_si.h"
00029 #include "hi_util.h"
00030 #include "hi_util_xmalloc.h"
00031 
00032 #define MAX_DIRS        2048
00033 
00034 #define NO_HEX_VAL      -1
00035 #define BASE36_VAL      -2
00036 #define HEX_VAL          1
00037 
00038 /**
00039 **  This define checks for negative return codes, since we have multiple
00040 **  reasons to error.  This just cuts the return code checks, especially
00041 **  as we add more errors.
00042 */
00043 #define GET_ERR         0x80000000
00044 #define END_OF_BUFFER   -1
00045 #define DOUBLE_ENCODING -2
00046 #define DIR_TRAV        -2
00047 
00048 #define NON_ASCII_CHAR  0xff
00049 
00050 typedef struct s_URI_NORM_STATE
00051 {
00052     u_char *abs_uri;
00053     u_char *param;
00054 
00055     /*
00056     **  Directory tracking
00057     */
00058     u_char *dir_track[MAX_DIRS];
00059     u_int   dir_count;
00060 
00061 }  URI_NORM_STATE;
00062 
00063 typedef int (*DECODE_FUNC)(HI_SESSION *, u_char *,
00064                           u_char *, u_char **, URI_NORM_STATE *);
00065 
00066 static int hex_lookup[256];
00067 static int valid_lookup[256];
00068 
00069 /*
00070 **  NAME
00071 **    GetPtr::
00072 */
00073 /**
00074 **  This routine is for getting bytes in the U decode.
00075 **  
00076 **  This checks the current bounds and checking for the double decoding.
00077 **  This routine differs from the other Get routines because it returns
00078 **  other values than just END_OF_BUFFER and the char.
00079 **  
00080 **  We also return DOUBLE_ENCODING if there is a % and double decoding
00081 **  is turned on.
00082 **  
00083 **  When using this function it is important to note that it increments
00084 **  the buffer before checking the bounds.  So, if you call this function
00085 **  in a loop and don't check for END_OF_BUFFER being returned, then 
00086 **  you are going to overwrite the buffer.  If I put the check in, you
00087 **  would just be in an never-ending loop.  So just use this correctly.
00088 **  
00089 **  @param ServerConf  the server configuration
00090 **  @param start       the start of the URI
00091 **  @param end         the end of the URI
00092 **  @param ptr         the current pointer into the URI
00093 **  
00094 **  @return integer
00095 **  
00096 **  @retval END_OF_BUFFER    the end of the buffer has been reached.
00097 **  @retval DOUBLE_ENCODING  a percent was found and double decoding is on
00098 **  @retval <= 0xff          an ASCII char         
00099 */
00100 static int GetPtr(HI_SESSION *Session, u_char *start,
00101                    u_char *end, u_char **ptr, URI_NORM_STATE *norm_state)
00102 {
00103     HTTPINSPECT_CONF *ServerConf = Session->server_conf;
00104 
00105     (*ptr)++;
00106 
00107     if(!hi_util_in_bounds(start, end, *ptr))
00108         return END_OF_BUFFER;
00109 
00110     if(ServerConf->double_decoding.on && **ptr == '%')
00111         return DOUBLE_ENCODING;
00112 
00113     return (int)**ptr;
00114 }
00115 
00116 /*
00117 **  NAME
00118 **    UDecode::
00119 */
00120 /**
00121 **  Handles the single decode for %U encoding.
00122 **  
00123 **  This routine receives the ptr pointing to the u.  We check the bounds
00124 **  and continue with processing.  %u encoding works by specifying the
00125 **  exact codepoint to be used.  For example, %u002f would be /.  So this
00126 **  all seems fine.  BUT, the problem is that IIS maps multiple codepoints
00127 **  to ASCII characters.  So, %u2044 also maps to /.  So this is what we
00128 **  need to handle here.
00129 **  
00130 **  This routine only handles the single encoding.  For double decoding,
00131 **  %u is handled in DoubleDecode().  It's the same routine, with just
00132 **  the GetByte function different.
00133 **  
00134 **  We use a get_byte function to get the bytes, so we can use this
00135 **  routine for PercentDecode and for DoubleDecode.
00136 **
00137 **  @param ServerConf  the server configuration
00138 **  @param start       the start of the URI
00139 **  @param end         the end of the URI
00140 **  @param ptr         the current pointer into the URI
00141 **  @param get_byte    the function pointer to get bytes.
00142 **  
00143 **  @return integer
00144 **  
00145 **  @retval END_OF_BUFFER    we are at the end of the buffer
00146 **  @retval DOUBLE_ENCODING  this U encoding is possible double encoded
00147 **  @retval NON_ASCII_CHAR   return this char for non-ascii or bad decodes
00148 **  @retval iChar            this is the char that we decoded.
00149 */
00150 static int UDecode(HI_SESSION *Session, u_char *start,
00151                    u_char *end, u_char **ptr, DECODE_FUNC get_byte,
00152                    URI_NORM_STATE *norm_state)
00153 {
00154     HTTPINSPECT_CONF *ServerConf = Session->server_conf;
00155     int iByte;
00156     int iNorm;
00157     int iCtr;
00158 
00159     iNorm = 0;
00160 
00161     for(iCtr = 0; iCtr < 4; iCtr++)
00162     {
00163         iByte = get_byte(Session, start, end, ptr, norm_state);
00164         if(iByte & GET_ERR)
00165             return iByte;
00166 
00167         if(valid_lookup[(u_char)iByte] < 0)
00168             return NON_ASCII_CHAR;
00169 
00170         iNorm <<= 4;
00171         iNorm = (iNorm | (hex_lookup[(u_char)iByte]));
00172     }
00173 
00174     /*
00175     **  If the decoded codepoint is greater than a single byte value,
00176     **  then we return a NON_ASCII_CHAR.
00177     */
00178     if(iNorm > 0xff)
00179     {
00180         /*
00181         **  We check here for IIS codepoints that map to ASCII chars.
00182         */
00183         if(ServerConf->iis_unicode.on && iNorm <= 0xffff)
00184         {
00185             iNorm = ServerConf->iis_unicode_map[iNorm];
00186 
00187             if(iNorm == HI_UI_NON_ASCII_CODEPOINT)
00188             {
00189                 iNorm = NON_ASCII_CHAR;
00190             }
00191 
00192             if(hi_eo_generate_event(Session, ServerConf->iis_unicode.alert) &&
00193                !norm_state->param)
00194             {
00195                 hi_eo_client_event_log(Session, HI_EO_CLIENT_IIS_UNICODE,
00196                                        NULL, NULL);
00197             }
00198         }
00199         else
00200         {
00201             return NON_ASCII_CHAR;
00202         }
00203     }
00204 
00205     /*
00206     **  Check if we alert on this encoding
00207     */
00208     if(hi_eo_generate_event(Session, ServerConf->u_encoding.alert) &&
00209        !norm_state->param)
00210     {
00211         hi_eo_client_event_log(Session, HI_EO_CLIENT_U_ENCODE,
00212                                          NULL, NULL);
00213     }
00214 
00215     return iNorm;
00216 }
00217 
00218 /*
00219 **  NAME
00220 **    PercentDecode::
00221 */
00222 /**
00223 **  This is the first level of decoding, and deals with ASCII, U, and
00224 **  double decoding.
00225 **
00226 **  This function is the main decoding function.  It handles all the ASCII
00227 **  encoding and the U encoding, and tells us when there is a double
00228 **  encoding.
00229 **  
00230 **  We use the GetPtr() routine to get the bytes for us.  This routine
00231 **  checks for DOUBLE_ENCODING and tells us about it if it finds something,
00232 **  so we can reset the ptrs and run it through the double decoding
00233 **  routine.
00234 **  
00235 **  The philosophy behind this routine is that if we run out of buffer
00236 **  we return such, the only other thing we return besides the decodes
00237 **  char is a NON_ASCII_CHAR in the case that we try and decode something
00238 **  like %tt.  This is no good, so we return a place holder.
00239 **  
00240 **  @param ServerConf  the server configuration
00241 **  @param start       the start of the URI
00242 **  @param end         the end of the URI
00243 **  @param ptr         the current pointer into the URI
00244 **  
00245 **  @return integer
00246 **  
00247 **  @retval END_OF_BUFFER   We've hit the end of buffer while decoding.
00248 **  @retval NON_ASCII_CHAR  Invalid hex encoding, so we return a placeholder.
00249 **  @retval char            return the valid char
00250 **  
00251 **  @see GetPtr()
00252 */
00253 static int PercentDecode(HI_SESSION *Session, u_char *start, 
00254                          u_char *end, u_char **ptr, URI_NORM_STATE *norm_state)
00255 {
00256     HTTPINSPECT_CONF *ServerConf = Session->server_conf;
00257     int    iByte;
00258     u_char *orig_ptr;
00259     int    iNorm;
00260 
00261     orig_ptr = *ptr;
00262 
00263     iByte = GetPtr(Session, start, end, ptr, norm_state);
00264     if(iByte & GET_ERR)
00265     {
00266         if(iByte == END_OF_BUFFER)
00267             return END_OF_BUFFER;
00268 
00269         if(iByte == DOUBLE_ENCODING)
00270         {
00271             *ptr = orig_ptr;
00272             return (int)**ptr;
00273         }
00274     }
00275 
00276     /*
00277     **  Initialize the normalization byte
00278     */
00279     iNorm = 0;
00280 
00281     /*
00282     **  hex values
00283     */
00284     if(valid_lookup[(u_char)iByte] < 0)
00285     {
00286         /*
00287         **  Check for %u encoding.
00288         **
00289         **  The u-encoding loop always returns something.
00290         */
00291         if(ServerConf->u_encoding.on && (toupper(iByte) == 'U'))
00292         {
00293             iNorm = UDecode(Session, start, end, ptr, GetPtr, norm_state);
00294 
00295             /*
00296             **  We have to handle the double meaning of END_OF_BUFFER
00297             **  when using the GetPtr() function.
00298             */
00299             if(iNorm & GET_ERR)
00300             {
00301                 if(iNorm == END_OF_BUFFER)
00302                 {
00303                     /*
00304                     **  We have reached the end of the buffer while
00305                     **  processing a U encoding.
00306                     */
00307                     return END_OF_BUFFER;
00308                 }
00309 
00310                 if(iNorm == DOUBLE_ENCODING)
00311                 {
00312                     *ptr = orig_ptr;
00313                     return (int)**ptr;
00314                 }
00315             }
00316 
00317             return iNorm;
00318         }
00319         else if(!ServerConf->base36.on ||
00320                 valid_lookup[(u_char)iByte] != BASE36_VAL)
00321         {
00322             return NON_ASCII_CHAR;
00323         }
00324 
00325         /*
00326         **  The logic above dictates that if we get to this point, we
00327         **  have a valid base36 encoding, so let's log the event.
00328         */
00329         if(hi_eo_generate_event(Session, ServerConf->base36.alert) &&
00330            !norm_state->param)
00331         {
00332             hi_eo_client_event_log(Session, HI_EO_CLIENT_BASE36, NULL, NULL);
00333         }
00334     }
00335 
00336     iNorm = (hex_lookup[(u_char)iByte]<<4);
00337     iByte = GetPtr(Session, start, end, ptr, norm_state);
00338     if(iByte & GET_ERR)
00339     {
00340         if(iByte == END_OF_BUFFER)
00341             return END_OF_BUFFER;
00342 
00343         if(iByte == DOUBLE_ENCODING)
00344         {
00345             *ptr = orig_ptr;
00346             return (int)**ptr;
00347         }
00348     }
00349 
00350     if(valid_lookup[(u_char)iByte] < 0)
00351     {
00352         if(!ServerConf->base36.on || valid_lookup[(u_char)iByte] != BASE36_VAL)
00353         {
00354             return NON_ASCII_CHAR;
00355         }
00356 
00357         /*
00358         **  Once again, we know we have a valid base36 encoding, let's alert
00359         **  if possible.
00360         */
00361         if(hi_eo_generate_event(Session, ServerConf->base36.alert) &&
00362            !norm_state->param)
00363         {
00364             hi_eo_client_event_log(Session, HI_EO_CLIENT_BASE36, NULL, NULL);
00365         }
00366     }
00367 
00368     iNorm = (iNorm | (hex_lookup[(u_char)iByte])) & 0xff;
00369 
00370     if(hi_eo_generate_event(Session,ServerConf->ascii.alert) &&
00371        !norm_state->param)
00372     {
00373         hi_eo_client_event_log(Session, HI_EO_CLIENT_ASCII,
00374                                NULL, NULL);
00375     }
00376 
00377     return iNorm;
00378 }
00379 
00380 /*
00381 **  NAME
00382 **    GetChar::
00383 */
00384 /**
00385 **  Wrapper for PercentDecode() and handles the return values from
00386 **  PercentDecode().
00387 **  
00388 **  This really decodes the chars for UnicodeDecode().  If the char is
00389 **  a percent then we process stuff, otherwise we just increment the
00390 **  pointer and return.
00391 **  
00392 **  @param ServerConf  the server configuration
00393 **  @param start       the start of the URI
00394 **  @param end         the end of the URI
00395 **  @param ptr         the current pointer into the URI
00396 **  @param bare_byte   value for a non-ASCII char or a decoded non-ASCII char
00397 **  
00398 **  @return integer
00399 **  
00400 **  @retval END_OF_BUFFER   End of the buffer has been reached before decode.
00401 **  @retval NON_ASCII_CHAR  End of buffer during decoding, return decoded char.
00402 **  @retval char            return the valid decoded/undecoded char
00403 **  
00404 **  @see PercentDecode()
00405 **  @see GetByte()
00406 */
00407 static int GetChar(HI_SESSION *Session, u_char *start,
00408                    u_char *end, u_char **ptr, int *bare_byte,
00409                    URI_NORM_STATE *norm_state)
00410 {
00411     HTTPINSPECT_CONF *ServerConf = Session->server_conf;
00412     int iNorm;
00413 
00414     if(!hi_util_in_bounds(start, end, *ptr))
00415         return END_OF_BUFFER;
00416 
00417     iNorm = (int)(**ptr);
00418     
00419     if(**ptr == '%' && ServerConf->ascii.on)
00420     {
00421         /*
00422         **  We go into percent encoding.
00423         */
00424         iNorm = PercentDecode(Session, start, end, ptr, norm_state);
00425 
00426         /*
00427         **  If during the course of PercentDecode() we run into the end
00428         **  of the buffer, then we return early (WITHOUT INCREMENTING ptr)
00429         **  with a NON_ASCII_CHAR.
00430         */
00431         if(iNorm == END_OF_BUFFER)
00432             return NON_ASCII_CHAR;
00433 
00434         *bare_byte = 0;
00435     }
00436     else
00437     {
00438         if(ServerConf->bare_byte.on && (u_char)iNorm > 0x7f)
00439         {
00440             if(hi_eo_generate_event(Session, ServerConf->bare_byte.alert) &&
00441                !norm_state->param)
00442             {
00443                 hi_eo_client_event_log(Session, HI_EO_CLIENT_BARE_BYTE,
00444                                        NULL, NULL);
00445             }
00446 
00447             /*
00448             **  Set the bare_byte flag
00449             */
00450             *bare_byte = 0;
00451         }
00452         else
00453         {
00454             /*
00455             **  Set the bare_byte flag negative.
00456             */
00457             *bare_byte = 1;
00458         }
00459     }
00460 
00461     /*
00462     **  Increment the buffer.
00463     */
00464     (*ptr)++;
00465 
00466     return iNorm;
00467 }
00468 
00469 /*
00470 **  NAME
00471 **    UTF8Decode::
00472 */
00473 /**
00474 **  Decode the UTF-8 sequences and check for valid codepoints via the
00475 **  Unicode standard and the IIS standard.
00476 **  
00477 **  We decode up to 3 bytes of UTF-8 because that's all I've been able to
00478 **  get to work on various servers, so let's reduce some false positives.
00479 **  So we decode valid UTF-8 sequences and then check the value.  If the
00480 **  value is ASCII, then it's decoded to that.  Otherwise, if iis_unicode
00481 **  is turned on, we will check the unicode codemap for valid IIS mappings.
00482 **  If a mapping turns up, then we return the mapped ASCII.
00483 **  
00484 **  @param ServerConf  the server configuration
00485 **  @param start       the start of the URI
00486 **  @param end         the end of the URI
00487 **  @param ptr         the current pointer into the URI
00488 **  
00489 **  @return integer
00490 **  
00491 **  @retval NON_ASCII_CHAR  Reached end of buffer while decoding
00492 **  @retval char            return the decoded or badly decoded char
00493 **  
00494 **  @see GetByte()
00495 **  @see UnicodeDecode()
00496 */
00497 static int UTF8Decode(HI_SESSION *Session, u_char *start,
00498                       u_char *end, u_char **ptr, int iFirst,
00499                       URI_NORM_STATE *norm_state)
00500 {
00501     HTTPINSPECT_CONF *ServerConf = Session->server_conf;
00502     int iBareByte;
00503     int iNorm;
00504     int iNumBytes;
00505     int iCtr;
00506     int iByte;
00507 
00508     /*
00509     **  Right now we support up to 3 byte unicode sequences.  We can add 
00510     **  more if any of the HTTP servers support more.
00511     */
00512     if((iFirst & 0xe0) == 0xc0)
00513     {
00514         iNumBytes = 1;
00515         iNorm = iFirst & 0x1f;
00516     }
00517     else if((iFirst & 0xf0) == 0xe0)
00518     {
00519         iNumBytes = 2;
00520         iNorm = iFirst & 0x0f;
00521     }
00522     else
00523     {
00524         /*
00525         **  This means that we have an invalid first sequence byte for
00526         **  a unicode sequence.  So we just return the byte and move on.
00527         */
00528         return iFirst;
00529     }
00530 
00531     /*
00532     **  This is the main loop for UTF-8 decoding.  We check for the only
00533     **  valid sequence after the first byte whish is 0x80.  Otherwise,
00534     **  it was invalid and we setnd a NON_ASCII_CHAR and continue on
00535     **  with our processing.
00536     */
00537     for(iCtr = 0; iCtr < iNumBytes; iCtr++)
00538     {
00539         iByte = GetChar(Session, start, end, ptr, &iBareByte, norm_state);
00540         if(iByte == END_OF_BUFFER || iBareByte)
00541             return NON_ASCII_CHAR;
00542 
00543         if((iByte & 0xc0) == 0x80)
00544         {
00545             iNorm <<= 6;
00546             iNorm |= (iByte & 0x3f);
00547         }
00548         else
00549         {
00550             /*
00551             **  This means that we don't have a valid unicode sequence, so
00552             **  we just bail.
00553             */
00554             return NON_ASCII_CHAR;
00555         }
00556     }
00557 
00558     /*
00559     **  Check for unicode as ASCII and if there is not an ASCII char then
00560     **  we return the space holder char.
00561     */
00562     if(iNorm > 0x7f)
00563     {
00564         if(ServerConf->iis_unicode.on)
00565         {
00566             iNorm = ServerConf->iis_unicode_map[iNorm];
00567 
00568             if(iNorm == HI_UI_NON_ASCII_CODEPOINT)
00569             {
00570                 iNorm = NON_ASCII_CHAR;
00571             }
00572 
00573             if(hi_eo_generate_event(Session, ServerConf->iis_unicode.alert) &&
00574                !norm_state->param)
00575             {
00576                 hi_eo_client_event_log(Session, HI_EO_CLIENT_IIS_UNICODE,
00577                                        NULL, NULL);
00578             }
00579 
00580             return iNorm;
00581         }
00582         else
00583         {
00584             iNorm = NON_ASCII_CHAR;
00585         }
00586     }
00587 
00588     if(hi_eo_generate_event(Session, ServerConf->utf_8.alert) &&
00589        !norm_state->param)
00590     {
00591         hi_eo_client_event_log(Session, HI_EO_CLIENT_UTF_8,
00592                                NULL, NULL);
00593     }
00594 
00595     return iNorm;
00596 }
00597 
00598 /*
00599 **  NAME
00600 **    UnicodeDecode::
00601 */
00602 /**
00603 **  Checks for the ServerConf values before we actually decode.
00604 **  
00605 **  This function is really a ServerConf wrapper for UTF8Decode.
00606 **
00607 **  @param ServerConf  the server configuration
00608 **  @param start       the start of the URI
00609 **  @param end         the end of the URI
00610 **  @param ptr         the current pointer into the URI
00611 **  
00612 **  @return integer
00613 **  
00614 **  @retval char       the decode/undecoded byte.
00615 **  
00616 **  @see GetByte()
00617 */
00618 static int UnicodeDecode(HI_SESSION *Session, u_char *start, 
00619                          u_char *end, u_char **ptr, int iFirst,
00620                          URI_NORM_STATE *norm_state)
00621 {
00622     HTTPINSPECT_CONF *ServerConf = Session->server_conf;
00623     int iNorm = iFirst;
00624 
00625     if(ServerConf->iis_unicode.on || ServerConf->utf_8.on)
00626     {
00627         iNorm = UTF8Decode(Session, start, end, ptr, iFirst, norm_state);
00628     }
00629 
00630     return iNorm;
00631 }
00632 
00633 /*
00634 **  NAME
00635 **    GetByte::
00636 */
00637 /**
00638 **  Handles the first stage of URI decoding for the case of IIS double
00639 **  decoding.
00640 **  
00641 **  The first stage consists of ASCII decoding and unicode decoding.  %U
00642 **  decoding is handled in the ASCII decoding.
00643 **  
00644 **  @param ServerConf  the server configuration
00645 **  @param start       the start of the URI
00646 **  @param end         the end of the URI
00647 **  @param ptr         the current pointer into the URI
00648 **  
00649 **  @return integer
00650 **  
00651 **  @retval END_OF_BUFFER means that we've reached the end of buffer in
00652 **                        GetChar.
00653 **  @retval iChar         this is the character that was decoded.
00654 */
00655 static int GetByte(HI_SESSION *Session, u_char *start, u_char *end,
00656                    u_char **ptr, URI_NORM_STATE *norm_state)
00657 {
00658     int iChar;
00659     int iBareByte;
00660 
00661     iChar = GetChar(Session, start, end, ptr, &iBareByte, norm_state);
00662     if(iChar == END_OF_BUFFER)
00663         return END_OF_BUFFER;
00664 
00665     /*
00666     **  We now check for unicode bytes
00667     */
00668     if((iChar & 0x80) && (iChar != NON_ASCII_CHAR) && !iBareByte)
00669     {
00670         iChar = UnicodeDecode(Session, start, end, ptr, iChar, norm_state);
00671     }
00672 
00673     return iChar;
00674 }
00675 
00676 /*
00677 **  NAME
00678 **    DoubleDecode::
00679 */
00680 /**
00681 **  The double decoding routine for IIS good times.
00682 **  
00683 **  Coming into this function means that we just decoded a % or that
00684 **  we just saw two percents in a row.  We know which state we are
00685 **  in depending if the first char is a '%' or not.
00686 **
00687 **  In the IIS world, there are two decodes, but only some of the decode
00688 **  options are valid.  All options are valid in the first decode
00689 **  stage, but the second decode stage only supports:
00690 **  -  %u encoding
00691 **  -  ascii
00692 **
00693 **  Knowing this, we can decode appropriately.
00694 **  
00695 **  @param ServerConf  the server configuration
00696 **  @param start       the start of the URI
00697 **  @param end         the end of the URI
00698 **  @param ptr         the current pointer into the URI
00699 **  @param norm_state  the ptr to the URI norm state
00700 **  
00701 **  @return integer
00702 **  
00703 **  @retval NON_ASCII_CHAR  End of buffer reached while decoding
00704 **  @retval char            The decoded char
00705 */
00706 static int DoubleDecode(HI_SESSION *Session, u_char *start,
00707                         u_char *end, u_char **ptr,
00708                         URI_NORM_STATE *norm_state)
00709 {
00710     HTTPINSPECT_CONF *ServerConf = Session->server_conf;
00711     int iByte;
00712     int iNorm;
00713     u_char *orig_ptr;
00714 
00715     orig_ptr = *ptr;
00716 
00717     /*
00718     **  We now know that we have seen a previous % and that we need to
00719     **  decode the remaining bytes.  We are in one of multiple cases:
00720     **
00721     **  -  %25xxxx
00722     **  -  %%xx%xx
00723     **  -  %u0025xxxx
00724     **  -  etc.
00725     **
00726     **  But, the one common factor is that they each started out with a
00727     **  % encoding of some type.
00728     **
00729     **  So now we just get the remaining bytes and do the processing
00730     **  ourselves in this routine.
00731     */
00732     iByte = GetByte(Session, start, end, ptr, norm_state);
00733     if(iByte == END_OF_BUFFER)
00734         return NON_ASCII_CHAR;
00735 
00736     if(valid_lookup[(u_char)iByte] < 0)
00737     {
00738         if(ServerConf->u_encoding.on && (toupper(iByte) == 'U'))
00739         {
00740             iNorm = UDecode(Session, start, end, ptr, GetByte, norm_state);
00741             
00742             if(iNorm == END_OF_BUFFER)
00743             {
00744                 /*
00745                 **  We have reached the end of the buffer while
00746                 **  processing a U encoding.  We keep the current
00747                 **  pointer and return a NON_ASCII char for the
00748                 **  bad encoding.
00749                 */
00750                 return NON_ASCII_CHAR;
00751             }
00752 
00753             return iNorm;
00754         }
00755 
00756         return iByte;
00757     }
00758 
00759     iNorm = (hex_lookup[(u_char)iByte]<<4);
00760 
00761     iByte = GetByte(Session, start, end, ptr, norm_state);
00762     if(iByte == END_OF_BUFFER)
00763         return NON_ASCII_CHAR;
00764 
00765     if(valid_lookup[(u_char)iByte] < 0)
00766     {
00767         return iByte;
00768     }
00769 
00770     iNorm = (iNorm | (hex_lookup[(u_char)iByte])) & 0xff;
00771 
00772     if(hi_eo_generate_event(Session, ServerConf->double_decoding.alert) &&
00773        (norm_state->param == NULL))
00774     {
00775         hi_eo_client_event_log(Session, HI_EO_CLIENT_DOUBLE_DECODE,
00776                                NULL, NULL);
00777     }
00778 
00779     return iNorm;
00780 }
00781 
00782 /*
00783 **  NAME
00784 **    GetDecodedByte::
00785 */
00786 /**
00787 **  This is the final GetByte routine.  The value that is returned from this
00788 **  routine is the final decoded byte, and normalization can begin.  This
00789 **  routine handles the double phase of decoding that IIS is fond of.
00790 **  
00791 **  So to recap all the decoding up until this point.
00792 **  
00793 **  The first phase is to call GetByte().  GetByte() returns the first stage
00794 **  of decoding, which handles the UTF-8 decoding.  If we have decoded a
00795 **  % of some type, then we head into DoubleDecode() if the ServerConf
00796 **  allows it.
00797 **  
00798 **  What returns from DoubleDecode is the final result.
00799 **  
00800 **  @param ServerConf  the server configuration
00801 **  @param start       the start of the URI
00802 **  @param end         the end of the URI
00803 **  @param ptr         the current pointer into the URI
00804 **  @param norm_state  the pointer to the URI norm state
00805 **  
00806 **  @return integer
00807 **  
00808 **  @retval END_OF_BUFFER  While decoding, the end of buffer was reached.
00809 **  @retval char           The resultant decoded char.
00810 **  
00811 **  @see DoubleDecode();
00812 **  @see GetByte();
00813 */
00814 static int GetDecodedByte(HI_SESSION *Session, u_char *start,
00815                           u_char *end, u_char **ptr, 
00816                           URI_NORM_STATE *norm_state)
00817 {
00818     HTTPINSPECT_CONF *ServerConf = Session->server_conf;
00819     int iChar;
00820 
00821     iChar = GetByte(Session,start,end,ptr, norm_state);
00822     if(iChar == END_OF_BUFFER)
00823         return END_OF_BUFFER;
00824 
00825     if(ServerConf->double_decoding.on && (u_char)iChar == '%')
00826     {
00827         iChar = DoubleDecode(Session,start,end,ptr, norm_state);
00828     }
00829 
00830     /*
00831     **  Let's change '\' to '/' if possible
00832     */
00833     if(ServerConf->iis_backslash.on && (u_char)iChar == 0x5c)
00834     {
00835         if(hi_eo_generate_event(Session, ServerConf->iis_backslash.alert) &&
00836            !norm_state->param)
00837         {
00838             hi_eo_client_event_log(Session, HI_EO_CLIENT_IIS_BACKSLASH, 
00839                                    NULL, NULL);
00840         }
00841 
00842         iChar = 0x2f;
00843     }
00844 
00845     return iChar;
00846 }
00847 
00848 /*
00849 **  NAME
00850 **    DirTrav::
00851 */
00852 /**
00853 **  Set the ub_ptr and update the URI_NORM_STATE.
00854 **  
00855 **  The main point of this function is to take care of the details in
00856 **  updating the directory stack and setting the buffer pointer to the
00857 **  last directory.
00858 **  
00859 **  @param norm_state pointer to the normalization state struct
00860 **  @param ub_ptr     double pointer to the normalized buffer
00861 **  
00862 **  @return integer
00863 **  
00864 **  @retval HI_SUCCESS function successful
00865 **  
00866 **  @see hi_norm_uri()
00867 */
00868 static int DirTrav(HI_SESSION *Session, URI_NORM_STATE *norm_state,
00869                    u_char *ub_start,u_char **ub_ptr)
00870 {
00871     HTTPINSPECT_CONF *ServerConf = Session->server_conf;
00872 
00873     if(norm_state->dir_count)
00874     {
00875         *ub_ptr = norm_state->dir_track[norm_state->dir_count - 1];
00876         
00877         /*
00878         **  Check to make sure that we aren't at the beginning
00879         */
00880         if(norm_state->dir_count >= 1)
00881         {
00882             norm_state->dir_count--;
00883         }
00884     }
00885     else
00886     {
00887         /*
00888         **  This is a special case where there was no / seen before
00889         **  we see a /../.  When this happens, we just reset the ub_ptr
00890         **  back to the beginning of the norm buffer and let the slash
00891         **  get written on the next iteration of the loop.
00892         */
00893         *ub_ptr = ub_start;
00894 
00895         /*
00896         **  Let's put the alert here for webroot dir traversal.
00897         */
00898         if(hi_eo_generate_event(Session, ServerConf->webroot.alert) &&
00899            !norm_state->param)
00900         {
00901             hi_eo_client_event_log(Session, HI_EO_CLIENT_WEBROOT_DIR,
00902                                    NULL, NULL);
00903         }
00904     }
00905 
00906     return HI_SUCCESS; 
00907 }
00908 
00909 /*
00910 **  NAME
00911 **    DirSet::
00912 */
00913 /**
00914 **  Set the directory by writing a '/' to the normalization buffer and
00915 **  updating the directory stack.
00916 **  
00917 **  This gets called after every slash that isn't a directory traversal.  We
00918 **  just write a '/' and then update the directory stack to point to the
00919 **  last directory, in the case of future directory traversals.
00920 **  
00921 **  @param norm_state pointer to the normalization state struct
00922 **  @param ub_ptr     double pointer to the normalized buffer
00923 **  
00924 **  @return integer
00925 **  
00926 **  @retval HI_SUCCESS function successful
00927 **  
00928 **  @see hi_norm_uri()
00929 */
00930 static int DirSet(URI_NORM_STATE *norm_state, u_char **ub_ptr)
00931 {
00932     /*
00933     **  Write the '/'.  Even if iDir is the END_OF_BUFFER we still
00934     **  write it because the '/' came before the END_OF_BUFFER.
00935     */
00936     **ub_ptr = '/';
00937 
00938     if(!norm_state->param)
00939     {
00940         norm_state->dir_track[norm_state->dir_count] = *ub_ptr;
00941         if(norm_state->dir_count < MAX_DIRS)
00942             norm_state->dir_count++;
00943     }
00944 
00945     (*ub_ptr)++;
00946 
00947     return HI_SUCCESS;
00948 }
00949 
00950 /*
00951 **  NAME
00952 **    DirNorm::
00953 */
00954 /**
00955 **  The main function for dealing with multiple slashes, self-referential
00956 **  directories, and directory traversals.
00957 **  
00958 **  This routine does GetDecodedByte() while looking for directory foo.  It's
00959 **  called every time that we see a slash in the main hi_norm_uri.  Most of
00960 **  the time we just enter this loop, find a non-directory-foo char and 
00961 **  return that char.  hi_norm_uri() takes care of the directory state
00962 **  updating and so forth.
00963 **  
00964 **  But when we run into trouble with directories, this function takes care
00965 **  of that.  We loop through multiple slashes until we get to the next
00966 **  directory.  We also loop through self-referential directories until we
00967 **  get to the next directory.  Then finally we deal with directory 
00968 **  traversals.
00969 **  
00970 **  With directory traversals we do a kind of "look ahead".  We verify that
00971 **  there is indeed a directory traversal, and then set the ptr back to
00972 **  the beginning of the '/', so when we iterate through hi_norm_uri() we
00973 **  catch it.
00974 **  
00975 **  The return value for this function is usually the character after
00976 **  the directory.  When there was a directory traversal, it returns the
00977 **  value DIR_TRAV.  And when END_OF_BUFFER is returned, it means that we've
00978 **  really hit the end of the buffer, or we were looping through multiple
00979 **  slashes and self-referential directories until the end of the URI
00980 **  buffer.
00981 **  
00982 **  @param ServerConf   pointer to the Server configuration
00983 **  @param start        pointer to the start of the URI buffer
00984 **  @param end          pointer to the end of the URI buffer
00985 **  @param ptr          pointer to the index in the URI buffer
00986 **  
00987 **  @return integer
00988 **  
00989 **  @retval END_OF_BUFFER   we've reached the end of buffer
00990 **  @retval DIR_TRAV        we found a directory traversal
00991 **  @retval char            return the next char after the directory
00992 **  
00993 **  @see hi_norm_uri()
00994 **  @see GetDecodedByte()
00995 */
00996 static int DirNorm(HI_SESSION *Session, u_char *start, u_char *end,
00997                    u_char **ptr, URI_NORM_STATE *norm_state)
00998 {
00999     HTTPINSPECT_CONF *ServerConf = Session->server_conf;
01000     int iChar;
01001     int iDir;
01002     u_char *orig_ptr;
01003     u_char *dir_ptr;
01004 
01005     while((iChar = GetDecodedByte(Session, start, end, ptr, norm_state)) !=
01006           END_OF_BUFFER)
01007     {
01008         orig_ptr = *ptr;
01009 
01010         /*
01011         **  This is kind of a short cut to get out of here as soon as we
01012         **  can.  If the character is over 0x2f then we know that is can't
01013         **  be either the '.' or the '/', so we break and return the
01014         **  char.
01015         */
01016         if((u_char)iChar < 0x30)
01017         {
01018             /*
01019             **  We check for multiple slashes.  If we find multiple slashes
01020             **  then we just continue on until we find something interesting.
01021             */
01022             if(ServerConf->multiple_slash.on && (u_char)iChar == '/')
01023             {
01024                 if(hi_eo_generate_event(Session,
01025                                         ServerConf->multiple_slash.alert) &&
01026                    !norm_state->param)
01027                 {
01028                     hi_eo_client_event_log(Session,
01029                                            HI_EO_CLIENT_MULTI_SLASH,
01030                                            NULL, NULL);
01031                 }
01032 
01033                 continue;
01034             }
01035             /*
01036             **  This is where we start looking for self-referential dirs
01037             **  and directory traversals.
01038             */
01039             else if(ServerConf->directory.on && (u_char)iChar == '.' &&
01040                     !norm_state->param)
01041             {
01042                 iDir = GetDecodedByte(Session,start,end,ptr,norm_state);
01043                 if(iDir != END_OF_BUFFER)
01044                 {
01045                     if((u_char)iDir == '.')
01046                     {
01047                         /*
01048                         **  This sets the dir_ptr to the beginning of the
01049                         **  byte that may be a dir.  So if it is a slash,
01050                         **  we can get back to that slash and continue
01051                         **  processing.
01052                         */
01053                         dir_ptr = *ptr;
01054 
01055                         iDir = GetDecodedByte(Session,start,end,ptr,norm_state);
01056                         if(iDir != END_OF_BUFFER)
01057                         {
01058                             if((u_char)iDir == '/')
01059                             {
01060                                 /*
01061                                 **  We found a real live directory traversal
01062                                 **  so we reset the pointer to before the
01063                                 **  '/' and finish up after the return.
01064                                 */
01065                                 if(hi_eo_generate_event(Session,
01066                                                  ServerConf->directory.alert)&&
01067                                    !norm_state->param)
01068                                 {
01069                                     hi_eo_client_event_log(Session,
01070                                                          HI_EO_CLIENT_DIR_TRAV,
01071                                                          NULL, NULL);
01072                                 }
01073 
01074                                 *ptr = dir_ptr;
01075                                 return DIR_TRAV;
01076                             }
01077                         }
01078 
01079                         *ptr = orig_ptr;
01080                         return iChar;
01081                     }
01082                     else if((u_char)iDir == '/')
01083                     {
01084                         /*
01085                         **  We got a self-referential directory traversal.
01086                         **
01087                         **  Keep processing until we stop seeing self
01088                         **  referential directories.
01089                         */
01090                         if(hi_eo_generate_event(Session,
01091                                                 ServerConf->directory.alert) &&
01092                            !norm_state->param)
01093                         {
01094                             hi_eo_client_event_log(Session,
01095                                                    HI_EO_CLIENT_SELF_DIR_TRAV,
01096                                                    NULL, NULL);
01097                         }
01098 
01099                         continue;
01100                     }
01101                 }
01102   
01103                 /*
01104                 **  This means that we saw '.' and then another char, so
01105                 **  it was just a file/dir that started with a '.'.
01106                 */
01107                 *ptr = orig_ptr;
01108                 return iChar;
01109             }
01110         }
01111 
01112         /*
01113         **  This is where we write the chars after the slash
01114         */
01115         return iChar;
01116     }
01117 
01118     return END_OF_BUFFER;
01119 }
01120 
01121 /*
01122 **  NAME
01123 **    CheckLongDir::
01124 */
01125 /**
01126 **  This function checks for long directory names in the request URI.
01127 **  
01128 **  @param Session    pointer to the session
01129 **  @param norm_state pointer to the directory stack
01130 **  @param ub_ptr     current pointer in normalization buffer
01131 **  
01132 **  @return integer
01133 **  
01134 **  @retval HI_SUCCESS
01135 */
01136 static int CheckLongDir(HI_SESSION *Session, URI_NORM_STATE *norm_state, 
01137                         u_char *ub_ptr)
01138 {
01139     int    iDirLen;
01140     u_char *LastDir;
01141 
01142     /*
01143     **  First check that we are alerting on long directories and then
01144     **  check that we've seen a previous directory.
01145     */
01146     if(Session->server_conf->long_dir && norm_state->dir_count &&
01147        !norm_state->param)
01148     {
01149         LastDir = norm_state->dir_track[norm_state->dir_count - 1];
01150 
01151         iDirLen = ub_ptr - LastDir;
01152 
01153         if(iDirLen > Session->server_conf->long_dir &&
01154            hi_eo_generate_event(Session, HI_EO_CLIENT_OVERSIZE_DIR) &&
01155            !norm_state->param)
01156         {
01157             hi_eo_client_event_log(Session, HI_EO_CLIENT_OVERSIZE_DIR,
01158                                    NULL, NULL);
01159         }
01160     }
01161 
01162     return HI_SUCCESS;
01163 }
01164 
01165 /*
01166 **  NAME
01167 **    InspectUriChar::
01168 */
01169 /**
01170 **  This function inspects the normalized chars for any other processing
01171 **  that we need to do, such as directory traversals.
01172 **  
01173 **  The main things that we check for here are '/' and '?'.  There reason
01174 **  for '/' is that we do directory traversals.  If it's a slash, we call
01175 **  the routine that will normalize mutli-slashes, self-referential dirs,
01176 **  and dir traversals.  We do all that processing here and call the
01177 **  appropriate functions.
01178 **  
01179 **  The '?' is so we can mark the parameter field, and check for oversize
01180 **  directories one last time.  Once the parameter field is set, we don't
01181 **  do any more oversize directory checks since we aren't in the url
01182 **  any more.
01183 **  
01184 **  @param Session      pointer to the current session
01185 **  @param iChar        the char to inspect
01186 **  @param norm_state   the normalization state
01187 **  @param start        the start of the URI buffer
01188 **  @param end          the end of the URI buffer
01189 **  @param ptr          the address of the pointer index into the URI buffer
01190 **  @param ub_start     the start of the norm buffer
01191 **  @param ub_end       the end of the norm buffer
01192 **  @param ub_ptr       the address of the pointer index into the norm buffer
01193 **  
01194 **  @return integer
01195 **  
01196 **  @retval END_OF_BUFFER    we've reached the end of the URI or norm buffer
01197 **  @retval HI_NONFATAL_ERR  no special char, so just write the char and
01198 **                           increment the ub_ptr.
01199 **  @retval HI_SUCCESS       normalized the special char and already
01200 **                           incremented the buffers.
01201 */
01202 static INLINE int InspectUriChar(HI_SESSION *Session, int iChar,
01203                                  URI_NORM_STATE *norm_state,
01204                                  u_char *start, u_char *end, u_char **ptr,
01205                                  u_char *ub_start, u_char *ub_end,
01206                                  u_char **ub_ptr)
01207 {
01208     HTTPINSPECT_CONF *ServerConf = Session->server_conf;
01209     int iDir;
01210 
01211     /*
01212     **  Let's add absolute URI/proxy support everyone.
01213     */
01214     if(!norm_state->dir_count && (u_char)iChar == ':' &&
01215        hi_util_in_bounds(start, end, ((*ptr)+2)))
01216     {
01217         if(**ptr == '/' && *((*ptr)+1) == '/')
01218         {
01219             /*
01220             **  We've found absolute vodka.
01221             */
01222             if(!hi_util_in_bounds(ub_start, ub_end, ((*ub_ptr)+2)))
01223                 return END_OF_BUFFER;
01224 
01225             /*
01226             **  Write the :
01227             */
01228             **ub_ptr = (u_char)iChar;
01229             (*ub_ptr)++;
01230 
01231             /*
01232             **  This increments us past the first slash, so at the next
01233             **  slash we will track a directory.
01234             **
01235             **  The reason we do this is so that an attacker can't trick
01236             **  us into normalizing a directory away that ended in a :.
01237             **  For instance, if we got a URL that was separated in by a
01238             **  packet boundary like this, and we were looking for the
01239             **  URL real_dir:/file.html:
01240             **    real_dir://obfuscate_dir/../file.html
01241             **  we would normalize it with proxy support to:
01242             **    /file.html
01243             **  because we never tracked the :// as a valid directory.  So
01244             **  even though this isn't the best solution, it is the best
01245             **  we can do given that we are working with stateless
01246             **  inspection.
01247             */
01248             (*ptr)++;
01249 
01250             return HI_SUCCESS;
01251         }
01252     }
01253 
01254     /*
01255     **  Now that we have the "true" byte, we check this byte for other
01256     **  types of normalization:
01257     **    -  directory traversals
01258     **    -  multiple slashes
01259     */
01260     if((u_char)iChar == '/')
01261     {
01262         /*
01263         **  First thing we do is check for a long directory.
01264         */
01265         CheckLongDir(Session, norm_state, *ub_ptr);
01266 
01267         iDir = DirNorm(Session, start, end, ptr, norm_state);
01268 
01269         if(iDir == DIR_TRAV)
01270         {
01271             /*
01272             **  This is the case where we have a directory traversal.
01273             **
01274             **  The DirTrav function will reset the ub_ptr to the previous
01275             **  slash.  After that, we just continue through the loop because
01276             **  DirNorm has already set ptr to the slash, so we can just
01277             **  continue on.
01278             */
01279             DirTrav(Session,norm_state, ub_start, ub_ptr);
01280         }
01281         else
01282         {
01283             /*
01284             **  This is the case where we didn't have a directory traversal,
01285             **  and we are now just writing the char after the '/'.
01286             **
01287             **  We call DirSet, because all this function does is write a
01288             **  '/' into the buffer and increment the ub_ptr.  We then
01289             **  check the return code and return END_OF_BUFFER if
01290             **  needed.
01291             */
01292             DirSet(norm_state, ub_ptr);
01293             if(iDir == END_OF_BUFFER)
01294                 return END_OF_BUFFER;
01295 
01296             /*
01297             **  We check the bounds before we write the next byte
01298             */
01299             if(!hi_util_in_bounds(ub_start, ub_end, *ub_ptr))
01300                 return END_OF_BUFFER;
01301             
01302             /*
01303             **  Set the char to what we got in DirNorm()
01304             */
01305             /*
01306             **  Look for user-defined Non-Rfc chars.  If we find them
01307             **  then log an alert.
01308             */
01309             if(ServerConf->non_rfc_chars[(u_char)iDir])
01310             {
01311                 if(hi_eo_generate_event(Session, HI_EO_CLIENT_NON_RFC_CHAR) &&
01312                    !norm_state->param)
01313                 {
01314                     hi_eo_client_event_log(Session, HI_EO_CLIENT_NON_RFC_CHAR,
01315                                            NULL, NULL);
01316                 }
01317             }
01318 
01319             **ub_ptr = (u_char)iDir;
01320             (*ub_ptr)++;
01321         }
01322 
01323         return HI_SUCCESS;
01324     }
01325 
01326     if((u_char)iChar == '?')
01327     {
01328         /*
01329         **  We assume that this is the beginning of the parameter field, 
01330         **  and check for a long directory following.  Event though seeing
01331         **  a question mark does not guarantee the parameter field, thanks
01332         **  IIS.
01333         */
01334         CheckLongDir(Session, norm_state, *ub_ptr);
01335         norm_state->param = *ub_ptr;
01336     }
01337 
01338     /*
01339     **  This is neither char, so we just bail and let the loop finish
01340     **  for us.
01341     */
01342     return HI_NONFATAL_ERR;
01343 }
01344 
01345 /*
01346 **  NAME
01347 **    hi_norm_uri::
01348 */
01349 /**
01350 **  Normalize the URI into the URI normalize buffer.
01351 **  
01352 **  This is the routine that users call to normalize the URI.  It iterates
01353 **  through the URI buffer decoding the next character and is then checked
01354 **  for any directory problems before writing the decoded character into the
01355 **  normalizing buffer.
01356 **  
01357 **  We return the length of the normalized URI buffer in the variable,
01358 **  uribuf_size.  This value is passed in as the max size of the normalization
01359 **  buffer, which we then set in iMaxUriBufSize for later reference.
01360 **  
01361 **  If there was some sort of problem during normalizing we set the normalized
01362 **  URI buffer size to 0 and return HI_NONFATAL_ERR.
01363 **  
01364 **  @param ServerConf   the pointer to the server configuration
01365 **  @param uribuf       the pointer to the normalize uri buffer
01366 **  @param uribuf_size  the size of the normalize buffer
01367 **  @param uri          the pointer to the unnormalized uri buffer
01368 **  @param uri_size     the size of the unnormalized uri buffer
01369 **  
01370 **  @return integer
01371 **  
01372 **  @retval HI_NONFATAL_ERR there was a problem during normalizing, the
01373 **                          uribuf_size is also set to 0
01374 **  @retval HI_SUCCESS      Normalizing the URI was successful
01375 */
01376 int hi_norm_uri(HI_SESSION *Session, u_char *uribuf, int *uribuf_size,
01377                 u_char *uri, int uri_size)
01378 {
01379     HTTPINSPECT_CONF *ServerConf;
01380     int iChar;
01381     int iRet;
01382     int iMaxUriBufSize;
01383     URI_NORM_STATE norm_state;
01384     u_char *ub_ptr;
01385     u_char *ptr;
01386     u_char *start;
01387     u_char *end;
01388     u_char *ub_start;
01389     u_char *ub_end;
01390 
01391     ServerConf = Session->server_conf;
01392 
01393     iMaxUriBufSize = *uribuf_size;
01394 
01395     start = uri;
01396     end   = uri + uri_size;
01397     ub_start = uribuf;
01398     ub_end   = uribuf + iMaxUriBufSize;
01399 
01400     ub_ptr = uribuf;
01401     ptr    = uri;
01402 
01403     /*
01404     **  Initialize the URI directory normalization state
01405     */
01406     norm_state.dir_count = 0;
01407     norm_state.param     = NULL;
01408 
01409     while(hi_util_in_bounds(ub_start, ub_end, ub_ptr))
01410     {
01411 
01412         iChar = GetDecodedByte(Session, start, end, &ptr, &norm_state);
01413         if(iChar == END_OF_BUFFER)
01414             break;
01415 
01416         /*
01417         **  Look for user-defined Non-Rfc chars.  If we find them
01418         **  then log an alert.
01419         */
01420         if(ServerConf->non_rfc_chars[(u_char)iChar])
01421         {
01422             if(hi_eo_generate_event(Session, HI_EO_CLIENT_NON_RFC_CHAR) &&
01423                !norm_state.param)
01424             {
01425                 hi_eo_client_event_log(Session, HI_EO_CLIENT_NON_RFC_CHAR,
01426                                        NULL, NULL);
01427             }
01428         }
01429 
01430         if((iRet=InspectUriChar(Session, iChar, &norm_state, start, end, &ptr,
01431                            ub_start, ub_end, &ub_ptr)))
01432         {
01433             if(iRet == END_OF_BUFFER)
01434                 break;
01435 
01436             /*
01437             **  This is the default case when we don't want anything to do with
01438             **  the char besides writing the value into the buffer.
01439             */
01440             *ub_ptr = (u_char)iChar;
01441             ub_ptr++;
01442         }
01443     }
01444 
01445     /*
01446     **  Now that we are done, let's make sure that we didn't just have a
01447     **  single large directory, with the rest in the next packet.
01448     */
01449     CheckLongDir(Session, &norm_state, ub_ptr);
01450 
01451     /*
01452     **  This means that we got to the end of the URI, so we set the length,
01453     **  check it, and move on.
01454     */
01455     *uribuf_size = ub_ptr - ub_start;
01456 
01457     if(*uribuf_size > uri_size || *uribuf_size < 1)
01458         return HI_NONFATAL_ERR;
01459 
01460     return HI_SUCCESS;
01461 }
01462 
01463 /*
01464 **  NAME
01465 **    hi_norm_init::
01466 */
01467 /**
01468 **  Initialize the arrays neccessary to normalize the HTTP protocol fields.
01469 **  
01470 **  Currently, we set a hex_lookup array where we can convert the hex encoding
01471 **  that we encounter in the URI into numbers we deal with.
01472 **  
01473 **  @param GlobalConf  pointer to the global configuration of HttpInspect
01474 **  
01475 **  @return HI_SUCCESS  function successful
01476 */
01477 int hi_norm_init(HTTPINSPECT_GLOBAL_CONF *GlobalConf)
01478 {
01479     int iCtr;
01480     int iNum;
01481 
01482     memset(hex_lookup, NO_HEX_VAL, sizeof(hex_lookup));
01483     memset(valid_lookup, NO_HEX_VAL, sizeof(valid_lookup));
01484 
01485     /*
01486     **  Set the decimal number values
01487     */
01488     iNum = 0;
01489     for(iCtr = 48; iCtr < 58; iCtr++)
01490     {
01491         hex_lookup[iCtr] = iNum;
01492         valid_lookup[iCtr] = HEX_VAL;
01493         iNum++;
01494     }
01495 
01496     /*
01497     **  Set the upper case values.
01498     */
01499     iNum = 10;
01500     for(iCtr = 65; iCtr < 71; iCtr++)
01501     {
01502         hex_lookup[iCtr] = iNum;
01503         valid_lookup[iCtr] = HEX_VAL;
01504         iNum++;
01505     }
01506 
01507     iNum = 16;
01508     for(iCtr = 71; iCtr < 91; iCtr++)
01509     {
01510         hex_lookup[iCtr] = iNum;
01511         valid_lookup[iCtr] = BASE36_VAL;
01512         iNum++;
01513     }
01514 
01515     /*
01516     **  Set the lower case values.
01517     */
01518     iNum = 10;
01519     for(iCtr = 97; iCtr < 103; iCtr++)
01520     {
01521         hex_lookup[iCtr] = iNum;
01522         valid_lookup[iCtr] = HEX_VAL;
01523         iNum++;
01524     }
01525 
01526     iNum = 16;
01527     for(iCtr = 103; iCtr < 123; iCtr++)
01528     {
01529         hex_lookup[iCtr] = iNum;
01530         valid_lookup[iCtr] = BASE36_VAL;
01531         iNum++;
01532     }
01533 
01534     return HI_SUCCESS;
01535 }
01536 
01537 /*
01538 **  NAME
01539 **    hi_normalization::
01540 */
01541 /**
01542 **  Wrap the logic for normalizing different inspection modes.
01543 **  
01544 **  We call the various normalization modes here, and adjust the appropriate
01545 **  Session constructs.
01546 **  
01547 **  @param Session      pointer to the session structure.
01548 **  @param iInspectMode the type of inspection/normalization to do
01549 **  
01550 **  @return integer
01551 **  
01552 **  @retval HI_SUCCESS      function successful
01553 **  @retval HI_INVALID_ARG  invalid argument
01554 */
01555 int hi_normalization(HI_SESSION *Session, int iInspectMode)
01556 {
01557     int iRet;
01558 
01559     if(!Session)
01560     {
01561         return HI_INVALID_ARG;
01562     }
01563 
01564     /*
01565     **  Depending on the mode, we normalize the packet differently.
01566     **  Currently, we only have normalization routines for the client
01567     **  URI, so that's all we are interested in.
01568     **
01569     **  HI_SI_CLIENT_MODE:
01570     **    Inspect for HTTP client communication.
01571     */
01572     if(iInspectMode == HI_SI_CLIENT_MODE)
01573     {
01574         if((iRet = hi_client_norm((void *)Session)))
01575         {
01576             return iRet;
01577         }
01578     }
01579 
01580     return HI_SUCCESS;
01581 }