Main Page | Modules | Class List | Directories | File List | Class Members | File Members | Related Pages

hi_ui_iis_unicode_map.c

Go to the documentation of this file.
00001 /**
00002 **  @file       hi_ui_iis_unicode_map.c
00003 **  
00004 **  @author     Daniel Roelker <droelker@atlas.cs.cuc.edu>
00005 **  
00006 **  @brief      Functions for parsing the unicode map file
00007 **  
00008 **  This file contains the routines for parsing generated IIS unicode
00009 **  maps.  We read in the map, find where the codepage is located in
00010 **  the map, and convert the codepoint maps, and store in the supplied
00011 **  array.
00012 **  
00013 **  NOTES
00014 **    -  Initial development.  DJR
00015 */
00016 
00017 #ifdef HAVE_CONFIG_H
00018 #include "config.h"
00019 #endif
00020 
00021 #include <stdio.h>
00022 #include <stdlib.h>
00023 #include <string.h>
00024 #include <sys/types.h>
00025 
00026 #include "hi_ui_config.h"
00027 #include "hi_ui_iis_unicode_map.h"
00028 #include "hi_util_xmalloc.h"
00029 #include "hi_return_codes.h"
00030 
00031 #define MAX_BUFFER 50000
00032 #define CODEPAGE_SEPARATORS  " \t\n\r"
00033 #define CODEPOINT_SEPARATORS ": \n\r"
00034 
00035 /*
00036 **  NAME
00037 **    FindCodePage::
00038 */
00039 /**
00040 **  Locate the codepage mapping the IIS Unicode Map file.
00041 **  
00042 **  We iterate through the file lines until we get to the codepage
00043 **  reference.  We then return that it was found successfully, and 
00044 **  the FILE pointer is located on the codepoint mapping line.
00045 **  
00046 **  @param fFile     the codemap file pointer
00047 **  @param iCodePage the codepage number
00048 **  
00049 **  @return int
00050 **  
00051 **  @retval HI_FATAL_ERR  Did not find the codepage listing.
00052 **  @retval HI_SUCCESS    function successful
00053 */
00054 static int FindCodePage(FILE *fFile, int iCodePage)
00055 {
00056     char buffer[MAX_BUFFER];
00057     char *pcToken;
00058     int  iCodePageTest;
00059     char *pcEnd;
00060     char *pcPtr;
00061 
00062     while(fgets(buffer, MAX_BUFFER, fFile))
00063     {
00064         pcToken = strtok_r(buffer, CODEPAGE_SEPARATORS, &pcPtr);
00065         if(!pcToken)
00066             continue;
00067 
00068         if(pcToken[0] == '#')
00069             continue;
00070 
00071         /*
00072         **  Is this a codepage or the beginning of a codemap
00073         */
00074         if(strchr(pcToken, ':'))
00075             continue;
00076 
00077         /*
00078         **  So we now have the beginning of a codepage number
00079         */
00080         iCodePageTest = strtol(pcToken, &pcEnd, 10);
00081         if(*pcEnd)
00082             continue;
00083 
00084         if(iCodePageTest == iCodePage)
00085             return HI_SUCCESS;
00086     }
00087 
00088     return HI_FATAL_ERR;
00089 }
00090 
00091 /*
00092 **  NAME
00093 **    MapCodePoints::
00094 */
00095 /**
00096 **  Read the codepoint mapping and covert to codepoint and ASCII.
00097 **  
00098 **  This is where the bulk of the work is done.  We read in 9 bytes at a time
00099 **  because the mappings are in chunks of 8 (+1 for the NULL at the end).  The
00100 **  chunks are as follows:
00101 **  
00102 **  xxxx:xx (the first set of 4 is the codepoint, the second set is the ASCII
00103 **  representation)
00104 **  
00105 **  We then convert and check these values before storing them in the
00106 **  supplied array.
00107 **  
00108 **  @param fFile           the unicode map file pointer
00109 **  @param iis_unicode_map the array to store the mappings in
00110 **  
00111 **  @return integer
00112 **  
00113 **  @retval HI_FATAL_ERR there was an error while parsing the file
00114 **  @retval HI_SUCCESS   function was successful
00115 */
00116 static int MapCodePoints(FILE *fFile, int *iis_unicode_map)
00117 {
00118     char buffer[9];
00119     char *pcPtr;
00120     char *pcEnd;
00121     char *pcToken;
00122     char *pcCodePoint;
00123     char *pcAsciiMap;
00124     int  iCodePoint;
00125     int  iAsciiMap;
00126     
00127 
00128     /*
00129     **  We should now be pointing to the beginning of the codemap area for
00130     **  the selected codepage.
00131     */
00132     while(fgets(buffer, 9, fFile))
00133     {
00134         pcToken = strtok_r(buffer, CODEPAGE_SEPARATORS, &pcPtr);
00135         if(!pcToken)
00136         {
00137             return HI_SUCCESS;
00138         }
00139 
00140         pcCodePoint = strtok_r(pcToken, CODEPOINT_SEPARATORS, &pcPtr);
00141         if(!pcCodePoint)
00142             return HI_FATAL_ERR;
00143 
00144         pcAsciiMap = strtok_r(NULL, CODEPOINT_SEPARATORS, &pcPtr);
00145         if(!pcAsciiMap)
00146             return HI_FATAL_ERR;
00147 
00148         iCodePoint = strtol(pcCodePoint, &pcEnd, 16);
00149         if(*pcEnd)
00150         {
00151             return HI_FATAL_ERR;
00152         }
00153 
00154         if(iCodePoint < 0 || iCodePoint > 65535)
00155         {
00156             return HI_FATAL_ERR;
00157         }
00158 
00159         iAsciiMap = strtol(pcAsciiMap, &pcEnd, 16);
00160         if(*pcEnd)
00161         {
00162             return HI_FATAL_ERR;
00163         }
00164 
00165         if(iAsciiMap < 0 || iAsciiMap > 0x7f)
00166         {
00167             return HI_FATAL_ERR;
00168         }
00169 
00170         iis_unicode_map[iCodePoint] = iAsciiMap;
00171 
00172         //printf("** iis_unicode_map[%s] = %s\n", pcCodePoint, pcAsciiMap);
00173         //printf("** iis_unicode_map[%.2x] = %.2x\n", iCodePoint, 
00174         //       (u_char)iAsciiMap);
00175     }
00176 
00177     return HI_FATAL_ERR;
00178 }
00179 
00180 /*
00181 **  NAME
00182 **    hi_ui_parse_iis_unicode_map::
00183 */
00184 /**
00185 **  Parses an IIS Unicode Map file and store in the supplied array.
00186 **  
00187 **  This routine allocates the necessary memory to store the array values
00188 **  in, and parses the supplied filename.
00189 **  
00190 **  @param iis_unicode_map  double pointer so we can allocate the memory
00191 **  @param filename         the name of the file to open and parse
00192 **  @param iCodePage        the codpage number to read the mappings from
00193 **  
00194 **  @return integer
00195 **  
00196 **  @retval HI_INVALID ARG     invalid argument
00197 **  @retval HI_MEM_ALLOC_FAIL  memory allocation failed
00198 **  @retval HI_INVALID_FILE    Could not open the supplied filename
00199 **  @retval HI_SUCCESS         function was successful
00200 */
00201 int hi_ui_parse_iis_unicode_map(int **iis_unicode_map, char *filename,
00202                                 int iCodePage)
00203 {
00204     int  iRet;
00205     FILE *fFile;
00206 
00207     if(!filename || iCodePage < 0)
00208     {
00209         return HI_INVALID_ARG;
00210     }
00211 
00212     fFile = fopen(filename, "r");
00213     if(fFile == NULL)
00214     {
00215         /*
00216         **  Couldn't open the file
00217         */
00218         return HI_INVALID_FILE;
00219     }
00220 
00221     *iis_unicode_map = (int *)xmalloc(sizeof(int) * 65536);
00222     if(*iis_unicode_map == NULL)
00223     {
00224         return HI_MEM_ALLOC_FAIL;
00225     }
00226 
00227     memset(*iis_unicode_map, HI_UI_NON_ASCII_CODEPOINT, (sizeof(int)*65536));
00228 
00229     /*
00230     **  Find the correct codepage
00231     */
00232     if((iRet = FindCodePage(fFile, iCodePage)))
00233     {
00234         //printf("** Did not find codepage\n");
00235         return iRet;
00236     }
00237 
00238     if((iRet = MapCodePoints(fFile, *iis_unicode_map)))
00239     {
00240         //printf("** Error while parsing codepage.\n");
00241         return iRet;
00242     }
00243 
00244     return HI_SUCCESS;
00245 }

Generated on Sun May 14 14:51:15 2006 by  doxygen 1.4.2