1 /*****************************************************************************/
2 /*                                                                           */
3 /*                                AREACODE.C                                 */
4 /*                                                                           */
5 /*     Portable library module to search for an area code in a database.     */
6 /*                                                                           */
7 /*                                                                           */
8 /*                                                                           */
9 /* (C) 1996,97  Ullrich von Bassewitz                                        */
10 /*              Wacholderweg 14                                              */
11 /*              D-70597 Stuttgart                                            */
12 /* EMail:       uz@musoftware.com                                            */
13 /*                                                                           */
14 /*                                                                           */
15 /* This software is provided 'as-is', without any express or implied         */
16 /* warranty.  In no event will the authors be held liable for any damages    */
17 /* arising from the use of this software.                                    */
18 /*                                                                           */
19 /* Permission is granted to anyone to use this software for any purpose,     */
20 /* including commercial applications, and to alter it and redistribute it    */
21 /* freely, subject to the following restrictions:                            */
22 /*                                                                           */
23 /* 1. The origin of this software must not be misrepresented; you must not   */
24 /*    claim that you wrote the original software. If you use this software   */
25 /*    in a product, an acknowledgment in the product documentation would be  */
26 /*    appreciated but is not required.                                       */
27 /* 2. Altered source versions must be plainly marked as such, and must not   */
28 /*    be misrepresented as being the original software.                      */
29 /* 3. This notice may not be removed or altered from any source              */
30 /*    distribution.                                                          */
31 /*                                                                           */
32 /*****************************************************************************/
33 
34 
35 
36 /*
37  * The code assumes
38  *      - 8 bit bytes
39  *      - unsigned long is 32 bit. This may be changed by #defining u32 to
40  *        a data type that is an 32 bit unsigned when compiling this module.
41  *      - ascii character set
42  *
43  * The code does *not* assume
44  *      - a specific byte order. Currently the code autoadjusts to big or
45  *        little endian data. If you have something more weird than that,
46  *        you have to add conversion code.
47  *
48  */
49 
50 
51 
52 #include <stdlib.h>
53 #include <stdio.h>
54 #include <string.h>
55 #include <limits.h>
56 
57 #include "areacode.h"
58 
59 
60 
61 /*****************************************************************************/
62 /*                          Externally visible data                          */
63 /*****************************************************************************/
64 
65 
66 
67 /* The name of the areacode data file. The default is what is #defined as
68  * DATA_FILENAME. If this is not #defined, the default is "areacode.dat",
69  * which is probably not what you want. In the latter case set this to
70  * the correct filename *before* your first call to GetAreaCodeInfo.
71  */
72 #ifdef DATA_FILENAME
73 char* acFileName = DATA_FILENAME;
74 #else
75 char* acFileName = "areacode.dat";
76 #endif
77 
78 /* How much dynamic memory is GetAreaCodeInfo allowed to consume? Having less
79  * memory means more disk access and vice versa. The function does even work
80  * if you set this value to zero. For maximum performance, the function needs
81  * 4 byte per area code stored in the data file. The default is 32KB.
82  */
83 unsigned long   acMaxMem        = 0x8000L;
84 
85 
86 
87 /*****************************************************************************/
88 /*                            Data and structures                            */
89 /*****************************************************************************/
90 
91 
92 
93 /* Define an unsigned quantity with 32 bits. Try to make some clever
94  * assumptions using the data from limits.h. This may break some older
95  * (non ISO compliant) compilers, but I can't help...
96  */
97 #if !defined(u32) && defined(ULONG_MAX)
98 #  if ULONG_MAX == 4294967295UL
99 #    define u32             unsigned long
100 #  endif
101 #endif
102 #if !defined(u32) && defined(UINT_MAX)
103 #  if UINT_MAX == 4294967295UL
104 #    define u32             unsigned
105 #  endif
106 #endif
107 #if !defined(u32) && defined(USHRT_MAX)
108 #  if USHRT_MAX == 4294967295UL
109 #    define u32             unsigned short
110 #  endif
111 #endif
112 #if !defined(u32)
113 #  define u32               unsigned long
114 #endif
115 
116 /* The version of the data file we support */
117 #define acVersion       0x100
118 
119 /* The magic words in little and big endian format */
120 #define LittleMagic     0x35465768L
121 #define BigMagic        0x68574635L
122 
123 /* Defining the byte ordering */
124 #define boLittleEndian  0
125 #define boBigEndian     1
126 
127 /* The byte order used in the file is little endian (intel) format */
128 #define FileByteOrder   boLittleEndian
129 
130 /* This is the header data of the data file. It is not used anywhere in
131  * the code, just have a look at it since it describes the layout in the
132  * file.
133  */
134 typedef struct {
135     u32         Magic;
136     u32         Version;        /* Version in hi word, build in lo word */
137     u32         Count;
138     u32         AreaCodeStart;
139     u32         NameIndexStart;
140     u32         NameStart;
141 } PrefixHeader;
142 
143 /* This is what's really used: */
144 typedef struct {
145 
146     /* The file we read from */
147     FILE*       F;
148 
149     /* Machine byte order */
150     unsigned    ByteOrder;
151 
152     /* Stuff from the file header */
153     unsigned    Version;
154     unsigned    Build;
155     u32         Count;
156     u32         AreaCodeStart;
157     u32         NameIndexStart;
158     u32         NameStart;
159 
160     /* Control data */
161     long        First;
162     long        Last;
163     u32*        Table;
164 
165 } AreaCodeDesc;
166 
167 /* Translation table for translation CP850 --> ISO-8859-1. To save some space,
168  * the table covers only values > 127
169  */
170 #ifdef CHARSET_ISO
171 static char ISOMap [128] = {
172     0xC7, 0xFC, 0xE9, 0xE2, 0xE4, 0xE0, 0xE5, 0xE7,
173     0xEA, 0xEB, 0xE8, 0xEF, 0xEE, 0xEC, 0xC4, 0xC5,
174     0xC9, 0xE6, 0xC6, 0xF4, 0xF6, 0xF2, 0xFC, 0xF9,
175     0xFF, 0xD6, 0xDC, 0xA2, 0xA3, 0xA5, 0x50, 0x66,
176     0xE1, 0xED, 0xF3, 0xFA, 0xF1, 0xD1, 0xAA, 0xBA,
177     0xBF, 0x2D, 0xAC, 0xC6, 0xBC, 0xA1, 0xAB, 0xBB,
178     0xFE, 0xFE, 0xFE, 0x7C, 0x2B, 0x2B, 0x2B, 0x2B,
179     0x2B, 0x2B, 0x7C, 0x2B, 0x2B, 0x2B, 0x2B, 0x2B,
180     0x2B, 0x2B, 0x2B, 0x2B, 0x2D, 0x2B, 0x2B, 0x2B,
181     0x2B, 0x2B, 0x2B, 0x2B, 0x2B, 0x2D, 0x2B, 0x2B,
182     0x2B, 0x2B, 0x2B, 0x2B, 0x2B, 0x2B, 0x2B, 0x2B,
183     0x2B, 0x2B, 0x2B, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE,
184     0x61, 0xDF, 0x63, 0x70, 0x5A, 0x73, 0xB5, 0x74,
185     0x70, 0x54, 0x4F, 0x64, 0x38, 0x30, 0x65, 0x55,
186     0x3D, 0xB1, 0x3E, 0x3C, 0x66, 0x4A, 0xF7, 0x7E,
187     0xB0, 0xB7, 0xB7, 0x2F, 0x6E, 0xB2, 0xFE, 0xFF
188 };
189 #endif
190 
191 /* Macro to convert from big endian to little endian format and vice versa.
192  * Beware: The macro evaluates its parameter more than once!
193  */
194 #define _ByteSwap(__V) ((((__V) & 0x000000FF) << 24) |  \
195                         (((__V) & 0xFF000000) >> 24) |  \
196                         (((__V) & 0x0000FF00) <<  8) |  \
197                         (((__V) & 0x00FF0000) >>  8))
198 
199 
200 
201 /*****************************************************************************/
202 /*                             Helper functions                              */
203 /*****************************************************************************/
204 
205 
206 
_ByteSwapIfNeeded(u32 D,unsigned ByteOrder)207 static u32 _ByteSwapIfNeeded (u32 D, unsigned ByteOrder)
208 /* Put the bytes into the correct order according to ByteOrder */
209 {
210     /* Swap bytes if needed and return the result */
211     switch (ByteOrder) {
212         case boLittleEndian:    return D;
213         default:                return _ByteSwap (D);
214     }
215 }
216 
217 
218 
ByteSwapIfNeeded(u32 D,const AreaCodeDesc * Desc)219 static u32 ByteSwapIfNeeded (u32 D, const AreaCodeDesc* Desc)
220 /* Put the bytes into the correct order according to ByteOrder in Desc */
221 {
222     /* Swap bytes if needed and return the result */
223     return _ByteSwapIfNeeded (D, Desc->ByteOrder);
224 }
225 
226 
227 
_Load_u32(FILE * F,unsigned ByteOrder)228 static u32 _Load_u32 (FILE* F, unsigned ByteOrder)
229 /* Load an u32 from the current file position and swap it if needed */
230 {
231     u32 D;
232 
233     /* Read the data from the file */
234     fread (&D, sizeof (D), 1, F);
235 
236     /* Swap bytes if needed and return the result */
237     return _ByteSwapIfNeeded (D, ByteOrder);
238 }
239 
240 
241 
Load_u32(const AreaCodeDesc * Desc)242 static u32 Load_u32 (const AreaCodeDesc* Desc)
243 /* Load an u32 from the current file position and swap it if needed */
244 {
245     return _Load_u32 (Desc->F, Desc->ByteOrder);
246 }
247 
248 
249 
LoadFileHeader(AreaCodeDesc * Desc)250 static unsigned LoadFileHeader (AreaCodeDesc* Desc)
251 /* Load the header of a data file. Return one of the acXXX codes. */
252 {
253     u32 Version;
254 
255     /* Load the magic word in the format used int the file (do not convert) */
256     u32 Magic = _Load_u32 (Desc->F, FileByteOrder);
257 
258     /* Check what we got from the file, determine the byte order */
259     switch (Magic) {
260 
261         case BigMagic:
262             Desc->ByteOrder = boBigEndian;
263             break;
264 
265         case LittleMagic:
266             Desc->ByteOrder = boLittleEndian;
267             break;
268 
269         default:
270             /* OOPS - the file is probably not a valid data file */
271             return acInvalidFile;
272 
273     }
274 
275     /* Now read the rest of the header data */
276     Version               = Load_u32 (Desc);
277     Desc->Version         = (Version >> 16);
278     Desc->Build           = (Version & 0xFFFF);
279     Desc->Count           = Load_u32 (Desc);
280     Desc->AreaCodeStart   = Load_u32 (Desc);
281     Desc->NameIndexStart  = Load_u32 (Desc);
282     Desc->NameStart       = Load_u32 (Desc);
283 
284     /* Check for some error conditions */
285     if (ferror (Desc->F)) {
286         /* Some sort of file problem */
287         return acFileError;
288     } else if (feof (Desc->F) || Desc->Count == 0) {
289         /* This should not happen on a valid file */
290         return acInvalidFile;
291     } else if (Desc->Version != acVersion) {
292         return acWrongVersion;
293     } else {
294         /* Data is sane */
295         return acOk;
296     }
297 }
298 
299 
300 
EncodeNumber(const char * Phone)301 static u32 EncodeNumber (const char* Phone)
302 /* Encode the number we got from the caller into the internally used BCD
303  * format.
304  */
305 {
306     unsigned I;
307     unsigned Len;
308     u32 P = 0;          /* Initialize to make gcc happy */
309 
310     /* Get the amount of characters to convert */
311     Len = strlen (Phone);
312     if (Len > 8) {
313         Len = 8;
314     }
315 
316     /* Convert the characters */
317     for (I = 0; I < Len; I++) {
318         P = (P << 4) | ((unsigned) ((unsigned char) Phone [I]) & 0x0F);
319     }
320 
321     /* Fill the rest of the number with 0x0F */
322     I = 8 - Len;
323     while (I--) {
324         P = (P << 4) | 0x0F;
325     }
326 
327     /* Done - return the result */
328     return P;
329 }
330 
331 
332 
ReadPhone(const AreaCodeDesc * Desc,long Index)333 static u32 ReadPhone (const AreaCodeDesc* Desc, long Index)
334 /* Read the phone number that is located at the given index. If we have a
335  * part of the table already loaded into memory, use the memory copy, else
336  * read the phone number from disk.
337  */
338 {
339     if (Desc->Table && Index >= Desc->First && Index <= Desc->Last) {
340         /* Use the already loaded table, but don't forget to swap bytes */
341         return ByteSwapIfNeeded (Desc->Table [Index - Desc->First], Desc);
342     } else {
343         /* Load the value from the file */
344         fseek (Desc->F, Desc->AreaCodeStart + Index * sizeof (u32), SEEK_SET);
345         return Load_u32 (Desc);
346     }
347 }
348 
349 
350 
LoadTable(AreaCodeDesc * Desc)351 static void LoadTable (AreaCodeDesc* Desc)
352 /* Load a part of the table into memory */
353 {
354     u32 SpaceNeeded = (Desc->Last - Desc->First + 1) * sizeof (u32);
355     Desc->Table = (u32*) malloc (SpaceNeeded);
356     if (Desc->Table == 0) {
357         /* Out of memory. There is no problem with this now since we do
358          * not really need the table in core memory (it speeds things up,
359          * that's all). In addition to that, the memory requirement halves
360          * with each iteration, so maybe we have more luck next time.
361          */
362         return;
363     }
364 
365     /* Seek to the correct position in the file */
366     fseek (Desc->F, Desc->AreaCodeStart + Desc->First * sizeof (u32), SEEK_SET);
367 
368     /* Read the data */
369     fread (Desc->Table, SpaceNeeded, 1, Desc->F);
370 }
371 
372 
373 
CalcCodeLen(u32 Code)374 static unsigned CalcCodeLen (u32 Code)
375 /* Calculate the length of a given (encoded) area code in characters */
376 {
377     u32 Mask;
378     unsigned Len = 0;
379     for (Mask = 0xF0000000L; Mask; Mask >>= 4) {
380         if ((Code & Mask) != Mask) {
381             Len++;
382         } else {
383             break;
384         }
385     }
386 
387     return Len;
388 }
389 
390 
391 
392 /*****************************************************************************/
393 /*                                   Code                                    */
394 /*****************************************************************************/
395 
396 
397 
GetAreaCodeInfo(acInfo * AC,const char * PhoneNumber)398 unsigned GetAreaCodeInfo (acInfo* AC, const char* PhoneNumber)
399 /* Return - if possible - an information for the area code of the given number.
400  * The function returns one of the error codes defined in areacode.h. If the
401  * returned value is acOk, the AC struct is filled with the data of the
402  * area code found. If we did not have an error, but there is no area code
403  * that corresponds to the given number, the function returns acOk, but the
404  * AC struct is filled with an empty Info field and a AreaCodeLen of zero.
405  */
406 {
407     u32           Phone;                /* PhoneNumber encoded in BCD */
408     long          First, Last, Current; /* For binary search */
409     u32           CurrentVal;           /* The value at Table [Current] */
410     unsigned      AreaCodeLen;          /* The length of the area code found */
411     unsigned char InfoLen;              /* Length of info string */
412     unsigned      RC = acOk;            /* Result code of the function */
413     u32           Mask;
414     AreaCodeDesc  Desc;
415 
416 
417     /* Clear the fields of the AC struct. Write a zero to the last field of
418      * Info - this field is never written to by the rest of the code. So by
419      * setting this to zero, we will assure a terminated string in case some
420      * problem prevents the code below from executing correctly.
421      */
422     AC->Info [0]  = '\0';
423     AC->Info [sizeof (AC->Info) - 1] = '\0';
424     AC->AreaCodeLen = 0;
425 
426     /* If the number is empty, return immidiately */
427     if (strlen (PhoneNumber) == 0) {
428         return acOk;
429     }
430 
431     /* Open the database file, check for errors */
432     Desc.F = fopen (acFileName, "rb");
433     if (Desc.F == 0) {
434         /* We had an error opening the file */
435         return acFileError;
436     }
437 
438     /* Initialize descriptor data where needed */
439     Desc.Table = 0;
440 
441     /* Read the header from the file */
442     RC = LoadFileHeader (&Desc);
443     if (RC != acOk) {
444         /* Wrong file or file read error */
445         goto ExitWithClose;
446     }
447 
448     /* Convert the phone number into the internal representation */
449     Phone = EncodeNumber (PhoneNumber);
450 
451     /* Add dead code to work around gcc warnings */
452     Current    = 0;
453     CurrentVal = 0;
454 
455     /* Now do a binary search over the data */
456     First   = 0;
457     Last    = (long) Desc.Count - 1;
458     while (First <= Last) {
459 
460         /* If we don't have read the table into memory, check if we can do
461          * so now.
462          */
463         if (Desc.Table == 0) {
464             u32 NeedMemory = (Last - First + 1) * sizeof (u32);
465             if (NeedMemory <= acMaxMem) {
466                 /* Ok, the current part of the table is now small enough to
467                  * load it into memory.
468                  */
469                 Desc.First = First;
470                 Desc.Last  = Last;
471                 LoadTable (&Desc);
472             }
473         }
474 
475         /* Set current to mid of range */
476         Current = (Last + First) / 2;
477 
478         /* Get the phone number from that place */
479         CurrentVal = ReadPhone (&Desc, Current);
480 
481         /* Do a compare */
482         if (Phone > CurrentVal) {
483             First = Current + 1;
484         } else {
485             Last = Current - 1;
486             if (Phone == CurrentVal) {
487                 /* Set the condition to terminate the loop */
488                 First = Current;
489             }
490         }
491     }
492 
493     /* First is the index of the area code, we eventually found. Put the index
494      * into Current and the value into CurrentVal.
495      */
496     if (Current != First) {
497         Current = First;
498         CurrentVal = ReadPhone (&Desc, Current);
499     }
500 
501     /*
502      * We may now delete an eventually allocated table space since it is
503      * not needed any more.
504      */
505     free (Desc.Table);
506     Desc.Table = 0;
507 
508     /* If Current points behind Last, we did not find anything */
509     if (Current >= (long) Desc.Count) {
510         /* Not found */
511         goto ExitWithClose;
512     }
513 
514     /* Calculate the length of the area code */
515     AreaCodeLen = CalcCodeLen (CurrentVal);
516 
517     /* Check if the Prefix is actually the first part of the phone number */
518     Mask = 0xFFFFFFFFL << ((8 - AreaCodeLen) * 4);
519     if ((Phone & Mask) != (CurrentVal & Mask)) {
520         /* They are different */
521         goto ExitWithClose;
522     }
523 
524     /* Ok, we have now definitely found the code. Set up the data structure,
525      * we return to the caller.
526      */
527     AC->AreaCodeLen = AreaCodeLen;
528 
529     /* Current is the index of the area code. Seek to the corresponding
530      * position in the name index, get the name position from there and seek
531      * to that place.
532      */
533     fseek (Desc.F, Desc.NameIndexStart + Current * sizeof (u32), SEEK_SET);
534     fseek (Desc.F, Desc.NameStart + Load_u32 (&Desc), SEEK_SET);
535 
536     /* Read the length of the name and add the trailing zero to the info
537      * field in the result struct.
538      */
539     fread (&InfoLen, 1, 1, Desc.F);
540     AC->Info [InfoLen] = '\0';
541 
542     /* Read the info into the result struct */
543     fread (AC->Info, 1, InfoLen, Desc.F);
544 
545 #ifdef CHARSET_ISO
546     /* Translate the info to the ISO-8859-1 charset */
547     {
548         unsigned I;
549         for (I = 0; I < InfoLen; I++) {
550             unsigned char C = (unsigned char) AC->Info [I];
551             if (C >= 128) {
552                 AC->Info [I] = ISOMap [C - 128];
553             }
554         }
555     }
556 #endif
557 
558 ExitWithClose:
559     /* Close the data file */
560     fclose (Desc.F);
561 
562     /* Done, return the result */
563     return RC;
564 }
565 
566 
567 
568