1 /*****************************************************************************/
2 /* */
3 /* AREACODE.C */
4 /* */
5 /* Portable library module to search for an area code in a database. */
6 /* */
7 /* */
8 /* */
9 /* (C) 1996,97 Ullrich von Bassewitz */
10 /* Wacholderweg 14 */
11 /* D-70597 Stuttgart */
12 /* EMail: uz@musoftware.com */
13 /* */
14 /* */
15 /* This software is provided 'as-is', without any express or implied */
16 /* warranty. In no event will the authors be held liable for any damages */
17 /* arising from the use of this software. */
18 /* */
19 /* Permission is granted to anyone to use this software for any purpose, */
20 /* including commercial applications, and to alter it and redistribute it */
21 /* freely, subject to the following restrictions: */
22 /* */
23 /* 1. The origin of this software must not be misrepresented; you must not */
24 /* claim that you wrote the original software. If you use this software */
25 /* in a product, an acknowledgment in the product documentation would be */
26 /* appreciated but is not required. */
27 /* 2. Altered source versions must be plainly marked as such, and must not */
28 /* be misrepresented as being the original software. */
29 /* 3. This notice may not be removed or altered from any source */
30 /* distribution. */
31 /* */
32 /*****************************************************************************/
33
34
35
36 /*
37 * The code assumes
38 * - 8 bit bytes
39 * - unsigned long is 32 bit. This may be changed by #defining u32 to
40 * a data type that is an 32 bit unsigned when compiling this module.
41 * - ascii character set
42 *
43 * The code does *not* assume
44 * - a specific byte order. Currently the code autoadjusts to big or
45 * little endian data. If you have something more weird than that,
46 * you have to add conversion code.
47 *
48 */
49
50
51
52 #include <stdlib.h>
53 #include <stdio.h>
54 #include <string.h>
55 #include <limits.h>
56
57 #include "areacode.h"
58
59
60
61 /*****************************************************************************/
62 /* Externally visible data */
63 /*****************************************************************************/
64
65
66
67 /* The name of the areacode data file. The default is what is #defined as
68 * DATA_FILENAME. If this is not #defined, the default is "areacode.dat",
69 * which is probably not what you want. In the latter case set this to
70 * the correct filename *before* your first call to GetAreaCodeInfo.
71 */
72 #ifdef DATA_FILENAME
73 char* acFileName = DATA_FILENAME;
74 #else
75 char* acFileName = "areacode.dat";
76 #endif
77
78 /* How much dynamic memory is GetAreaCodeInfo allowed to consume? Having less
79 * memory means more disk access and vice versa. The function does even work
80 * if you set this value to zero. For maximum performance, the function needs
81 * 4 byte per area code stored in the data file. The default is 32KB.
82 */
83 unsigned long acMaxMem = 0x8000L;
84
85
86
87 /*****************************************************************************/
88 /* Data and structures */
89 /*****************************************************************************/
90
91
92
93 /* Define an unsigned quantity with 32 bits. Try to make some clever
94 * assumptions using the data from limits.h. This may break some older
95 * (non ISO compliant) compilers, but I can't help...
96 */
97 #if !defined(u32) && defined(ULONG_MAX)
98 # if ULONG_MAX == 4294967295UL
99 # define u32 unsigned long
100 # endif
101 #endif
102 #if !defined(u32) && defined(UINT_MAX)
103 # if UINT_MAX == 4294967295UL
104 # define u32 unsigned
105 # endif
106 #endif
107 #if !defined(u32) && defined(USHRT_MAX)
108 # if USHRT_MAX == 4294967295UL
109 # define u32 unsigned short
110 # endif
111 #endif
112 #if !defined(u32)
113 # define u32 unsigned long
114 #endif
115
116 /* The version of the data file we support */
117 #define acVersion 0x100
118
119 /* The magic words in little and big endian format */
120 #define LittleMagic 0x35465768L
121 #define BigMagic 0x68574635L
122
123 /* Defining the byte ordering */
124 #define boLittleEndian 0
125 #define boBigEndian 1
126
127 /* The byte order used in the file is little endian (intel) format */
128 #define FileByteOrder boLittleEndian
129
130 /* This is the header data of the data file. It is not used anywhere in
131 * the code, just have a look at it since it describes the layout in the
132 * file.
133 */
134 typedef struct {
135 u32 Magic;
136 u32 Version; /* Version in hi word, build in lo word */
137 u32 Count;
138 u32 AreaCodeStart;
139 u32 NameIndexStart;
140 u32 NameStart;
141 } PrefixHeader;
142
143 /* This is what's really used: */
144 typedef struct {
145
146 /* The file we read from */
147 FILE* F;
148
149 /* Machine byte order */
150 unsigned ByteOrder;
151
152 /* Stuff from the file header */
153 unsigned Version;
154 unsigned Build;
155 u32 Count;
156 u32 AreaCodeStart;
157 u32 NameIndexStart;
158 u32 NameStart;
159
160 /* Control data */
161 long First;
162 long Last;
163 u32* Table;
164
165 } AreaCodeDesc;
166
167 /* Translation table for translation CP850 --> ISO-8859-1. To save some space,
168 * the table covers only values > 127
169 */
170 #ifdef CHARSET_ISO
171 static char ISOMap [128] = {
172 0xC7, 0xFC, 0xE9, 0xE2, 0xE4, 0xE0, 0xE5, 0xE7,
173 0xEA, 0xEB, 0xE8, 0xEF, 0xEE, 0xEC, 0xC4, 0xC5,
174 0xC9, 0xE6, 0xC6, 0xF4, 0xF6, 0xF2, 0xFC, 0xF9,
175 0xFF, 0xD6, 0xDC, 0xA2, 0xA3, 0xA5, 0x50, 0x66,
176 0xE1, 0xED, 0xF3, 0xFA, 0xF1, 0xD1, 0xAA, 0xBA,
177 0xBF, 0x2D, 0xAC, 0xC6, 0xBC, 0xA1, 0xAB, 0xBB,
178 0xFE, 0xFE, 0xFE, 0x7C, 0x2B, 0x2B, 0x2B, 0x2B,
179 0x2B, 0x2B, 0x7C, 0x2B, 0x2B, 0x2B, 0x2B, 0x2B,
180 0x2B, 0x2B, 0x2B, 0x2B, 0x2D, 0x2B, 0x2B, 0x2B,
181 0x2B, 0x2B, 0x2B, 0x2B, 0x2B, 0x2D, 0x2B, 0x2B,
182 0x2B, 0x2B, 0x2B, 0x2B, 0x2B, 0x2B, 0x2B, 0x2B,
183 0x2B, 0x2B, 0x2B, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE,
184 0x61, 0xDF, 0x63, 0x70, 0x5A, 0x73, 0xB5, 0x74,
185 0x70, 0x54, 0x4F, 0x64, 0x38, 0x30, 0x65, 0x55,
186 0x3D, 0xB1, 0x3E, 0x3C, 0x66, 0x4A, 0xF7, 0x7E,
187 0xB0, 0xB7, 0xB7, 0x2F, 0x6E, 0xB2, 0xFE, 0xFF
188 };
189 #endif
190
191 /* Macro to convert from big endian to little endian format and vice versa.
192 * Beware: The macro evaluates its parameter more than once!
193 */
194 #define _ByteSwap(__V) ((((__V) & 0x000000FF) << 24) | \
195 (((__V) & 0xFF000000) >> 24) | \
196 (((__V) & 0x0000FF00) << 8) | \
197 (((__V) & 0x00FF0000) >> 8))
198
199
200
201 /*****************************************************************************/
202 /* Helper functions */
203 /*****************************************************************************/
204
205
206
_ByteSwapIfNeeded(u32 D,unsigned ByteOrder)207 static u32 _ByteSwapIfNeeded (u32 D, unsigned ByteOrder)
208 /* Put the bytes into the correct order according to ByteOrder */
209 {
210 /* Swap bytes if needed and return the result */
211 switch (ByteOrder) {
212 case boLittleEndian: return D;
213 default: return _ByteSwap (D);
214 }
215 }
216
217
218
ByteSwapIfNeeded(u32 D,const AreaCodeDesc * Desc)219 static u32 ByteSwapIfNeeded (u32 D, const AreaCodeDesc* Desc)
220 /* Put the bytes into the correct order according to ByteOrder in Desc */
221 {
222 /* Swap bytes if needed and return the result */
223 return _ByteSwapIfNeeded (D, Desc->ByteOrder);
224 }
225
226
227
_Load_u32(FILE * F,unsigned ByteOrder)228 static u32 _Load_u32 (FILE* F, unsigned ByteOrder)
229 /* Load an u32 from the current file position and swap it if needed */
230 {
231 u32 D;
232
233 /* Read the data from the file */
234 fread (&D, sizeof (D), 1, F);
235
236 /* Swap bytes if needed and return the result */
237 return _ByteSwapIfNeeded (D, ByteOrder);
238 }
239
240
241
Load_u32(const AreaCodeDesc * Desc)242 static u32 Load_u32 (const AreaCodeDesc* Desc)
243 /* Load an u32 from the current file position and swap it if needed */
244 {
245 return _Load_u32 (Desc->F, Desc->ByteOrder);
246 }
247
248
249
LoadFileHeader(AreaCodeDesc * Desc)250 static unsigned LoadFileHeader (AreaCodeDesc* Desc)
251 /* Load the header of a data file. Return one of the acXXX codes. */
252 {
253 u32 Version;
254
255 /* Load the magic word in the format used int the file (do not convert) */
256 u32 Magic = _Load_u32 (Desc->F, FileByteOrder);
257
258 /* Check what we got from the file, determine the byte order */
259 switch (Magic) {
260
261 case BigMagic:
262 Desc->ByteOrder = boBigEndian;
263 break;
264
265 case LittleMagic:
266 Desc->ByteOrder = boLittleEndian;
267 break;
268
269 default:
270 /* OOPS - the file is probably not a valid data file */
271 return acInvalidFile;
272
273 }
274
275 /* Now read the rest of the header data */
276 Version = Load_u32 (Desc);
277 Desc->Version = (Version >> 16);
278 Desc->Build = (Version & 0xFFFF);
279 Desc->Count = Load_u32 (Desc);
280 Desc->AreaCodeStart = Load_u32 (Desc);
281 Desc->NameIndexStart = Load_u32 (Desc);
282 Desc->NameStart = Load_u32 (Desc);
283
284 /* Check for some error conditions */
285 if (ferror (Desc->F)) {
286 /* Some sort of file problem */
287 return acFileError;
288 } else if (feof (Desc->F) || Desc->Count == 0) {
289 /* This should not happen on a valid file */
290 return acInvalidFile;
291 } else if (Desc->Version != acVersion) {
292 return acWrongVersion;
293 } else {
294 /* Data is sane */
295 return acOk;
296 }
297 }
298
299
300
EncodeNumber(const char * Phone)301 static u32 EncodeNumber (const char* Phone)
302 /* Encode the number we got from the caller into the internally used BCD
303 * format.
304 */
305 {
306 unsigned I;
307 unsigned Len;
308 u32 P = 0; /* Initialize to make gcc happy */
309
310 /* Get the amount of characters to convert */
311 Len = strlen (Phone);
312 if (Len > 8) {
313 Len = 8;
314 }
315
316 /* Convert the characters */
317 for (I = 0; I < Len; I++) {
318 P = (P << 4) | ((unsigned) ((unsigned char) Phone [I]) & 0x0F);
319 }
320
321 /* Fill the rest of the number with 0x0F */
322 I = 8 - Len;
323 while (I--) {
324 P = (P << 4) | 0x0F;
325 }
326
327 /* Done - return the result */
328 return P;
329 }
330
331
332
ReadPhone(const AreaCodeDesc * Desc,long Index)333 static u32 ReadPhone (const AreaCodeDesc* Desc, long Index)
334 /* Read the phone number that is located at the given index. If we have a
335 * part of the table already loaded into memory, use the memory copy, else
336 * read the phone number from disk.
337 */
338 {
339 if (Desc->Table && Index >= Desc->First && Index <= Desc->Last) {
340 /* Use the already loaded table, but don't forget to swap bytes */
341 return ByteSwapIfNeeded (Desc->Table [Index - Desc->First], Desc);
342 } else {
343 /* Load the value from the file */
344 fseek (Desc->F, Desc->AreaCodeStart + Index * sizeof (u32), SEEK_SET);
345 return Load_u32 (Desc);
346 }
347 }
348
349
350
LoadTable(AreaCodeDesc * Desc)351 static void LoadTable (AreaCodeDesc* Desc)
352 /* Load a part of the table into memory */
353 {
354 u32 SpaceNeeded = (Desc->Last - Desc->First + 1) * sizeof (u32);
355 Desc->Table = (u32*) malloc (SpaceNeeded);
356 if (Desc->Table == 0) {
357 /* Out of memory. There is no problem with this now since we do
358 * not really need the table in core memory (it speeds things up,
359 * that's all). In addition to that, the memory requirement halves
360 * with each iteration, so maybe we have more luck next time.
361 */
362 return;
363 }
364
365 /* Seek to the correct position in the file */
366 fseek (Desc->F, Desc->AreaCodeStart + Desc->First * sizeof (u32), SEEK_SET);
367
368 /* Read the data */
369 fread (Desc->Table, SpaceNeeded, 1, Desc->F);
370 }
371
372
373
CalcCodeLen(u32 Code)374 static unsigned CalcCodeLen (u32 Code)
375 /* Calculate the length of a given (encoded) area code in characters */
376 {
377 u32 Mask;
378 unsigned Len = 0;
379 for (Mask = 0xF0000000L; Mask; Mask >>= 4) {
380 if ((Code & Mask) != Mask) {
381 Len++;
382 } else {
383 break;
384 }
385 }
386
387 return Len;
388 }
389
390
391
392 /*****************************************************************************/
393 /* Code */
394 /*****************************************************************************/
395
396
397
GetAreaCodeInfo(acInfo * AC,const char * PhoneNumber)398 unsigned GetAreaCodeInfo (acInfo* AC, const char* PhoneNumber)
399 /* Return - if possible - an information for the area code of the given number.
400 * The function returns one of the error codes defined in areacode.h. If the
401 * returned value is acOk, the AC struct is filled with the data of the
402 * area code found. If we did not have an error, but there is no area code
403 * that corresponds to the given number, the function returns acOk, but the
404 * AC struct is filled with an empty Info field and a AreaCodeLen of zero.
405 */
406 {
407 u32 Phone; /* PhoneNumber encoded in BCD */
408 long First, Last, Current; /* For binary search */
409 u32 CurrentVal; /* The value at Table [Current] */
410 unsigned AreaCodeLen; /* The length of the area code found */
411 unsigned char InfoLen; /* Length of info string */
412 unsigned RC = acOk; /* Result code of the function */
413 u32 Mask;
414 AreaCodeDesc Desc;
415
416
417 /* Clear the fields of the AC struct. Write a zero to the last field of
418 * Info - this field is never written to by the rest of the code. So by
419 * setting this to zero, we will assure a terminated string in case some
420 * problem prevents the code below from executing correctly.
421 */
422 AC->Info [0] = '\0';
423 AC->Info [sizeof (AC->Info) - 1] = '\0';
424 AC->AreaCodeLen = 0;
425
426 /* If the number is empty, return immidiately */
427 if (strlen (PhoneNumber) == 0) {
428 return acOk;
429 }
430
431 /* Open the database file, check for errors */
432 Desc.F = fopen (acFileName, "rb");
433 if (Desc.F == 0) {
434 /* We had an error opening the file */
435 return acFileError;
436 }
437
438 /* Initialize descriptor data where needed */
439 Desc.Table = 0;
440
441 /* Read the header from the file */
442 RC = LoadFileHeader (&Desc);
443 if (RC != acOk) {
444 /* Wrong file or file read error */
445 goto ExitWithClose;
446 }
447
448 /* Convert the phone number into the internal representation */
449 Phone = EncodeNumber (PhoneNumber);
450
451 /* Add dead code to work around gcc warnings */
452 Current = 0;
453 CurrentVal = 0;
454
455 /* Now do a binary search over the data */
456 First = 0;
457 Last = (long) Desc.Count - 1;
458 while (First <= Last) {
459
460 /* If we don't have read the table into memory, check if we can do
461 * so now.
462 */
463 if (Desc.Table == 0) {
464 u32 NeedMemory = (Last - First + 1) * sizeof (u32);
465 if (NeedMemory <= acMaxMem) {
466 /* Ok, the current part of the table is now small enough to
467 * load it into memory.
468 */
469 Desc.First = First;
470 Desc.Last = Last;
471 LoadTable (&Desc);
472 }
473 }
474
475 /* Set current to mid of range */
476 Current = (Last + First) / 2;
477
478 /* Get the phone number from that place */
479 CurrentVal = ReadPhone (&Desc, Current);
480
481 /* Do a compare */
482 if (Phone > CurrentVal) {
483 First = Current + 1;
484 } else {
485 Last = Current - 1;
486 if (Phone == CurrentVal) {
487 /* Set the condition to terminate the loop */
488 First = Current;
489 }
490 }
491 }
492
493 /* First is the index of the area code, we eventually found. Put the index
494 * into Current and the value into CurrentVal.
495 */
496 if (Current != First) {
497 Current = First;
498 CurrentVal = ReadPhone (&Desc, Current);
499 }
500
501 /*
502 * We may now delete an eventually allocated table space since it is
503 * not needed any more.
504 */
505 free (Desc.Table);
506 Desc.Table = 0;
507
508 /* If Current points behind Last, we did not find anything */
509 if (Current >= (long) Desc.Count) {
510 /* Not found */
511 goto ExitWithClose;
512 }
513
514 /* Calculate the length of the area code */
515 AreaCodeLen = CalcCodeLen (CurrentVal);
516
517 /* Check if the Prefix is actually the first part of the phone number */
518 Mask = 0xFFFFFFFFL << ((8 - AreaCodeLen) * 4);
519 if ((Phone & Mask) != (CurrentVal & Mask)) {
520 /* They are different */
521 goto ExitWithClose;
522 }
523
524 /* Ok, we have now definitely found the code. Set up the data structure,
525 * we return to the caller.
526 */
527 AC->AreaCodeLen = AreaCodeLen;
528
529 /* Current is the index of the area code. Seek to the corresponding
530 * position in the name index, get the name position from there and seek
531 * to that place.
532 */
533 fseek (Desc.F, Desc.NameIndexStart + Current * sizeof (u32), SEEK_SET);
534 fseek (Desc.F, Desc.NameStart + Load_u32 (&Desc), SEEK_SET);
535
536 /* Read the length of the name and add the trailing zero to the info
537 * field in the result struct.
538 */
539 fread (&InfoLen, 1, 1, Desc.F);
540 AC->Info [InfoLen] = '\0';
541
542 /* Read the info into the result struct */
543 fread (AC->Info, 1, InfoLen, Desc.F);
544
545 #ifdef CHARSET_ISO
546 /* Translate the info to the ISO-8859-1 charset */
547 {
548 unsigned I;
549 for (I = 0; I < InfoLen; I++) {
550 unsigned char C = (unsigned char) AC->Info [I];
551 if (C >= 128) {
552 AC->Info [I] = ISOMap [C - 128];
553 }
554 }
555 }
556 #endif
557
558 ExitWithClose:
559 /* Close the data file */
560 fclose (Desc.F);
561
562 /* Done, return the result */
563 return RC;
564 }
565
566
567
568