1 static char const rcsid[] = "$Id: ncbisam.c,v 6.34 2007/11/06 19:20:06 coulouri Exp $";
2 
3 /* $Id: ncbisam.c,v 6.34 2007/11/06 19:20:06 coulouri Exp $
4 * ===========================================================================
5 *
6 *                            PUBLIC DOMAIN NOTICE
7 *               National Center for Biotechnology Information
8 *
9 *  This software/database is a "United States Government Work" under the
10 *  terms of the United States Copyright Act.  It was written as part of
11 *  the author's official duties as a United States Government employee and
12 *  thus cannot be copyrighted.  This software/database is freely available
13 *  to the public for use. The National Library of Medicine and the U.S.
14 *  Government have not placed any restriction on its use or reproduction.
15 *
16 *  Although all reasonable efforts have been taken to ensure the accuracy
17 *  and reliability of the software and data, the NLM and the U.S.
18 *  Government do not and cannot warrant the performance or results that
19 *  may be obtained by using this software or data. The NLM and the U.S.
20 *  Government disclaim all warranties, express or implied, including
21 *  warranties of performance, merchantability or fitness for any particular
22 *  purpose.
23 *
24 *  Please cite the author in any work or product based on this material.
25 *
26 * ===========================================================================
27 *
28 * File Name:  $RCSfile: ncbisam.c,v $
29 *
30 * Author:  Sergei Shavirin
31 *
32 * Initial Version Creation Date: 02/24/1997
33 *
34 * $Revision: 6.34 $
35 *
36 * File Description:
37 *         Main file for ISAM library
38 *
39 * $Log: ncbisam.c,v $
40 * Revision 6.34  2007/11/06 19:20:06  coulouri
41 * fix memory allocation; resolves blast-rt#15351152
42 *
43 * Revision 6.33  2006/06/21 13:55:06  camacho
44 * Fixed from Ilya Dondoshansky in s_ISAMBufferReadLine
45 * Change FILEREAD_BUFFER_SIZE from 1MB to 64k
46 *
47 * Revision 6.32  2006/05/10 20:47:15  camacho
48 * From Ilya Dondoshansky: In ISAMMakeStringIndex: read large chunks from file instead of one line at a time.
49 *
50 * Revision 6.31  2005/07/28 14:57:10  coulouri
51 * remove dead code
52 *
53 * Revision 6.30  2003/05/30 17:25:37  coulouri
54 * add rcsid
55 *
56 * Revision 6.29  2003/04/14 19:52:31  camacho
57 * Added ISAMUninitSearch
58 *
59 * Revision 6.28  2002/09/23 19:48:09  camacho
60 * Avoid the use of data->db_fd when searching the ISAM databases
61 *
62 * Revision 6.27  2002/09/20 15:17:12  camacho
63 * Fixed file descriptor leak
64 *
65 * Revision 6.26  2002/04/04 17:57:10  camacho
66 * Fixed binary search implementation in NISAMSearch
67 *
68 * Revision 6.25  2002/04/02 20:51:21  camacho
69 * Fixed bug in NISAMSearch
70 *
71 * Revision 6.24  2002/01/18 18:53:13  madden
72 * Changes to research the last page if appropriate
73 *
74 * Revision 6.23  2001/07/09 14:17:24  madden
75 * Fix PC-lint complaints from R. Williams
76 *
77 * Revision 6.22  2001/06/08 20:31:01  madden
78 * Fix memory leaks
79 *
80 * Revision 6.21  2000/08/04 19:54:17  shavirin
81 * Fixed problem with counting line when data not tested to be non- unique.
82 *
83 * Revision 6.20  2000/07/18 19:29:27  shavirin
84 * Added new parameter test_non_unique to suppress check for non-unique
85 * strings ids in the database - default - TRUE.
86 *
87 * Revision 6.19  2000/07/13 16:43:48  shavirin
88 * Fixed checking of order in String index creation.
89 *
90 * Revision 6.17  2000/02/11 21:14:07  madden
91 * Allocate MasterPos of correct (smaller) size
92 *
93 * Revision 6.16  1999/12/18 15:27:50  egorov
94 * Fix NT compilation problem
95 *
96 * Revision 6.15  1999/12/17 20:47:05  egorov
97 * Fix 'gcc -Wall' warnings
98 *
99 * Revision 6.14  1999/12/06 20:56:12  egorov
100 * fwrite() writes two bytes for '\n' if file is open not in binary mode,
101 * so MakeISAMIndex worked incorrectly.
102 * What to Blast programs, formatdb now works correctly on NT machine.
103 *
104 * Revision 6.13  1999/11/08 19:05:21  shavirin
105 * Fixed minor SGI warning.
106 *
107 * Revision 6.12  1999/08/25 20:18:49  shavirin
108 * Added possibility to store user-specified Int4 options in the index
109 * header.
110 *
111 * Revision 6.11  1999/03/17 21:38:04  kans
112 * Int4Ptr argument must point to Int4
113 *
114 * Revision 6.10  1999/03/17 20:56:24  shavirin
115 * Fixed warning "long int format"
116 *
117 * Revision 6.9  1999/02/19 22:01:14  madden
118 * Use memory-mapping and binary search on second numerical index
119 *
120 * Revision 6.8  1998/07/13 15:31:14  egorov
121 * make error message more understandable
122 *
123 * Revision 6.7  1998/05/28 17:18:04  shavirin
124 * Fixed non-intialized variable warning.
125 *
126 * Revision 6.6  1998/02/23 17:45:28  shavirin
127 * Fixed problem in sorted file checkup.
128 *
129 * Revision 6.5  1997/12/02 20:06:31  shavirin
130 * Fixed Macintosh warnings
131 *
132 * Revision 6.4  1997/12/02 19:38:17  shavirin
133 * Added variables
134 *
135 * Revision 6.3  1997/12/02 18:05:12  shavirin
136 * Removed typecast warnings in sprintf and sscanf
137 *
138 * Revision 6.2  1997/11/28 15:50:10  shavirin
139 * Added default returned value in the function ISAMGetDataNumber()
140 *
141 * Revision 6.1  1997/11/07 16:17:43  shavirin
142 * Added new function SISAMFindAllData() returned info about redundant keys
143 *
144 * Revision 6.0  1997/08/25 18:53:27  madden
145 * Revision changed to 6.0
146 *
147 * Revision 1.17  1997/05/16 17:08:44  shavirin
148 * Removed printf()
149 *
150 * Revision 1.16  1997/05/16 16:16:00  shavirin
151 * Added LIBCALLBACK to definition of ISAMUidCompare()
152 *
153 * Revision 1.15  1997/05/12 19:55:05  shavirin
154 * Some fixes type-changes to support ISAMCreateDatabase() API
155 *
156 * Revision 1.13  1997/05/09 14:12:06  shavirin
157 * Fixed memory leakage and added ErrLogPrintf()
158 *
159 * Revision 1.12  1997/05/08 21:18:08  shavirin
160 * Added generic function ISAMSearchTerm(), that will search complete ISAM
161 * string database created by ISAMCreateDatabase() function. Returns array of
162 * found uids corresponing given string term and gived bit-field mask
163 *
164 * Revision 1.10  1997/05/06 21:36:15  shavirin
165 * Added set of function for Coded Array compression implementation
166 *
167  * Revision 1.9  1997/05/05  18:17:22  shavirin
168  * Added support for platforms without memory mapping
169  *
170  * Revision 1.8  1997/05/05  14:37:54  shavirin
171  * Fixed usage of Numeric ISAM with Windows NT platform
172  *
173  * Revision 1.7  1997/05/01  20:10:57  shavirin
174  * Fixed few errors discovered on Macintoch
175  *
176  * Revision 1.6  1997/05/01  17:24:33  shavirin
177  * Added String ISAM index functionality
178  *
179  * Revision 1.5  1997/02/26  01:28:11  shavirin
180  * Fixed difference in definitions of SISAMSearch() function
181  *
182  * Revision 1.4  1997/02/25  22:16:49  shavirin
183  * Changed general API of ISAM library .
184  *
185  * Revision 1.3  1997/02/25  19:38:12  shavirin
186  * Added defence aginst little-big endian platforms
187  *
188  * Revision 1.2  1997/02/25  15:33:56  shavirin
189  * Return value will be ISAMNoError if number is not found,
190  * byt Data and Index will be set to ISAMNotFound
191  *
192  * Revision 1.1  1997/02/24  21:06:52  shavirin
193  * Initial revision
194  *
195 *
196 * ==========================================================================
197 */
198 #include <ncbi.h>
199 #include <readdb.h>
200 #include <ncbisami.h>
201 
202 /****************************************************************************/
203 /* INTERNAL FINCTION DEFINITIONS  */
204 /****************************************************************************/
205 
206 #ifdef NONO
207 static Int4 GetPageNumElements(ISAMDataPtr data, Int4 SampleNum,
208 			       Int4Ptr Start);
209 
210 /* ---------------------- ISAMInitSearch --------------------------
211    Purpose:     Initialize ISAM Search. Checks for any errors
212 
213    Parameters:  ISAM Object
214    Returns:     ISAM Error Code
215    NOTE:        No need to call this function first.
216   ------------------------------------------------------------------*/
217 static ISAMErrorCode ISAMInitSearch(ISAMObjectPtr object);
218 
219 static ISAMErrorCode ISAMMakeNumericIndex(
220                                ISAMDataPtr data,
221                                Int4 page_size
222                                );
223 
224 static ISAMErrorCode ISAMMakeStringIndex(
225                                  ISAMDataPtr data,
226                                  Int4 page_size
227                                  );
228 static Boolean ISAMCheckIfSorted(ISAMDataPtr data);
229 ISAMErrorCode ISAMCountLines(ISAMDataPtr data);
230 static Int4 ISAMReadLine(ISAMDataPtr data);
231 static Int4 ISAMDiffChar(CharPtr Term, CharPtr Key, Boolean IgnoreCase);
232 static void ISAMExtractData(CharPtr KeyData,
233                             CharPtr PNTR Key, CharPtr PNTR Data);
234 static CharPtr ISAMReadFileInMemory(CharPtr filename);
235 
236 /* Coded Array - Field Array handling functions */
237 
238 static Boolean ISAMWriteNBits10(ISAMTmpCAPtr cap, Int4 number);
239 static Boolean ISAMWriteBitNumber(ISAMTmpCAPtr cap, Int4 number);
240 static Uint4Ptr ISAMDecompressCA(Uint1Ptr buffer, Int4 length,
241                                  Int4 num_bits, Int4 num_uids);
242 static Boolean ISAMCreateCA(ISAMTmpCAPtr cap, ISAMUidFieldPtr data,
243                             Int4 num_uids);
244 static ISAMTmpCAPtr ISAMTmpCANew(void);
245 static void ISAMTmpCAFree(ISAMTmpCAPtr cap);
246 static Boolean ISAMCreateFA(ISAMTmpCAPtr fcap,
247                              ISAMUidFieldPtr data, Int4 num_uids);
248 static Uint4Ptr ISAMDecompressFA(Uint1Ptr buffer, Int4 num_uids);
249 static ISAMErrorCode ISAMDumpTermEntry(ISAMTmpCAPtr cap, FILE *off_fd,
250                                        FILE *db_fd, ISAMUidFieldPtr uidf,
251                                        Int4 count, Int4Ptr offset);
252 static int LIBCALLBACK ISAMUidCompare(VoidPtr i, VoidPtr j);
253 
254 #endif
255 
256 /****************************************************************************/
257 /* EXTERNAL FINCTIONS  */
258 /****************************************************************************/
259 
260 /* ----------------------  ISAMObjectNew --------------------------
261    Purpose:     Creates ISAM object structure with default parameters
262 
263    Parameters:  type - Type of ISAM (Numeric, String etc. )
264    Returns:     Poiner to created object structure
265    NOTE:        Page size is set to default value if 0
266 
267   ------------------------------------------------------------------*/
ISAMObjectNew(ISAMType type,CharPtr DBFile,CharPtr IndexFile)268 ISAMObjectPtr ISAMObjectNew(ISAMType type,       /* Type of ISAM */
269                             CharPtr DBFile,      /* ISAM Database file */
270                             CharPtr IndexFile   /* ISAM Index file */
271                             )
272 {
273     ISAMDataPtr data;
274     Char name[MAX_FILENAME_LEN];
275 #ifndef OS_UNIX
276 	CharPtr ch, ch1;
277 #endif
278 
279     if(DBFile == NULL)
280         return NULL;
281 
282     if((data = (ISAMDataPtr)MemNew(sizeof(ISAMData))) == NULL)
283         return NULL;
284 
285     data->type = type;
286     data->DBFileName = StringSave(DBFile);
287 
288 
289     if(IndexFile == NULL) {
290 #ifdef OS_UNIX
291         if(type == ISAMNumeric || type == ISAMNumericNoData)
292             sprintf(name, "%s.nisam", DBFile);
293         else
294             sprintf(name, "%s.isam", DBFile);
295 
296         data->IndexFileName = StringSave(name);
297 #else
298         sprintf(name, "%s", DBFile);
299 
300         /* Looking for last  '.' in the filename */
301         for(ch = name; (ch = StringChr(ch, '.')) != NULL; ch1=ch)
302             continue;
303 
304         if(ch1 != NULL) {
305             if(type == ISAMNumeric || type == ISAMNumericNoData)
306                 sprintf(ch1, ".nsm");
307             else
308                 sprintf(ch1, ".ism");
309         } else {
310             if(type == ISAMNumeric || type == ISAMNumericNoData)
311                 sprintf(name, "%s.nsm", DBFile);
312             else
313                 sprintf(name, "%s.ism", DBFile);
314         }
315 
316         data->IndexFileName = StringSave(name);
317 #endif
318     } else {
319         data->IndexFileName = StringSave(IndexFile);
320     }
321 
322     if(type == ISAMNumeric || type == ISAMNumericNoData)
323         data->PageSize = DEFAULT_NISAM_SIZE;
324     else
325         data->PageSize = DEFAULT_SISAM_SIZE;
326 
327     data->initialized = FALSE;
328     data->KeySamples = NULL;
329     data->KeyDataSamples = NULL;
330     data->test_non_unique = TRUE; /* default - to check non-unique data */
331 
332     return (ISAMObjectPtr) data;
333 }
334 /* ----------------------  ISAMSetUpCAInfo --------------------------
335    Purpose:     Added toISAM object Coded Array filenames information
336 
337    Parameters:  CAName - Common name for all CA/FA DB and offset files
338                 CADBExt - exetntio for CA/FA DB files
339                 CAOffExt - extention for CA/FA offset files
340                 MaxOffset - threshhold offset for starting write new
341                             file
342 
343    Returns:     ISAM Error Code
344    NOTE:        MaxOffset is set to default value if 10.000.000
345   ------------------------------------------------------------------*/
ISAMSetUpCAInfo(ISAMObjectPtr object,Int4 MaxOffset,CharPtr CAName,CharPtr CADBExt,CharPtr CAOffExt)346 ISAMErrorCode ISAMSetUpCAInfo(ISAMObjectPtr object, Int4 MaxOffset,
347                               CharPtr CAName, CharPtr CADBExt,
348                               CharPtr CAOffExt)
349 {
350     ISAMDataPtr data;
351 
352     if(object == NULL)
353         return ISAMBadParameter;
354 
355     data = (ISAMDataPtr) object;
356 
357     data->CAName = StringSave(CAName);
358     data->CADBExt = StringSave(CADBExt);
359     data->CAOffExt = StringSave(CAOffExt);
360     if(MaxOffset != 0)
361         data->CAMaxOffset = MaxOffset;
362     else
363         data->CAMaxOffset = DEFAULT_CA_MAX_OFFSET;
364 
365     return ISAMNoError;
366 }
367 
368 /* ---------------------- ISAMUpdateDatabase ------------------------
369    Purpose:
370 
371    Parameters:
372 
373    Returns:
374   ------------------------------------------------------------------*/
ISAMUpdateDatabase(CharPtr InFile,CharPtr NewDBDir,Int4 MaxOffset,CharPtr BaseName,CharPtr DBExt,CharPtr IndexExt,CharPtr OffExt,CharPtr CodeExt)375 ISAMErrorCode ISAMUpdateDatabase(CharPtr InFile,
376                                  CharPtr NewDBDir,
377                                  Int4 MaxOffset,
378                                  CharPtr BaseName,
379                                  CharPtr DBExt,
380                                  CharPtr IndexExt,
381                                  CharPtr OffExt,
382                                  CharPtr CodeExt)
383 {
384 
385     return ISAMNotImplemented;
386 }
387 
ISAMTmpCANew(void)388 static ISAMTmpCAPtr ISAMTmpCANew(void)
389 {
390     ISAMTmpCAPtr cap;
391 
392     cap = MemNew(sizeof(ISAMTmpCA));
393 
394     cap->allocated = CA_TMP_CHUNK;
395     cap->buffer = (Uint1Ptr) MemNew(cap->allocated);
396 
397     return cap;
398 }
399 
ISAMTmpCAFree(ISAMTmpCAPtr cap)400 static void ISAMTmpCAFree(ISAMTmpCAPtr cap)
401 {
402     if(cap == NULL)
403         return;
404 
405     MemFree(cap->buffer);
406     MemFree(cap);
407 }
408 
409 /* ------------------------------------------------------------------
410                 This is handler for HeapSort function
411    ------------------------------------------------------------------*/
ISAMUidCompare(VoidPtr i,VoidPtr j)412 static int LIBCALLBACK ISAMUidCompare(VoidPtr i, VoidPtr j)
413 {
414   if (*(Int4Ptr)i > *(Int4Ptr)j)
415     return (1);
416   if (*(Int4Ptr)i < *(Int4Ptr)j)
417     return (-1);
418   return (0);
419 }
420 
ISAMWriteBitNumber(ISAMTmpCAPtr cap,Int4 number)421 static Boolean ISAMWriteBitNumber(ISAMTmpCAPtr cap, Int4 number)
422 {
423     Int4 template;
424 
425     if(cap->num_bits == 0)
426         return TRUE;
427 
428     template = PowersOfTwo[cap->num_bits - 1];
429 
430     for(; template; template >>= 1) {
431 
432         if(number & template)
433             cap->buffer[cap->byte_num] |= OneBit[cap->bit_num] ;
434 
435         if(++cap->bit_num > 7) {
436             cap->bit_num = 0;
437             if((++cap->byte_num >= cap->allocated)) {
438                 cap->allocated += CA_TMP_CHUNK;
439                 cap->buffer = Realloc(cap->buffer, cap->allocated);
440             }
441         }
442     }
443     return TRUE;
444 }
445 
ISAMWriteNBits10(ISAMTmpCAPtr cap,Int4 number)446 static Boolean ISAMWriteNBits10(ISAMTmpCAPtr cap, Int4 number)
447 {
448     register Int4 i;
449 
450     for(i = 0; i < number; i++) {
451         cap->buffer[cap->byte_num] |= OneBit[cap->bit_num];
452         if(++cap->bit_num > 7) {
453             cap->bit_num = 0;
454             if((++cap->byte_num >= cap->allocated)) {
455                 cap->allocated += CA_TMP_CHUNK;
456                 cap->buffer = Realloc(cap->buffer, cap->allocated);
457             }
458         }
459     }
460 
461     /* Now wriiting 0 bit in the end of set of 1's */
462 
463     if(++cap->bit_num > 7) {
464         cap->bit_num = 0;
465         if((++cap->byte_num >= cap->allocated)) {
466             cap->allocated += CA_TMP_CHUNK;
467             cap->buffer = Realloc(cap->buffer, cap->allocated);
468         }
469     }
470     return TRUE;
471 }
472 
ISAMCreateCA(ISAMTmpCAPtr cap,ISAMUidFieldPtr data,Int4 num_uids)473 static Boolean ISAMCreateCA(ISAMTmpCAPtr cap,
474                             ISAMUidFieldPtr data,
475                             Int4 num_uids)
476 {
477     Nlm_FloatHi AverageDiff;
478     Int4 base, number, prev_number;
479     Int4 i, diff, dividend;
480 
481     if(cap == NULL || data == NULL)
482         return FALSE;
483 
484     cap->byte_num = 0;
485     cap->bit_num  = 0;
486     cap->length   = 0;
487     cap->num_uids = num_uids;
488     MemSet(cap->buffer, 0, cap->allocated);
489 
490 
491     if((AverageDiff = (Nlm_FloatHi)(data[num_uids-1].uid - num_uids + 1) /
492         (Nlm_FloatHi)num_uids ) < 1) {
493 	AverageDiff = 1;
494     }
495 
496     cap->num_bits = Log2(AverageDiff);
497     base = PowersOfTwo[cap->num_bits];
498 
499     prev_number = -1;
500 
501     for(i = 0; i < num_uids; i++) {
502         number = data[i].uid;
503 
504         if (number <= prev_number) {
505             ErrLogPrintf("%s\n%s%ld%s%ld\n",
506                          "Bad record number in writing to coded array!",
507                          "Number: ", number,
508                          "  Previous Number: ", prev_number);
509             ISAMTmpCAFree(cap);
510             return FALSE;
511         }
512 
513         diff = number - prev_number - 1;
514 
515         dividend = diff/base;
516 
517         ISAMWriteNBits10(cap, dividend);
518         ISAMWriteBitNumber(cap, diff);
519         prev_number = number;
520     }
521 
522     cap->length = cap->byte_num + (cap->bit_num != 0 ? 1 : 0);
523 
524     return TRUE;
525 }
526 
ISAMCreateFA(ISAMTmpCAPtr cap,ISAMUidFieldPtr uidf,Int4 num_uids)527 static Boolean ISAMCreateFA(ISAMTmpCAPtr cap,
528                              ISAMUidFieldPtr uidf, Int4 num_uids)
529 {
530     Int4 i, j;
531     Int4 byte_start;
532 
533     if(cap == NULL || uidf == NULL)
534         return FALSE;
535 
536     cap->byte_num = 0;
537     cap->length   = 0;
538     cap->num_uids = num_uids;
539     MemSet(cap->buffer, 0, cap->allocated);
540 
541     for(i = 0; i < num_uids; i++) {
542 
543         byte_start = cap->byte_num;
544 
545         for(j = 0, cap->bit_num = 0; j < 32; j++) {
546             if(uidf[i].field & PowersOfTwo[j]) {
547                 cap->buffer[cap->byte_num] |= j;
548 
549                 if((++cap->byte_num >= cap->allocated)) {
550                     cap->allocated += CA_TMP_CHUNK;
551                     cap->buffer = Realloc(cap->buffer, cap->allocated);
552                 }
553 
554                 cap->bit_num++;
555             }
556         }
557 
558         for(j = 0; j < cap->bit_num -1; j++) {
559             cap->buffer[byte_start+j] |=PowersOfTwo[7];
560         }
561     }
562 
563     cap->length = cap->byte_num;
564     return TRUE;
565 }
566 
ISAMDumpTermEntry(ISAMTmpCAPtr cap,FILE * off_fd,FILE * db_fd,ISAMUidFieldPtr uidf,Int4 count,Int4Ptr offset)567 static ISAMErrorCode ISAMDumpTermEntry(ISAMTmpCAPtr cap, FILE *off_fd,
568                                        FILE *db_fd,
569                                        ISAMUidFieldPtr uidf,
570                                        Int4 count, Int4Ptr offset)
571 {
572     Int4 i, j, offset_out, ca_offset, num_bits;
573     Uint4 numbers[32];
574     Uint4 bit_flag = 0;
575     Int4 length;
576 
577     offset_out = ftell(off_fd);
578     ca_offset  = ftell(db_fd);
579     MemSet(numbers, 0, sizeof(numbers));
580 
581     HeapSort(uidf, count, sizeof(Uint4)*2, ISAMUidCompare);
582 
583     for(i = 0; i < count; i++) {
584         for(j = 0; j < 32; j++) {
585             if(uidf[i].field & PowersOfTwo[j]) {
586                 numbers[j]++;
587                 bit_flag |= PowersOfTwo[j];
588             }
589         }
590     }
591 
592     /* Calculating and writting code and field arrays */
593 
594     if(!ISAMCreateCA(cap, uidf, count)) {
595         ErrLogPrintf("Cannot create coded array. Formating failed.\n");
596         return ISAMInvalidFormat;
597     }
598     num_bits = cap->num_bits;
599 
600     FileWrite(cap->buffer, 1, cap->length, db_fd);
601     length = cap->length;
602 
603     if(!ISAMCreateFA(cap, uidf, count)) {
604         ErrLogPrintf("Cannot create field array. Formating failed.");
605         return ISAMInvalidFormat;
606     }
607 
608     FileWrite(cap->buffer, 1, cap->length, db_fd);
609 
610     /* ------- Now writting header ---------- */
611 
612     FileWrite(&count, 1, sizeof(Uint4), off_fd);
613     FileWrite(&ca_offset, 1, sizeof(Uint4), off_fd);
614     FileWrite(&length, 1, sizeof(Uint4), off_fd);
615     FileWrite(&num_bits, 1, sizeof(Uint4), off_fd);
616     FileWrite(&bit_flag, 1, sizeof(Uint4), off_fd);
617 
618     for(j = 0; j < 32; j++) {
619         if(numbers[j] > 0)
620             FileWrite(&numbers[j], 1, sizeof(Uint4), off_fd);
621     }
622 
623     *offset = offset_out;
624     return ISAMNoError;
625 }
626 
627 #define IS_END_BUF(ch) (ch == EOF || ch == '\0')
628 #define IS_NEWLINE(ch) (IS_END_BUF(ch) || (ch == '\n' || ch == '\r'))
629 
630 /** Reads one line from a buffer. If end of buffer is reached before the next
631  * newline character, line is not returned, and the start of line is saved
632  * in a "remainder" string.
633  * @param buffer Start of buffer to read from; pointer to the start of next line
634  *               on exit. If line is unfinished, output pointer is the same as
635  *               input pointer. [in|out]
636  * @param buffer_length Length of input and output buffer. [in|out]
637  * @param line_length Length of the current line, even if unfinished. [out]
638  * @return TRUE if full line has been read.
639  */
640 static Boolean
s_ISAMBufferReadLine(char ** buffer,Int4 * buffer_length,Int4 * line_length)641 s_ISAMBufferReadLine(char* *buffer, Int4* buffer_length, Int4* line_length)
642 {
643     char* ptr;
644     Int4 length;
645     Boolean success = TRUE;
646     Boolean end_of_file = FALSE;
647 
648     if (!buffer)
649         return 0;
650 
651     for (ptr = *buffer, length = 0;
652          (length < *buffer_length) && !IS_NEWLINE(*ptr); ++ptr, ++length);
653 
654     /* Check if end of buffer (file) has been reached. */
655     /* If buffer_length has been reached, consider this line as unfinished,
656        even if a full line has actually been found, because we were unable
657        to reach the start of the next line. */
658     if (length == *buffer_length) {
659         success = FALSE;
660     } else if (IS_END_BUF(*ptr)) {
661         end_of_file = TRUE;
662     } else if (IS_NEWLINE(*ptr)) {
663         /* If new line has been reached, and this is not the end of buffer,
664            skip the white space before the start of the next line. */
665         while ((length < *buffer_length) && IS_WHITESP(*ptr)) {
666             ++length;
667             ++ptr;
668         }
669     }
670 
671     *line_length = (ptr - *buffer);
672 
673     if (success) {
674         *buffer = ptr;
675         if (end_of_file)
676             *buffer_length = 0;
677         else
678             *buffer_length -= *line_length;
679     }
680 
681     return success;
682 }
683 
684 /* returns NULL terminated string \n\r are removed */
685 
ISAMReadLine(ISAMDataPtr data)686 static Int4 ISAMReadLine(ISAMDataPtr data)
687 {
688     Int4 i = 0;
689     Int4 ch;
690     Int4 MaxChars;
691     FILE *fd = data->db_fd;
692 
693     MaxChars = data->max_line_size-1;
694 
695     for(i = 0; (( ch = getc(fd)) != EOF) ; i++)  {
696         if((ch == '\n') || (ch == '\r'))
697             break;
698         data->line[i] = (Char) ch;
699 
700         if(i == MaxChars) { /* Reallocating line buffer */
701             data->max_line_size += LINE_SIZE_CHUNK;
702             data->line = Realloc(data->line,  data->max_line_size);
703             MaxChars = data->max_line_size-1;
704         }
705     }
706     data->line[i] = NULLB;
707 
708     /* Finding first character on new line */
709 
710     while((ch = getc(fd)) != EOF) {
711         if(IS_WHITESP(ch)) {
712             continue;
713         } else {
714             ungetc(ch, fd);
715             break;
716         }
717     }
718 
719     return i;
720 }
721 
ISAMCheckIfSorted(ISAMDataPtr data)722 static Boolean ISAMCheckIfSorted(ISAMDataPtr data)
723 {
724     CharPtr prevline = NULL;
725     Int4 length;
726     CharPtr chptr;
727 
728     if(data == NULL || data->db_fd == NULL || data->max_line_size == 0)
729         return FALSE;
730 
731     rewind(data->db_fd);
732 
733     if (data->sorting_done)
734         return TRUE;
735 
736     data->NumTerms = 0;
737     prevline = MemNew(data->max_line_size);
738 
739 
740     if(data->type == ISAMString || data->type == ISAMStringDatabase) {
741         while(ISAMReadLine(data) > 0) {
742             data->NumTerms++;
743 
744             /* If not testing data - lines eventually should be counted */
745             if(data->test_non_unique) {
746                 if((chptr = StringChr(data->line, ISAM_DATA_CHAR)) != NULL)
747                     *chptr = NULLB;
748 
749                 if (StringCmp(data->line, prevline) <= 0) {
750                     ErrPostEx(SEV_WARNING, 0, 0, "Non-unique or not-sorted string IDs found %d line: '%s' %d line: '%s'", data->NumTerms, data->line, data->NumTerms-1, prevline);
751                 }
752                 length = StringLen(data->line)+1;
753                 StringNCpy_0(prevline, data->line,
754                              length > LINE_SIZE_CHUNK ? LINE_SIZE_CHUNK : length);
755             }
756         }
757     } else {
758         return FALSE;
759     }
760 
761     rewind(data->db_fd);
762     MemFree(prevline);
763     return(TRUE);
764 }
765 
766 /* ---------------------- ISAMMakeStringIndex ---------------------
767    Purpose:     To create String ISAM Intex file for Database file
768 
769    Parameters:  ISAM Data
770    Returns:     ISAM itemized error code
771    NOTE:        Special default rules for UNIX platform
772    ------------------------------------------------------------------*/
ISAMMakeStringIndex(ISAMDataPtr data,Int4 page_size,Int4 idx_option)773 static ISAMErrorCode ISAMMakeStringIndex(
774                                  ISAMDataPtr data,
775                                  Int4 page_size,   /* ISAM page size */
776                                  Int4 idx_option   /* Option for upper layer */
777                                  )
778 {
779     Int4 TermCount, Pos, count, SampleCount;
780     Int4Ptr MasterPos, SamplePos;
781     Int4 OffsetPos;
782     FILE *tf_fd;
783     Int4 Version = ISAM_VERSION;
784     Uint4 value;
785 
786     if(page_size != 0)
787         data->PageSize = page_size;
788     else
789         data->PageSize = DEFAULT_SISAM_SIZE;
790 
791     if((data->db_fd = FileOpen(data->DBFileName, "r")) == NULL)
792         return ISAMBadFileName;
793 
794 
795     /* Temporary space for line initialy set to MAX_LINE_SIZE
796        byt will be realocated if some line exceed this limit */
797 
798     if(data->max_line_size == 0) {
799         data->max_line_size = LINE_SIZE_CHUNK;
800         data->line = MemNew(LINE_SIZE_CHUNK);
801     }
802 
803     /* This function will also split data if strings are
804        identical and finaly count lines*/
805 
806     if(!ISAMCheckIfSorted(data))
807         return ISAMNoOrder;
808 
809     /* Obtain the term offsets; select the sample terms. */
810 
811     MasterPos = (Int4 *)Nlm_Malloc(sizeof(Int4) * (((data->NumTerms+1)/(data->PageSize))+2));
812 
813     Pos = TermCount = SampleCount = 0;
814 
815 #define FILEREAD_BUFFER_SIZE 0x00010000
816     {
817         char buffer[FILEREAD_BUFFER_SIZE];
818         Int4 buffer_length = FILEREAD_BUFFER_SIZE;
819         char *buffer_ptr = buffer;
820         Int4 bytes_read;
821 
822         Pos = ftell(data->db_fd);
823 
824         while ((bytes_read =
825                FileRead(buffer_ptr, 1, buffer_length, data->db_fd)) > 0) {
826             Int4 line_length;
827             /* Lines are always read beginning at the start of the original
828                buffer. */
829             buffer_length = bytes_read + (buffer_ptr - buffer);
830             buffer_ptr = buffer;
831             while (buffer_length > 0 &&
832                    s_ISAMBufferReadLine(&buffer_ptr, &buffer_length,
833                                         &line_length)) {
834                 if (TermCount++ % data->PageSize == 0)
835                     MasterPos[SampleCount++] = SwapUint4(Pos);
836                 Pos += line_length;
837             }
838             /* If an unfinished line is left, copy it to the start of the
839                buffer, and set buffer pointer so that next file chunk is read
840                into location immediately following the unfinished line. */
841             if (buffer_length > 0 && line_length > 0) {
842                 Int4 file_pos = ftell(data->db_fd);
843                 memmove(buffer, buffer_ptr, line_length);
844                 buffer_ptr = buffer + line_length;
845                 ASSERT(Pos ==  file_pos - line_length);
846                 buffer_length = FILEREAD_BUFFER_SIZE - line_length;
847             } else {
848                 buffer_ptr = buffer;
849                 buffer_length = FILEREAD_BUFFER_SIZE;
850             }
851         }
852     }
853 
854     MasterPos[SampleCount] = SwapUint4(Pos);
855 
856     /* Create the sample file. */
857 
858     if (!(tf_fd = FileOpen(data->IndexFileName, "wb")))
859     {
860 	MemFree(MasterPos);
861         return ISAMBadFileName;
862     }
863 
864     /* Write the term counts and offsets to the sample file. */
865     value = SwapUint4(Version);
866     FileWrite((CharPtr)&value, sizeof(Int4), 1, tf_fd);
867     value = SwapUint4(data->type);
868     FileWrite((CharPtr)&value, sizeof(Int4), 1, tf_fd);
869     value = SwapUint4(FileLength(data->DBFileName)); /* Length of DB file */
870     FileWrite((CharPtr)&value, sizeof(Int4), 1, tf_fd);
871     value = SwapUint4(TermCount);
872     FileWrite((CharPtr)&value,    sizeof(Int4), 1, tf_fd);
873     value = SwapUint4(SampleCount);
874     FileWrite((CharPtr)&value,  sizeof(Int4), 1, tf_fd);
875     value = SwapUint4(data->PageSize);
876     FileWrite((CharPtr)&value,      sizeof(Int4), 1, tf_fd);
877     value = SwapUint4(data->max_line_size);
878     FileWrite((CharPtr)&value, sizeof(Int4), 1, tf_fd);
879     value = SwapUint4(idx_option);
880     FileWrite(&value, sizeof(Int4), 1, tf_fd);
881     value = SwapUint4(0);      /* This space reserved for future use */
882     FileWrite(&value, sizeof(Int4), 1, tf_fd);
883 
884     if(data->PageSize != MEMORY_ONLY_PAGE_SIZE)
885         FileWrite((CharPtr)MasterPos, sizeof(Int4), SampleCount+1, tf_fd);
886 
887     /* Leave space for the offsets of the selected terms. */
888 
889     OffsetPos = ftell(tf_fd);
890     SamplePos = (Int4 *)MemNew((SampleCount + 1) * sizeof(Int4));
891     FileWrite((CharPtr)SamplePos, sizeof(Int4), SampleCount+1, tf_fd);
892 
893     /* Copy the selected terms to the sample file. */
894 
895     for (count = 0; count < SampleCount; count++) {
896         SamplePos[count] = SwapUint4(ftell(tf_fd));
897         fseek(data->db_fd, SwapUint4(MasterPos[count]), SEEK_SET);
898         ISAMReadLine(data);
899         fprintf(tf_fd,"%s%c",data->line, NULLB);
900     }
901 
902     SamplePos[SampleCount] = ftell(tf_fd);
903 
904     /* Replace the space-holding zeroes with the offsets of the selected
905        terms.*/
906 
907     fseek(tf_fd, OffsetPos, SEEK_SET);
908     FileWrite((CharPtr)SamplePos, sizeof(Int4), SampleCount+1, tf_fd);
909 
910     FileClose(tf_fd);
911     FileClose(data->db_fd);
912     data->db_fd = NULL;
913 
914     MemFree(SamplePos);
915     MemFree(MasterPos);
916 
917     MemFree(data->line);
918     data->max_line_size = 0;
919 
920     return ISAMNoError;
921 }
922 
923 /* ------------------- ISAMReadFileInMemory  -----------------------
924    Purpose:     Function reads data from file into a buffer
925 
926    Parameters:  filename -  Name of file to read file
927 
928    Returns:     Pointer to allocated buffer.
929   ------------------------------------------------------------------*/
930 
ISAMReadFileInMemory(CharPtr filename)931 static CharPtr ISAMReadFileInMemory(CharPtr filename)
932 {
933     CharPtr  in_buff;
934     Int4     new_size = BUFF_SIZE_CHUNK;
935     Int4     bytes = 0, buff_len = 0;
936     FILE *fd;
937 
938     if(filename == NULL)
939         return NULL;
940 
941     if((fd = FileOpen(filename, "rb")) == NULL)
942         return NULL;
943 
944     /* initial allocation of memory */
945 
946     if((in_buff = MemNew(BUFF_SIZE_CHUNK)) == NULL) {
947         ErrLogPrintf("Error in allocating memory\n");
948         FileClose(fd);
949         return NULL;
950     }
951 
952     while ((bytes = FileRead(in_buff + buff_len, 1,
953                              BUFF_SIZE_CHUNK, fd)) > 0) {
954         new_size += bytes;
955         buff_len += bytes;
956 
957         if ((in_buff = Realloc(in_buff, new_size)) == NULL) {
958             ErrLogPrintf("Error in reallocating memory\n");
959             FileClose(fd);
960             return NULL;
961         }
962     }
963 
964     FileClose(fd);
965     return(in_buff);
966 }
967 
968 /* ---------------------- ISAMMakeNumericIndex ---------------------
969    Purpose:     To create Numeric ISAM Intex file for Database file
970 
971    Parameters:  ISAM Data
972    Returns:     ISAM itemized error code
973    NOTE:        Special default rules for UNIX platform
974    ------------------------------------------------------------------*/
ISAMMakeNumericIndex(ISAMDataPtr data,Int4 page_size,Int4 idx_option)975 static ISAMErrorCode ISAMMakeNumericIndex(
976                                   ISAMDataPtr data,
977                                   Int4 page_size,  /* ISAM page size */
978                                   Int4 idx_option  /* Option for upper layer */
979                                   )
980 {
981     Int4 i, NumTerms, value;
982     Int4 MaxSamples, SampleCount;
983     Uint4Ptr KeyInfo, KeySamples;
984     NISAMKeyDataPtr KeyDataInfo, KeyDataSamples;
985     Boolean NoData;
986     FILE *fd;
987     Int4 Version = ISAM_VERSION;
988 
989     NoData = (data->type == ISAMNumericNoData);
990 
991     NumTerms = FileLength(data->DBFileName) /
992         (NoData ? sizeof(Uint4) : sizeof(NISAMKeyData));
993 
994     if(!Nlm_MemMapAvailable()) {
995         if((data->FileStart =
996             ISAMReadFileInMemory(data->DBFileName)) == NULL)
997             return ISAMBadFileName;
998         if (NoData)
999             KeyInfo = (Uint4Ptr) data->FileStart;
1000         else
1001             KeyDataInfo = (NISAMKeyDataPtr)data->FileStart;
1002     } else {
1003         if((data->mmp = Nlm_MemMapInit(data->DBFileName)) == NULL)
1004             return ISAMMemMap;
1005         if (NoData)
1006             KeyInfo = (Uint4Ptr) data->mmp->mmp_begin;
1007         else
1008             KeyDataInfo = (NISAMKeyDataPtr)data->mmp->mmp_begin;
1009     }
1010 
1011     if(page_size != 0)
1012         data->PageSize = page_size;
1013     else
1014         data->PageSize = DEFAULT_NISAM_SIZE;
1015 
1016 #ifndef CHECK_ORDER
1017     for (i = 1; i < NumTerms; i++) {
1018         if (NoData) {
1019             if (SwapUint4(KeyInfo[i]) <= SwapUint4(KeyInfo[i-1]))
1020                 break;
1021         } else {
1022             if (SwapUint4(KeyDataInfo[i].key) <= SwapUint4(KeyDataInfo[i-1].key))
1023                 break;
1024         }
1025     }
1026 
1027     if (i < NumTerms) {
1028         ErrLogPrintf("NIsam key file %s not in sorted order!\n",
1029                      data->DBFileName);
1030 
1031         if (NoData) {
1032             ErrLogPrintf("unsorted or non-unique elements:"
1033                          "#%ld, #%ld :  %ld, %ld\n",
1034                          i-1, i, SwapUint4(KeyInfo[i-1]),
1035                          SwapUint4(KeyInfo[i]));
1036         } else {
1037             ErrLogPrintf("unsorted or non-unique elements:"
1038                          "#%ld, #%ld : %ld, %ld\n",
1039                          i-1, i, SwapUint4(KeyDataInfo[i-1].key),
1040                          SwapUint4(KeyDataInfo[i].key));
1041         }
1042         return ISAMNoOrder;
1043     }
1044 #endif
1045 
1046     /* Obtain the term offsets; select the sample terms. */
1047 
1048     MaxSamples = NumTerms/data->PageSize + 4;
1049 
1050     if (NoData)
1051         KeySamples = (Uint4Ptr)MemNew(sizeof(Uint4)*(MaxSamples+1));
1052     else
1053         KeyDataSamples = (NISAMKeyDataPtr) MemNew(sizeof(NISAMKeyData)*
1054                                                   (MaxSamples+1));
1055     SampleCount = 0;
1056 
1057     for (i = 0; i < NumTerms; i++) {
1058         if (i % data->PageSize == 0) {
1059             if (NoData)
1060                 KeySamples[SampleCount] = KeyInfo[i];
1061             else
1062                 KeyDataSamples[SampleCount] = KeyDataInfo[i];
1063             SampleCount++;
1064         }
1065     }
1066 
1067     if (NoData) {
1068         KeySamples[SampleCount] = SwapUint4(UINT4_MAX);
1069     } else {
1070         KeyDataSamples[SampleCount].key = SwapUint4(UINT4_MAX);
1071         KeyDataSamples[SampleCount].data = SwapUint4(0);
1072     }
1073 
1074     /* Create the sample file. */
1075 
1076     if((fd = FileOpen(data->IndexFileName, "wb")) == NULL)
1077       return ISAMBadFileName;
1078 
1079     /* Write the term counts and offsets to the sample file. */
1080 
1081     value = SwapUint4(Version);         /* Index version */
1082     FileWrite((CharPtr)&value, sizeof(Int4), 1, fd);
1083     value = SwapUint4(data->type);      /* Index type */
1084     FileWrite(&value, sizeof(Int4), 1, fd);
1085     value = SwapUint4(FileLength(data->DBFileName)); /* Length of DB file */
1086     FileWrite(&value, sizeof(Int4), 1, fd);
1087     value = SwapUint4(NumTerms);        /* Number of terms in DB file */
1088     FileWrite(&value, sizeof(Int4), 1, fd);
1089     value = SwapUint4(SampleCount);     /* Number of elements in index file */
1090     FileWrite(&value, sizeof(Int4), 1, fd);
1091     value = SwapUint4(data->PageSize);  /* Page size of ISAM */
1092     FileWrite(&value, sizeof(Int4), 1, fd);
1093     value = SwapUint4(0);      /* 0  max_line-size for strings here */
1094     FileWrite(&value, sizeof(Int4), 1, fd);
1095     value = SwapUint4(idx_option);  /* Option for the upper layer */
1096     FileWrite(&value, sizeof(Int4), 1, fd);
1097     value = SwapUint4(0);      /* This space reserved for future use */
1098     FileWrite(&value, sizeof(Int4), 1, fd);
1099 
1100     if (NoData) /* No swaping neeeded here */
1101         FileWrite((VoidPtr)KeySamples, sizeof(Uint4), SampleCount+1, fd);
1102     else
1103         FileWrite((VoidPtr)KeyDataSamples, sizeof(NISAMKeyData),
1104                   SampleCount+1, fd);
1105 
1106     FileClose(fd);
1107 
1108     if(data->mmp != NULL) {
1109         Nlm_MemMapFini(data->mmp);
1110         data->mmp = NULL;
1111     } else {
1112         MemFree(data->FileStart);
1113         data->FileStart = NULL;
1114     }
1115 
1116     if (NoData)
1117         MemFree(KeySamples);
1118     else
1119         MemFree(KeyDataSamples);
1120 
1121     return ISAMNoError;
1122 }
1123 
1124 /* ---------------------- ISAMMakeIndex --------------------------
1125    Purpose:     To create ISAM Intex file for Database file
1126 
1127    Parameters:  ISAM Object
1128    Returns:     ISAM itemized error code
1129    NOTE:        Special default rules for UNIX platform
1130   ------------------------------------------------------------------*/
ISAMMakeIndex(ISAMObjectPtr object,Int4 page_size,Int4 idx_option)1131 ISAMErrorCode ISAMMakeIndex(ISAMObjectPtr object,
1132                             Int4 page_size,       /* ISAM page size */
1133                             Int4 idx_option       /* Option for upper layer */
1134                             )
1135 {
1136     ISAMDataPtr data;
1137 
1138     if(object == NULL)
1139         return ISAMBadParameter;
1140 
1141     data = (ISAMDataPtr) object;
1142 
1143     if(data->type == ISAMString || data->type == ISAMStringDatabase)
1144         return ISAMMakeStringIndex(data, page_size, idx_option);
1145     else if (data->type == ISAMNumeric || data->type == ISAMNumericNoData)
1146         return ISAMMakeNumericIndex(data, page_size, idx_option);
1147     else
1148         return ISAMNotImplemented;
1149 }
1150 
1151 /* ---------------------- ISAMCreateDatabase ------------------------
1152    Purpose:     To create coded array/offsets and ISAM database files
1153                 from input files in special form:
1154                 All files are in sorted order and sorted through
1155                 format: <term><\2><uid><field-bit mask><CR>
1156 
1157    Parameters:  ISAM Object
1158                 files - list of sorted files to process
1159    Returns:     ISAM itemized error code
1160   ------------------------------------------------------------------*/
ISAMCreateDatabase(CharPtr PNTR files,Int4 num_files,Int4 MaxOffset,CharPtr BaseName,CharPtr DBExt,CharPtr IndexExt,CharPtr OffExt,CharPtr CodeExt)1161 ISAMErrorCode ISAMCreateDatabase(CharPtr PNTR files,
1162                                  Int4 num_files,
1163                                  Int4 MaxOffset,
1164                                  CharPtr BaseName,
1165                                  CharPtr DBExt,
1166                                  CharPtr IndexExt,
1167                                  CharPtr OffExt,
1168                                  CharPtr CodeExt)
1169 
1170 {
1171     ISAMDataPtr data;
1172     ISAMErrorCode error;
1173     Char DBName[MAX_FILENAME_LEN], filename[MAX_FILENAME_LEN];
1174     FILE *ca_fd, *off_fd, *out_fd;
1175     long count = 0, ca_count = 0, files_count;
1176     Int4 offset;
1177     unsigned long uid_in, uid_last, field_in;
1178     CharPtr chptr;
1179     CharPtr term, value;
1180     Int4 uidf_allocated;
1181     CharPtr prevterm;
1182     ISAMTmpCAPtr cap;
1183     ISAMUidFieldPtr uidf;
1184     long int lvalue1, lvalue2;
1185 
1186     if(BaseName == NULL || files == NULL)
1187       return ISAMBadParameter;
1188 
1189     if(DBExt != NULL)
1190       sprintf(DBName, "%s.%s", BaseName, DBExt);
1191     else
1192       sprintf(DBName, "%s", BaseName);
1193 
1194     if(IndexExt != NULL)
1195       sprintf(filename, "%s.%s", BaseName, IndexExt);
1196 
1197     if((data = (ISAMDataPtr) ISAMObjectNew(ISAMStringDatabase, DBName,
1198                                            IndexExt == NULL ?
1199                                            NULL : filename)) == NULL) {
1200         ErrLogPrintf("Creating of ISAM object failed\n");
1201         return ISAMMiscError;
1202     }
1203 
1204     data->CAName = StringSave(BaseName);
1205     data->CADBExt = StringSave(CodeExt);
1206     data->CAOffExt = StringSave(OffExt);
1207 
1208     if(MaxOffset != 0)
1209         data->CAMaxOffset = MaxOffset;
1210     else
1211         data->CAMaxOffset = DEFAULT_CA_MAX_OFFSET;
1212 
1213     sprintf(filename, "%s%ld.%s",
1214             data->CAName, (long) ca_count, data->CADBExt);
1215 
1216     if((out_fd = FileOpen(data->DBFileName, "bw")) == NULL)
1217         return ISAMBadFileName;
1218 
1219     sprintf(filename, "%s%ld.%s",
1220             data->CAName, (long) ca_count, data->CADBExt);
1221 
1222     if((ca_fd  = FileOpen(filename, "bw")) == NULL)
1223         return ISAMBadFileName;
1224 
1225     sprintf(filename, "%s%ld.%s",
1226             data->CAName, (long) ca_count, data->CAOffExt);
1227     if((off_fd = FileOpen(filename, "bw")) == NULL)
1228         return ISAMBadFileName;
1229 
1230     if(data->max_line_size == 0) {
1231         data->max_line_size = LINE_SIZE_CHUNK;
1232         data->line = MemNew(LINE_SIZE_CHUNK);
1233     }
1234 
1235     uidf = MemNew(sizeof(ISAMUidField)*UID_NUM_CHUNK);
1236     uidf_allocated = UID_NUM_CHUNK;
1237 
1238     cap = ISAMTmpCANew();
1239 
1240     for(files_count = 0; files_count < num_files; files_count++) {
1241 
1242         if((data->db_fd = FileOpen(files[ca_count], "r")) == NULL)
1243             return ISAMBadFileName;
1244 
1245         /* Reading first entry */
1246 
1247         ISAMReadLine(data);
1248         if((chptr = StringChr(data->line, ISAM_DATA_CHAR)) == NULL) {
1249           ErrLogPrintf("No ISAM delimiter char present in input. \n"
1250                        "Line: \"%s\" \n",
1251                        data->line);
1252             return ISAMMiscError;
1253         }
1254 
1255         *chptr = NULLB;
1256         term = data->line;
1257         value = chptr + 1;
1258 
1259         if((sscanf(value, "%ld %ld",
1260                    &lvalue1, &lvalue2)) != 2 || lvalue2 == 0){
1261             ErrLogPrintf("Invalidly formatted input file\n");
1262             return ISAMMiscError;
1263         }
1264 
1265         uidf[count].uid = lvalue1;
1266         uidf[count].field = lvalue2;
1267 
1268         uid_last = uidf[count].uid;
1269 
1270         if(++count > UID_NUM_CHUNK) {
1271           uidf_allocated += UID_NUM_CHUNK;
1272           uidf = Realloc(uidf, sizeof(ISAMUidField)*uidf_allocated);
1273         }
1274 
1275         prevterm = MemNew(LINE_SIZE_CHUNK);
1276         StringNCpy(prevterm, term, LINE_SIZE_CHUNK);
1277 
1278         /* Reading to the end of file */
1279 
1280         while(ISAMReadLine(data) > 0) {
1281           if(data->line[0] == NULLB)
1282             continue;
1283           if((chptr = StringChr(data->line, ISAM_DATA_CHAR)) == NULL) {
1284             ErrLogPrintf("No ISAM delimiter precent in the input\n"
1285                          "Line: \"%s\" \nPrevterm: \"%s\"\n",
1286                          data->line, prevterm);
1287             return ISAMMiscError;
1288           }
1289 
1290             *chptr = NULLB;
1291             term = data->line;
1292             value = ++chptr;
1293 
1294             /* Yes, we got new term, so closing information about
1295                previous */
1296 
1297             if(StringCmp(term, prevterm)) {
1298               if((error = ISAMDumpTermEntry(cap, off_fd, ca_fd,
1299                                             uidf, count, &offset)) !=
1300                    ISAMNoError) {
1301                   ErrLogPrintf("Failed to dump entry. All failed!\n"
1302                                "Term: \"%s\"\n", prevterm);
1303                     return error;
1304                 }
1305 
1306                 if(offset > data->CAMaxOffset) {
1307                     FileClose(ca_fd);
1308                     FileClose(off_fd);
1309                     ca_count++;
1310 
1311                     sprintf(filename, "%s%ld.%s",
1312                             data->CAName, (long) ca_count, data->CADBExt);
1313                     if((ca_fd  = FileOpen(filename, "bw")) == NULL)
1314                         return ISAMBadFileName;
1315 
1316                     sprintf(filename, "%s%ld.%s",
1317                             data->CAName, (long) ca_count, data->CAOffExt);
1318                     if((off_fd = FileOpen(filename, "bw")) == NULL)
1319                         return ISAMBadFileName;
1320                 }
1321 
1322                 fprintf(out_fd, "%s%c%ld %ld\n", prevterm,
1323                         ISAM_DATA_CHAR, (long) offset, (long) ca_count);
1324                 count = 0;
1325                 StringNCpy(prevterm, term, LINE_SIZE_CHUNK);
1326                 uid_last = -1;
1327             }
1328 
1329             if((sscanf(value,
1330                        "%ld %ld", &uid_in, &lvalue2)) != 2 || lvalue2 == 0) {
1331                 ErrLogPrintf("Invalidly formatted database. Database creation "
1332                              "failed.\nValue = \"%s\", Field = %ld\n",
1333                              value, (long) field_in);
1334                 return ISAMInvalidFormat;
1335             }
1336 
1337             uid_in = lvalue1;
1338             field_in = lvalue2;
1339 
1340             if(uid_last == uid_in) {
1341                 uidf[count].field |= field_in;
1342             } else {
1343                 uidf[count].uid = uid_in;
1344                 uidf[count].field = field_in;
1345                 uid_last = uid_in;
1346                 count++;
1347             }
1348 
1349             if(count >= uidf_allocated) {
1350                 uidf_allocated += UID_NUM_CHUNK;
1351                 uidf = Realloc(uidf, sizeof(ISAMUidField)*uidf_allocated);
1352             }
1353         }
1354         /* Writting last entry */
1355 
1356         if((error =
1357             ISAMDumpTermEntry(cap, off_fd, ca_fd,
1358                               uidf, count, &offset)) != ISAMNoError) {
1359             ErrLogPrintf("Failed to dump entry. All failed!\n");
1360             return error;
1361         }
1362 
1363         fprintf(out_fd, "%s%c%ld %ld\n", prevterm,
1364                 ISAM_DATA_CHAR, (long) offset, (long) ca_count);
1365 
1366         FileClose(data->db_fd);
1367         data->db_fd = NULL;
1368     }
1369 
1370     ISAMTmpCAFree(cap);
1371     MemFree(uidf);
1372     MemFree(prevterm);
1373 
1374     FileClose(ca_fd);
1375     FileClose(out_fd);
1376     FileClose(off_fd);
1377 
1378     if((error = ISAMMakeIndex((VoidPtr)data, 0, 0)) != ISAMNoError) {
1379         ErrLogPrintf("Failed to create ISAM String Index All failed!\n");
1380         return error;
1381     }
1382     ISAMObjectFree((VoidPtr)data);
1383     return ISAMNoError;
1384 }
1385 
1386 /* ---------------------- ISAMInitSearch --------------------------
1387    Purpose:     Initialize ISAM Numeric Search. Checks for any errors
1388 
1389    Parameters:  ISAM Object
1390    Returns:     ISAM Error Code
1391    NOTE:        None
1392    ------------------------------------------------------------------*/
ISAMInitSearch(ISAMObjectPtr object)1393 static ISAMErrorCode ISAMInitSearch(ISAMObjectPtr object)
1394 {
1395 
1396     Int4Ptr FileInfo;
1397     Int4 Version, IsamType, DBFileLength;
1398     ISAMDataPtr data;
1399     Int4 reserved2;
1400 
1401     if(object == NULL)
1402         return ISAMBadParameter;
1403 
1404     data = (ISAMDataPtr) object;
1405 
1406     if(data->initialized == TRUE)
1407         return ISAMNoError;
1408 
1409     if(!Nlm_MemMapAvailable()) {
1410         if((data->FileStart =
1411             ISAMReadFileInMemory(data->IndexFileName)) == NULL)
1412             return ISAMBadFileName;
1413         FileInfo = (Int4Ptr)data->FileStart;
1414     } else {
1415         if((data->mmp = Nlm_MemMapInit(data->IndexFileName)) == NULL)
1416             return ISAMMemMap;
1417 
1418         FileInfo = (Int4Ptr)data->mmp->mmp_begin;
1419     }
1420     /* For numeric search. */
1421     data->mfp = NlmOpenMFILE(data->DBFileName);
1422 
1423     /* Check for consistence of files and parameters */
1424 
1425     if((Version   = SwapUint4(FileInfo[0])) != ISAM_VERSION)
1426         return ISAMBadVersion;
1427 
1428     if((IsamType  = SwapUint4(FileInfo[1])) != data->type)
1429         return ISAMBadType;
1430 
1431     data->NumTerms      = SwapUint4(FileInfo[3]);
1432     data->NumSamples    = SwapUint4(FileInfo[4]);
1433     data->PageSize      = SwapUint4(FileInfo[5]);
1434     data->max_line_size = SwapUint4(FileInfo[6]);
1435 
1436     if(data->PageSize != MEMORY_ONLY_PAGE_SIZE) {
1437         /* Special case of memory-only index */
1438         if((DBFileLength =
1439             SwapUint4(FileInfo[2])) != FileLength(data->DBFileName))
1440             return ISAMWrongFile;
1441     }
1442 
1443     /* This space reserved for future use */
1444 
1445     data->idx_option    =  SwapUint4(FileInfo[7]);
1446     reserved2           =  SwapUint4(FileInfo[8]);
1447 
1448     if(data->max_line_size != 0)
1449         data->line = MemNew(data->max_line_size + 1);
1450 
1451     if(data->type == ISAMNumeric)
1452         data->KeyDataSamples = (NISAMKeyDataPtr)(FileInfo + 9);
1453     else
1454         data->KeySamples = (Uint4Ptr)(FileInfo + 9);
1455 
1456     data->initialized = TRUE;
1457     return ISAMNoError;
1458 }
1459 
1460 /* ------------------------ ISAMGetIdxOption ------------------------
1461    Purpose:     Returns user specified option from ISAM database
1462 
1463    Parameters:  ISAM object
1464    Returns:     User specified option (set while formating)
1465    NOTE:        None
1466   ------------------------------------------------------------------*/
ISAMGetIdxOption(ISAMObjectPtr object,Int4Ptr idx_option)1467 ISAMErrorCode ISAMGetIdxOption(ISAMObjectPtr object, Int4Ptr idx_option)
1468 {
1469     ISAMDataPtr data;
1470     ISAMErrorCode error;
1471 
1472     if(object == NULL)
1473         return ISAMMiscError;
1474 
1475     data = (ISAMDataPtr) object;
1476 
1477     if(data->initialized == FALSE) {
1478         if((error = ISAMInitSearch(object)) != ISAMNoError)
1479             return error;
1480     }
1481 
1482     *idx_option = data->idx_option;
1483 
1484     return ISAMNoError;
1485 }
1486 
1487 /* ------------------------ ISAMGetIdxOption ------------------------
1488    Purpose:     To set option to check or not check for non-unique
1489                 elements
1490    Parameters:  ISAM object
1491    Returns:     None
1492    NOTE:        None
1493   ------------------------------------------------------------------*/
ISAMSetCheckForNonUnique(ISAMObjectPtr object,Boolean test_non_unique)1494 void ISAMSetCheckForNonUnique(ISAMObjectPtr object, Boolean test_non_unique)
1495 {
1496     ISAMDataPtr data;
1497 
1498     if(object == NULL)
1499         return;
1500 
1501     data = (ISAMDataPtr) object;
1502 
1503     data->test_non_unique = test_non_unique;
1504 
1505     return;
1506 }
1507 
ISAMSetDataSorted(ISAMObjectPtr object,Int4 num_terms)1508 void ISAMSetDataSorted(ISAMObjectPtr object, Int4 num_terms)
1509 {
1510     ISAMDataPtr data = (ISAMDataPtr) object;
1511     data->sorting_done = TRUE;
1512     data->NumTerms = num_terms;
1513 }
1514 
1515 /* ---------------------- ISAMUninitSearch --------------------------
1516    Purpose:     Uninitialize an ISAM search (free all allocated and used
1517                 buffers and unmap and close all mapped/opened files).
1518                 Undoes what the ISAMInitSearch function does.
1519    Parameters:  ISAM object
1520    Returns:     ISAM Error Code
1521    NOTE:        None
1522   ------------------------------------------------------------------*/
ISAMUninitSearch(ISAMObjectPtr object)1523 ISAMErrorCode ISAMUninitSearch(ISAMObjectPtr object)
1524 {
1525     ISAMDataPtr data = NULL;
1526 
1527     if (!object)
1528         return ISAMBadParameter;
1529 
1530     if ( !(data = (ISAMDataPtr) object))
1531         return ISAMBadParameter;
1532 
1533     if (data->initialized == FALSE)
1534         return ISAMNoError;
1535 
1536     if (data->mmp != NULL) {
1537         Nlm_MemMapFini(data->mmp);
1538         data->mmp = NULL;
1539     } else {
1540         MemFree(data->FileStart);
1541         data->FileStart = NULL;
1542     }
1543 
1544     if (data->db_fd != NULL)
1545         FileClose(data->db_fd);
1546 
1547     NlmCloseMFILE(data->mfp);
1548 
1549     if (data->max_line_size != 0) {
1550         data->max_line_size = 0;
1551         MemFree(data->line);
1552         data->line = NULL;
1553     }
1554 
1555     data->initialized = FALSE;
1556 
1557     return ISAMNoError;
1558 }
1559 /* ---------------------- ISAMObjectFree --------------------------
1560    Purpose:     To terminate all allocated and used buffers
1561                 unmap and close all mapped/opened files
1562    Parameters:  ISAM object
1563    Returns:     None
1564    NOTE:        None
1565   ------------------------------------------------------------------*/
1566 
ISAMObjectFree(ISAMObjectPtr object)1567 void ISAMObjectFree(ISAMObjectPtr object)
1568 {
1569     ISAMDataPtr data = (ISAMDataPtr) object;
1570 
1571     if (ISAMUninitSearch(object) != ISAMNoError)
1572         return;
1573 
1574     if((data = (ISAMDataPtr) object) == NULL)
1575         return;
1576 
1577     MemFree(data->DBFileName);
1578     MemFree(data->IndexFileName);
1579     MemFree(data->CAName);
1580     MemFree(data->CADBExt);
1581     MemFree(data->CAOffExt);
1582 
1583     MemFree(data);
1584 
1585     return;
1586 }
1587 
GetPageNumElements(ISAMDataPtr data,Int4 SampleNum,Int4Ptr Start)1588 static Int4 GetPageNumElements(ISAMDataPtr data, Int4 SampleNum,
1589 			       Int4Ptr Start)
1590 {
1591     Int4 NumElements;
1592 
1593     *Start = SampleNum * data->PageSize;
1594     NumElements = (SampleNum + 1 == data->NumSamples) ?
1595         data->NumTerms - *Start : data->PageSize;
1596 
1597     return NumElements;
1598 }
1599 
1600 #define NCBISAM_ITER_MAX 30
1601 /* ------------------------ NISAMSearch ----------------------------
1602    Purpose:     Main search function of Numeric ISAM
1603 
1604    Parameters:  Key - integer to search
1605                 Data - returned value (for NIASM with data)
1606                 Index - internal index in database
1607    Returns:     ISAM Error Code
1608    NOTE:        None
1609   ------------------------------------------------------------------*/
NISAMSearch(ISAMObjectPtr object,Uint4 Number,Uint4Ptr Data,Uint4Ptr Index)1610 ISAMErrorCode NISAMSearch(ISAMObjectPtr object,
1611                           Uint4    Number,
1612                           Uint4Ptr Data,
1613                           Uint4Ptr Index
1614                           )
1615 {
1616     Boolean found;
1617     ISAMDataPtr data;
1618     Int4 Start = 0, Stop, SampleNum;
1619     Int4 NumElements, *KeyPage, *KeyPageStart;
1620     Int4 first, last, current, type;
1621     Boolean NoData;
1622     NISAMKeyDataPtr KeyDataPage=NULL, KeyDataSamples, KeyDataPageStart;
1623     Uint4Ptr KeySamples;
1624     Uint4 Key;
1625     ISAMErrorCode error;
1626 
1627     if((data = (ISAMDataPtr) object) == NULL)
1628         return ISAMBadParameter;
1629 
1630     if(data->initialized == FALSE) {
1631         if((error = ISAMInitSearch(object)) != ISAMNoError)
1632             return error;
1633     }
1634 
1635     NoData = (data->type == ISAMNumericNoData);
1636     KeyDataSamples = data->KeyDataSamples;
1637     KeySamples = data->KeySamples;
1638     type = data->type;
1639 
1640     /* search the sample file. */
1641 
1642     Stop = data->NumSamples -1;
1643 
1644     if (!data->lastKeyDataPage || Number <= data->first_gi || Number >= data->last_gi)
1645     {
1646        while(Stop >= Start) {
1647         SampleNum = ((Uint4)(Stop + Start)) >> 1;
1648         if (type == ISAMNumericNoData)
1649             Key = SwapUint4(KeySamples[SampleNum]);
1650         else
1651             Key = SwapUint4(KeyDataSamples[SampleNum].key);
1652 
1653         /* If this is an exact match, return the master term number. */
1654 
1655         if (Key == Number) {
1656             if (Data != NULL) {
1657                 if (NoData) {
1658                     *Data = SampleNum * data->PageSize;
1659                 } else {
1660                     *Data = SwapUint4(data->KeyDataSamples[SampleNum].data);
1661                 }
1662             }
1663             if(Index != NULL)
1664                 *Index = SampleNum * data->PageSize;
1665 
1666 	    /* NULL this out so we don't confuse the next lookup. */
1667 	    data->lastKeyDataPage = NULL;
1668 
1669             return ISAMNoError;
1670          }
1671 
1672          /* Otherwise, search for the next sample. */
1673          if ( Number < Key )
1674             Stop = --SampleNum;
1675          else
1676             Start = SampleNum +1;
1677        }
1678 
1679        /* If the term is out of range altogether, report not finding it. */
1680 
1681        if ( (SampleNum < 0) || (SampleNum >= data->NumSamples)) {
1682 
1683         if (Data != NULL)
1684             *Data = ISAMNotFound;
1685 
1686         if(Index != NULL)
1687             *Index = ISAMNotFound;
1688 
1689         return ISAMNotFound;
1690       }
1691 
1692       /* load the appropriate page of numbers into memory. */
1693 
1694       NumElements = GetPageNumElements(data, SampleNum, &Start);
1695       first = Start;
1696       last = Start + NumElements - 1;
1697 
1698       if (NoData) {
1699           if (data->mfp->mfile_true)
1700           {
1701               NlmSeekInMFILE(data->mfp, Start*sizeof(Int4), SEEK_SET);
1702               KeyPageStart = (Int4Ptr) data->mfp->mmp;
1703               KeyPage = KeyPageStart - Start;
1704           }
1705           else
1706           {
1707               KeyPageStart = (Int4Ptr) MemNew((NumElements + 1) * sizeof(Int4));
1708               NlmSeekInMFILE(data->mfp, Start*sizeof(Int4), SEEK_SET);
1709               NlmReadMFILE((Uint1Ptr)KeyPageStart, sizeof(Int4), NumElements,
1710                       data->mfp);
1711               KeyPage = KeyPageStart - Start;
1712           }
1713       } else {
1714           if (data->mfp->mfile_true)
1715           {
1716               NlmSeekInMFILE(data->mfp, Start*sizeof(NISAMKeyData), SEEK_SET);
1717               KeyDataPageStart = (NISAMKeyDataPtr) data->mfp->mmp;
1718               KeyDataPage = KeyDataPageStart - Start;
1719               /* The following data is used if the next lookup is on the same page. */
1720               data->first_gi = SwapUint4(KeyDataPage[first].key);
1721               data->last_gi = SwapUint4(KeyDataPage[last].key);
1722               data->first = first;
1723               data->last = last;
1724               data->lastKeyDataPage = KeyDataPage;
1725           }
1726           else
1727           {
1728               KeyDataPageStart = (NISAMKeyDataPtr) MemNew((NumElements + 1) *
1729                                                  sizeof(NISAMKeyData));
1730               NlmSeekInMFILE(data->mfp, Start*sizeof(NISAMKeyData), SEEK_SET);
1731               NlmReadMFILE((Uint1Ptr)KeyDataPageStart, sizeof(NISAMKeyData), NumElements,
1732                       data->mfp);
1733               KeyDataPage = KeyDataPageStart - Start;
1734           }
1735       }
1736     }
1737     else
1738     {
1739         first = data->first;
1740         last = data->last;
1741         KeyDataPage = data->lastKeyDataPage;
1742     }
1743 
1744     found = FALSE;
1745     /* Search the page for the number. */
1746     if (NoData) {
1747         while (first <= last)
1748         {
1749             current = (first+last)/2;
1750             Key = SwapUint4(KeyPage[current]);
1751             if (Key > Number)
1752                 last = --current;
1753             else if (Key < Number)
1754                 first = ++current;
1755             else
1756             {
1757                 found = TRUE;
1758                 break;
1759             }
1760         }
1761     } else {
1762         while (first <= last)
1763         {
1764             current = (first+last)/2;
1765             Key = SwapUint4(KeyDataPage[current].key);
1766             if (Key > Number)
1767                 last = --current;
1768             else if (Key < Number)
1769                 first = ++current;
1770             else
1771             {
1772                 found = TRUE;
1773                 break;
1774             }
1775         }
1776     }
1777 
1778 
1779     if (found == FALSE) /* not found. */
1780     {
1781 
1782         if (Data != NULL)
1783             *Data = ISAMNotFound;
1784 
1785         if(Index != NULL)
1786             *Index = ISAMNotFound;
1787 
1788 	if (data->mfp->mfile_true == FALSE)
1789 	{
1790     	    if (NoData)
1791        	     	KeyPageStart = MemFree(KeyPageStart);
1792        	    else
1793        	     	KeyDataPageStart = MemFree(KeyDataPageStart);
1794 	}
1795 
1796         return ISAMNotFound;
1797     }
1798 
1799     if (Data != NULL) {
1800         if (NoData)
1801             *Data = Start + current;
1802         else
1803             *Data =  SwapUint4(KeyDataPage[current].data);
1804     }
1805 
1806     if(Index != NULL)
1807         *Index = Start + current;
1808 
1809     if (data->mfp->mfile_true == FALSE)
1810     {
1811     	    if (NoData)
1812        	     	KeyPageStart = MemFree(KeyPageStart);
1813        	    else
1814        	     	KeyDataPageStart = MemFree(KeyDataPageStart);
1815     }
1816 
1817     return ISAMNoError;
1818 }
1819 
1820 /* ---------------------- NISAMSearchList --------------------------
1821    Purpose:       Perform search of multiple Keys
1822 
1823    Parameters:    NumKeys - number of input keys
1824                   Keys - array of keys
1825    Returns:       Data - array of returned values
1826                   Index - array of internal indexes
1827    NOTE:          None
1828   ------------------------------------------------------------------*/
NISAMSearchList(ISAMObjectPtr object,Int4 NumKeys,Uint4Ptr Keys,Uint4Ptr Data,Uint4Ptr Index)1829 ISAMErrorCode NISAMSearchList(ISAMObjectPtr object,
1830                               Int4     NumKeys,
1831                               Uint4Ptr Keys,
1832                               Uint4Ptr Data,
1833                               Uint4Ptr Index
1834                               )
1835 {
1836     Int4 count;
1837     ISAMErrorCode error;
1838 
1839     if (object == NULL || Data == NULL)
1840         return ISAMBadParameter;
1841 
1842     for (count = 0; count < NumKeys; count++) {
1843         if((error = NISAMSearch(object, Keys[count],
1844                                 Data + count, Index + count)) < 0)
1845             return error;
1846     }
1847 
1848     return ISAMNoError;
1849 }
1850 
ISAMDecompressCA(Uint1Ptr buffer,Int4 length,Int4 num_bits,Int4 num_uids)1851 static Uint4Ptr ISAMDecompressCA(Uint1Ptr buffer, Int4 length,
1852                                  Int4 num_bits, Int4 num_uids)
1853 {
1854     Uint4Ptr data;
1855     Int4 diff, dividend;
1856     Int4 i, template, base;
1857     Int4 byte_num = 0, bit_num = 0;
1858 
1859     if(buffer == NULL || num_uids == 0)
1860         return NULL;
1861 
1862     data = MemNew(sizeof(Uint4)*num_uids);
1863 
1864     base = PowersOfTwo[num_bits];
1865 
1866     for(i = 0; i < num_uids; i++) {
1867 
1868         diff = dividend = 0;
1869 
1870         if(num_bits != 0)
1871             template = PowersOfTwo[num_bits - 1];
1872         else
1873             template = 0;
1874 
1875         /* Reading dividend first */
1876 
1877         while(buffer[byte_num] & OneBit[bit_num]) {
1878             dividend++;
1879             if(++bit_num > 7) {
1880                 bit_num = 0;
1881                 byte_num++;
1882             }
1883         }
1884 
1885         /* And skipping following 0 bit */
1886 
1887         if(++bit_num > 7) {
1888             bit_num = 0;
1889             byte_num++;
1890         }
1891 
1892         for(; template; template >>= 1) {
1893             if(buffer[byte_num] & OneBit[bit_num])
1894                 diff |= template;
1895 
1896             if(++bit_num > 7) {
1897                 bit_num = 0;
1898                 byte_num++;
1899             }
1900         }
1901         data[i] = dividend*base + diff + (i == 0 ? 0 : (data[i-1] + 1));
1902 
1903     }  /* Over all uids */
1904 
1905     return data;
1906 }
1907 
ISAMDecompressFA(Uint1Ptr buffer,Int4 num_uids)1908 static Uint4Ptr ISAMDecompressFA(Uint1Ptr buffer, Int4 num_uids)
1909 {
1910     Uint4Ptr fields;
1911     Int4 i, j;
1912 
1913     if(buffer == NULL || num_uids == 0)
1914         return NULL;
1915 
1916     fields = MemNew(sizeof(Uint4)*num_uids);
1917 
1918     for(i = 0, j =0; j < num_uids; i++) {
1919 
1920         fields[j] |= PowersOfTwo[buffer[i] & FA_Mask];
1921 
1922         if(!(buffer[i] & 0x80)) {
1923             j++;
1924         }
1925     }
1926 
1927     return fields;
1928 }
1929 
1930 /* ------------------------ ISAMSearchTerm -------------------------
1931    Purpose:     Main search function of complete String ISAM
1932 
1933    Parameters:  object - ISAM Object
1934                 term_in - input string
1935                 field_mask - fields to search in 0 and -1 mean search
1936                 all fields
1937                 uid - array of returned uids
1938                 count number of returned uids
1939    Returns:     ISAM Error Code
1940    NOTE:        Initialization done with first call to this function
1941   ------------------------------------------------------------------*/
ISAMSearchTerm(ISAMObjectPtr object,CharPtr term_in,Uint4 field_mask,Uint4Ptr PNTR uid_out,Int4Ptr count)1942 ISAMErrorCode ISAMSearchTerm(ISAMObjectPtr object, CharPtr term_in,
1943                              Uint4 field_mask, Uint4Ptr PNTR uid_out,
1944                              Int4Ptr count)
1945 {
1946     ISAMErrorCode error;
1947     ISAMDataPtr data;
1948     CharPtr term = NULL, value = NULL;
1949     Uint1Ptr buffer;
1950     Int4 ca_count  = 0, num_uids = 0, num_bits, field_len=0;
1951     Int4 offset, i, j, length;
1952     Char filename[MAX_FILENAME_LEN];
1953     FILE *ca_fd, *off_fd;
1954     Uint4 bit_flag = 0;
1955     Uint4 numbers[32];
1956     Uint4 index;
1957     Uint4Ptr field, uid;
1958     long int lvalue1, lvalue2;
1959 
1960     if((data = (ISAMDataPtr) object) == NULL)
1961         return ISAMBadParameter;
1962 
1963     /* First searching for term in database */
1964 
1965     if((error = SISAMSearch(object, term_in,
1966                             0, &term, &value, &index)) != ISAMNoError) {
1967         *count = 0;
1968         *uid_out = NULL;
1969         return error;
1970     }
1971 
1972     MemFree(term);
1973 
1974     /* Now retriving information about uids and fields */
1975 
1976     if((sscanf(value, "%ld %ld", &lvalue1, &lvalue2)) != 2) {
1977         ErrLogPrintf("Error in database formating (%s)\n", value);
1978         return error;
1979     }
1980 
1981     offset = lvalue1;
1982     ca_count = lvalue2;
1983 
1984     MemFree(value);
1985 
1986     /* Opening corresponding files */
1987 
1988     sprintf(filename, "%s%ld.%s",
1989             data->CAName, (long) ca_count, data->CAOffExt);
1990 
1991     if((off_fd = FileOpen(filename, "r")) == NULL)
1992         return ISAMBadFileName;
1993 
1994     sprintf(filename, "%s%ld.%s",
1995             data->CAName, (long) ca_count, data->CADBExt);
1996 
1997     if((ca_fd  = FileOpen(filename, "r")) == NULL)
1998         return ISAMBadFileName;
1999 
2000     /* Getting header/offset information */
2001 
2002     MemSet(numbers, 0, sizeof(numbers));
2003 
2004     if((fseek(off_fd, offset, SEEK_SET)) != 0)
2005         return ISAMFseekFailed;
2006 
2007 
2008     FileRead(&num_uids, 1, sizeof(Uint4), off_fd);
2009     FileRead(&offset, 1, sizeof(Uint4), off_fd);
2010     FileRead(&length, 1, sizeof(Uint4), off_fd);
2011     FileRead(&num_bits, 1, sizeof(Uint4), off_fd);
2012     FileRead(&bit_flag, 1, sizeof(Uint4), off_fd);
2013 
2014     if(field_mask == 0)
2015         field_mask = (Uint4)(-1);
2016 
2017     if(!(bit_flag & field_mask)) { /* Do not satisfy given bitmask */
2018         *count = 0;
2019         *uid_out = NULL;
2020         return ISAMNotFound;
2021     }
2022 
2023     for(j = 0; j < 32; j++) {
2024         if(bit_flag & PowersOfTwo[j]) {
2025             FileRead(&numbers[j], 1, sizeof(Uint4), off_fd);
2026             field_len += numbers[j];
2027         }
2028     }
2029 
2030     /* Now reading uids and fields from CA/FA file */
2031 
2032     fseek(ca_fd, offset, SEEK_SET);
2033     buffer = MemNew(length);
2034     FileRead(buffer, 1, length, ca_fd);
2035 
2036     if((uid = ISAMDecompressCA(buffer, length,
2037                                 num_bits, num_uids)) == NULL) {
2038         ErrLogPrintf("Cannot decompress coded array. Retrieve failed.");
2039         return ISAMMiscError;
2040     }
2041 
2042     MemFree(buffer);
2043     buffer = MemNew(field_len);
2044 
2045     FileRead(buffer, 1, field_len, ca_fd);
2046 
2047     if((field = ISAMDecompressFA(buffer, num_uids)) == NULL) {
2048         ErrLogPrintf("Cannot decompress fields array. Retrieve failed.");
2049         return ISAMMiscError;
2050     }
2051 
2052     /* Now filtering returned uids by field_mask */
2053 
2054     for(i = 0, j = 0; i < num_uids; i++) {
2055         if(field[i] & field_mask) {
2056             uid[j] = uid[i];
2057             j++;
2058         }
2059     }
2060 
2061     *count = j;
2062     *uid_out = uid;
2063 
2064     MemFree(field);
2065     MemFree(buffer);
2066     FileClose(ca_fd);
2067     FileClose(off_fd);
2068 
2069     return ISAMNoError;
2070 }
2071 
ISAMGetDataNumber(CharPtr KeyData)2072 static Int4 ISAMGetDataNumber(CharPtr KeyData)
2073 {
2074     CharPtr chptr;
2075     Int4 count, value;
2076     long int lvalue;
2077 
2078     if((chptr = StringChr(KeyData, ISAM_DATA_CHAR)) != NULL) {
2079         chptr++;
2080         if((count  = sscanf(chptr, "%ld", &lvalue)) != 1)
2081             return -1;
2082         else {
2083             value = lvalue;
2084             return value;
2085         }
2086     }
2087     return -1;
2088 }
2089 
2090 /*
2091   This returns the position of the first character that differs
2092    between the query Term and the Isam Key, or -1 if they are identical.
2093 */
ISAMDiffChar(CharPtr Term,CharPtr Key,Boolean IgnoreCase)2094 static Int4 ISAMDiffChar(CharPtr Term, CharPtr Key, Boolean IgnoreCase)
2095 
2096 {
2097     CharPtr Start = Term;
2098 
2099     if(IgnoreCase) {
2100         while(*Term && (TO_UPPER(*Term) == TO_UPPER(*Key))) {
2101             Term++;
2102             Key++;
2103         }
2104     } else {
2105         while(*Term && (*Term == *Key)) {
2106             Term++;
2107             Key++;
2108         }
2109     }
2110 
2111     if(*Term != NULLB)
2112         return((Int4)(Term - Start));
2113 
2114     for(;;) {
2115         if (ENDS_ISAM_KEY(Key))
2116             return(-1);
2117 
2118         if (*Key != ' ')
2119             break;
2120 
2121         Key++;
2122     }
2123 
2124     return((Int4)(Term - Start));
2125 }
2126 
2127 #define ID_DATA_CHUNK 16
SISAMFindAllData(ISAMObjectPtr object,CharPtr term_in,Int4Ptr PNTR ids_out,Int4Ptr count_out)2128 ISAMErrorCode SISAMFindAllData(ISAMObjectPtr object,
2129                                CharPtr term_in,
2130                                Int4Ptr PNTR ids_out,
2131                                Int4Ptr count_out)
2132 {
2133     ISAMDataPtr   data;
2134     ISAMErrorCode error;
2135     Int4          index, Start, Stop;
2136     Int4          i, Diff = 0, SampleNum, Pos;
2137     Int4          TermNum, count, NumBytes, allocated;
2138     Int4Ptr       ids;
2139     Uint4Ptr      SamplePos;
2140     CharPtr       Page, Key, FileStart, Ptr, chptr;
2141     CharPtr       value, key_out;
2142 
2143     if((data = (ISAMDataPtr) object) == NULL)
2144         return ISAMBadParameter;
2145 
2146     *count_out = 0;
2147     *ids_out = NULL;
2148 
2149     if(data->initialized == FALSE) {
2150         if((error = ISAMInitSearch(object)) != ISAMNoError)
2151             return error;
2152     }
2153 
2154     if((error = SISAMSearch(object, term_in, 0, &key_out,
2155                             &value, (Uint4Ptr) &index)) != ISAMNoError) {
2156         return error;
2157     }
2158 
2159     MemFree(key_out);
2160     MemFree(value);
2161 
2162     if(data->mmp != NULL)
2163         FileStart = (CharPtr)data->mmp->mmp_begin;
2164     else
2165         FileStart = data->FileStart;
2166 
2167     if(data->PageSize != MEMORY_ONLY_PAGE_SIZE)
2168         SamplePos = data->KeySamples + data->NumSamples + 1;
2169     else
2170         SamplePos = data->KeySamples;
2171 
2172     SampleNum = index / data->PageSize;
2173     TermNum   = index % data->PageSize;
2174 
2175     Start = SampleNum; Stop = SampleNum;
2176     if(TermNum == 0) { /* Exact match. Borders must be checked */
2177         for(i = 1; Diff == -1 && (SampleNum - i) >= 0; i++) {
2178             Key = FileStart + SwapUint4(SamplePos[SampleNum-i]);
2179             if((Diff = ISAMDiffChar(term_in, Key, TRUE)) == -1)
2180                 Start = SampleNum - i;
2181         }
2182         for(i = 1; Diff == -1 && (SampleNum + i) < data->NumSamples; i++) {
2183             Key = FileStart + SwapUint4(SamplePos[SampleNum + i]);
2184             if((Diff = ISAMDiffChar(term_in, Key, TRUE)) == -1)
2185                 Stop = SampleNum + i;
2186         }
2187         if(Start == Stop) { /* We have to load 2 pages */
2188             if(Start-- < 0) Start = 0;
2189             if(Stop++ > data->NumSamples -1)
2190                 Stop = data->NumSamples -1;
2191         }
2192     } else {
2193         Stop++;
2194     }
2195 
2196     /* Reading all in memory */
2197 
2198     Pos = SwapUint4(data->KeySamples[Start]);
2199     NumBytes = SwapUint4(data->KeySamples[Stop]) - Pos;
2200     Page = (CharPtr) MemNew(NumBytes + 1);
2201     NlmSeekInMFILE(data->mfp, Pos, SEEK_SET);
2202     NlmReadMFILE((Uint1Ptr)Page, sizeof(Char), NumBytes, data->mfp);
2203     Page[NumBytes] = NULLB;
2204 
2205     /* Now removing all \n and \r characters */
2206 
2207     for(chptr = Page; *chptr != NULLB; chptr++) {
2208         if(*chptr == '\n' || *chptr == '\r')
2209             *chptr = NULLB;
2210     }
2211 
2212     /* Search the page for the term. */
2213 
2214     allocated = ID_DATA_CHUNK;
2215     ids = MemNew(sizeof(Int4) * allocated);
2216     count = 0;
2217 
2218     Ptr = Page;
2219     while (Ptr - Page < NumBytes) {
2220         Diff = ISAMDiffChar(term_in, Ptr, TRUE);
2221 
2222         if (Diff == -1) {
2223             if(count >= allocated) {
2224                 allocated += ID_DATA_CHUNK;
2225                 ids = Realloc(ids, allocated * sizeof(Int4));
2226             }
2227             ids[count] = ISAMGetDataNumber(Ptr);
2228             count++;
2229         }
2230         Ptr += StringLen(Ptr);
2231 
2232         while(Ptr - Page < NumBytes && *Ptr == NULLB)
2233             Ptr++;
2234     }
2235 
2236     *count_out = count;
2237     *ids_out   = ids;
2238 
2239     MemFree(Page);
2240 
2241     return ISAMNoError;
2242 }
2243 
ISAMExtractData(CharPtr KeyData,CharPtr PNTR Key,CharPtr PNTR Data)2244 static void ISAMExtractData(CharPtr KeyData,
2245                             CharPtr PNTR Key, CharPtr PNTR Data)
2246 {
2247     CharPtr chptr, nkey;
2248 
2249     if (KeyData == NULL)
2250         return;
2251 
2252     nkey = StringSave(KeyData);
2253 
2254     if((chptr = StringChr(nkey, ISAM_DATA_CHAR)) != NULL) {
2255         *chptr = NULLB;
2256         if(Data != NULL)
2257             *Data = StringSave(chptr+1);
2258     } else if(Data != NULL) {
2259         *Data = StringSave("");
2260     }
2261 
2262     if(Key != NULL)
2263         *Key = StringSave(nkey);
2264 
2265     MemFree(nkey);
2266 
2267     return;
2268 }
2269 
SISAMSearch(ISAMObjectPtr object,CharPtr term_in,Int4 flags,CharPtr PNTR term_out,CharPtr PNTR value,Uint4Ptr index)2270 ISAMErrorCode SISAMSearch(ISAMObjectPtr object,
2271                           CharPtr term_in,
2272                           Int4 flags,
2273                           CharPtr PNTR term_out,
2274                           CharPtr PNTR value,
2275                           Uint4Ptr index)
2276 {
2277     ISAMDataPtr   data;
2278     ISAMErrorCode error;
2279 
2280     Int4 Diff, Start, Stop, SampleNum, Length, Pos;
2281     Int4 TermNum, NumBytes, FoundShort = -1;
2282     CharPtr Page, Key, FileStart, Ptr, ShortTerm, chptr;
2283     Uint4Ptr SamplePos;
2284     Boolean IgnoreCase, Short, Follow;
2285 
2286     if((data = (ISAMDataPtr) object) == NULL)
2287         return ISAMBadParameter;
2288 
2289     if(data->initialized == FALSE) {
2290         if((error = ISAMInitSearch(object)) != ISAMNoError)
2291             return error;
2292     }
2293 
2294     IgnoreCase = TRUE;  /* We will set this option to avoid more
2295                            complications */
2296 
2297     /* search the sample file first */
2298 
2299     Start = 0;
2300     Stop = data->NumSamples -1;
2301     Length = StringLen(term_in);
2302     ShortTerm= MemNew(data->max_line_size);
2303 
2304     Follow     = (Boolean)(flags & ISAM_FOLLOW_KEY);
2305     Short      = (Boolean)((flags & ISAM_SHORT_KEY) || Follow);
2306 
2307     if(data->mmp != NULL)
2308         FileStart = (CharPtr)data->mmp->mmp_begin;
2309     else
2310         FileStart = data->FileStart;
2311 
2312     if(data->PageSize != MEMORY_ONLY_PAGE_SIZE)
2313         SamplePos = data->KeySamples + data->NumSamples + 1;
2314     else
2315         SamplePos = data->KeySamples;
2316 
2317     while(Stop >= Start) {
2318         SampleNum = ((Uint4)(Stop + Start)) >> 1;
2319 
2320         Key = FileStart + SwapUint4(SamplePos[SampleNum]);
2321         Diff = ISAMDiffChar(term_in, Key, IgnoreCase);
2322 
2323         /* If this is an exact match, return the master term number. */
2324         if (Diff == -1) {
2325             ISAMExtractData(Key, term_out, value);
2326             *index = data->PageSize * SampleNum;
2327             MemFree(ShortTerm);
2328             return ISAMNoError;
2329         }
2330 
2331         /* If the key is a superset of the sample term, backup until just
2332            before the term. */
2333         if (Short && (Diff >= Length)) {
2334             if (SampleNum > 0)
2335               SampleNum--;
2336 
2337             if (IgnoreCase) {
2338 	      while((SampleNum > 0) &&
2339 		    (StrNICmp(term_in,
2340                               FileStart+SwapUint4(SamplePos[SampleNum]),
2341                               Length) == 0))
2342                   SampleNum--;
2343             } else {
2344                 while((SampleNum > 0) &&
2345                       (StrNCmp(term_in,
2346                                FileStart + SwapUint4(SamplePos[SampleNum]),
2347                                Length) == 0))
2348                     SampleNum--;
2349             }
2350 
2351             FoundShort = SampleNum + 1;
2352             Ptr = FileStart + SwapUint4(SamplePos[SampleNum+1]);
2353             StringCpy(ShortTerm, Ptr);
2354             break;
2355         } else
2356             /* If preceding is desired, note the key.  */
2357 
2358             if (Follow) {
2359                 FoundShort = SampleNum;
2360                 StringCpy(ShortTerm, Key);
2361             }
2362 
2363         /* Otherwise, search for the next sample. */
2364         if (IgnoreCase ? TO_LOWER(term_in[Diff]) < TO_LOWER(Key[Diff]) :
2365             term_in[Diff] < Key[Diff])
2366             Stop = --SampleNum;
2367         else
2368             Start = SampleNum +1;
2369     }
2370 
2371     /* If the term is out of range altogether, report not finding it. */
2372 
2373     if ( (SampleNum < 0) || (SampleNum >= data->NumSamples)) {
2374         MemFree(ShortTerm);
2375         return ISAMNotFound;
2376     }
2377 
2378     /* load the appropriate page of terms into memory. */
2379 
2380     Pos = SwapUint4(data->KeySamples[SampleNum]);
2381 
2382     NumBytes = SwapUint4(data->KeySamples[SampleNum + 1]) - Pos;
2383     Page = (CharPtr) MemNew(NumBytes + 1);
2384     NlmSeekInMFILE(data->mfp, Pos, SEEK_SET);
2385     NlmReadMFILE((Uint1Ptr)Page, sizeof(Char), NumBytes, data->mfp);
2386     Page[NumBytes] = NULLB;
2387 
2388     /* Now removing all \n and \r characters */
2389 
2390     for(chptr = Page; *chptr != NULLB; chptr++) {
2391         if(*chptr == '\n' || *chptr == '\r')
2392             *chptr = NULLB;
2393     }
2394 
2395     /* Search the page for the term. */
2396     TermNum = 0;
2397     Ptr = Page;
2398     while (Ptr - Page < NumBytes) {
2399         Diff = ISAMDiffChar(term_in, Ptr, IgnoreCase);
2400 
2401         if (Diff == -1) /* Complete match */
2402             break;
2403 
2404         if (Short && (Diff >= Length)) /* Partialy complete */
2405             break;
2406 
2407         /* Just next available term accepted */
2408 
2409         if (Follow && (IgnoreCase ?
2410                        TO_UPPER(term_in[Diff]) < TO_UPPER(Ptr [Diff]) :
2411                        term_in[Diff] < Ptr [Diff]))
2412             break;
2413 
2414         Ptr += StringLen(Ptr);
2415 
2416         while(Ptr - Page < NumBytes && *Ptr == NULLB)
2417             Ptr++;
2418 
2419         TermNum++;
2420     }
2421 
2422     /* If we didn't find a match in the page, then we failed, unless the
2423        items that begins the next page is a match (only possible if
2424        ISAM_SHORT_KEY or ISAM_FOLLOW_KEY was specified. */
2425     if (Ptr - Page == NumBytes) {
2426 
2427         MemFree(Page);
2428 
2429         if (FoundShort >= 0) {
2430             ISAMExtractData(ShortTerm, term_out, value);
2431             *index = data->PageSize * FoundShort;
2432             MemFree(ShortTerm);
2433             return ISAMNoError;
2434         } else {
2435             *index = (Uint4) -1;
2436             MemFree(ShortTerm);
2437             return ISAMNotFound;
2438         }
2439     }
2440 
2441     /* Otherwise, we found a match. */
2442     ISAMExtractData(Ptr, term_out, value);
2443 
2444     *index = (data->PageSize * SampleNum) + TermNum;
2445 
2446     MemFree(Page);
2447     MemFree(ShortTerm);
2448 
2449     return ISAMNoError;
2450 }
2451 
2452 /* ------------------------  NISAMFindKey ---------------------------
2453    Purpose:     Return Key value by absolute internal index
2454 
2455    Parameters:  Index - absolute internal index
2456    Returns:     Key   - corresponding key value
2457    Data  - corresponding data value
2458    NOTE:
2459    ------------------------------------------------------------------*/
NISAMFindKey(ISAMObjectPtr object,Int4 Index,Uint4Ptr Key,Uint4Ptr Data)2460 ISAMErrorCode NISAMFindKey(ISAMObjectPtr object,
2461                            Int4 Index,
2462                            Uint4Ptr Key,
2463                            Uint4Ptr Data
2464                            )
2465 {
2466     return NISAMFindKeys(object, Index, Index, Key, Data);
2467 }
2468 
2469 
2470 /* ----------------------  NISAMFindKeys -------------------------
2471    Purpose:     Retuns set of Key/Data pairs from
2472    First to Last internal index
2473 
2474    Parameters:  First - beginning of interval
2475                 Last  - the end of interval
2476    Returns:     Keys - array of Keys
2477                 Data - array of Data
2478    NOTE:        None
2479   ------------------------------------------------------------------*/
NISAMFindKeys(ISAMObjectPtr object,Int4 First,Int4 Last,Uint4Ptr Keys,Uint4Ptr Data)2480 ISAMErrorCode NISAMFindKeys(ISAMObjectPtr object,
2481                             Int4     First,
2482                             Int4     Last,
2483                             Uint4Ptr Keys,
2484                             Uint4Ptr Data
2485                             )
2486 {
2487     ISAMDataPtr data = (ISAMDataPtr)object;
2488     Int4 TotalNums, count;
2489     Int4Ptr KeyPage;
2490     NISAMKeyDataPtr KeyDataPage;
2491     Boolean NoData = (data->type == ISAMNumericNoData);
2492     ISAMErrorCode error;
2493 
2494     if(data == NULL)
2495         return ISAMBadParameter;
2496 
2497     if(data->initialized == FALSE) {
2498         if((error = ISAMInitSearch(object)) != ISAMNoError)
2499             return error;
2500     }
2501 
2502     if ((First < 0) || (Last >= data->NumTerms) || (First > Last))
2503         return ISAMBadParameter;
2504 
2505     TotalNums = Last-First + 1;
2506 
2507     if (NoData) {
2508         KeyPage = (Int4Ptr)MemNew((TotalNums + 1) * sizeof(Int4));
2509         NlmSeekInMFILE(data->mfp, First*sizeof(Int4), SEEK_SET);
2510         NlmReadMFILE((Uint1Ptr)KeyPage, sizeof(Int4), TotalNums, data->mfp);
2511     } else {
2512         KeyDataPage = (NISAMKeyDataPtr)MemNew((TotalNums + 1) *
2513                                               sizeof(NISAMKeyData));
2514         NlmSeekInMFILE(data->mfp, First*sizeof(NISAMKeyData), SEEK_SET);
2515         NlmReadMFILE((Uint1Ptr)KeyDataPage, sizeof(NISAMKeyData), TotalNums,
2516                 data->mfp);
2517     }
2518 
2519     if (NoData) {
2520         for (count = 0; count < TotalNums; count++) {
2521             if (Keys != NULL)
2522                 Keys[count] = SwapUint4(KeyPage[count]);
2523             if (Data != NULL)
2524                 Data[count] = First + count;
2525         }
2526     } else {
2527         for (count = 0; count < TotalNums; count++) {
2528             if (Keys != NULL)
2529                 Keys[count] = SwapUint4(KeyDataPage[count].key);
2530             if (Data != NULL)
2531                 Data[count] = SwapUint4(KeyDataPage[count].data);
2532         }
2533     }
2534 
2535     if (NoData)
2536         MemFree(KeyPage);
2537     else
2538         MemFree(KeyDataPage);
2539 
2540     return ISAMNoError;
2541 }
2542 
2543 /* ------------------------  ISAMNumTerms ---------------------------
2544    Purpose:     Returns total number of terms in ISAM database
2545 
2546    Parameters:  ISAM object
2547    Returns:     Number of terms
2548    NOTE:        None
2549   ------------------------------------------------------------------*/
ISAMNumTerms(ISAMObjectPtr object,Int4Ptr terms)2550 ISAMErrorCode ISAMNumTerms(ISAMObjectPtr object, Int4Ptr terms)
2551 {
2552     ISAMDataPtr data = (ISAMDataPtr) object;
2553     ISAMErrorCode error;
2554 
2555     if(data == NULL || terms == NULL)
2556         return ISAMBadParameter;
2557 
2558     if(data->initialized == FALSE) {
2559         if((error = ISAMInitSearch(object)) != ISAMNoError)
2560             return error;
2561     }
2562 
2563     *terms = data->NumTerms;
2564     return ISAMNoError;
2565 }
2566 
2567 /****************************************************************************/
2568 /* INTERNAL FINCTIONS  */
2569 /****************************************************************************/
2570 
ISAMCountLines(ISAMDataPtr data)2571 ISAMErrorCode ISAMCountLines(ISAMDataPtr data)
2572      /* this returns the number of lines in a file. */
2573 {
2574     if(data == NULL)
2575         return ISAMBadParameter;
2576 
2577     data->NumTerms = 0;
2578     rewind(data->db_fd);
2579     while(ISAMReadLine(data) > 0)
2580         data->NumTerms++;
2581 
2582     rewind(data->db_fd);
2583     return ISAMNoError;
2584 }
2585 
2586 #ifdef NISAM_TEST_MODULE
Main(void)2587 Int2 Main(void)
2588 {
2589     ISAMObjectPtr object;
2590     Int4 i, terms, key_failed=0;
2591     Uint4Ptr Keys, Data;
2592     Uint4 Value;
2593     Uint4 Index;
2594     ISAMErrorCode error;
2595     CharPtr PNTR argv = GetArgv();
2596     Int4         argc = GetArgc();
2597 
2598     if(argc < 2) {
2599 
2600         printf("USAGE: %s <key file name> <index filename>\n", argv[0]);
2601         return 1;
2602     }
2603 
2604     if((object = ISAMObjectNew(ISAMNumeric, argv[1], argv[2])) == NULL) {
2605         printf("Failed to create ISAM object.\n");
2606         return 1;
2607     }
2608 
2609     /*    if((error = ISAMMakeIndex(object, 0)) != ISAMNoError) {
2610         printf("Failed to create numerical index. "
2611                "Error code is %d\n", error);
2612         return 1;
2613     } */
2614 
2615     if((error = ISAMNumTerms(object, &terms)) != ISAMNoError) {
2616         printf("Failed to return number of terms. "
2617                "Error code is %d\n", error);
2618         return 1;
2619     }
2620 
2621     printf("Number of terms is %d\n", terms);
2622 
2623     Keys = (Uint4Ptr) MemNew(terms*sizeof(Uint4));
2624     Data = (Uint4Ptr) MemNew(terms*sizeof(Uint4));
2625 
2626     if((error = NISAMFindKeys(object, 0,
2627                               terms-1, Keys, Data)) != ISAMNoError) {
2628         printf("Failed to find keys. Error code is %d\n", error);
2629         return 1;
2630     }
2631 
2632     for(i=0; i < terms; i++) {
2633         if(i%1000 == 0)
2634             printf("Passed number %d\n", i);
2635 
2636         if((error = NISAMSearch(object, Keys[i],
2637                                 &Value, &Index)) != ISAMNoError) {
2638             printf("Failed to search. Error code is %d\n", error);
2639             return 1;
2640         }
2641 
2642         if(Value != Data[i] || Index != i) {
2643             printf("ISAM failed for key = %d\n "
2644                    "Index: %d expected %d\n"
2645                    "Value: %d expected %d\n",
2646                    Keys[i], Index, i, Value, Data[i]
2647                    );
2648             if(key_failed++ > 100)
2649                 break;
2650         }
2651     }
2652     if(key_failed == 0)
2653         printf("Test succeeded\n");
2654 
2655     MemFree(Keys);
2656     MemFree(Data);
2657     ISAMObjectFree(object);
2658     return 0;
2659 }
2660 #endif
2661 #ifdef SISAM_TEST_MODULE
2662 
Main(void)2663 Int2 Main(void)
2664 {
2665     ISAMErrorCode error;
2666     CharPtr key, data, key_out, chptr;
2667     Uint4 index;
2668     ISAMObjectPtr isamp;
2669     Char tmpbuff[1024];
2670     FILE *fd;
2671     register Int4 i;
2672     CharPtr PNTR argv = GetArgv();
2673     Int4         argc = GetArgc();
2674 
2675     if(argc < 2) {
2676         printf("USAGE: %s <key file name> [<index file name>]\n", argv[0]);
2677         return 1;
2678     }
2679 
2680     isamp = ISAMObjectNew(ISAMString, argv[1], argv[2]);
2681 
2682     if(argv[2] == NULL) {
2683         printf("Indexing file %s ...\n", argv[1]);
2684 
2685         if((error = ISAMMakeIndex(isamp, 1)) != ISAMNoError) {
2686             printf("Creating of index failed with error code %d\n", error);
2687             return 1;
2688         }
2689     }
2690 
2691     fd = FileOpen(argv[1], "r");
2692 
2693     for(i=0; fgets(tmpbuff, 1024, fd) != NULL; i++) {
2694         tmpbuff[StringLen(tmpbuff)-1] = NULLB;
2695         if((chptr = StringChr(tmpbuff, ISAM_DATA_CHAR)) != NULL)
2696             *chptr = NULLB;
2697 
2698         if((error = SISAMSearch(isamp, tmpbuff, 0, &key_out,
2699                                 &data, &index)) != ISAMNoError) {
2700             printf("Search failed with error code %d\n"
2701                    "String: %s\n", error, tmpbuff);
2702             return(1);
2703         } else {
2704             if(index != i)
2705                 printf("Position mismatch:\n"
2706                        "String: %s\nData: %s\n"
2707                        "Position: %d (expected %d)\n",
2708                        key_out, data, index, i);
2709             if(StringCmp(tmpbuff, key_out))
2710                 printf("String mismatch:\n"
2711                        "String: %s\nExpected: %s\n"
2712                        "Data: %s\n"
2713                        "Position: %d (expected %d)\n",
2714                        key_out, tmpbuff, data, index, i);
2715             if(i%100 == 0)
2716                 printf("Passed index %d\n", i);
2717 
2718             MemFree(key_out);
2719             MemFree(data);
2720             if(i == 5000)
2721                 break;
2722         }
2723     }
2724     ISAMObjectFree(isamp);
2725     printf("TEST SUCCESSFUL!!!\n");
2726     return 0;
2727 }
2728 #endif
2729 
2730 
2731 
2732 
2733