1 static char const rcsid[] = "$Id: ncbisam.c,v 6.34 2007/11/06 19:20:06 coulouri Exp $";
2
3 /* $Id: ncbisam.c,v 6.34 2007/11/06 19:20:06 coulouri Exp $
4 * ===========================================================================
5 *
6 * PUBLIC DOMAIN NOTICE
7 * National Center for Biotechnology Information
8 *
9 * This software/database is a "United States Government Work" under the
10 * terms of the United States Copyright Act. It was written as part of
11 * the author's official duties as a United States Government employee and
12 * thus cannot be copyrighted. This software/database is freely available
13 * to the public for use. The National Library of Medicine and the U.S.
14 * Government have not placed any restriction on its use or reproduction.
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * Please cite the author in any work or product based on this material.
25 *
26 * ===========================================================================
27 *
28 * File Name: $RCSfile: ncbisam.c,v $
29 *
30 * Author: Sergei Shavirin
31 *
32 * Initial Version Creation Date: 02/24/1997
33 *
34 * $Revision: 6.34 $
35 *
36 * File Description:
37 * Main file for ISAM library
38 *
39 * $Log: ncbisam.c,v $
40 * Revision 6.34 2007/11/06 19:20:06 coulouri
41 * fix memory allocation; resolves blast-rt#15351152
42 *
43 * Revision 6.33 2006/06/21 13:55:06 camacho
44 * Fixed from Ilya Dondoshansky in s_ISAMBufferReadLine
45 * Change FILEREAD_BUFFER_SIZE from 1MB to 64k
46 *
47 * Revision 6.32 2006/05/10 20:47:15 camacho
48 * From Ilya Dondoshansky: In ISAMMakeStringIndex: read large chunks from file instead of one line at a time.
49 *
50 * Revision 6.31 2005/07/28 14:57:10 coulouri
51 * remove dead code
52 *
53 * Revision 6.30 2003/05/30 17:25:37 coulouri
54 * add rcsid
55 *
56 * Revision 6.29 2003/04/14 19:52:31 camacho
57 * Added ISAMUninitSearch
58 *
59 * Revision 6.28 2002/09/23 19:48:09 camacho
60 * Avoid the use of data->db_fd when searching the ISAM databases
61 *
62 * Revision 6.27 2002/09/20 15:17:12 camacho
63 * Fixed file descriptor leak
64 *
65 * Revision 6.26 2002/04/04 17:57:10 camacho
66 * Fixed binary search implementation in NISAMSearch
67 *
68 * Revision 6.25 2002/04/02 20:51:21 camacho
69 * Fixed bug in NISAMSearch
70 *
71 * Revision 6.24 2002/01/18 18:53:13 madden
72 * Changes to research the last page if appropriate
73 *
74 * Revision 6.23 2001/07/09 14:17:24 madden
75 * Fix PC-lint complaints from R. Williams
76 *
77 * Revision 6.22 2001/06/08 20:31:01 madden
78 * Fix memory leaks
79 *
80 * Revision 6.21 2000/08/04 19:54:17 shavirin
81 * Fixed problem with counting line when data not tested to be non- unique.
82 *
83 * Revision 6.20 2000/07/18 19:29:27 shavirin
84 * Added new parameter test_non_unique to suppress check for non-unique
85 * strings ids in the database - default - TRUE.
86 *
87 * Revision 6.19 2000/07/13 16:43:48 shavirin
88 * Fixed checking of order in String index creation.
89 *
90 * Revision 6.17 2000/02/11 21:14:07 madden
91 * Allocate MasterPos of correct (smaller) size
92 *
93 * Revision 6.16 1999/12/18 15:27:50 egorov
94 * Fix NT compilation problem
95 *
96 * Revision 6.15 1999/12/17 20:47:05 egorov
97 * Fix 'gcc -Wall' warnings
98 *
99 * Revision 6.14 1999/12/06 20:56:12 egorov
100 * fwrite() writes two bytes for '\n' if file is open not in binary mode,
101 * so MakeISAMIndex worked incorrectly.
102 * What to Blast programs, formatdb now works correctly on NT machine.
103 *
104 * Revision 6.13 1999/11/08 19:05:21 shavirin
105 * Fixed minor SGI warning.
106 *
107 * Revision 6.12 1999/08/25 20:18:49 shavirin
108 * Added possibility to store user-specified Int4 options in the index
109 * header.
110 *
111 * Revision 6.11 1999/03/17 21:38:04 kans
112 * Int4Ptr argument must point to Int4
113 *
114 * Revision 6.10 1999/03/17 20:56:24 shavirin
115 * Fixed warning "long int format"
116 *
117 * Revision 6.9 1999/02/19 22:01:14 madden
118 * Use memory-mapping and binary search on second numerical index
119 *
120 * Revision 6.8 1998/07/13 15:31:14 egorov
121 * make error message more understandable
122 *
123 * Revision 6.7 1998/05/28 17:18:04 shavirin
124 * Fixed non-intialized variable warning.
125 *
126 * Revision 6.6 1998/02/23 17:45:28 shavirin
127 * Fixed problem in sorted file checkup.
128 *
129 * Revision 6.5 1997/12/02 20:06:31 shavirin
130 * Fixed Macintosh warnings
131 *
132 * Revision 6.4 1997/12/02 19:38:17 shavirin
133 * Added variables
134 *
135 * Revision 6.3 1997/12/02 18:05:12 shavirin
136 * Removed typecast warnings in sprintf and sscanf
137 *
138 * Revision 6.2 1997/11/28 15:50:10 shavirin
139 * Added default returned value in the function ISAMGetDataNumber()
140 *
141 * Revision 6.1 1997/11/07 16:17:43 shavirin
142 * Added new function SISAMFindAllData() returned info about redundant keys
143 *
144 * Revision 6.0 1997/08/25 18:53:27 madden
145 * Revision changed to 6.0
146 *
147 * Revision 1.17 1997/05/16 17:08:44 shavirin
148 * Removed printf()
149 *
150 * Revision 1.16 1997/05/16 16:16:00 shavirin
151 * Added LIBCALLBACK to definition of ISAMUidCompare()
152 *
153 * Revision 1.15 1997/05/12 19:55:05 shavirin
154 * Some fixes type-changes to support ISAMCreateDatabase() API
155 *
156 * Revision 1.13 1997/05/09 14:12:06 shavirin
157 * Fixed memory leakage and added ErrLogPrintf()
158 *
159 * Revision 1.12 1997/05/08 21:18:08 shavirin
160 * Added generic function ISAMSearchTerm(), that will search complete ISAM
161 * string database created by ISAMCreateDatabase() function. Returns array of
162 * found uids corresponing given string term and gived bit-field mask
163 *
164 * Revision 1.10 1997/05/06 21:36:15 shavirin
165 * Added set of function for Coded Array compression implementation
166 *
167 * Revision 1.9 1997/05/05 18:17:22 shavirin
168 * Added support for platforms without memory mapping
169 *
170 * Revision 1.8 1997/05/05 14:37:54 shavirin
171 * Fixed usage of Numeric ISAM with Windows NT platform
172 *
173 * Revision 1.7 1997/05/01 20:10:57 shavirin
174 * Fixed few errors discovered on Macintoch
175 *
176 * Revision 1.6 1997/05/01 17:24:33 shavirin
177 * Added String ISAM index functionality
178 *
179 * Revision 1.5 1997/02/26 01:28:11 shavirin
180 * Fixed difference in definitions of SISAMSearch() function
181 *
182 * Revision 1.4 1997/02/25 22:16:49 shavirin
183 * Changed general API of ISAM library .
184 *
185 * Revision 1.3 1997/02/25 19:38:12 shavirin
186 * Added defence aginst little-big endian platforms
187 *
188 * Revision 1.2 1997/02/25 15:33:56 shavirin
189 * Return value will be ISAMNoError if number is not found,
190 * byt Data and Index will be set to ISAMNotFound
191 *
192 * Revision 1.1 1997/02/24 21:06:52 shavirin
193 * Initial revision
194 *
195 *
196 * ==========================================================================
197 */
198 #include <ncbi.h>
199 #include <readdb.h>
200 #include <ncbisami.h>
201
202 /****************************************************************************/
203 /* INTERNAL FINCTION DEFINITIONS */
204 /****************************************************************************/
205
206 #ifdef NONO
207 static Int4 GetPageNumElements(ISAMDataPtr data, Int4 SampleNum,
208 Int4Ptr Start);
209
210 /* ---------------------- ISAMInitSearch --------------------------
211 Purpose: Initialize ISAM Search. Checks for any errors
212
213 Parameters: ISAM Object
214 Returns: ISAM Error Code
215 NOTE: No need to call this function first.
216 ------------------------------------------------------------------*/
217 static ISAMErrorCode ISAMInitSearch(ISAMObjectPtr object);
218
219 static ISAMErrorCode ISAMMakeNumericIndex(
220 ISAMDataPtr data,
221 Int4 page_size
222 );
223
224 static ISAMErrorCode ISAMMakeStringIndex(
225 ISAMDataPtr data,
226 Int4 page_size
227 );
228 static Boolean ISAMCheckIfSorted(ISAMDataPtr data);
229 ISAMErrorCode ISAMCountLines(ISAMDataPtr data);
230 static Int4 ISAMReadLine(ISAMDataPtr data);
231 static Int4 ISAMDiffChar(CharPtr Term, CharPtr Key, Boolean IgnoreCase);
232 static void ISAMExtractData(CharPtr KeyData,
233 CharPtr PNTR Key, CharPtr PNTR Data);
234 static CharPtr ISAMReadFileInMemory(CharPtr filename);
235
236 /* Coded Array - Field Array handling functions */
237
238 static Boolean ISAMWriteNBits10(ISAMTmpCAPtr cap, Int4 number);
239 static Boolean ISAMWriteBitNumber(ISAMTmpCAPtr cap, Int4 number);
240 static Uint4Ptr ISAMDecompressCA(Uint1Ptr buffer, Int4 length,
241 Int4 num_bits, Int4 num_uids);
242 static Boolean ISAMCreateCA(ISAMTmpCAPtr cap, ISAMUidFieldPtr data,
243 Int4 num_uids);
244 static ISAMTmpCAPtr ISAMTmpCANew(void);
245 static void ISAMTmpCAFree(ISAMTmpCAPtr cap);
246 static Boolean ISAMCreateFA(ISAMTmpCAPtr fcap,
247 ISAMUidFieldPtr data, Int4 num_uids);
248 static Uint4Ptr ISAMDecompressFA(Uint1Ptr buffer, Int4 num_uids);
249 static ISAMErrorCode ISAMDumpTermEntry(ISAMTmpCAPtr cap, FILE *off_fd,
250 FILE *db_fd, ISAMUidFieldPtr uidf,
251 Int4 count, Int4Ptr offset);
252 static int LIBCALLBACK ISAMUidCompare(VoidPtr i, VoidPtr j);
253
254 #endif
255
256 /****************************************************************************/
257 /* EXTERNAL FINCTIONS */
258 /****************************************************************************/
259
260 /* ---------------------- ISAMObjectNew --------------------------
261 Purpose: Creates ISAM object structure with default parameters
262
263 Parameters: type - Type of ISAM (Numeric, String etc. )
264 Returns: Poiner to created object structure
265 NOTE: Page size is set to default value if 0
266
267 ------------------------------------------------------------------*/
ISAMObjectNew(ISAMType type,CharPtr DBFile,CharPtr IndexFile)268 ISAMObjectPtr ISAMObjectNew(ISAMType type, /* Type of ISAM */
269 CharPtr DBFile, /* ISAM Database file */
270 CharPtr IndexFile /* ISAM Index file */
271 )
272 {
273 ISAMDataPtr data;
274 Char name[MAX_FILENAME_LEN];
275 #ifndef OS_UNIX
276 CharPtr ch, ch1;
277 #endif
278
279 if(DBFile == NULL)
280 return NULL;
281
282 if((data = (ISAMDataPtr)MemNew(sizeof(ISAMData))) == NULL)
283 return NULL;
284
285 data->type = type;
286 data->DBFileName = StringSave(DBFile);
287
288
289 if(IndexFile == NULL) {
290 #ifdef OS_UNIX
291 if(type == ISAMNumeric || type == ISAMNumericNoData)
292 sprintf(name, "%s.nisam", DBFile);
293 else
294 sprintf(name, "%s.isam", DBFile);
295
296 data->IndexFileName = StringSave(name);
297 #else
298 sprintf(name, "%s", DBFile);
299
300 /* Looking for last '.' in the filename */
301 for(ch = name; (ch = StringChr(ch, '.')) != NULL; ch1=ch)
302 continue;
303
304 if(ch1 != NULL) {
305 if(type == ISAMNumeric || type == ISAMNumericNoData)
306 sprintf(ch1, ".nsm");
307 else
308 sprintf(ch1, ".ism");
309 } else {
310 if(type == ISAMNumeric || type == ISAMNumericNoData)
311 sprintf(name, "%s.nsm", DBFile);
312 else
313 sprintf(name, "%s.ism", DBFile);
314 }
315
316 data->IndexFileName = StringSave(name);
317 #endif
318 } else {
319 data->IndexFileName = StringSave(IndexFile);
320 }
321
322 if(type == ISAMNumeric || type == ISAMNumericNoData)
323 data->PageSize = DEFAULT_NISAM_SIZE;
324 else
325 data->PageSize = DEFAULT_SISAM_SIZE;
326
327 data->initialized = FALSE;
328 data->KeySamples = NULL;
329 data->KeyDataSamples = NULL;
330 data->test_non_unique = TRUE; /* default - to check non-unique data */
331
332 return (ISAMObjectPtr) data;
333 }
334 /* ---------------------- ISAMSetUpCAInfo --------------------------
335 Purpose: Added toISAM object Coded Array filenames information
336
337 Parameters: CAName - Common name for all CA/FA DB and offset files
338 CADBExt - exetntio for CA/FA DB files
339 CAOffExt - extention for CA/FA offset files
340 MaxOffset - threshhold offset for starting write new
341 file
342
343 Returns: ISAM Error Code
344 NOTE: MaxOffset is set to default value if 10.000.000
345 ------------------------------------------------------------------*/
ISAMSetUpCAInfo(ISAMObjectPtr object,Int4 MaxOffset,CharPtr CAName,CharPtr CADBExt,CharPtr CAOffExt)346 ISAMErrorCode ISAMSetUpCAInfo(ISAMObjectPtr object, Int4 MaxOffset,
347 CharPtr CAName, CharPtr CADBExt,
348 CharPtr CAOffExt)
349 {
350 ISAMDataPtr data;
351
352 if(object == NULL)
353 return ISAMBadParameter;
354
355 data = (ISAMDataPtr) object;
356
357 data->CAName = StringSave(CAName);
358 data->CADBExt = StringSave(CADBExt);
359 data->CAOffExt = StringSave(CAOffExt);
360 if(MaxOffset != 0)
361 data->CAMaxOffset = MaxOffset;
362 else
363 data->CAMaxOffset = DEFAULT_CA_MAX_OFFSET;
364
365 return ISAMNoError;
366 }
367
368 /* ---------------------- ISAMUpdateDatabase ------------------------
369 Purpose:
370
371 Parameters:
372
373 Returns:
374 ------------------------------------------------------------------*/
ISAMUpdateDatabase(CharPtr InFile,CharPtr NewDBDir,Int4 MaxOffset,CharPtr BaseName,CharPtr DBExt,CharPtr IndexExt,CharPtr OffExt,CharPtr CodeExt)375 ISAMErrorCode ISAMUpdateDatabase(CharPtr InFile,
376 CharPtr NewDBDir,
377 Int4 MaxOffset,
378 CharPtr BaseName,
379 CharPtr DBExt,
380 CharPtr IndexExt,
381 CharPtr OffExt,
382 CharPtr CodeExt)
383 {
384
385 return ISAMNotImplemented;
386 }
387
ISAMTmpCANew(void)388 static ISAMTmpCAPtr ISAMTmpCANew(void)
389 {
390 ISAMTmpCAPtr cap;
391
392 cap = MemNew(sizeof(ISAMTmpCA));
393
394 cap->allocated = CA_TMP_CHUNK;
395 cap->buffer = (Uint1Ptr) MemNew(cap->allocated);
396
397 return cap;
398 }
399
ISAMTmpCAFree(ISAMTmpCAPtr cap)400 static void ISAMTmpCAFree(ISAMTmpCAPtr cap)
401 {
402 if(cap == NULL)
403 return;
404
405 MemFree(cap->buffer);
406 MemFree(cap);
407 }
408
409 /* ------------------------------------------------------------------
410 This is handler for HeapSort function
411 ------------------------------------------------------------------*/
ISAMUidCompare(VoidPtr i,VoidPtr j)412 static int LIBCALLBACK ISAMUidCompare(VoidPtr i, VoidPtr j)
413 {
414 if (*(Int4Ptr)i > *(Int4Ptr)j)
415 return (1);
416 if (*(Int4Ptr)i < *(Int4Ptr)j)
417 return (-1);
418 return (0);
419 }
420
ISAMWriteBitNumber(ISAMTmpCAPtr cap,Int4 number)421 static Boolean ISAMWriteBitNumber(ISAMTmpCAPtr cap, Int4 number)
422 {
423 Int4 template;
424
425 if(cap->num_bits == 0)
426 return TRUE;
427
428 template = PowersOfTwo[cap->num_bits - 1];
429
430 for(; template; template >>= 1) {
431
432 if(number & template)
433 cap->buffer[cap->byte_num] |= OneBit[cap->bit_num] ;
434
435 if(++cap->bit_num > 7) {
436 cap->bit_num = 0;
437 if((++cap->byte_num >= cap->allocated)) {
438 cap->allocated += CA_TMP_CHUNK;
439 cap->buffer = Realloc(cap->buffer, cap->allocated);
440 }
441 }
442 }
443 return TRUE;
444 }
445
ISAMWriteNBits10(ISAMTmpCAPtr cap,Int4 number)446 static Boolean ISAMWriteNBits10(ISAMTmpCAPtr cap, Int4 number)
447 {
448 register Int4 i;
449
450 for(i = 0; i < number; i++) {
451 cap->buffer[cap->byte_num] |= OneBit[cap->bit_num];
452 if(++cap->bit_num > 7) {
453 cap->bit_num = 0;
454 if((++cap->byte_num >= cap->allocated)) {
455 cap->allocated += CA_TMP_CHUNK;
456 cap->buffer = Realloc(cap->buffer, cap->allocated);
457 }
458 }
459 }
460
461 /* Now wriiting 0 bit in the end of set of 1's */
462
463 if(++cap->bit_num > 7) {
464 cap->bit_num = 0;
465 if((++cap->byte_num >= cap->allocated)) {
466 cap->allocated += CA_TMP_CHUNK;
467 cap->buffer = Realloc(cap->buffer, cap->allocated);
468 }
469 }
470 return TRUE;
471 }
472
ISAMCreateCA(ISAMTmpCAPtr cap,ISAMUidFieldPtr data,Int4 num_uids)473 static Boolean ISAMCreateCA(ISAMTmpCAPtr cap,
474 ISAMUidFieldPtr data,
475 Int4 num_uids)
476 {
477 Nlm_FloatHi AverageDiff;
478 Int4 base, number, prev_number;
479 Int4 i, diff, dividend;
480
481 if(cap == NULL || data == NULL)
482 return FALSE;
483
484 cap->byte_num = 0;
485 cap->bit_num = 0;
486 cap->length = 0;
487 cap->num_uids = num_uids;
488 MemSet(cap->buffer, 0, cap->allocated);
489
490
491 if((AverageDiff = (Nlm_FloatHi)(data[num_uids-1].uid - num_uids + 1) /
492 (Nlm_FloatHi)num_uids ) < 1) {
493 AverageDiff = 1;
494 }
495
496 cap->num_bits = Log2(AverageDiff);
497 base = PowersOfTwo[cap->num_bits];
498
499 prev_number = -1;
500
501 for(i = 0; i < num_uids; i++) {
502 number = data[i].uid;
503
504 if (number <= prev_number) {
505 ErrLogPrintf("%s\n%s%ld%s%ld\n",
506 "Bad record number in writing to coded array!",
507 "Number: ", number,
508 " Previous Number: ", prev_number);
509 ISAMTmpCAFree(cap);
510 return FALSE;
511 }
512
513 diff = number - prev_number - 1;
514
515 dividend = diff/base;
516
517 ISAMWriteNBits10(cap, dividend);
518 ISAMWriteBitNumber(cap, diff);
519 prev_number = number;
520 }
521
522 cap->length = cap->byte_num + (cap->bit_num != 0 ? 1 : 0);
523
524 return TRUE;
525 }
526
ISAMCreateFA(ISAMTmpCAPtr cap,ISAMUidFieldPtr uidf,Int4 num_uids)527 static Boolean ISAMCreateFA(ISAMTmpCAPtr cap,
528 ISAMUidFieldPtr uidf, Int4 num_uids)
529 {
530 Int4 i, j;
531 Int4 byte_start;
532
533 if(cap == NULL || uidf == NULL)
534 return FALSE;
535
536 cap->byte_num = 0;
537 cap->length = 0;
538 cap->num_uids = num_uids;
539 MemSet(cap->buffer, 0, cap->allocated);
540
541 for(i = 0; i < num_uids; i++) {
542
543 byte_start = cap->byte_num;
544
545 for(j = 0, cap->bit_num = 0; j < 32; j++) {
546 if(uidf[i].field & PowersOfTwo[j]) {
547 cap->buffer[cap->byte_num] |= j;
548
549 if((++cap->byte_num >= cap->allocated)) {
550 cap->allocated += CA_TMP_CHUNK;
551 cap->buffer = Realloc(cap->buffer, cap->allocated);
552 }
553
554 cap->bit_num++;
555 }
556 }
557
558 for(j = 0; j < cap->bit_num -1; j++) {
559 cap->buffer[byte_start+j] |=PowersOfTwo[7];
560 }
561 }
562
563 cap->length = cap->byte_num;
564 return TRUE;
565 }
566
ISAMDumpTermEntry(ISAMTmpCAPtr cap,FILE * off_fd,FILE * db_fd,ISAMUidFieldPtr uidf,Int4 count,Int4Ptr offset)567 static ISAMErrorCode ISAMDumpTermEntry(ISAMTmpCAPtr cap, FILE *off_fd,
568 FILE *db_fd,
569 ISAMUidFieldPtr uidf,
570 Int4 count, Int4Ptr offset)
571 {
572 Int4 i, j, offset_out, ca_offset, num_bits;
573 Uint4 numbers[32];
574 Uint4 bit_flag = 0;
575 Int4 length;
576
577 offset_out = ftell(off_fd);
578 ca_offset = ftell(db_fd);
579 MemSet(numbers, 0, sizeof(numbers));
580
581 HeapSort(uidf, count, sizeof(Uint4)*2, ISAMUidCompare);
582
583 for(i = 0; i < count; i++) {
584 for(j = 0; j < 32; j++) {
585 if(uidf[i].field & PowersOfTwo[j]) {
586 numbers[j]++;
587 bit_flag |= PowersOfTwo[j];
588 }
589 }
590 }
591
592 /* Calculating and writting code and field arrays */
593
594 if(!ISAMCreateCA(cap, uidf, count)) {
595 ErrLogPrintf("Cannot create coded array. Formating failed.\n");
596 return ISAMInvalidFormat;
597 }
598 num_bits = cap->num_bits;
599
600 FileWrite(cap->buffer, 1, cap->length, db_fd);
601 length = cap->length;
602
603 if(!ISAMCreateFA(cap, uidf, count)) {
604 ErrLogPrintf("Cannot create field array. Formating failed.");
605 return ISAMInvalidFormat;
606 }
607
608 FileWrite(cap->buffer, 1, cap->length, db_fd);
609
610 /* ------- Now writting header ---------- */
611
612 FileWrite(&count, 1, sizeof(Uint4), off_fd);
613 FileWrite(&ca_offset, 1, sizeof(Uint4), off_fd);
614 FileWrite(&length, 1, sizeof(Uint4), off_fd);
615 FileWrite(&num_bits, 1, sizeof(Uint4), off_fd);
616 FileWrite(&bit_flag, 1, sizeof(Uint4), off_fd);
617
618 for(j = 0; j < 32; j++) {
619 if(numbers[j] > 0)
620 FileWrite(&numbers[j], 1, sizeof(Uint4), off_fd);
621 }
622
623 *offset = offset_out;
624 return ISAMNoError;
625 }
626
627 #define IS_END_BUF(ch) (ch == EOF || ch == '\0')
628 #define IS_NEWLINE(ch) (IS_END_BUF(ch) || (ch == '\n' || ch == '\r'))
629
630 /** Reads one line from a buffer. If end of buffer is reached before the next
631 * newline character, line is not returned, and the start of line is saved
632 * in a "remainder" string.
633 * @param buffer Start of buffer to read from; pointer to the start of next line
634 * on exit. If line is unfinished, output pointer is the same as
635 * input pointer. [in|out]
636 * @param buffer_length Length of input and output buffer. [in|out]
637 * @param line_length Length of the current line, even if unfinished. [out]
638 * @return TRUE if full line has been read.
639 */
640 static Boolean
s_ISAMBufferReadLine(char ** buffer,Int4 * buffer_length,Int4 * line_length)641 s_ISAMBufferReadLine(char* *buffer, Int4* buffer_length, Int4* line_length)
642 {
643 char* ptr;
644 Int4 length;
645 Boolean success = TRUE;
646 Boolean end_of_file = FALSE;
647
648 if (!buffer)
649 return 0;
650
651 for (ptr = *buffer, length = 0;
652 (length < *buffer_length) && !IS_NEWLINE(*ptr); ++ptr, ++length);
653
654 /* Check if end of buffer (file) has been reached. */
655 /* If buffer_length has been reached, consider this line as unfinished,
656 even if a full line has actually been found, because we were unable
657 to reach the start of the next line. */
658 if (length == *buffer_length) {
659 success = FALSE;
660 } else if (IS_END_BUF(*ptr)) {
661 end_of_file = TRUE;
662 } else if (IS_NEWLINE(*ptr)) {
663 /* If new line has been reached, and this is not the end of buffer,
664 skip the white space before the start of the next line. */
665 while ((length < *buffer_length) && IS_WHITESP(*ptr)) {
666 ++length;
667 ++ptr;
668 }
669 }
670
671 *line_length = (ptr - *buffer);
672
673 if (success) {
674 *buffer = ptr;
675 if (end_of_file)
676 *buffer_length = 0;
677 else
678 *buffer_length -= *line_length;
679 }
680
681 return success;
682 }
683
684 /* returns NULL terminated string \n\r are removed */
685
ISAMReadLine(ISAMDataPtr data)686 static Int4 ISAMReadLine(ISAMDataPtr data)
687 {
688 Int4 i = 0;
689 Int4 ch;
690 Int4 MaxChars;
691 FILE *fd = data->db_fd;
692
693 MaxChars = data->max_line_size-1;
694
695 for(i = 0; (( ch = getc(fd)) != EOF) ; i++) {
696 if((ch == '\n') || (ch == '\r'))
697 break;
698 data->line[i] = (Char) ch;
699
700 if(i == MaxChars) { /* Reallocating line buffer */
701 data->max_line_size += LINE_SIZE_CHUNK;
702 data->line = Realloc(data->line, data->max_line_size);
703 MaxChars = data->max_line_size-1;
704 }
705 }
706 data->line[i] = NULLB;
707
708 /* Finding first character on new line */
709
710 while((ch = getc(fd)) != EOF) {
711 if(IS_WHITESP(ch)) {
712 continue;
713 } else {
714 ungetc(ch, fd);
715 break;
716 }
717 }
718
719 return i;
720 }
721
ISAMCheckIfSorted(ISAMDataPtr data)722 static Boolean ISAMCheckIfSorted(ISAMDataPtr data)
723 {
724 CharPtr prevline = NULL;
725 Int4 length;
726 CharPtr chptr;
727
728 if(data == NULL || data->db_fd == NULL || data->max_line_size == 0)
729 return FALSE;
730
731 rewind(data->db_fd);
732
733 if (data->sorting_done)
734 return TRUE;
735
736 data->NumTerms = 0;
737 prevline = MemNew(data->max_line_size);
738
739
740 if(data->type == ISAMString || data->type == ISAMStringDatabase) {
741 while(ISAMReadLine(data) > 0) {
742 data->NumTerms++;
743
744 /* If not testing data - lines eventually should be counted */
745 if(data->test_non_unique) {
746 if((chptr = StringChr(data->line, ISAM_DATA_CHAR)) != NULL)
747 *chptr = NULLB;
748
749 if (StringCmp(data->line, prevline) <= 0) {
750 ErrPostEx(SEV_WARNING, 0, 0, "Non-unique or not-sorted string IDs found %d line: '%s' %d line: '%s'", data->NumTerms, data->line, data->NumTerms-1, prevline);
751 }
752 length = StringLen(data->line)+1;
753 StringNCpy_0(prevline, data->line,
754 length > LINE_SIZE_CHUNK ? LINE_SIZE_CHUNK : length);
755 }
756 }
757 } else {
758 return FALSE;
759 }
760
761 rewind(data->db_fd);
762 MemFree(prevline);
763 return(TRUE);
764 }
765
766 /* ---------------------- ISAMMakeStringIndex ---------------------
767 Purpose: To create String ISAM Intex file for Database file
768
769 Parameters: ISAM Data
770 Returns: ISAM itemized error code
771 NOTE: Special default rules for UNIX platform
772 ------------------------------------------------------------------*/
ISAMMakeStringIndex(ISAMDataPtr data,Int4 page_size,Int4 idx_option)773 static ISAMErrorCode ISAMMakeStringIndex(
774 ISAMDataPtr data,
775 Int4 page_size, /* ISAM page size */
776 Int4 idx_option /* Option for upper layer */
777 )
778 {
779 Int4 TermCount, Pos, count, SampleCount;
780 Int4Ptr MasterPos, SamplePos;
781 Int4 OffsetPos;
782 FILE *tf_fd;
783 Int4 Version = ISAM_VERSION;
784 Uint4 value;
785
786 if(page_size != 0)
787 data->PageSize = page_size;
788 else
789 data->PageSize = DEFAULT_SISAM_SIZE;
790
791 if((data->db_fd = FileOpen(data->DBFileName, "r")) == NULL)
792 return ISAMBadFileName;
793
794
795 /* Temporary space for line initialy set to MAX_LINE_SIZE
796 byt will be realocated if some line exceed this limit */
797
798 if(data->max_line_size == 0) {
799 data->max_line_size = LINE_SIZE_CHUNK;
800 data->line = MemNew(LINE_SIZE_CHUNK);
801 }
802
803 /* This function will also split data if strings are
804 identical and finaly count lines*/
805
806 if(!ISAMCheckIfSorted(data))
807 return ISAMNoOrder;
808
809 /* Obtain the term offsets; select the sample terms. */
810
811 MasterPos = (Int4 *)Nlm_Malloc(sizeof(Int4) * (((data->NumTerms+1)/(data->PageSize))+2));
812
813 Pos = TermCount = SampleCount = 0;
814
815 #define FILEREAD_BUFFER_SIZE 0x00010000
816 {
817 char buffer[FILEREAD_BUFFER_SIZE];
818 Int4 buffer_length = FILEREAD_BUFFER_SIZE;
819 char *buffer_ptr = buffer;
820 Int4 bytes_read;
821
822 Pos = ftell(data->db_fd);
823
824 while ((bytes_read =
825 FileRead(buffer_ptr, 1, buffer_length, data->db_fd)) > 0) {
826 Int4 line_length;
827 /* Lines are always read beginning at the start of the original
828 buffer. */
829 buffer_length = bytes_read + (buffer_ptr - buffer);
830 buffer_ptr = buffer;
831 while (buffer_length > 0 &&
832 s_ISAMBufferReadLine(&buffer_ptr, &buffer_length,
833 &line_length)) {
834 if (TermCount++ % data->PageSize == 0)
835 MasterPos[SampleCount++] = SwapUint4(Pos);
836 Pos += line_length;
837 }
838 /* If an unfinished line is left, copy it to the start of the
839 buffer, and set buffer pointer so that next file chunk is read
840 into location immediately following the unfinished line. */
841 if (buffer_length > 0 && line_length > 0) {
842 Int4 file_pos = ftell(data->db_fd);
843 memmove(buffer, buffer_ptr, line_length);
844 buffer_ptr = buffer + line_length;
845 ASSERT(Pos == file_pos - line_length);
846 buffer_length = FILEREAD_BUFFER_SIZE - line_length;
847 } else {
848 buffer_ptr = buffer;
849 buffer_length = FILEREAD_BUFFER_SIZE;
850 }
851 }
852 }
853
854 MasterPos[SampleCount] = SwapUint4(Pos);
855
856 /* Create the sample file. */
857
858 if (!(tf_fd = FileOpen(data->IndexFileName, "wb")))
859 {
860 MemFree(MasterPos);
861 return ISAMBadFileName;
862 }
863
864 /* Write the term counts and offsets to the sample file. */
865 value = SwapUint4(Version);
866 FileWrite((CharPtr)&value, sizeof(Int4), 1, tf_fd);
867 value = SwapUint4(data->type);
868 FileWrite((CharPtr)&value, sizeof(Int4), 1, tf_fd);
869 value = SwapUint4(FileLength(data->DBFileName)); /* Length of DB file */
870 FileWrite((CharPtr)&value, sizeof(Int4), 1, tf_fd);
871 value = SwapUint4(TermCount);
872 FileWrite((CharPtr)&value, sizeof(Int4), 1, tf_fd);
873 value = SwapUint4(SampleCount);
874 FileWrite((CharPtr)&value, sizeof(Int4), 1, tf_fd);
875 value = SwapUint4(data->PageSize);
876 FileWrite((CharPtr)&value, sizeof(Int4), 1, tf_fd);
877 value = SwapUint4(data->max_line_size);
878 FileWrite((CharPtr)&value, sizeof(Int4), 1, tf_fd);
879 value = SwapUint4(idx_option);
880 FileWrite(&value, sizeof(Int4), 1, tf_fd);
881 value = SwapUint4(0); /* This space reserved for future use */
882 FileWrite(&value, sizeof(Int4), 1, tf_fd);
883
884 if(data->PageSize != MEMORY_ONLY_PAGE_SIZE)
885 FileWrite((CharPtr)MasterPos, sizeof(Int4), SampleCount+1, tf_fd);
886
887 /* Leave space for the offsets of the selected terms. */
888
889 OffsetPos = ftell(tf_fd);
890 SamplePos = (Int4 *)MemNew((SampleCount + 1) * sizeof(Int4));
891 FileWrite((CharPtr)SamplePos, sizeof(Int4), SampleCount+1, tf_fd);
892
893 /* Copy the selected terms to the sample file. */
894
895 for (count = 0; count < SampleCount; count++) {
896 SamplePos[count] = SwapUint4(ftell(tf_fd));
897 fseek(data->db_fd, SwapUint4(MasterPos[count]), SEEK_SET);
898 ISAMReadLine(data);
899 fprintf(tf_fd,"%s%c",data->line, NULLB);
900 }
901
902 SamplePos[SampleCount] = ftell(tf_fd);
903
904 /* Replace the space-holding zeroes with the offsets of the selected
905 terms.*/
906
907 fseek(tf_fd, OffsetPos, SEEK_SET);
908 FileWrite((CharPtr)SamplePos, sizeof(Int4), SampleCount+1, tf_fd);
909
910 FileClose(tf_fd);
911 FileClose(data->db_fd);
912 data->db_fd = NULL;
913
914 MemFree(SamplePos);
915 MemFree(MasterPos);
916
917 MemFree(data->line);
918 data->max_line_size = 0;
919
920 return ISAMNoError;
921 }
922
923 /* ------------------- ISAMReadFileInMemory -----------------------
924 Purpose: Function reads data from file into a buffer
925
926 Parameters: filename - Name of file to read file
927
928 Returns: Pointer to allocated buffer.
929 ------------------------------------------------------------------*/
930
ISAMReadFileInMemory(CharPtr filename)931 static CharPtr ISAMReadFileInMemory(CharPtr filename)
932 {
933 CharPtr in_buff;
934 Int4 new_size = BUFF_SIZE_CHUNK;
935 Int4 bytes = 0, buff_len = 0;
936 FILE *fd;
937
938 if(filename == NULL)
939 return NULL;
940
941 if((fd = FileOpen(filename, "rb")) == NULL)
942 return NULL;
943
944 /* initial allocation of memory */
945
946 if((in_buff = MemNew(BUFF_SIZE_CHUNK)) == NULL) {
947 ErrLogPrintf("Error in allocating memory\n");
948 FileClose(fd);
949 return NULL;
950 }
951
952 while ((bytes = FileRead(in_buff + buff_len, 1,
953 BUFF_SIZE_CHUNK, fd)) > 0) {
954 new_size += bytes;
955 buff_len += bytes;
956
957 if ((in_buff = Realloc(in_buff, new_size)) == NULL) {
958 ErrLogPrintf("Error in reallocating memory\n");
959 FileClose(fd);
960 return NULL;
961 }
962 }
963
964 FileClose(fd);
965 return(in_buff);
966 }
967
968 /* ---------------------- ISAMMakeNumericIndex ---------------------
969 Purpose: To create Numeric ISAM Intex file for Database file
970
971 Parameters: ISAM Data
972 Returns: ISAM itemized error code
973 NOTE: Special default rules for UNIX platform
974 ------------------------------------------------------------------*/
ISAMMakeNumericIndex(ISAMDataPtr data,Int4 page_size,Int4 idx_option)975 static ISAMErrorCode ISAMMakeNumericIndex(
976 ISAMDataPtr data,
977 Int4 page_size, /* ISAM page size */
978 Int4 idx_option /* Option for upper layer */
979 )
980 {
981 Int4 i, NumTerms, value;
982 Int4 MaxSamples, SampleCount;
983 Uint4Ptr KeyInfo, KeySamples;
984 NISAMKeyDataPtr KeyDataInfo, KeyDataSamples;
985 Boolean NoData;
986 FILE *fd;
987 Int4 Version = ISAM_VERSION;
988
989 NoData = (data->type == ISAMNumericNoData);
990
991 NumTerms = FileLength(data->DBFileName) /
992 (NoData ? sizeof(Uint4) : sizeof(NISAMKeyData));
993
994 if(!Nlm_MemMapAvailable()) {
995 if((data->FileStart =
996 ISAMReadFileInMemory(data->DBFileName)) == NULL)
997 return ISAMBadFileName;
998 if (NoData)
999 KeyInfo = (Uint4Ptr) data->FileStart;
1000 else
1001 KeyDataInfo = (NISAMKeyDataPtr)data->FileStart;
1002 } else {
1003 if((data->mmp = Nlm_MemMapInit(data->DBFileName)) == NULL)
1004 return ISAMMemMap;
1005 if (NoData)
1006 KeyInfo = (Uint4Ptr) data->mmp->mmp_begin;
1007 else
1008 KeyDataInfo = (NISAMKeyDataPtr)data->mmp->mmp_begin;
1009 }
1010
1011 if(page_size != 0)
1012 data->PageSize = page_size;
1013 else
1014 data->PageSize = DEFAULT_NISAM_SIZE;
1015
1016 #ifndef CHECK_ORDER
1017 for (i = 1; i < NumTerms; i++) {
1018 if (NoData) {
1019 if (SwapUint4(KeyInfo[i]) <= SwapUint4(KeyInfo[i-1]))
1020 break;
1021 } else {
1022 if (SwapUint4(KeyDataInfo[i].key) <= SwapUint4(KeyDataInfo[i-1].key))
1023 break;
1024 }
1025 }
1026
1027 if (i < NumTerms) {
1028 ErrLogPrintf("NIsam key file %s not in sorted order!\n",
1029 data->DBFileName);
1030
1031 if (NoData) {
1032 ErrLogPrintf("unsorted or non-unique elements:"
1033 "#%ld, #%ld : %ld, %ld\n",
1034 i-1, i, SwapUint4(KeyInfo[i-1]),
1035 SwapUint4(KeyInfo[i]));
1036 } else {
1037 ErrLogPrintf("unsorted or non-unique elements:"
1038 "#%ld, #%ld : %ld, %ld\n",
1039 i-1, i, SwapUint4(KeyDataInfo[i-1].key),
1040 SwapUint4(KeyDataInfo[i].key));
1041 }
1042 return ISAMNoOrder;
1043 }
1044 #endif
1045
1046 /* Obtain the term offsets; select the sample terms. */
1047
1048 MaxSamples = NumTerms/data->PageSize + 4;
1049
1050 if (NoData)
1051 KeySamples = (Uint4Ptr)MemNew(sizeof(Uint4)*(MaxSamples+1));
1052 else
1053 KeyDataSamples = (NISAMKeyDataPtr) MemNew(sizeof(NISAMKeyData)*
1054 (MaxSamples+1));
1055 SampleCount = 0;
1056
1057 for (i = 0; i < NumTerms; i++) {
1058 if (i % data->PageSize == 0) {
1059 if (NoData)
1060 KeySamples[SampleCount] = KeyInfo[i];
1061 else
1062 KeyDataSamples[SampleCount] = KeyDataInfo[i];
1063 SampleCount++;
1064 }
1065 }
1066
1067 if (NoData) {
1068 KeySamples[SampleCount] = SwapUint4(UINT4_MAX);
1069 } else {
1070 KeyDataSamples[SampleCount].key = SwapUint4(UINT4_MAX);
1071 KeyDataSamples[SampleCount].data = SwapUint4(0);
1072 }
1073
1074 /* Create the sample file. */
1075
1076 if((fd = FileOpen(data->IndexFileName, "wb")) == NULL)
1077 return ISAMBadFileName;
1078
1079 /* Write the term counts and offsets to the sample file. */
1080
1081 value = SwapUint4(Version); /* Index version */
1082 FileWrite((CharPtr)&value, sizeof(Int4), 1, fd);
1083 value = SwapUint4(data->type); /* Index type */
1084 FileWrite(&value, sizeof(Int4), 1, fd);
1085 value = SwapUint4(FileLength(data->DBFileName)); /* Length of DB file */
1086 FileWrite(&value, sizeof(Int4), 1, fd);
1087 value = SwapUint4(NumTerms); /* Number of terms in DB file */
1088 FileWrite(&value, sizeof(Int4), 1, fd);
1089 value = SwapUint4(SampleCount); /* Number of elements in index file */
1090 FileWrite(&value, sizeof(Int4), 1, fd);
1091 value = SwapUint4(data->PageSize); /* Page size of ISAM */
1092 FileWrite(&value, sizeof(Int4), 1, fd);
1093 value = SwapUint4(0); /* 0 max_line-size for strings here */
1094 FileWrite(&value, sizeof(Int4), 1, fd);
1095 value = SwapUint4(idx_option); /* Option for the upper layer */
1096 FileWrite(&value, sizeof(Int4), 1, fd);
1097 value = SwapUint4(0); /* This space reserved for future use */
1098 FileWrite(&value, sizeof(Int4), 1, fd);
1099
1100 if (NoData) /* No swaping neeeded here */
1101 FileWrite((VoidPtr)KeySamples, sizeof(Uint4), SampleCount+1, fd);
1102 else
1103 FileWrite((VoidPtr)KeyDataSamples, sizeof(NISAMKeyData),
1104 SampleCount+1, fd);
1105
1106 FileClose(fd);
1107
1108 if(data->mmp != NULL) {
1109 Nlm_MemMapFini(data->mmp);
1110 data->mmp = NULL;
1111 } else {
1112 MemFree(data->FileStart);
1113 data->FileStart = NULL;
1114 }
1115
1116 if (NoData)
1117 MemFree(KeySamples);
1118 else
1119 MemFree(KeyDataSamples);
1120
1121 return ISAMNoError;
1122 }
1123
1124 /* ---------------------- ISAMMakeIndex --------------------------
1125 Purpose: To create ISAM Intex file for Database file
1126
1127 Parameters: ISAM Object
1128 Returns: ISAM itemized error code
1129 NOTE: Special default rules for UNIX platform
1130 ------------------------------------------------------------------*/
ISAMMakeIndex(ISAMObjectPtr object,Int4 page_size,Int4 idx_option)1131 ISAMErrorCode ISAMMakeIndex(ISAMObjectPtr object,
1132 Int4 page_size, /* ISAM page size */
1133 Int4 idx_option /* Option for upper layer */
1134 )
1135 {
1136 ISAMDataPtr data;
1137
1138 if(object == NULL)
1139 return ISAMBadParameter;
1140
1141 data = (ISAMDataPtr) object;
1142
1143 if(data->type == ISAMString || data->type == ISAMStringDatabase)
1144 return ISAMMakeStringIndex(data, page_size, idx_option);
1145 else if (data->type == ISAMNumeric || data->type == ISAMNumericNoData)
1146 return ISAMMakeNumericIndex(data, page_size, idx_option);
1147 else
1148 return ISAMNotImplemented;
1149 }
1150
1151 /* ---------------------- ISAMCreateDatabase ------------------------
1152 Purpose: To create coded array/offsets and ISAM database files
1153 from input files in special form:
1154 All files are in sorted order and sorted through
1155 format: <term><\2><uid><field-bit mask><CR>
1156
1157 Parameters: ISAM Object
1158 files - list of sorted files to process
1159 Returns: ISAM itemized error code
1160 ------------------------------------------------------------------*/
ISAMCreateDatabase(CharPtr PNTR files,Int4 num_files,Int4 MaxOffset,CharPtr BaseName,CharPtr DBExt,CharPtr IndexExt,CharPtr OffExt,CharPtr CodeExt)1161 ISAMErrorCode ISAMCreateDatabase(CharPtr PNTR files,
1162 Int4 num_files,
1163 Int4 MaxOffset,
1164 CharPtr BaseName,
1165 CharPtr DBExt,
1166 CharPtr IndexExt,
1167 CharPtr OffExt,
1168 CharPtr CodeExt)
1169
1170 {
1171 ISAMDataPtr data;
1172 ISAMErrorCode error;
1173 Char DBName[MAX_FILENAME_LEN], filename[MAX_FILENAME_LEN];
1174 FILE *ca_fd, *off_fd, *out_fd;
1175 long count = 0, ca_count = 0, files_count;
1176 Int4 offset;
1177 unsigned long uid_in, uid_last, field_in;
1178 CharPtr chptr;
1179 CharPtr term, value;
1180 Int4 uidf_allocated;
1181 CharPtr prevterm;
1182 ISAMTmpCAPtr cap;
1183 ISAMUidFieldPtr uidf;
1184 long int lvalue1, lvalue2;
1185
1186 if(BaseName == NULL || files == NULL)
1187 return ISAMBadParameter;
1188
1189 if(DBExt != NULL)
1190 sprintf(DBName, "%s.%s", BaseName, DBExt);
1191 else
1192 sprintf(DBName, "%s", BaseName);
1193
1194 if(IndexExt != NULL)
1195 sprintf(filename, "%s.%s", BaseName, IndexExt);
1196
1197 if((data = (ISAMDataPtr) ISAMObjectNew(ISAMStringDatabase, DBName,
1198 IndexExt == NULL ?
1199 NULL : filename)) == NULL) {
1200 ErrLogPrintf("Creating of ISAM object failed\n");
1201 return ISAMMiscError;
1202 }
1203
1204 data->CAName = StringSave(BaseName);
1205 data->CADBExt = StringSave(CodeExt);
1206 data->CAOffExt = StringSave(OffExt);
1207
1208 if(MaxOffset != 0)
1209 data->CAMaxOffset = MaxOffset;
1210 else
1211 data->CAMaxOffset = DEFAULT_CA_MAX_OFFSET;
1212
1213 sprintf(filename, "%s%ld.%s",
1214 data->CAName, (long) ca_count, data->CADBExt);
1215
1216 if((out_fd = FileOpen(data->DBFileName, "bw")) == NULL)
1217 return ISAMBadFileName;
1218
1219 sprintf(filename, "%s%ld.%s",
1220 data->CAName, (long) ca_count, data->CADBExt);
1221
1222 if((ca_fd = FileOpen(filename, "bw")) == NULL)
1223 return ISAMBadFileName;
1224
1225 sprintf(filename, "%s%ld.%s",
1226 data->CAName, (long) ca_count, data->CAOffExt);
1227 if((off_fd = FileOpen(filename, "bw")) == NULL)
1228 return ISAMBadFileName;
1229
1230 if(data->max_line_size == 0) {
1231 data->max_line_size = LINE_SIZE_CHUNK;
1232 data->line = MemNew(LINE_SIZE_CHUNK);
1233 }
1234
1235 uidf = MemNew(sizeof(ISAMUidField)*UID_NUM_CHUNK);
1236 uidf_allocated = UID_NUM_CHUNK;
1237
1238 cap = ISAMTmpCANew();
1239
1240 for(files_count = 0; files_count < num_files; files_count++) {
1241
1242 if((data->db_fd = FileOpen(files[ca_count], "r")) == NULL)
1243 return ISAMBadFileName;
1244
1245 /* Reading first entry */
1246
1247 ISAMReadLine(data);
1248 if((chptr = StringChr(data->line, ISAM_DATA_CHAR)) == NULL) {
1249 ErrLogPrintf("No ISAM delimiter char present in input. \n"
1250 "Line: \"%s\" \n",
1251 data->line);
1252 return ISAMMiscError;
1253 }
1254
1255 *chptr = NULLB;
1256 term = data->line;
1257 value = chptr + 1;
1258
1259 if((sscanf(value, "%ld %ld",
1260 &lvalue1, &lvalue2)) != 2 || lvalue2 == 0){
1261 ErrLogPrintf("Invalidly formatted input file\n");
1262 return ISAMMiscError;
1263 }
1264
1265 uidf[count].uid = lvalue1;
1266 uidf[count].field = lvalue2;
1267
1268 uid_last = uidf[count].uid;
1269
1270 if(++count > UID_NUM_CHUNK) {
1271 uidf_allocated += UID_NUM_CHUNK;
1272 uidf = Realloc(uidf, sizeof(ISAMUidField)*uidf_allocated);
1273 }
1274
1275 prevterm = MemNew(LINE_SIZE_CHUNK);
1276 StringNCpy(prevterm, term, LINE_SIZE_CHUNK);
1277
1278 /* Reading to the end of file */
1279
1280 while(ISAMReadLine(data) > 0) {
1281 if(data->line[0] == NULLB)
1282 continue;
1283 if((chptr = StringChr(data->line, ISAM_DATA_CHAR)) == NULL) {
1284 ErrLogPrintf("No ISAM delimiter precent in the input\n"
1285 "Line: \"%s\" \nPrevterm: \"%s\"\n",
1286 data->line, prevterm);
1287 return ISAMMiscError;
1288 }
1289
1290 *chptr = NULLB;
1291 term = data->line;
1292 value = ++chptr;
1293
1294 /* Yes, we got new term, so closing information about
1295 previous */
1296
1297 if(StringCmp(term, prevterm)) {
1298 if((error = ISAMDumpTermEntry(cap, off_fd, ca_fd,
1299 uidf, count, &offset)) !=
1300 ISAMNoError) {
1301 ErrLogPrintf("Failed to dump entry. All failed!\n"
1302 "Term: \"%s\"\n", prevterm);
1303 return error;
1304 }
1305
1306 if(offset > data->CAMaxOffset) {
1307 FileClose(ca_fd);
1308 FileClose(off_fd);
1309 ca_count++;
1310
1311 sprintf(filename, "%s%ld.%s",
1312 data->CAName, (long) ca_count, data->CADBExt);
1313 if((ca_fd = FileOpen(filename, "bw")) == NULL)
1314 return ISAMBadFileName;
1315
1316 sprintf(filename, "%s%ld.%s",
1317 data->CAName, (long) ca_count, data->CAOffExt);
1318 if((off_fd = FileOpen(filename, "bw")) == NULL)
1319 return ISAMBadFileName;
1320 }
1321
1322 fprintf(out_fd, "%s%c%ld %ld\n", prevterm,
1323 ISAM_DATA_CHAR, (long) offset, (long) ca_count);
1324 count = 0;
1325 StringNCpy(prevterm, term, LINE_SIZE_CHUNK);
1326 uid_last = -1;
1327 }
1328
1329 if((sscanf(value,
1330 "%ld %ld", &uid_in, &lvalue2)) != 2 || lvalue2 == 0) {
1331 ErrLogPrintf("Invalidly formatted database. Database creation "
1332 "failed.\nValue = \"%s\", Field = %ld\n",
1333 value, (long) field_in);
1334 return ISAMInvalidFormat;
1335 }
1336
1337 uid_in = lvalue1;
1338 field_in = lvalue2;
1339
1340 if(uid_last == uid_in) {
1341 uidf[count].field |= field_in;
1342 } else {
1343 uidf[count].uid = uid_in;
1344 uidf[count].field = field_in;
1345 uid_last = uid_in;
1346 count++;
1347 }
1348
1349 if(count >= uidf_allocated) {
1350 uidf_allocated += UID_NUM_CHUNK;
1351 uidf = Realloc(uidf, sizeof(ISAMUidField)*uidf_allocated);
1352 }
1353 }
1354 /* Writting last entry */
1355
1356 if((error =
1357 ISAMDumpTermEntry(cap, off_fd, ca_fd,
1358 uidf, count, &offset)) != ISAMNoError) {
1359 ErrLogPrintf("Failed to dump entry. All failed!\n");
1360 return error;
1361 }
1362
1363 fprintf(out_fd, "%s%c%ld %ld\n", prevterm,
1364 ISAM_DATA_CHAR, (long) offset, (long) ca_count);
1365
1366 FileClose(data->db_fd);
1367 data->db_fd = NULL;
1368 }
1369
1370 ISAMTmpCAFree(cap);
1371 MemFree(uidf);
1372 MemFree(prevterm);
1373
1374 FileClose(ca_fd);
1375 FileClose(out_fd);
1376 FileClose(off_fd);
1377
1378 if((error = ISAMMakeIndex((VoidPtr)data, 0, 0)) != ISAMNoError) {
1379 ErrLogPrintf("Failed to create ISAM String Index All failed!\n");
1380 return error;
1381 }
1382 ISAMObjectFree((VoidPtr)data);
1383 return ISAMNoError;
1384 }
1385
1386 /* ---------------------- ISAMInitSearch --------------------------
1387 Purpose: Initialize ISAM Numeric Search. Checks for any errors
1388
1389 Parameters: ISAM Object
1390 Returns: ISAM Error Code
1391 NOTE: None
1392 ------------------------------------------------------------------*/
ISAMInitSearch(ISAMObjectPtr object)1393 static ISAMErrorCode ISAMInitSearch(ISAMObjectPtr object)
1394 {
1395
1396 Int4Ptr FileInfo;
1397 Int4 Version, IsamType, DBFileLength;
1398 ISAMDataPtr data;
1399 Int4 reserved2;
1400
1401 if(object == NULL)
1402 return ISAMBadParameter;
1403
1404 data = (ISAMDataPtr) object;
1405
1406 if(data->initialized == TRUE)
1407 return ISAMNoError;
1408
1409 if(!Nlm_MemMapAvailable()) {
1410 if((data->FileStart =
1411 ISAMReadFileInMemory(data->IndexFileName)) == NULL)
1412 return ISAMBadFileName;
1413 FileInfo = (Int4Ptr)data->FileStart;
1414 } else {
1415 if((data->mmp = Nlm_MemMapInit(data->IndexFileName)) == NULL)
1416 return ISAMMemMap;
1417
1418 FileInfo = (Int4Ptr)data->mmp->mmp_begin;
1419 }
1420 /* For numeric search. */
1421 data->mfp = NlmOpenMFILE(data->DBFileName);
1422
1423 /* Check for consistence of files and parameters */
1424
1425 if((Version = SwapUint4(FileInfo[0])) != ISAM_VERSION)
1426 return ISAMBadVersion;
1427
1428 if((IsamType = SwapUint4(FileInfo[1])) != data->type)
1429 return ISAMBadType;
1430
1431 data->NumTerms = SwapUint4(FileInfo[3]);
1432 data->NumSamples = SwapUint4(FileInfo[4]);
1433 data->PageSize = SwapUint4(FileInfo[5]);
1434 data->max_line_size = SwapUint4(FileInfo[6]);
1435
1436 if(data->PageSize != MEMORY_ONLY_PAGE_SIZE) {
1437 /* Special case of memory-only index */
1438 if((DBFileLength =
1439 SwapUint4(FileInfo[2])) != FileLength(data->DBFileName))
1440 return ISAMWrongFile;
1441 }
1442
1443 /* This space reserved for future use */
1444
1445 data->idx_option = SwapUint4(FileInfo[7]);
1446 reserved2 = SwapUint4(FileInfo[8]);
1447
1448 if(data->max_line_size != 0)
1449 data->line = MemNew(data->max_line_size + 1);
1450
1451 if(data->type == ISAMNumeric)
1452 data->KeyDataSamples = (NISAMKeyDataPtr)(FileInfo + 9);
1453 else
1454 data->KeySamples = (Uint4Ptr)(FileInfo + 9);
1455
1456 data->initialized = TRUE;
1457 return ISAMNoError;
1458 }
1459
1460 /* ------------------------ ISAMGetIdxOption ------------------------
1461 Purpose: Returns user specified option from ISAM database
1462
1463 Parameters: ISAM object
1464 Returns: User specified option (set while formating)
1465 NOTE: None
1466 ------------------------------------------------------------------*/
ISAMGetIdxOption(ISAMObjectPtr object,Int4Ptr idx_option)1467 ISAMErrorCode ISAMGetIdxOption(ISAMObjectPtr object, Int4Ptr idx_option)
1468 {
1469 ISAMDataPtr data;
1470 ISAMErrorCode error;
1471
1472 if(object == NULL)
1473 return ISAMMiscError;
1474
1475 data = (ISAMDataPtr) object;
1476
1477 if(data->initialized == FALSE) {
1478 if((error = ISAMInitSearch(object)) != ISAMNoError)
1479 return error;
1480 }
1481
1482 *idx_option = data->idx_option;
1483
1484 return ISAMNoError;
1485 }
1486
1487 /* ------------------------ ISAMGetIdxOption ------------------------
1488 Purpose: To set option to check or not check for non-unique
1489 elements
1490 Parameters: ISAM object
1491 Returns: None
1492 NOTE: None
1493 ------------------------------------------------------------------*/
ISAMSetCheckForNonUnique(ISAMObjectPtr object,Boolean test_non_unique)1494 void ISAMSetCheckForNonUnique(ISAMObjectPtr object, Boolean test_non_unique)
1495 {
1496 ISAMDataPtr data;
1497
1498 if(object == NULL)
1499 return;
1500
1501 data = (ISAMDataPtr) object;
1502
1503 data->test_non_unique = test_non_unique;
1504
1505 return;
1506 }
1507
ISAMSetDataSorted(ISAMObjectPtr object,Int4 num_terms)1508 void ISAMSetDataSorted(ISAMObjectPtr object, Int4 num_terms)
1509 {
1510 ISAMDataPtr data = (ISAMDataPtr) object;
1511 data->sorting_done = TRUE;
1512 data->NumTerms = num_terms;
1513 }
1514
1515 /* ---------------------- ISAMUninitSearch --------------------------
1516 Purpose: Uninitialize an ISAM search (free all allocated and used
1517 buffers and unmap and close all mapped/opened files).
1518 Undoes what the ISAMInitSearch function does.
1519 Parameters: ISAM object
1520 Returns: ISAM Error Code
1521 NOTE: None
1522 ------------------------------------------------------------------*/
ISAMUninitSearch(ISAMObjectPtr object)1523 ISAMErrorCode ISAMUninitSearch(ISAMObjectPtr object)
1524 {
1525 ISAMDataPtr data = NULL;
1526
1527 if (!object)
1528 return ISAMBadParameter;
1529
1530 if ( !(data = (ISAMDataPtr) object))
1531 return ISAMBadParameter;
1532
1533 if (data->initialized == FALSE)
1534 return ISAMNoError;
1535
1536 if (data->mmp != NULL) {
1537 Nlm_MemMapFini(data->mmp);
1538 data->mmp = NULL;
1539 } else {
1540 MemFree(data->FileStart);
1541 data->FileStart = NULL;
1542 }
1543
1544 if (data->db_fd != NULL)
1545 FileClose(data->db_fd);
1546
1547 NlmCloseMFILE(data->mfp);
1548
1549 if (data->max_line_size != 0) {
1550 data->max_line_size = 0;
1551 MemFree(data->line);
1552 data->line = NULL;
1553 }
1554
1555 data->initialized = FALSE;
1556
1557 return ISAMNoError;
1558 }
1559 /* ---------------------- ISAMObjectFree --------------------------
1560 Purpose: To terminate all allocated and used buffers
1561 unmap and close all mapped/opened files
1562 Parameters: ISAM object
1563 Returns: None
1564 NOTE: None
1565 ------------------------------------------------------------------*/
1566
ISAMObjectFree(ISAMObjectPtr object)1567 void ISAMObjectFree(ISAMObjectPtr object)
1568 {
1569 ISAMDataPtr data = (ISAMDataPtr) object;
1570
1571 if (ISAMUninitSearch(object) != ISAMNoError)
1572 return;
1573
1574 if((data = (ISAMDataPtr) object) == NULL)
1575 return;
1576
1577 MemFree(data->DBFileName);
1578 MemFree(data->IndexFileName);
1579 MemFree(data->CAName);
1580 MemFree(data->CADBExt);
1581 MemFree(data->CAOffExt);
1582
1583 MemFree(data);
1584
1585 return;
1586 }
1587
GetPageNumElements(ISAMDataPtr data,Int4 SampleNum,Int4Ptr Start)1588 static Int4 GetPageNumElements(ISAMDataPtr data, Int4 SampleNum,
1589 Int4Ptr Start)
1590 {
1591 Int4 NumElements;
1592
1593 *Start = SampleNum * data->PageSize;
1594 NumElements = (SampleNum + 1 == data->NumSamples) ?
1595 data->NumTerms - *Start : data->PageSize;
1596
1597 return NumElements;
1598 }
1599
1600 #define NCBISAM_ITER_MAX 30
1601 /* ------------------------ NISAMSearch ----------------------------
1602 Purpose: Main search function of Numeric ISAM
1603
1604 Parameters: Key - integer to search
1605 Data - returned value (for NIASM with data)
1606 Index - internal index in database
1607 Returns: ISAM Error Code
1608 NOTE: None
1609 ------------------------------------------------------------------*/
NISAMSearch(ISAMObjectPtr object,Uint4 Number,Uint4Ptr Data,Uint4Ptr Index)1610 ISAMErrorCode NISAMSearch(ISAMObjectPtr object,
1611 Uint4 Number,
1612 Uint4Ptr Data,
1613 Uint4Ptr Index
1614 )
1615 {
1616 Boolean found;
1617 ISAMDataPtr data;
1618 Int4 Start = 0, Stop, SampleNum;
1619 Int4 NumElements, *KeyPage, *KeyPageStart;
1620 Int4 first, last, current, type;
1621 Boolean NoData;
1622 NISAMKeyDataPtr KeyDataPage=NULL, KeyDataSamples, KeyDataPageStart;
1623 Uint4Ptr KeySamples;
1624 Uint4 Key;
1625 ISAMErrorCode error;
1626
1627 if((data = (ISAMDataPtr) object) == NULL)
1628 return ISAMBadParameter;
1629
1630 if(data->initialized == FALSE) {
1631 if((error = ISAMInitSearch(object)) != ISAMNoError)
1632 return error;
1633 }
1634
1635 NoData = (data->type == ISAMNumericNoData);
1636 KeyDataSamples = data->KeyDataSamples;
1637 KeySamples = data->KeySamples;
1638 type = data->type;
1639
1640 /* search the sample file. */
1641
1642 Stop = data->NumSamples -1;
1643
1644 if (!data->lastKeyDataPage || Number <= data->first_gi || Number >= data->last_gi)
1645 {
1646 while(Stop >= Start) {
1647 SampleNum = ((Uint4)(Stop + Start)) >> 1;
1648 if (type == ISAMNumericNoData)
1649 Key = SwapUint4(KeySamples[SampleNum]);
1650 else
1651 Key = SwapUint4(KeyDataSamples[SampleNum].key);
1652
1653 /* If this is an exact match, return the master term number. */
1654
1655 if (Key == Number) {
1656 if (Data != NULL) {
1657 if (NoData) {
1658 *Data = SampleNum * data->PageSize;
1659 } else {
1660 *Data = SwapUint4(data->KeyDataSamples[SampleNum].data);
1661 }
1662 }
1663 if(Index != NULL)
1664 *Index = SampleNum * data->PageSize;
1665
1666 /* NULL this out so we don't confuse the next lookup. */
1667 data->lastKeyDataPage = NULL;
1668
1669 return ISAMNoError;
1670 }
1671
1672 /* Otherwise, search for the next sample. */
1673 if ( Number < Key )
1674 Stop = --SampleNum;
1675 else
1676 Start = SampleNum +1;
1677 }
1678
1679 /* If the term is out of range altogether, report not finding it. */
1680
1681 if ( (SampleNum < 0) || (SampleNum >= data->NumSamples)) {
1682
1683 if (Data != NULL)
1684 *Data = ISAMNotFound;
1685
1686 if(Index != NULL)
1687 *Index = ISAMNotFound;
1688
1689 return ISAMNotFound;
1690 }
1691
1692 /* load the appropriate page of numbers into memory. */
1693
1694 NumElements = GetPageNumElements(data, SampleNum, &Start);
1695 first = Start;
1696 last = Start + NumElements - 1;
1697
1698 if (NoData) {
1699 if (data->mfp->mfile_true)
1700 {
1701 NlmSeekInMFILE(data->mfp, Start*sizeof(Int4), SEEK_SET);
1702 KeyPageStart = (Int4Ptr) data->mfp->mmp;
1703 KeyPage = KeyPageStart - Start;
1704 }
1705 else
1706 {
1707 KeyPageStart = (Int4Ptr) MemNew((NumElements + 1) * sizeof(Int4));
1708 NlmSeekInMFILE(data->mfp, Start*sizeof(Int4), SEEK_SET);
1709 NlmReadMFILE((Uint1Ptr)KeyPageStart, sizeof(Int4), NumElements,
1710 data->mfp);
1711 KeyPage = KeyPageStart - Start;
1712 }
1713 } else {
1714 if (data->mfp->mfile_true)
1715 {
1716 NlmSeekInMFILE(data->mfp, Start*sizeof(NISAMKeyData), SEEK_SET);
1717 KeyDataPageStart = (NISAMKeyDataPtr) data->mfp->mmp;
1718 KeyDataPage = KeyDataPageStart - Start;
1719 /* The following data is used if the next lookup is on the same page. */
1720 data->first_gi = SwapUint4(KeyDataPage[first].key);
1721 data->last_gi = SwapUint4(KeyDataPage[last].key);
1722 data->first = first;
1723 data->last = last;
1724 data->lastKeyDataPage = KeyDataPage;
1725 }
1726 else
1727 {
1728 KeyDataPageStart = (NISAMKeyDataPtr) MemNew((NumElements + 1) *
1729 sizeof(NISAMKeyData));
1730 NlmSeekInMFILE(data->mfp, Start*sizeof(NISAMKeyData), SEEK_SET);
1731 NlmReadMFILE((Uint1Ptr)KeyDataPageStart, sizeof(NISAMKeyData), NumElements,
1732 data->mfp);
1733 KeyDataPage = KeyDataPageStart - Start;
1734 }
1735 }
1736 }
1737 else
1738 {
1739 first = data->first;
1740 last = data->last;
1741 KeyDataPage = data->lastKeyDataPage;
1742 }
1743
1744 found = FALSE;
1745 /* Search the page for the number. */
1746 if (NoData) {
1747 while (first <= last)
1748 {
1749 current = (first+last)/2;
1750 Key = SwapUint4(KeyPage[current]);
1751 if (Key > Number)
1752 last = --current;
1753 else if (Key < Number)
1754 first = ++current;
1755 else
1756 {
1757 found = TRUE;
1758 break;
1759 }
1760 }
1761 } else {
1762 while (first <= last)
1763 {
1764 current = (first+last)/2;
1765 Key = SwapUint4(KeyDataPage[current].key);
1766 if (Key > Number)
1767 last = --current;
1768 else if (Key < Number)
1769 first = ++current;
1770 else
1771 {
1772 found = TRUE;
1773 break;
1774 }
1775 }
1776 }
1777
1778
1779 if (found == FALSE) /* not found. */
1780 {
1781
1782 if (Data != NULL)
1783 *Data = ISAMNotFound;
1784
1785 if(Index != NULL)
1786 *Index = ISAMNotFound;
1787
1788 if (data->mfp->mfile_true == FALSE)
1789 {
1790 if (NoData)
1791 KeyPageStart = MemFree(KeyPageStart);
1792 else
1793 KeyDataPageStart = MemFree(KeyDataPageStart);
1794 }
1795
1796 return ISAMNotFound;
1797 }
1798
1799 if (Data != NULL) {
1800 if (NoData)
1801 *Data = Start + current;
1802 else
1803 *Data = SwapUint4(KeyDataPage[current].data);
1804 }
1805
1806 if(Index != NULL)
1807 *Index = Start + current;
1808
1809 if (data->mfp->mfile_true == FALSE)
1810 {
1811 if (NoData)
1812 KeyPageStart = MemFree(KeyPageStart);
1813 else
1814 KeyDataPageStart = MemFree(KeyDataPageStart);
1815 }
1816
1817 return ISAMNoError;
1818 }
1819
1820 /* ---------------------- NISAMSearchList --------------------------
1821 Purpose: Perform search of multiple Keys
1822
1823 Parameters: NumKeys - number of input keys
1824 Keys - array of keys
1825 Returns: Data - array of returned values
1826 Index - array of internal indexes
1827 NOTE: None
1828 ------------------------------------------------------------------*/
NISAMSearchList(ISAMObjectPtr object,Int4 NumKeys,Uint4Ptr Keys,Uint4Ptr Data,Uint4Ptr Index)1829 ISAMErrorCode NISAMSearchList(ISAMObjectPtr object,
1830 Int4 NumKeys,
1831 Uint4Ptr Keys,
1832 Uint4Ptr Data,
1833 Uint4Ptr Index
1834 )
1835 {
1836 Int4 count;
1837 ISAMErrorCode error;
1838
1839 if (object == NULL || Data == NULL)
1840 return ISAMBadParameter;
1841
1842 for (count = 0; count < NumKeys; count++) {
1843 if((error = NISAMSearch(object, Keys[count],
1844 Data + count, Index + count)) < 0)
1845 return error;
1846 }
1847
1848 return ISAMNoError;
1849 }
1850
ISAMDecompressCA(Uint1Ptr buffer,Int4 length,Int4 num_bits,Int4 num_uids)1851 static Uint4Ptr ISAMDecompressCA(Uint1Ptr buffer, Int4 length,
1852 Int4 num_bits, Int4 num_uids)
1853 {
1854 Uint4Ptr data;
1855 Int4 diff, dividend;
1856 Int4 i, template, base;
1857 Int4 byte_num = 0, bit_num = 0;
1858
1859 if(buffer == NULL || num_uids == 0)
1860 return NULL;
1861
1862 data = MemNew(sizeof(Uint4)*num_uids);
1863
1864 base = PowersOfTwo[num_bits];
1865
1866 for(i = 0; i < num_uids; i++) {
1867
1868 diff = dividend = 0;
1869
1870 if(num_bits != 0)
1871 template = PowersOfTwo[num_bits - 1];
1872 else
1873 template = 0;
1874
1875 /* Reading dividend first */
1876
1877 while(buffer[byte_num] & OneBit[bit_num]) {
1878 dividend++;
1879 if(++bit_num > 7) {
1880 bit_num = 0;
1881 byte_num++;
1882 }
1883 }
1884
1885 /* And skipping following 0 bit */
1886
1887 if(++bit_num > 7) {
1888 bit_num = 0;
1889 byte_num++;
1890 }
1891
1892 for(; template; template >>= 1) {
1893 if(buffer[byte_num] & OneBit[bit_num])
1894 diff |= template;
1895
1896 if(++bit_num > 7) {
1897 bit_num = 0;
1898 byte_num++;
1899 }
1900 }
1901 data[i] = dividend*base + diff + (i == 0 ? 0 : (data[i-1] + 1));
1902
1903 } /* Over all uids */
1904
1905 return data;
1906 }
1907
ISAMDecompressFA(Uint1Ptr buffer,Int4 num_uids)1908 static Uint4Ptr ISAMDecompressFA(Uint1Ptr buffer, Int4 num_uids)
1909 {
1910 Uint4Ptr fields;
1911 Int4 i, j;
1912
1913 if(buffer == NULL || num_uids == 0)
1914 return NULL;
1915
1916 fields = MemNew(sizeof(Uint4)*num_uids);
1917
1918 for(i = 0, j =0; j < num_uids; i++) {
1919
1920 fields[j] |= PowersOfTwo[buffer[i] & FA_Mask];
1921
1922 if(!(buffer[i] & 0x80)) {
1923 j++;
1924 }
1925 }
1926
1927 return fields;
1928 }
1929
1930 /* ------------------------ ISAMSearchTerm -------------------------
1931 Purpose: Main search function of complete String ISAM
1932
1933 Parameters: object - ISAM Object
1934 term_in - input string
1935 field_mask - fields to search in 0 and -1 mean search
1936 all fields
1937 uid - array of returned uids
1938 count number of returned uids
1939 Returns: ISAM Error Code
1940 NOTE: Initialization done with first call to this function
1941 ------------------------------------------------------------------*/
ISAMSearchTerm(ISAMObjectPtr object,CharPtr term_in,Uint4 field_mask,Uint4Ptr PNTR uid_out,Int4Ptr count)1942 ISAMErrorCode ISAMSearchTerm(ISAMObjectPtr object, CharPtr term_in,
1943 Uint4 field_mask, Uint4Ptr PNTR uid_out,
1944 Int4Ptr count)
1945 {
1946 ISAMErrorCode error;
1947 ISAMDataPtr data;
1948 CharPtr term = NULL, value = NULL;
1949 Uint1Ptr buffer;
1950 Int4 ca_count = 0, num_uids = 0, num_bits, field_len=0;
1951 Int4 offset, i, j, length;
1952 Char filename[MAX_FILENAME_LEN];
1953 FILE *ca_fd, *off_fd;
1954 Uint4 bit_flag = 0;
1955 Uint4 numbers[32];
1956 Uint4 index;
1957 Uint4Ptr field, uid;
1958 long int lvalue1, lvalue2;
1959
1960 if((data = (ISAMDataPtr) object) == NULL)
1961 return ISAMBadParameter;
1962
1963 /* First searching for term in database */
1964
1965 if((error = SISAMSearch(object, term_in,
1966 0, &term, &value, &index)) != ISAMNoError) {
1967 *count = 0;
1968 *uid_out = NULL;
1969 return error;
1970 }
1971
1972 MemFree(term);
1973
1974 /* Now retriving information about uids and fields */
1975
1976 if((sscanf(value, "%ld %ld", &lvalue1, &lvalue2)) != 2) {
1977 ErrLogPrintf("Error in database formating (%s)\n", value);
1978 return error;
1979 }
1980
1981 offset = lvalue1;
1982 ca_count = lvalue2;
1983
1984 MemFree(value);
1985
1986 /* Opening corresponding files */
1987
1988 sprintf(filename, "%s%ld.%s",
1989 data->CAName, (long) ca_count, data->CAOffExt);
1990
1991 if((off_fd = FileOpen(filename, "r")) == NULL)
1992 return ISAMBadFileName;
1993
1994 sprintf(filename, "%s%ld.%s",
1995 data->CAName, (long) ca_count, data->CADBExt);
1996
1997 if((ca_fd = FileOpen(filename, "r")) == NULL)
1998 return ISAMBadFileName;
1999
2000 /* Getting header/offset information */
2001
2002 MemSet(numbers, 0, sizeof(numbers));
2003
2004 if((fseek(off_fd, offset, SEEK_SET)) != 0)
2005 return ISAMFseekFailed;
2006
2007
2008 FileRead(&num_uids, 1, sizeof(Uint4), off_fd);
2009 FileRead(&offset, 1, sizeof(Uint4), off_fd);
2010 FileRead(&length, 1, sizeof(Uint4), off_fd);
2011 FileRead(&num_bits, 1, sizeof(Uint4), off_fd);
2012 FileRead(&bit_flag, 1, sizeof(Uint4), off_fd);
2013
2014 if(field_mask == 0)
2015 field_mask = (Uint4)(-1);
2016
2017 if(!(bit_flag & field_mask)) { /* Do not satisfy given bitmask */
2018 *count = 0;
2019 *uid_out = NULL;
2020 return ISAMNotFound;
2021 }
2022
2023 for(j = 0; j < 32; j++) {
2024 if(bit_flag & PowersOfTwo[j]) {
2025 FileRead(&numbers[j], 1, sizeof(Uint4), off_fd);
2026 field_len += numbers[j];
2027 }
2028 }
2029
2030 /* Now reading uids and fields from CA/FA file */
2031
2032 fseek(ca_fd, offset, SEEK_SET);
2033 buffer = MemNew(length);
2034 FileRead(buffer, 1, length, ca_fd);
2035
2036 if((uid = ISAMDecompressCA(buffer, length,
2037 num_bits, num_uids)) == NULL) {
2038 ErrLogPrintf("Cannot decompress coded array. Retrieve failed.");
2039 return ISAMMiscError;
2040 }
2041
2042 MemFree(buffer);
2043 buffer = MemNew(field_len);
2044
2045 FileRead(buffer, 1, field_len, ca_fd);
2046
2047 if((field = ISAMDecompressFA(buffer, num_uids)) == NULL) {
2048 ErrLogPrintf("Cannot decompress fields array. Retrieve failed.");
2049 return ISAMMiscError;
2050 }
2051
2052 /* Now filtering returned uids by field_mask */
2053
2054 for(i = 0, j = 0; i < num_uids; i++) {
2055 if(field[i] & field_mask) {
2056 uid[j] = uid[i];
2057 j++;
2058 }
2059 }
2060
2061 *count = j;
2062 *uid_out = uid;
2063
2064 MemFree(field);
2065 MemFree(buffer);
2066 FileClose(ca_fd);
2067 FileClose(off_fd);
2068
2069 return ISAMNoError;
2070 }
2071
ISAMGetDataNumber(CharPtr KeyData)2072 static Int4 ISAMGetDataNumber(CharPtr KeyData)
2073 {
2074 CharPtr chptr;
2075 Int4 count, value;
2076 long int lvalue;
2077
2078 if((chptr = StringChr(KeyData, ISAM_DATA_CHAR)) != NULL) {
2079 chptr++;
2080 if((count = sscanf(chptr, "%ld", &lvalue)) != 1)
2081 return -1;
2082 else {
2083 value = lvalue;
2084 return value;
2085 }
2086 }
2087 return -1;
2088 }
2089
2090 /*
2091 This returns the position of the first character that differs
2092 between the query Term and the Isam Key, or -1 if they are identical.
2093 */
ISAMDiffChar(CharPtr Term,CharPtr Key,Boolean IgnoreCase)2094 static Int4 ISAMDiffChar(CharPtr Term, CharPtr Key, Boolean IgnoreCase)
2095
2096 {
2097 CharPtr Start = Term;
2098
2099 if(IgnoreCase) {
2100 while(*Term && (TO_UPPER(*Term) == TO_UPPER(*Key))) {
2101 Term++;
2102 Key++;
2103 }
2104 } else {
2105 while(*Term && (*Term == *Key)) {
2106 Term++;
2107 Key++;
2108 }
2109 }
2110
2111 if(*Term != NULLB)
2112 return((Int4)(Term - Start));
2113
2114 for(;;) {
2115 if (ENDS_ISAM_KEY(Key))
2116 return(-1);
2117
2118 if (*Key != ' ')
2119 break;
2120
2121 Key++;
2122 }
2123
2124 return((Int4)(Term - Start));
2125 }
2126
2127 #define ID_DATA_CHUNK 16
SISAMFindAllData(ISAMObjectPtr object,CharPtr term_in,Int4Ptr PNTR ids_out,Int4Ptr count_out)2128 ISAMErrorCode SISAMFindAllData(ISAMObjectPtr object,
2129 CharPtr term_in,
2130 Int4Ptr PNTR ids_out,
2131 Int4Ptr count_out)
2132 {
2133 ISAMDataPtr data;
2134 ISAMErrorCode error;
2135 Int4 index, Start, Stop;
2136 Int4 i, Diff = 0, SampleNum, Pos;
2137 Int4 TermNum, count, NumBytes, allocated;
2138 Int4Ptr ids;
2139 Uint4Ptr SamplePos;
2140 CharPtr Page, Key, FileStart, Ptr, chptr;
2141 CharPtr value, key_out;
2142
2143 if((data = (ISAMDataPtr) object) == NULL)
2144 return ISAMBadParameter;
2145
2146 *count_out = 0;
2147 *ids_out = NULL;
2148
2149 if(data->initialized == FALSE) {
2150 if((error = ISAMInitSearch(object)) != ISAMNoError)
2151 return error;
2152 }
2153
2154 if((error = SISAMSearch(object, term_in, 0, &key_out,
2155 &value, (Uint4Ptr) &index)) != ISAMNoError) {
2156 return error;
2157 }
2158
2159 MemFree(key_out);
2160 MemFree(value);
2161
2162 if(data->mmp != NULL)
2163 FileStart = (CharPtr)data->mmp->mmp_begin;
2164 else
2165 FileStart = data->FileStart;
2166
2167 if(data->PageSize != MEMORY_ONLY_PAGE_SIZE)
2168 SamplePos = data->KeySamples + data->NumSamples + 1;
2169 else
2170 SamplePos = data->KeySamples;
2171
2172 SampleNum = index / data->PageSize;
2173 TermNum = index % data->PageSize;
2174
2175 Start = SampleNum; Stop = SampleNum;
2176 if(TermNum == 0) { /* Exact match. Borders must be checked */
2177 for(i = 1; Diff == -1 && (SampleNum - i) >= 0; i++) {
2178 Key = FileStart + SwapUint4(SamplePos[SampleNum-i]);
2179 if((Diff = ISAMDiffChar(term_in, Key, TRUE)) == -1)
2180 Start = SampleNum - i;
2181 }
2182 for(i = 1; Diff == -1 && (SampleNum + i) < data->NumSamples; i++) {
2183 Key = FileStart + SwapUint4(SamplePos[SampleNum + i]);
2184 if((Diff = ISAMDiffChar(term_in, Key, TRUE)) == -1)
2185 Stop = SampleNum + i;
2186 }
2187 if(Start == Stop) { /* We have to load 2 pages */
2188 if(Start-- < 0) Start = 0;
2189 if(Stop++ > data->NumSamples -1)
2190 Stop = data->NumSamples -1;
2191 }
2192 } else {
2193 Stop++;
2194 }
2195
2196 /* Reading all in memory */
2197
2198 Pos = SwapUint4(data->KeySamples[Start]);
2199 NumBytes = SwapUint4(data->KeySamples[Stop]) - Pos;
2200 Page = (CharPtr) MemNew(NumBytes + 1);
2201 NlmSeekInMFILE(data->mfp, Pos, SEEK_SET);
2202 NlmReadMFILE((Uint1Ptr)Page, sizeof(Char), NumBytes, data->mfp);
2203 Page[NumBytes] = NULLB;
2204
2205 /* Now removing all \n and \r characters */
2206
2207 for(chptr = Page; *chptr != NULLB; chptr++) {
2208 if(*chptr == '\n' || *chptr == '\r')
2209 *chptr = NULLB;
2210 }
2211
2212 /* Search the page for the term. */
2213
2214 allocated = ID_DATA_CHUNK;
2215 ids = MemNew(sizeof(Int4) * allocated);
2216 count = 0;
2217
2218 Ptr = Page;
2219 while (Ptr - Page < NumBytes) {
2220 Diff = ISAMDiffChar(term_in, Ptr, TRUE);
2221
2222 if (Diff == -1) {
2223 if(count >= allocated) {
2224 allocated += ID_DATA_CHUNK;
2225 ids = Realloc(ids, allocated * sizeof(Int4));
2226 }
2227 ids[count] = ISAMGetDataNumber(Ptr);
2228 count++;
2229 }
2230 Ptr += StringLen(Ptr);
2231
2232 while(Ptr - Page < NumBytes && *Ptr == NULLB)
2233 Ptr++;
2234 }
2235
2236 *count_out = count;
2237 *ids_out = ids;
2238
2239 MemFree(Page);
2240
2241 return ISAMNoError;
2242 }
2243
ISAMExtractData(CharPtr KeyData,CharPtr PNTR Key,CharPtr PNTR Data)2244 static void ISAMExtractData(CharPtr KeyData,
2245 CharPtr PNTR Key, CharPtr PNTR Data)
2246 {
2247 CharPtr chptr, nkey;
2248
2249 if (KeyData == NULL)
2250 return;
2251
2252 nkey = StringSave(KeyData);
2253
2254 if((chptr = StringChr(nkey, ISAM_DATA_CHAR)) != NULL) {
2255 *chptr = NULLB;
2256 if(Data != NULL)
2257 *Data = StringSave(chptr+1);
2258 } else if(Data != NULL) {
2259 *Data = StringSave("");
2260 }
2261
2262 if(Key != NULL)
2263 *Key = StringSave(nkey);
2264
2265 MemFree(nkey);
2266
2267 return;
2268 }
2269
SISAMSearch(ISAMObjectPtr object,CharPtr term_in,Int4 flags,CharPtr PNTR term_out,CharPtr PNTR value,Uint4Ptr index)2270 ISAMErrorCode SISAMSearch(ISAMObjectPtr object,
2271 CharPtr term_in,
2272 Int4 flags,
2273 CharPtr PNTR term_out,
2274 CharPtr PNTR value,
2275 Uint4Ptr index)
2276 {
2277 ISAMDataPtr data;
2278 ISAMErrorCode error;
2279
2280 Int4 Diff, Start, Stop, SampleNum, Length, Pos;
2281 Int4 TermNum, NumBytes, FoundShort = -1;
2282 CharPtr Page, Key, FileStart, Ptr, ShortTerm, chptr;
2283 Uint4Ptr SamplePos;
2284 Boolean IgnoreCase, Short, Follow;
2285
2286 if((data = (ISAMDataPtr) object) == NULL)
2287 return ISAMBadParameter;
2288
2289 if(data->initialized == FALSE) {
2290 if((error = ISAMInitSearch(object)) != ISAMNoError)
2291 return error;
2292 }
2293
2294 IgnoreCase = TRUE; /* We will set this option to avoid more
2295 complications */
2296
2297 /* search the sample file first */
2298
2299 Start = 0;
2300 Stop = data->NumSamples -1;
2301 Length = StringLen(term_in);
2302 ShortTerm= MemNew(data->max_line_size);
2303
2304 Follow = (Boolean)(flags & ISAM_FOLLOW_KEY);
2305 Short = (Boolean)((flags & ISAM_SHORT_KEY) || Follow);
2306
2307 if(data->mmp != NULL)
2308 FileStart = (CharPtr)data->mmp->mmp_begin;
2309 else
2310 FileStart = data->FileStart;
2311
2312 if(data->PageSize != MEMORY_ONLY_PAGE_SIZE)
2313 SamplePos = data->KeySamples + data->NumSamples + 1;
2314 else
2315 SamplePos = data->KeySamples;
2316
2317 while(Stop >= Start) {
2318 SampleNum = ((Uint4)(Stop + Start)) >> 1;
2319
2320 Key = FileStart + SwapUint4(SamplePos[SampleNum]);
2321 Diff = ISAMDiffChar(term_in, Key, IgnoreCase);
2322
2323 /* If this is an exact match, return the master term number. */
2324 if (Diff == -1) {
2325 ISAMExtractData(Key, term_out, value);
2326 *index = data->PageSize * SampleNum;
2327 MemFree(ShortTerm);
2328 return ISAMNoError;
2329 }
2330
2331 /* If the key is a superset of the sample term, backup until just
2332 before the term. */
2333 if (Short && (Diff >= Length)) {
2334 if (SampleNum > 0)
2335 SampleNum--;
2336
2337 if (IgnoreCase) {
2338 while((SampleNum > 0) &&
2339 (StrNICmp(term_in,
2340 FileStart+SwapUint4(SamplePos[SampleNum]),
2341 Length) == 0))
2342 SampleNum--;
2343 } else {
2344 while((SampleNum > 0) &&
2345 (StrNCmp(term_in,
2346 FileStart + SwapUint4(SamplePos[SampleNum]),
2347 Length) == 0))
2348 SampleNum--;
2349 }
2350
2351 FoundShort = SampleNum + 1;
2352 Ptr = FileStart + SwapUint4(SamplePos[SampleNum+1]);
2353 StringCpy(ShortTerm, Ptr);
2354 break;
2355 } else
2356 /* If preceding is desired, note the key. */
2357
2358 if (Follow) {
2359 FoundShort = SampleNum;
2360 StringCpy(ShortTerm, Key);
2361 }
2362
2363 /* Otherwise, search for the next sample. */
2364 if (IgnoreCase ? TO_LOWER(term_in[Diff]) < TO_LOWER(Key[Diff]) :
2365 term_in[Diff] < Key[Diff])
2366 Stop = --SampleNum;
2367 else
2368 Start = SampleNum +1;
2369 }
2370
2371 /* If the term is out of range altogether, report not finding it. */
2372
2373 if ( (SampleNum < 0) || (SampleNum >= data->NumSamples)) {
2374 MemFree(ShortTerm);
2375 return ISAMNotFound;
2376 }
2377
2378 /* load the appropriate page of terms into memory. */
2379
2380 Pos = SwapUint4(data->KeySamples[SampleNum]);
2381
2382 NumBytes = SwapUint4(data->KeySamples[SampleNum + 1]) - Pos;
2383 Page = (CharPtr) MemNew(NumBytes + 1);
2384 NlmSeekInMFILE(data->mfp, Pos, SEEK_SET);
2385 NlmReadMFILE((Uint1Ptr)Page, sizeof(Char), NumBytes, data->mfp);
2386 Page[NumBytes] = NULLB;
2387
2388 /* Now removing all \n and \r characters */
2389
2390 for(chptr = Page; *chptr != NULLB; chptr++) {
2391 if(*chptr == '\n' || *chptr == '\r')
2392 *chptr = NULLB;
2393 }
2394
2395 /* Search the page for the term. */
2396 TermNum = 0;
2397 Ptr = Page;
2398 while (Ptr - Page < NumBytes) {
2399 Diff = ISAMDiffChar(term_in, Ptr, IgnoreCase);
2400
2401 if (Diff == -1) /* Complete match */
2402 break;
2403
2404 if (Short && (Diff >= Length)) /* Partialy complete */
2405 break;
2406
2407 /* Just next available term accepted */
2408
2409 if (Follow && (IgnoreCase ?
2410 TO_UPPER(term_in[Diff]) < TO_UPPER(Ptr [Diff]) :
2411 term_in[Diff] < Ptr [Diff]))
2412 break;
2413
2414 Ptr += StringLen(Ptr);
2415
2416 while(Ptr - Page < NumBytes && *Ptr == NULLB)
2417 Ptr++;
2418
2419 TermNum++;
2420 }
2421
2422 /* If we didn't find a match in the page, then we failed, unless the
2423 items that begins the next page is a match (only possible if
2424 ISAM_SHORT_KEY or ISAM_FOLLOW_KEY was specified. */
2425 if (Ptr - Page == NumBytes) {
2426
2427 MemFree(Page);
2428
2429 if (FoundShort >= 0) {
2430 ISAMExtractData(ShortTerm, term_out, value);
2431 *index = data->PageSize * FoundShort;
2432 MemFree(ShortTerm);
2433 return ISAMNoError;
2434 } else {
2435 *index = (Uint4) -1;
2436 MemFree(ShortTerm);
2437 return ISAMNotFound;
2438 }
2439 }
2440
2441 /* Otherwise, we found a match. */
2442 ISAMExtractData(Ptr, term_out, value);
2443
2444 *index = (data->PageSize * SampleNum) + TermNum;
2445
2446 MemFree(Page);
2447 MemFree(ShortTerm);
2448
2449 return ISAMNoError;
2450 }
2451
2452 /* ------------------------ NISAMFindKey ---------------------------
2453 Purpose: Return Key value by absolute internal index
2454
2455 Parameters: Index - absolute internal index
2456 Returns: Key - corresponding key value
2457 Data - corresponding data value
2458 NOTE:
2459 ------------------------------------------------------------------*/
NISAMFindKey(ISAMObjectPtr object,Int4 Index,Uint4Ptr Key,Uint4Ptr Data)2460 ISAMErrorCode NISAMFindKey(ISAMObjectPtr object,
2461 Int4 Index,
2462 Uint4Ptr Key,
2463 Uint4Ptr Data
2464 )
2465 {
2466 return NISAMFindKeys(object, Index, Index, Key, Data);
2467 }
2468
2469
2470 /* ---------------------- NISAMFindKeys -------------------------
2471 Purpose: Retuns set of Key/Data pairs from
2472 First to Last internal index
2473
2474 Parameters: First - beginning of interval
2475 Last - the end of interval
2476 Returns: Keys - array of Keys
2477 Data - array of Data
2478 NOTE: None
2479 ------------------------------------------------------------------*/
NISAMFindKeys(ISAMObjectPtr object,Int4 First,Int4 Last,Uint4Ptr Keys,Uint4Ptr Data)2480 ISAMErrorCode NISAMFindKeys(ISAMObjectPtr object,
2481 Int4 First,
2482 Int4 Last,
2483 Uint4Ptr Keys,
2484 Uint4Ptr Data
2485 )
2486 {
2487 ISAMDataPtr data = (ISAMDataPtr)object;
2488 Int4 TotalNums, count;
2489 Int4Ptr KeyPage;
2490 NISAMKeyDataPtr KeyDataPage;
2491 Boolean NoData = (data->type == ISAMNumericNoData);
2492 ISAMErrorCode error;
2493
2494 if(data == NULL)
2495 return ISAMBadParameter;
2496
2497 if(data->initialized == FALSE) {
2498 if((error = ISAMInitSearch(object)) != ISAMNoError)
2499 return error;
2500 }
2501
2502 if ((First < 0) || (Last >= data->NumTerms) || (First > Last))
2503 return ISAMBadParameter;
2504
2505 TotalNums = Last-First + 1;
2506
2507 if (NoData) {
2508 KeyPage = (Int4Ptr)MemNew((TotalNums + 1) * sizeof(Int4));
2509 NlmSeekInMFILE(data->mfp, First*sizeof(Int4), SEEK_SET);
2510 NlmReadMFILE((Uint1Ptr)KeyPage, sizeof(Int4), TotalNums, data->mfp);
2511 } else {
2512 KeyDataPage = (NISAMKeyDataPtr)MemNew((TotalNums + 1) *
2513 sizeof(NISAMKeyData));
2514 NlmSeekInMFILE(data->mfp, First*sizeof(NISAMKeyData), SEEK_SET);
2515 NlmReadMFILE((Uint1Ptr)KeyDataPage, sizeof(NISAMKeyData), TotalNums,
2516 data->mfp);
2517 }
2518
2519 if (NoData) {
2520 for (count = 0; count < TotalNums; count++) {
2521 if (Keys != NULL)
2522 Keys[count] = SwapUint4(KeyPage[count]);
2523 if (Data != NULL)
2524 Data[count] = First + count;
2525 }
2526 } else {
2527 for (count = 0; count < TotalNums; count++) {
2528 if (Keys != NULL)
2529 Keys[count] = SwapUint4(KeyDataPage[count].key);
2530 if (Data != NULL)
2531 Data[count] = SwapUint4(KeyDataPage[count].data);
2532 }
2533 }
2534
2535 if (NoData)
2536 MemFree(KeyPage);
2537 else
2538 MemFree(KeyDataPage);
2539
2540 return ISAMNoError;
2541 }
2542
2543 /* ------------------------ ISAMNumTerms ---------------------------
2544 Purpose: Returns total number of terms in ISAM database
2545
2546 Parameters: ISAM object
2547 Returns: Number of terms
2548 NOTE: None
2549 ------------------------------------------------------------------*/
ISAMNumTerms(ISAMObjectPtr object,Int4Ptr terms)2550 ISAMErrorCode ISAMNumTerms(ISAMObjectPtr object, Int4Ptr terms)
2551 {
2552 ISAMDataPtr data = (ISAMDataPtr) object;
2553 ISAMErrorCode error;
2554
2555 if(data == NULL || terms == NULL)
2556 return ISAMBadParameter;
2557
2558 if(data->initialized == FALSE) {
2559 if((error = ISAMInitSearch(object)) != ISAMNoError)
2560 return error;
2561 }
2562
2563 *terms = data->NumTerms;
2564 return ISAMNoError;
2565 }
2566
2567 /****************************************************************************/
2568 /* INTERNAL FINCTIONS */
2569 /****************************************************************************/
2570
ISAMCountLines(ISAMDataPtr data)2571 ISAMErrorCode ISAMCountLines(ISAMDataPtr data)
2572 /* this returns the number of lines in a file. */
2573 {
2574 if(data == NULL)
2575 return ISAMBadParameter;
2576
2577 data->NumTerms = 0;
2578 rewind(data->db_fd);
2579 while(ISAMReadLine(data) > 0)
2580 data->NumTerms++;
2581
2582 rewind(data->db_fd);
2583 return ISAMNoError;
2584 }
2585
2586 #ifdef NISAM_TEST_MODULE
Main(void)2587 Int2 Main(void)
2588 {
2589 ISAMObjectPtr object;
2590 Int4 i, terms, key_failed=0;
2591 Uint4Ptr Keys, Data;
2592 Uint4 Value;
2593 Uint4 Index;
2594 ISAMErrorCode error;
2595 CharPtr PNTR argv = GetArgv();
2596 Int4 argc = GetArgc();
2597
2598 if(argc < 2) {
2599
2600 printf("USAGE: %s <key file name> <index filename>\n", argv[0]);
2601 return 1;
2602 }
2603
2604 if((object = ISAMObjectNew(ISAMNumeric, argv[1], argv[2])) == NULL) {
2605 printf("Failed to create ISAM object.\n");
2606 return 1;
2607 }
2608
2609 /* if((error = ISAMMakeIndex(object, 0)) != ISAMNoError) {
2610 printf("Failed to create numerical index. "
2611 "Error code is %d\n", error);
2612 return 1;
2613 } */
2614
2615 if((error = ISAMNumTerms(object, &terms)) != ISAMNoError) {
2616 printf("Failed to return number of terms. "
2617 "Error code is %d\n", error);
2618 return 1;
2619 }
2620
2621 printf("Number of terms is %d\n", terms);
2622
2623 Keys = (Uint4Ptr) MemNew(terms*sizeof(Uint4));
2624 Data = (Uint4Ptr) MemNew(terms*sizeof(Uint4));
2625
2626 if((error = NISAMFindKeys(object, 0,
2627 terms-1, Keys, Data)) != ISAMNoError) {
2628 printf("Failed to find keys. Error code is %d\n", error);
2629 return 1;
2630 }
2631
2632 for(i=0; i < terms; i++) {
2633 if(i%1000 == 0)
2634 printf("Passed number %d\n", i);
2635
2636 if((error = NISAMSearch(object, Keys[i],
2637 &Value, &Index)) != ISAMNoError) {
2638 printf("Failed to search. Error code is %d\n", error);
2639 return 1;
2640 }
2641
2642 if(Value != Data[i] || Index != i) {
2643 printf("ISAM failed for key = %d\n "
2644 "Index: %d expected %d\n"
2645 "Value: %d expected %d\n",
2646 Keys[i], Index, i, Value, Data[i]
2647 );
2648 if(key_failed++ > 100)
2649 break;
2650 }
2651 }
2652 if(key_failed == 0)
2653 printf("Test succeeded\n");
2654
2655 MemFree(Keys);
2656 MemFree(Data);
2657 ISAMObjectFree(object);
2658 return 0;
2659 }
2660 #endif
2661 #ifdef SISAM_TEST_MODULE
2662
Main(void)2663 Int2 Main(void)
2664 {
2665 ISAMErrorCode error;
2666 CharPtr key, data, key_out, chptr;
2667 Uint4 index;
2668 ISAMObjectPtr isamp;
2669 Char tmpbuff[1024];
2670 FILE *fd;
2671 register Int4 i;
2672 CharPtr PNTR argv = GetArgv();
2673 Int4 argc = GetArgc();
2674
2675 if(argc < 2) {
2676 printf("USAGE: %s <key file name> [<index file name>]\n", argv[0]);
2677 return 1;
2678 }
2679
2680 isamp = ISAMObjectNew(ISAMString, argv[1], argv[2]);
2681
2682 if(argv[2] == NULL) {
2683 printf("Indexing file %s ...\n", argv[1]);
2684
2685 if((error = ISAMMakeIndex(isamp, 1)) != ISAMNoError) {
2686 printf("Creating of index failed with error code %d\n", error);
2687 return 1;
2688 }
2689 }
2690
2691 fd = FileOpen(argv[1], "r");
2692
2693 for(i=0; fgets(tmpbuff, 1024, fd) != NULL; i++) {
2694 tmpbuff[StringLen(tmpbuff)-1] = NULLB;
2695 if((chptr = StringChr(tmpbuff, ISAM_DATA_CHAR)) != NULL)
2696 *chptr = NULLB;
2697
2698 if((error = SISAMSearch(isamp, tmpbuff, 0, &key_out,
2699 &data, &index)) != ISAMNoError) {
2700 printf("Search failed with error code %d\n"
2701 "String: %s\n", error, tmpbuff);
2702 return(1);
2703 } else {
2704 if(index != i)
2705 printf("Position mismatch:\n"
2706 "String: %s\nData: %s\n"
2707 "Position: %d (expected %d)\n",
2708 key_out, data, index, i);
2709 if(StringCmp(tmpbuff, key_out))
2710 printf("String mismatch:\n"
2711 "String: %s\nExpected: %s\n"
2712 "Data: %s\n"
2713 "Position: %d (expected %d)\n",
2714 key_out, tmpbuff, data, index, i);
2715 if(i%100 == 0)
2716 printf("Passed index %d\n", i);
2717
2718 MemFree(key_out);
2719 MemFree(data);
2720 if(i == 5000)
2721 break;
2722 }
2723 }
2724 ISAMObjectFree(isamp);
2725 printf("TEST SUCCESSFUL!!!\n");
2726 return 0;
2727 }
2728 #endif
2729
2730
2731
2732
2733