1 /*
2 * ===========================================================================
3 *
4 * COPYRIGHT NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a Government employee and thus cannot
10 * be copyrighted. This software/database is freely available to the
11 * public for use without a copyright notice. Restrictions cannot be
12 * placed on its present or future use.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the National Library of
16 * Medicine (NLM) and the U. S. Government do not and cannot warrant the
17 * performance or results that may be obtained by using this software or
18 * data. The NLM and the U. S. Government disclaim all warranties as to
19 * performance, merchantability or fitness for any particular purpose.
20 *
21 * Please see that the author is suitably cited in any work or product
22 * based on this material.
23 *
24 * ===========================================================================
25 *
26 * RCS $Id: cdromlib.c,v 6.1 2004/04/01 13:43:05 lavr Exp $
27 *
28 * Authors: Greg Schuler, Jim Ostell, Jonathan Kans, Jonathan Epstein
29 *
30 * Original Creation Date: 9-91
31 *
32 * File Description:
33 * Functions in this file are the I/O primitives needed to retrieve data
34 * from the Entrez CD-ROMs, releases 12.0 and earlier. For releases 13.0
35 * and later, use the functions in cdnewlib.c
36 *
37 *
38 * Modifications:
39 * --------------------------------------------------------------------------
40 * Date Name Description of modification
41 * ------- ---------- -----------------------------------------------------
42 * 06-29-94 Schuler AsnRead/AsnWrite functions moved to objentr.c
43 * 07-11-94 Schuler Removed #include <cdrom.h> (no longer needed!)
44 * 07-13-94 Schuler Moved CdTermFree to cdentrez.c
45 * 08-04-94 Kans Fixed bug resulting in reading too many term pages
46 * 11-16-94 Schuler Typecasts for picky compilers
47 *
48 * 05-19-95 Schuler Added rcs Log directive for automatic insertion of
49 * modification comments.
50 *
51 * Revision $Log: cdromlib.c,v $
52 * Revision Revision 6.1 2004/04/01 13:43:05 lavr
53 * Revision Spell "occurred", "occurrence", and "occurring"
54 * Revision
55 * Revision Revision 6.0 1997/08/25 18:13:10 madden
56 * Revision Revision changed to 6.0
57 * Revision
58 * Revision Revision 5.0 1996/05/28 13:55:34 ostell
59 * Revision Set to revision 5.0
60 * Revision
61 * Revision 4.0 1995/07/26 13:50:32 ostell
62 * force revision to 4.0
63 *
64 * Revision 2.48 1995/05/16 14:36:20 schuler
65 * Automatic comment insertion enabled
66 *
67 *
68 * ==========================================================================
69 */
70
71 #define REVISION_STR "$Revision: 6.1 $"
72
73 #include <cdromlib.h>
74
75 #ifdef _OLD_CdEntrez_
76
77 static char * _this_module = "CdEntrez";
78 #undef THIS_MODULE
79 #define THIS_MODULE _this_module
80 static char * _this_file = __FILE__;
81 #undef THIS_FILE
82 #define THIS_FILE _this_file
83
84 /* =========================================================================
85 * CONSTANTS & MACROS
86 */
87
88 #define CURRENT_FORMAT_VERSION 0
89
90 #define BLKSIZE ((size_t)vi->field_bucket_size) /* sizeof term index block on cdrom */
91
92 #define PREF_ML TYP_ML
93 #define PREF_AA TYP_AA
94 #define PREF_NT TYP_NT
95 #define PREF_MED (NTYPE+0)
96 #define PREF_SEQ (NTYPE+1)
97
98 #define SUF_ML TYP_ML
99 #define SUF_AA TYP_AA
100 #define SUF_NT TYP_NT
101 #define SUF_WORD (NTYPE+FLD_WORD)
102 #define SUF_MESH (NTYPE+FLD_MESH)
103 #define SUF_KYWD (NTYPE+FLD_KYWD)
104 #define SUF_AUTH (NTYPE+FLD_AUTH)
105 #define SUF_JOUR (NTYPE+FLD_JOUR)
106 #define SUF_ORGN (NTYPE+FLD_ORGN)
107 #define SUF_ACCN (NTYPE+FLD_ACCN)
108 #define SUF_GENE (NTYPE+FLD_GENE)
109 #define SUF_PROT (NTYPE+FLD_PROT)
110 #define SUF_ECNO (NTYPE+FLD_ECNO)
111 #define SUF_HIER (NTYPE+FLD_ORGN_HIER)
112 #define SUF_DATE (NTYPE+FLD_DATE)
113 #define SUF_ASN (NTYPE+NFLD)
114 #define SUF_REC (NTYPE+NFLD+1)
115 #define SUF_UID (NTYPE+NFLD+2)
116
117 #define EXT_DAT 0
118 #define EXT_IDX 1
119 #define EXT_LST 2
120 #define EXT_PST 3
121 #define EXT_LNK 4
122
123 #define COMPR_NONE 0
124 #define COMPR_HUFFMAN 1
125 #define COMPR_LZW1 2
126 /* etc...*/
127 #define COMPR_DONT_KNOW 0xFF
128
129
130 #define HUFFMAN_SENTINEL 256
131
132
133 typedef struct {
134 DocUid uid; /* MEDLINE UI or seq-id of Bioseq */
135 DocType type; /* document type code (ml/nt/aa) */
136 Int4 entry_offset , /* offset into entry file(s) */
137 sum_offset , /* offset into summary file */
138 link_offset ; /* offset into link file */
139 } UidIdx, PNTR UidIdxPtr;
140
141 typedef struct decompinfo {
142 AsnIoPtr aip;
143 FILE *fp;
144 Uint1 compr; /* compression protocol */
145 unsigned int mask; /* used internally for Huffman */
146 unsigned int byte; /* used internally for Huffman */
147 Uint4 bytes_left; /* count of remaining bytes for uncompressed protocol */
148 struct decompinfo PNTR next;
149 } DecompInfo, PNTR DecompInfoPtr;
150
151
152 /* =========================================================================
153 * VARIABLES
154 */
155
156 static DecompInfoPtr DecompInfoList = NULL;
157 static Int4 numinits;
158 static CharPtr buffer;
159
160 static CharPtr sPath [NDIR];
161
162 static char *sSdir[] = { "", "data", "sequence", "medline", "terms",
163 "index", "links", "" };
164 static char *sPref[] = { "ml", "aa", "nt", "med", "seq" };
165 static char *sSuff[] = { "word", "mesh", "kywd", "auth", "jour", "orgn",
166 "accn", "gene", "prot", "ecno", "hier", "date",
167 "fkey", "prop", "subs", "mloc",
168 "ml", "aa", "nt",
169 "asn", "rec", "uid" };
170 static char *sExtn[] = { "dat", "idx", "lst", "pst", "lnk" };
171
172 static Boolean bAppendVer = FALSE;
173 static Boolean upperCaseIt = FALSE;
174
175 static EntrezInfoPtr vi = NULL;
176 static Int4Ptr type_bucket_index[NTYPE]; /* from the .idx files */
177
178 static size_t detInfoCharCount;
179 static CharPtr CdDetailedBuf = NULL;
180 static Boolean countOnly;
181
182 /* for saving the last term.idx file used */
183
184 static DocType term_idx_type = -1;
185 static DocField term_idx_field = -1;
186 static Int2 term_idx_count = 0;
187 static CharPtr PNTR term_idx_str;
188 static FILE *IdxFilePtr[NTYPE+2];
189 static Boolean HoldIdxOpen = FALSE;
190
191
192 #ifdef IS_BIG_ENDIAN
193 /* no swapping needed: define do-nothing macros */
194 #define SwapInt2(X) (X)
195 #define SwapInt4(X) (X)
196 #else
197 /* give prototypes for byte swapping functions */
198 static Int2 NEAR SwapInt2 PROTO((Int2));
199 static Int4 NEAR SwapInt4 PROTO((Int4));
200 #endif
201
202
203 /*****************************************************************************
204 *
205 * Private Function Prototypes
206 *
207 *****************************************************************************/
208 static Boolean NEAR CdInitialize PROTO((CharPtr,CharPtr,CharPtr,Int2Ptr));
209 static Boolean NEAR CdSetPath PROTO((Int2,CharPtr));
210 static Boolean NEAR SaveCdMediaContext PROTO((CharPtr media_name));
211 static void NEAR ExtraInitWork PROTO((void));
212 static Boolean NEAR ValidateType PROTO((DocType type));
213 static Boolean NEAR ValidateField PROTO((DocType type, DocField field));
214 static Boolean NEAR ValidateUid PROTO((DocType type, DocUid uid));
215 static CharPtr NEAR MakePath PROTO((Int2 nSdir,Int2 nPref,Int2 nSuf, Int2 nExtn));
216 static Boolean NEAR LoadUidIndex PROTO((DocType type));
217 static Int2 NEAR LoadTrmIndex PROTO((DocType type, DocField field));
218 static void NEAR FreeTrmIndex PROTO((void));
219 /**** not used in reading cdrom ******
220 static Int4 NEAR MergeSegOffset PROTO((Int2 seg, Int4 offset));
221 *************************************/
222 static Boolean NEAR SplitSegOffset PROTO((Int4 value, Int2Ptr segptr, Int4Ptr offsetptr));
223 static FILE * NEAR CdDocFil PROTO((DocType type, DocUid uid, UidIdxPtr idx));
224
225 static Boolean SwapOutCd PROTO((VoidPtr med));
226 static Boolean SwapInCd PROTO((VoidPtr med));
227 static void NEAR ForceCdFini PROTO((void));
228 static Boolean CdInitMedia PROTO((VoidPtr med));
229 static Boolean CdFmtInfo PROTO((VoidPtr medName));
230
231 static CdTermPtr NEAR CdTrmLocate PROTO((CharPtr term, Int2 page));
232 static UidIdxPtr NEAR UidIdxGet PROTO((DocType type, DocUid uid, UidIdxPtr idx));
233 static void NEAR linksort PROTO((Int4Ptr uids, Int4Ptr wts, Int4 n));
234 static DecompInfoPtr NEAR DecompInit PROTO((FILE *fp));
235 static Boolean NEAR DecompFini PROTO((AsnIoPtr aip, DecompInfoPtr dip));
236 static void NEAR DecompInfoFree PROTO((DecompInfoPtr dcp));
237 static Int2 LIBCALLBACK DecompReadFunc PROTO((Pointer p, CharPtr buff, Uint2 count));
238 static Int2 HuffmanRead PROTO((DecompInfoPtr dcp, CharPtr buff, Uint2 count));
239 static Boolean NEAR IsOKMagic PROTO((Uint4 magic, CharPtr volume_label));
240 static CdTermPtr CdTermRead PROTO((Int2 type, Int2 field, CharPtr ptr, CharPtr bufr, Int2 page));
241
242 /*****************************************************************************
243 *
244 * General purpose public functions
245 *
246 *****************************************************************************/
247
248 /*****************************************************************************
249 *
250 * CdInit()
251 *
252 *****************************************************************************/
253 static CharPtr trmbuf; /* for term pages */
254 static DocType trmtype; /* type of last term used in trmbuf */
255 static DocField trmfield; /* field of last term used in trmbuf */
256 static Int2 trmpage, /* page # of first page in trmbuf */
257 trmpages; /* number of pages in memory */
258 static size_t trmpagesrequest; /* how bytes to read (5 * BLKSIZE) */
259
260 static Boolean oldStyleCfgFile;
261
262
263 static Int2 nCdVer;
264 static char *sCdError [] = {
265 "",
266 "Memory allocation error",
267 "File create error",
268 #ifdef WIN_MSWIN
269 "File open error on %Fs",
270 #else
271 "File open error on %s",
272 #endif
273 "File seek error",
274 "File read error",
275 "File write error",
276 "Bad database type code [%d]",
277 "Bad field code [%d]",
278 "No terms for type/field [%d/%d]",
279 "Bad uid number [%ld]",
280 "Bad directory number [%d]",
281 "Cannot read new data format",
282 "Index files out of date",
283 "Data decompression error",
284 "Programmer error"
285 };
286
287 static CdTermPtr cdtrmcache [10]; /* cache of most recent CdTrmFind results */
288
289 /*****************************************************************************
290 *
291 * CdInit()
292 * uses environment variables to configure initialization
293 *
294 *****************************************************************************/
295
CdInit(void)296 Boolean CdInit (void)
297
298 {
299 char media[64];
300
301 ConfigInit();
302
303 if (nCdVer) {
304 numinits++;
305 return TRUE; /* already setup */
306 }
307
308 oldStyleCfgFile = FALSE;
309
310 GetAppParam ("ncbi", "NCBI", "MEDIA", "", media, sizeof media);
311
312 /* This is a work-around to provide backwards compatibility for old */
313 /* config files which do not specify MEDIA */
314 if (media[0] == '\0')
315 {
316 StrCpy(media, "NCBI");
317 SetSoleMedia();
318 oldStyleCfgFile = TRUE;
319 }
320
321 return (ParseMedia(CdInitMedia, MEDIUM_CD | MEDIUM_DISK) != 0);
322 }
323
324
CdInitMedia(VoidPtr med)325 static Boolean CdInitMedia(VoidPtr med)
326
327 {
328 char CdRootPath[PATH_MAX];
329 char sVol[32];
330 char datvalpath[PATH_MAX];
331 CharPtr mediaName = (CharPtr) med;
332
333 GetAppParam ("ncbi", mediaName, "ROOT", "", CdRootPath, sizeof CdRootPath);
334
335 vi = NULL;
336
337 bAppendVer = FALSE;
338 upperCaseIt = FALSE;
339
340 /* "VAL" overrides "ROOT" for purposes of finding first copy of .val */
341 if (GetAppParam ("ncbi", mediaName, "VAL", CdRootPath, datvalpath, sizeof datvalpath))
342 CdSetPath (DIR_VAL, CdRootPath);
343
344 FileBuildPath(datvalpath, NULL, NULL);
345 if (! CdInitialize (CdRootPath, sVol, datvalpath, &nCdVer)) {
346 return FALSE;
347 }
348
349 trmpagesrequest =(size_t)(5 * BLKSIZE); /* number of termpages to request */
350
351 if (GetAppParam ("ncbi", mediaName, "IDX", "", CdRootPath, sizeof CdRootPath))
352 CdSetPath (DIR_IDX, CdRootPath);
353
354 /* work-around to find alternate index files when using old-style */
355 /* configuration file */
356 if (oldStyleCfgFile)
357 {
358 if (StrICmp(sVol, "SeqData") == 0 &&
359 GetAppParam ("ncbi", mediaName, "SEQIDX", "", CdRootPath,
360 sizeof CdRootPath))
361 {
362 CdSetPath (DIR_IDX, CdRootPath);
363 }
364 if (StrICmp(sVol, "MedData") == 0 &&
365 GetAppParam ("ncbi", mediaName, "MEDIDX", "", CdRootPath,
366 sizeof CdRootPath))
367 {
368 CdSetPath (DIR_IDX, CdRootPath);
369 }
370 }
371
372 if (GetAppParam ("ncbi", mediaName, "LNK", "", CdRootPath, sizeof CdRootPath))
373 CdSetPath (DIR_LNK, CdRootPath);
374 if (GetAppParam ("ncbi", mediaName, "MED", "", CdRootPath, sizeof CdRootPath))
375 CdSetPath (DIR_MED, CdRootPath);
376 if (GetAppParam ("ncbi", mediaName, "SEQ", "", CdRootPath, sizeof CdRootPath))
377 CdSetPath (DIR_SEQ, CdRootPath);
378 if (GetAppParam ("ncbi", mediaName, "TRM", "", CdRootPath, sizeof CdRootPath))
379 CdSetPath (DIR_TRM, CdRootPath);
380 SaveCdMediaContext(mediaName);
381
382 return TRUE;
383 }
384
385
SaveCdMediaContext(CharPtr media_name)386 static Boolean NEAR SaveCdMediaContext(CharPtr media_name)
387
388 {
389 MediaPtr media;
390 CdMediaInfoPtr cdm;
391 int i;
392 char ejectable[10];
393 char buffer[100];
394
395 media = PreInitMedia(media_name);
396
397 if (media == NULL)
398 return FALSE;
399
400 if (media->inited_partial || (media->media_type != MEDIUM_CD &&
401 media->media_type != MEDIUM_DISK))
402 return TRUE;
403
404 media->swapOutMedia = SwapOutCd;
405 media->swapInMedia = SwapInCd;
406 GetAppParam ("ncbi", media_name, "EJECTABLE", "0", ejectable, sizeof ejectable);
407
408 cdm = (CdMediaInfoPtr) MemNew(sizeof(CdMediaInfo));
409 cdm->ejectable = atoi(ejectable);
410 cdm->device_name = NULL;
411 cdm->raw_device_name = NULL;
412 cdm->mount_point = NULL;
413 cdm->mount_cmd = NULL;
414
415 if (GetAppParam ("ncbi", media_name, "DEVICE_NAME", "", buffer, sizeof buffer))
416 {
417 cdm->device_name = StringSave(buffer);
418 }
419 if (GetAppParam ("ncbi", media_name, "RAW_DEVICE_NAME", "", buffer, sizeof buffer))
420 {
421 cdm->raw_device_name = StringSave(buffer);
422 }
423 if (GetAppParam ("ncbi", media_name, "MOUNT_POINT", "", buffer, sizeof buffer))
424 {
425 cdm->mount_point = StringSave(buffer);
426 }
427 if (GetAppParam ("ncbi", media_name, "MOUNT_CMD", "", buffer, sizeof buffer))
428 {
429 cdm->mount_cmd = StringSave(buffer);
430 }
431 cdm->hold_idx_open = FALSE;
432 if (GetAppParam ("ncbi", media_name, "HOLD_IDX_OPEN", "", buffer, sizeof buffer))
433 {
434 cdm->hold_idx_open = StringICmp(buffer, "TRUE") == 0;
435 }
436
437 media->media_info = (VoidPtr) cdm;
438
439 for (i = 0; i < NDIR; i++)
440 {
441 cdm->sPath[i] = sPath[i];
442 sPath[i] = NULL;
443 }
444
445 media->entrez_info = vi;
446 cdm->bAppendVer = bAppendVer;
447 cdm->upperCaseIt = upperCaseIt;
448
449 media->inited_partial = TRUE;
450
451 return TRUE;
452 }
453
454
SwapOutCd(VoidPtr curm)455 static Boolean SwapOutCd(VoidPtr curm)
456 {
457 int i;
458 MediaPtr CurMedia = (MediaPtr) curm;
459 CdMediaInfoPtr cmip;
460
461 if (CurMedia != NULL)
462 {
463 cmip = (CdMediaInfoPtr) CurMedia->media_info;
464 CurMedia->entrez_info = vi;
465 vi = NULL; /* avoid freeing it */
466
467 for (i = 0; i < NDIR; i++)
468 { /* copy and avoid freeing */
469 cmip->sPath[i] = sPath[i];
470 sPath[i] = NULL;
471 }
472
473 ForceCdFini();
474 }
475
476 return TRUE;
477 }
478
479
SwapInCd(VoidPtr med)480 static Boolean SwapInCd(VoidPtr med)
481 {
482 MediaPtr newMedia = (MediaPtr) med;
483 int i;
484 CdMediaInfoPtr cmip;
485
486 cmip = (CdMediaInfoPtr) newMedia->media_info;
487
488 for (i = 0; i < NDIR; i++)
489 { /* load up sPath */
490 sPath[i] = cmip->sPath[i];
491 }
492 vi = newMedia->entrez_info;
493
494 bAppendVer = cmip->bAppendVer;
495 upperCaseIt = cmip->upperCaseIt;
496 HoldIdxOpen = cmip->hold_idx_open;
497
498 ExtraInitWork();
499
500 return TRUE;
501 }
502
503
ExtraInitWork()504 static void NEAR ExtraInitWork()
505
506 {
507 size_t bufsize;
508 int i;
509
510 /* initialize cached CdTermPtr array */
511 for (i = 0; i < 10; i++) {
512 cdtrmcache [i] = NULL;
513 }
514
515 term_idx_type = -1;
516 term_idx_field = -1;
517
518 if (buffer == NULL) {
519 bufsize = (size_t) MAX (MAX ((size_t) vi->type_bucket_size, (size_t) vi->field_bucket_size), sizeof (Int4) * 512);
520 buffer = (CharPtr) MemNew(bufsize);
521 }
522 }
523
524
ForceCdFini(void)525 static void NEAR ForceCdFini(void)
526
527 {
528 Int4 sav_numinits = numinits;
529 CharPtr savDetailedBuf;
530
531 ConfigInit(); /* simulate Init() to balance Fini() */
532 savDetailedBuf = CdDetailedBuf;
533 CdDetailedBuf = NULL; /* avoid freeing in Fini() */
534 numinits = 1;
535 CdFini();
536 numinits = sav_numinits;
537 CdDetailedBuf = savDetailedBuf;
538 }
539
540
541 /*****************************************************************************
542 *
543 * CdFini()
544 * closes cdromlib session
545 *
546 *****************************************************************************/
CdFini(void)547 Boolean CdFini (void)
548
549 {
550 Int2 i;
551 CdTermPtr trmptr;
552
553 ConfigFini();
554 numinits--;
555 if (numinits) /* haven't fixed all initializations yet */
556 return TRUE;
557
558 /* free cached CdTermPtr array */
559 for (i = 0; i < 10; i++) {
560 trmptr = cdtrmcache [i];
561 if (trmptr != NULL) {
562 if (trmptr->term != NULL) {
563 MemFree (trmptr->term);
564 }
565 MemFree (trmptr);
566 }
567 cdtrmcache [i] = NULL;
568 }
569
570 buffer = (CharPtr) MemFree(buffer);
571 FreeTrmIndex();
572 for (i = 0; i < NDIR; i++)
573 sPath[i] = (CharPtr) MemFree(sPath[i]);
574 for (i = 0; i < NTYPE; i++)
575 if (i != TYP_NT)
576 type_bucket_index[i] = (Int4Ptr) MemFree(type_bucket_index[i]);
577 else
578 type_bucket_index[i] = NULL; /* NT and AA use same index */
579 vi = EntrezInfoFree(vi);
580 trmbuf = (CharPtr) MemFree(trmbuf);
581 trmpages = 0;
582 nCdVer = 0;
583 bAppendVer = FALSE;
584 upperCaseIt = FALSE;
585
586 for (i = 0; i < NTYPE+2; i++)
587 {
588 if (IdxFilePtr[i] != NULL)
589 {
590 FileClose(IdxFilePtr[i]);
591 IdxFilePtr[i] = NULL;
592 }
593 }
594
595 CdDetailedBuf = (CharPtr) MemFree(CdDetailedBuf);
596
597 return TRUE;
598 }
599
600
601 /* =========================================================================
602 * PUBLIC FUNCTION BODIES
603 */
604
605
606 /* -------------------- CdInitialize() --------------------------------
607 * CdInitialize -- Initializes the library
608 *
609 * Parameters: sCdRoot: CD-ROM root path
610 * sVolume: pointer to volume name buffer (VOLUME_MAX)
611 * ver: pointer to version number buffer
612 *
613 * Return value: TRUE: Success.
614 * FALSE: Failure; refer to error code.
615 *
616 * Notes: 1. The file cdromdat.val must be in the specified root path.
617 * 2. Default paths strings for various subdirectories are
618 * created by this function below the specified root path.
619 * Use CdSetPath() to override the defaults.
620 */
621
CdInitialize(CharPtr sCdRoot,CharPtr sVolume,CharPtr datvalpath,Int2Ptr ver)622 static Boolean NEAR CdInitialize (CharPtr sCdRoot, CharPtr sVolume, CharPtr datvalpath, Int2Ptr ver)
623
624 {
625 Int2 i;
626 AsnIoPtr aip;
627 Char drctry [16];
628 CharPtr p;
629 size_t bufsize;
630
631
632 *sVolume = '\0';
633 *ver = 0;
634
635 numinits++; /* count the number of initialization calls */
636
637 if (vi != NULL) { /* already initialized ! */
638 StringCpy (sVolume, vi->volume_label);
639 *ver = vi->version;
640 return TRUE;
641 }
642
643 /* initialize cached CdTermPtr array */
644 for (i = 0; i < 10; i++) {
645 cdtrmcache [i] = NULL;
646 }
647
648 term_idx_type = -1;
649 term_idx_field = -1;
650
651 for (i = 0; i < NTYPE+2; i++)
652 IdxFilePtr[i] = NULL;
653
654 /* initialize storage for path names */
655 for (i = 0; i < NDIR; i++)
656 if (sPath[i] == NULL)
657 sPath[i] = (CharPtr) MemNew(PATH_MAX + 1);
658
659 /* initialize root path string variable */
660 StringNCpy (sPath[DIR_ROOT], sCdRoot, PATH_MAX);
661 FileBuildPath(sPath[DIR_ROOT], NULL, NULL);
662
663 /* read the CDROMLIB.INF file */
664 if ((aip = EntrezInfoOpen (datvalpath)) == NULL)
665 return FALSE;
666
667 /* set default paths for subdirectories */
668 for (i=1; i<NDIR; i++) {
669 StringCpy (sPath[i], sPath[DIR_ROOT]);
670 StringCpy (drctry, sSdir[i]);
671 if (upperCaseIt) {
672 p = drctry;
673 while (*p != '\0') {
674 *p = TO_UPPER (*p);
675 p++;
676 }
677 }
678 FileBuildPath(sPath[i], drctry, NULL);
679 }
680
681 vi = EntrezInfoAsnRead(aip, NULL);
682 AsnIoClose(aip);
683 if (vi == NULL)
684 return FALSE;
685
686 /* check for incompatible format */
687 if (vi->format != CURRENT_FORMAT_VERSION) {
688 ErrPostEx(SEV_ERROR, ERR_CD_BADFORMAT, 0, sCdError[ERR_CD_BADFORMAT]);
689 return FALSE;
690 }
691
692 if (buffer == NULL) {
693 bufsize = (size_t) MAX (MAX ((size_t) vi->type_bucket_size, (size_t) vi->field_bucket_size), sizeof (Int4) * 512);
694 buffer = (CharPtr) MemNew(bufsize);
695 }
696
697 StringCpy (sVolume, vi->volume_label);
698 *ver = vi->version;
699 return TRUE;
700 }
701
702 /*****************************************************************************
703 *
704 * CdGetInfo()
705 * Gets Entrez info pointer
706 *
707 *****************************************************************************/
CdGetInfo(void)708 EntrezInfoPtr CdGetInfo (void)
709
710 {
711 return vi;
712 }
713
714
715 /*****************************************************************************
716 *
717 * CdFmtInfo()
718 * Formats CD-ROM specific "detailed info" and either stores the number
719 * of characters required to format the text, or concatentates the
720 * formatted string to a global string
721 *
722 *****************************************************************************/
CdFmtInfo(VoidPtr medName)723 static Boolean CdFmtInfo(VoidPtr medName)
724 {
725 char buf[256];
726 MediaPtr media;
727 CharPtr mediaName = (CharPtr) medName;
728 CdMediaInfoPtr cdm;
729
730
731 if ((media = PreInitMedia(mediaName)) == NULL || media->invalid ||
732 (cdm = (CdMediaInfoPtr) media->media_info) == NULL)
733 {
734 return FALSE;
735 }
736
737 if (media->media_type == MEDIUM_CD)
738 StrCpy(buf, "\n CD-ROM image from ");
739 else
740 StrCpy(buf, "\n Hard disk image from ");
741 if (cdm->sPath[DIR_ROOT] == NULL)
742 {
743 StrCat(buf, "<location unknown>");
744 } else {
745 StrCat(buf, cdm->sPath[DIR_ROOT]);
746 }
747 if (media->entrez_info != NULL && media->entrez_info->volume_label != NULL)
748 {
749 StrCat(buf, "\n Volume label is ");
750 StrCat(buf, media->entrez_info->volume_label);
751 }
752 if (media->formal_name == NULL)
753 {
754 StrCat(buf, "\n [ this medium has no formal name ]");
755 }
756 else {
757 StrCat(buf, "\n Formal name is ");
758 StrCat(buf, media->formal_name);
759 }
760 StrCat(buf, "\n");
761
762 if (countOnly)
763 {
764 detInfoCharCount += StringLen(buf);
765 } else {
766 StrCat(CdDetailedBuf, buf);
767 }
768
769 /* always return FALSE, so that ParseMedia() will refrain from setting */
770 /* validity flags */
771 return FALSE;
772 }
773
774
775 /*****************************************************************************
776 *
777 * CdDetailedInfo()
778 * Gets formatted text information about the current status, or returns
779 * NULL; the text (if any) is stored in a statically allocated buffer
780 *
781 *****************************************************************************/
782
CdDetailedInfo(void)783 CharPtr CdDetailedInfo (void)
784
785 {
786 if (numinits == 0) /* not yet initialized */
787 {
788 if (CdDetailedBuf == NULL)
789 {
790 CdDetailedBuf = StringSave("CD-ROM and HARD DISK access information is not currently available\n");
791 }
792 return CdDetailedBuf;
793 }
794 detInfoCharCount = 0;
795 countOnly = TRUE;
796 ParseMedia(CdFmtInfo, MEDIUM_CD | MEDIUM_DISK);
797 countOnly = FALSE;
798 if (detInfoCharCount == 0)
799 return NULL;
800 if (CdDetailedBuf != NULL)
801 {
802 CdDetailedBuf = (CharPtr) MemFree(CdDetailedBuf);
803 }
804 CdDetailedBuf = (CharPtr) MemNew(detInfoCharCount + 200);
805 StrCpy(CdDetailedBuf, "CD-ROM and HARD DISK ACCESS\n");
806 if (CurMediaType() == MEDIUM_CD || CurMediaType() == MEDIUM_DISK)
807 {
808 StrCat(CdDetailedBuf, " Currently active medium is ");
809 StrCat(CdDetailedBuf, (GetCurMedia())->formal_name);
810 StrCat(CdDetailedBuf, "\n");
811 }
812 ParseMedia(CdFmtInfo, MEDIUM_CD | MEDIUM_DISK);
813 return CdDetailedBuf;
814 }
815
816 /* -------------------- CdSetPath() ---------------------------------
817 */
CdSetPath(Int2 dir,CharPtr path)818 static Boolean NEAR CdSetPath (Int2 dir, CharPtr path)
819
820 {
821 int k = 0;
822
823 if (path != NULL)
824 k = StringLen(path);
825
826 if ((dir<2) || (dir>=NDIR) || (k==0)) {
827 ErrPostEx(SEV_ERROR, ERR_CD_BADDIR, 0, sCdError[ERR_CD_BADDIR], dir);
828 return FALSE;
829 }
830 StringCpy (sPath[dir], path);
831 FileBuildPath(sPath[dir], NULL, NULL);
832 return TRUE;
833 }
834
835 /*****************************************************************************
836 *
837 * UidIdxGet(type, uid, idx)
838 *
839 *****************************************************************************/
UidIdxGet(DocType type,DocUid uid,UidIdxPtr idx)840 static UidIdxPtr NEAR UidIdxGet (DocType type, DocUid uid, UidIdxPtr idx)
841
842 {
843 Int4Ptr ip;
844 Int2 i, j, l, r;
845 FILE * fp;
846 CharPtr path;
847 struct idxrec {
848 DocUid uid;
849 Int4 entry_offset,
850 link_offset;
851 } PNTR idxptr;
852
853 if (! ValidateUid(type, uid))
854 return NULL;
855
856 if (type == TYP_SEQ) /* AA, NT, SEQ all the same */
857 type = TYP_AA;
858
859 if (type_bucket_index[type] == NULL)
860 {
861 if (! LoadUidIndex(type))
862 return NULL;
863 }
864
865 ip = type_bucket_index[type];
866 r = vi->types[type].num_bucket - 1;
867 l = 0;
868 j = 0;
869 while ((l <= r) && (! ((ip[j] <= uid) && (ip[j+1] > uid))))
870 {
871 j = (l + r) / 2;
872 if (uid > ip[j])
873 l = j + 1;
874 else
875 r = j - 1;
876 }
877
878 if (type != TYP_ML)
879 type = TYP_SEQ;
880
881 if ((fp = IdxFilePtr[type]) == NULL)
882 {
883 path = MakePath (DIR_IDX, type, SUF_UID, EXT_LST);
884 if ((fp = FileOpen(path, "rb")) == NULL)
885 {
886 ErrPostEx(SEV_ERROR, ERR_CD_FILEOPEN, 0, sCdError[ERR_CD_FILEOPEN], path);
887 return NULL;
888 }
889 }
890
891 fseek(fp, (long)j * (long)vi->type_bucket_size, SEEK_SET);
892
893 j = FileRead(buffer, 1, vi->type_bucket_size, fp);
894
895 if (HoldIdxOpen)
896 {
897 IdxFilePtr[type] = fp;
898 } else {
899 FileClose(fp);
900 }
901
902 if (j == 0)
903 {
904 ErrPostEx(SEV_ERROR, ERR_CD_FILEREAD, 0, sCdError[ERR_CD_FILEREAD]);
905 return NULL;
906 }
907
908 idxptr = (struct idxrec PNTR) buffer;
909 j = vi->type_bucket_size / sizeof(struct idxrec);
910 for (i = 0; i < j; i++, idxptr++)
911 {
912 if (uid == SwapInt4(idxptr->uid))
913 {
914 if (idx == NULL)
915 idx = (UidIdxPtr) MemNew(sizeof(UidIdx));
916 else
917 MemFill(idx, '\0', sizeof(UidIdx));
918
919 idx->type = type;
920 idx->uid = uid;
921 idx->entry_offset = SwapInt4(idxptr->entry_offset);
922 idx->sum_offset = 0;
923 idx->link_offset = SwapInt4(idxptr->link_offset);
924 if (type == TYP_SEQ)
925 {
926 if (idx->entry_offset & 0x80000000)
927 idx->type = TYP_AA;
928 else
929 idx->type = TYP_NT;
930 }
931 return idx;
932 }
933 }
934
935 return NULL;
936 }
937
938 /* -------------------- CdTrmPageCt() --------------------------------
939 * CdTrmPageCt -- returns the number of term pages for a type/field pair.
940 *
941 * Parameters: type: database code.
942 * field: field code.
943 *
944 * Return value: non-zero: Success; page count.
945 * zero: Failure; refer to error code.
946 */
947
CdTrmPageCt(DocType type,DocField field)948 Int2 CdTrmPageCt (DocType type, DocField field)
949
950 {
951 if (!ValidateType (type)) return 0;
952 if (!ValidateField (type, field)) return 0;
953 return (Int2) vi->types[type].fields[field].num_bucket;
954 }
955
956
957 /* -------------------- CdTrmLookup() --------------------------------
958 * CdTrmLookup -- returns the first page that COULD contain a term.
959 *
960 * Parameters: type: database code.
961 * field: field code.
962 * term: term (or term fragment) to lookup.
963 *
964 * Return value: non-negative: Success; page number. (zero-based)
965 * negative: Failure; refer to error code.
966 */
967
CdTrmLookup(DocType type,DocField field,CharPtr term)968 Int2 CdTrmLookup (DocType type, DocField field, CharPtr term)
969
970 {
971 int i;
972
973 if (!LoadTrmIndex (type, field))
974 return(-1);
975
976 for (i=0; i< term_idx_count; i++) {
977 if (MeshStringICmp (term_idx_str[i], term) >= 0)
978 return MAX (0,i-2);
979 }
980 return MAX (0,term_idx_count-2);
981 }
982
983
984 /* -------------------- CdTrmPages() ---------------------------------
985 * CdTrmPages -- fetches a range of term pages from the CD-ROM.
986 *
987 * Parameters: type: database code.
988 * field: field code.
989 * pg: page number of first page to read.
990 * ct: number of pages to read.
991 * buffer: buffer to receive the data.
992 *
993 * Return value: non-zero: Success; number of pages read.
994 * zero: Failure; refer to error code.
995 *
996 * Notes: The term pages contain a series of variable-length term records,
997 * each of which is an ASCII string with the following structure:
998 *
999 * <term>\t<c1>\t<c2>\t<offset>\n
1000 *
1001 * term: term
1002 * c1: count of 'special' occurrences.
1003 * c2: count of total occurrences. ** NOTE **
1004 * offset: offset in postings file of list of document numbers.
1005 * \t: tab character (?).
1006 * \n: newline character ('\x0A').
1007 *
1008 * A term record may cross a page boundary.
1009 */
1010
CdTrmPages(DocType type,DocField field,Int2 pg)1011 Int2 CdTrmPages (DocType type, DocField field, Int2 pg)
1012
1013 {
1014 CharPtr path, buff;
1015 FILE *fd;
1016 Int4 offset;
1017 size_t bytes;
1018
1019 if ((type == trmtype) && (field == trmfield) && (pg == trmpage) && (trmpages))
1020 return trmpages;
1021
1022 if (trmbuf == NULL)
1023 trmbuf = (CharPtr) MemNew(trmpagesrequest + 2); /* allow terminating 00 */
1024 buff = trmbuf; /* use local static buffer */
1025 /* need to fill buffer with NULL's */
1026 MemFill(buff, 0, trmpagesrequest + 2);
1027 trmpages = 0; /* no pages loaded */
1028
1029 if (!ValidateType (type)) return 0;
1030 if (!ValidateField (type, field)) return 0;
1031
1032 path = MakePath (DIR_TRM, type, field, EXT_LST);
1033 if ((fd = FileOpen(path, "rb")) ==NULL) {
1034 ErrPostEx(SEV_WARNING, ERR_CD_FILEOPEN, 0, sCdError[ERR_CD_FILEOPEN], path);
1035 return 0;
1036 }
1037 offset = (long) pg * BLKSIZE;
1038 fseek (fd, offset, SEEK_SET);
1039 bytes = FileRead(buff, 1, trmpagesrequest, fd);
1040 FileClose (fd);
1041
1042 if (bytes == trmpagesrequest) /* got the extra page */
1043 bytes -= BLKSIZE;
1044 trmpages = (Int2)(bytes/BLKSIZE);
1045 if (bytes % BLKSIZE) /* got a partial last page */
1046 trmpages++;
1047 trmtype = type;
1048 trmfield = field;
1049 trmpage = pg;
1050 /* may have to switch \n for \r */
1051 return trmpages;
1052 }
1053
1054
1055 /* -------------------- CdTrmUidsFil () --------------------------------
1056 * CdTrmUids -- retrieves a list of uids for a term.
1057 *
1058 * Parameters: type: database code.
1059 * field: field code.
1060 * offset: offset into postings file.
1061 * count: number of uids.
1062 * filename: name of file to receive the results.
1063 *
1064 * Return value: non-zero: Success; number of documents (same as count).
1065 * zero: Failure; refer to error code.
1066 *
1067 * Notes: the offset value is obtained by:
1068 * 1) looking up a term (using CdTrmLookup()).
1069 * 2) loading term pages (using CdTrmPages()).
1070 * 3) finding the term in the loaded pages.
1071 */
1072
CdTrmUidsFil(DocType type,DocField field,Int4 offset,Int4 count,CharPtr filename,Boolean append)1073 Int4 CdTrmUidsFil (DocType type, DocField field, Int4 offset, Int4 count, CharPtr filename, Boolean append)
1074
1075 {
1076 Int4 i;
1077 FILE *fd1;
1078 FILE *fd2;
1079 Char mode [4];
1080 CharPtr path;
1081 Int4Ptr ptr;
1082 size_t cnt;
1083 Int4 cntr;
1084
1085 if (!ValidateType (type)) return 0;
1086 if (!ValidateField (type, field)) return 0;
1087
1088 path = MakePath (DIR_TRM, type, field, EXT_PST);
1089 if ((fd1 = FileOpen(path, "rb")) == NULL)
1090 {
1091 ErrPostEx(SEV_WARNING, ERR_CD_FILEOPEN, 0, sCdError[ERR_CD_FILEOPEN], path);
1092 return 0;
1093 }
1094
1095 if (append) {
1096 StringCpy (mode, "ab");
1097 } else {
1098 StringCpy (mode, "wb");
1099 }
1100 if ((fd2 = FileOpen(filename, mode)) == NULL)
1101 {
1102 FileClose (fd1);
1103 ErrPostEx(SEV_WARNING, ERR_CD_FILEOPEN, 0, sCdError[ERR_CD_FILEOPEN], filename);
1104 return 0;
1105 }
1106
1107 fseek (fd1, offset, SEEK_SET);
1108
1109 cntr = count;
1110 cnt = (size_t) MIN (cntr, (Int4)(BLKSIZE / sizeof(Int4)));
1111 ptr = (Int4Ptr) buffer;
1112 while (cnt > 0)
1113 {
1114 FileRead (buffer, sizeof (Int4), cnt, fd1);
1115 for (i = 0; i < (Int4) cnt; i++)
1116 ptr[i] = SwapInt4(ptr[i]);
1117 if (! FileWrite (buffer, sizeof(Int4), cnt, fd2))
1118 {
1119 ErrPostEx(SEV_ERROR, ERR_CD_FILEWRITE, 0, sCdError[ERR_CD_FILEWRITE]);
1120 break;
1121 }
1122 cntr -= cnt;
1123 cnt = (size_t) MIN (cntr, (Int4)(BLKSIZE / sizeof(Int4)));
1124 }
1125
1126 FileClose (fd1);
1127 FileClose (fd2);
1128 if (cntr) /* didn't finish */
1129 return 0;
1130 else
1131 return count;
1132 }
1133
1134 /* -------------------- CdTrmUidsMem () --------------------------------
1135 * CdTrmUidsMem -- retrieves a list of uids for a term.
1136 *
1137 * Parameters: type: database code.
1138 * field: field code.
1139 * offset: offset into postings file.
1140 * count: number of uids.
1141 * mem: storage to receive the results.
1142 *
1143 * Return value: non-zero: Success; number of documents (same as count).
1144 * zero: Failure; refer to error code.
1145 *
1146 * Notes: the offset value is obtained by:
1147 * 1) looking up a term (using CdTrmLookup()).
1148 * 2) loading term pages (using CdTrmPages()).
1149 * 3) finding the term in the loaded pages.
1150 */
1151
CdTrmUidsMem(DocType type,DocField field,Int4 offset,Int4 count,DocUidPtr mem)1152 Int4 CdTrmUidsMem (DocType type, DocField field, Int4 offset, Int4 count, DocUidPtr mem)
1153
1154 {
1155 Int4 i;
1156 FILE *fd1;
1157 CharPtr path;
1158 size_t cnt;
1159
1160 if (!ValidateField (type, field))
1161 return 0;
1162 if (mem == NULL)
1163 return 0;
1164
1165 path = MakePath (DIR_TRM, type, field, EXT_PST);
1166 if ((fd1 = FileOpen(path, "rb")) == NULL)
1167 {
1168 ErrPostEx(SEV_WARNING, ERR_CD_FILEOPEN, 0, sCdError[ERR_CD_FILEOPEN], path);
1169 return 0;
1170 }
1171
1172 fseek (fd1, offset, SEEK_SET);
1173
1174 cnt = FileRead ((VoidPtr)mem, sizeof (Int4), (size_t) count, fd1);
1175 for (i = 0; i < (Int4) cnt; i++)
1176 mem[i] = SwapInt4(mem[i]);
1177
1178 FileClose (fd1);
1179 return (Int4) cnt;
1180 }
1181
1182 /* -------------------- CdDocAsnOpen() -----------------------------------
1183 * CdDocAsnOpen -- returns an active AsnIoPtr for a document.
1184 *
1185 * Parameters: type: class code (ML/AA/NT).
1186 * uid: unique identifier
1187 *
1188 * Return value: non-null: Success; active asnioptr
1189 * null: Failure; refer to error code.
1190 *
1191 * For TYP_ML, the value is a Medline-entry
1192 * For TYP_AA or TYP_NT it is a Bioseq-set.
1193 */
1194
CdDocAsnOpen(DocType type,DocUid uid)1195 AsnIoPtr CdDocAsnOpen (DocType type, DocUid uid)
1196
1197 {
1198 FILE * fd2;
1199 AsnIoPtr aip;
1200 DecompInfoPtr decomp;
1201
1202 fd2 = CdDocFil (type, uid, NULL);
1203 if (fd2 == NULL)
1204 return NULL;
1205
1206 if (vi->no_compression)
1207 { /* no compression on this data source */
1208 aip = AsnIoNew(ASNIO_BIN_IN, fd2, NULL, NULL, NULL);
1209 }
1210 else { /* use alternate read function for compressed data sources */
1211 decomp = DecompInit(fd2);
1212 aip = AsnIoNew(ASNIO_BIN_IN, fd2, decomp, DecompReadFunc, NULL);
1213 if (aip == NULL)
1214 DecompFini(NULL, decomp);
1215 decomp->aip = aip;
1216 }
1217
1218 return aip;
1219 }
1220
1221 /*****************************************************************************
1222 *
1223 * CdDocAsnClose(aip)
1224 * closes an aip opened by CdDocAsnOpen
1225 *
1226 *****************************************************************************/
CdDocAsnClose(AsnIoPtr aip)1227 AsnIoPtr CdDocAsnClose (AsnIoPtr aip)
1228
1229 {
1230 if (!vi->no_compression)
1231 {
1232 DecompFini(aip, NULL);
1233 }
1234
1235 AsnIoClose(aip);
1236
1237 return NULL;
1238 }
1239
1240 /* =========================================================================
1241 * PRIVATE FUNCTION BODIES
1242 */
1243
ValidateUid(DocType type,DocUid uid)1244 static Boolean NEAR ValidateUid (DocType type, DocUid uid)
1245
1246 {
1247 EntrezTypeDataPtr tdp;
1248 DocType tmp;
1249
1250 if (! ValidateType(type))
1251 return FALSE;
1252
1253 tmp = type;
1254 if (tmp == TYP_SEQ)
1255 tmp = TYP_AA;
1256
1257 tdp = &vi->types[tmp];
1258 if ((uid >= tdp->minuid) && (uid <= tdp->maxuid))
1259 return TRUE;
1260
1261 if (type == TYP_SEQ)
1262 {
1263 tdp = &vi->types[TYP_NT];
1264 if ((uid >= tdp->minuid) && (uid <= tdp->maxuid))
1265 return TRUE;
1266 }
1267
1268 return FALSE;
1269 }
1270
ValidateType(DocType type)1271 static Boolean NEAR ValidateType (DocType type)
1272
1273 {
1274 if (((type < 0) || (type >= NTYPE)) && (type != TYP_SEQ)) {
1275 ErrPostEx(SEV_ERROR, ERR_CD_BADTYPE, 0, sCdError[ERR_CD_BADTYPE], type);
1276 return FALSE;
1277 }
1278 return TRUE;
1279 }
1280
ValidateField(DocType type,DocField field)1281 static Boolean NEAR ValidateField (DocType type, DocField field)
1282
1283 {
1284 if (type<0 || type>=NTYPE) {
1285 ErrPostEx(SEV_ERROR, ERR_CD_BADTYPE, 0, sCdError[ERR_CD_BADTYPE], type);
1286 return FALSE;
1287 }
1288 if (field<0 || field>=NFLD) {
1289 ErrPostEx(SEV_ERROR, ERR_CD_BADFIELD, 0, sCdError[ERR_CD_BADFIELD], field);
1290 return FALSE;
1291 }
1292 if (vi->types[type].fields[field].num_bucket == 0) {
1293 ErrPostEx(SEV_ERROR, ERR_CD_NOTERMS, 0, sCdError[ERR_CD_NOTERMS], type, field);
1294 return FALSE;
1295 }
1296 return TRUE;
1297 }
1298
MakePath(Int2 nSdir,Int2 nPref,Int2 nSuff,Int2 nExtn)1299 static CharPtr NEAR MakePath (Int2 nSdir, Int2 nPref, Int2 nSuff, Int2 nExtn)
1300
1301 {
1302 Char ltemp[8], filename[60];
1303 Char c;
1304 CharPtr p;
1305
1306 StringCpy (buffer, sPath[nSdir]);
1307
1308 StringCpy (filename, sPref[nPref]);
1309 StringCat (filename, sSuff[nSuff]);
1310 StringCat (filename, ".");
1311 if (nExtn <0) {
1312 c = (char) -nExtn;
1313 ltemp[0] = (char) ('0' + (c/100));
1314 ltemp[1] = (char) ('0' + ((c%100)/10));
1315 ltemp[2] = (char) ('0' + (c%10));
1316 ltemp[3] = '\0';
1317 StringCat (filename, ltemp);
1318 }
1319 else
1320 StringCat (filename, sExtn[nExtn]);
1321
1322 if (bAppendVer)
1323 StringCat (filename, ";1");
1324
1325 if (upperCaseIt) {
1326 p = filename;
1327 while (*p != '\0') {
1328 *p = TO_UPPER (*p);
1329 p++;
1330 }
1331 }
1332
1333 FileBuildPath(buffer, NULL, filename);
1334 return buffer;
1335 }
1336
LoadUidIndex(DocType type)1337 static Boolean NEAR LoadUidIndex (DocType type)
1338
1339 {
1340 Int2 i;
1341 size_t n;
1342 Int4Ptr p;
1343 CharPtr path;
1344 FILE *fd;
1345 Int4 header [3];
1346 Int4 version;
1347 Int4 issue;
1348
1349 if (!ValidateType (type))
1350 return FALSE;
1351
1352 if ((type == TYP_SEQ) || (type == TYP_NT))
1353 type = TYP_AA;
1354
1355 n = (size_t) vi->types[type].num_bucket + 1;
1356 p = type_bucket_index[type];
1357 if (p != NULL)
1358 return TRUE;
1359
1360 p = (Int4Ptr) MemNew(sizeof(Int4) * n);
1361 p[n-1] = INT4_MAX; /* put sentinel at end */
1362 n--;
1363
1364 if (type == TYP_AA)
1365 {
1366 type = TYP_SEQ;
1367 }
1368
1369 path = MakePath (DIR_IDX, type, SUF_UID, EXT_IDX);
1370 if ((fd = FileOpen (path, "rb")) ==NULL) {
1371 MemFree (p);
1372 ErrPostEx(SEV_ERROR, ERR_CD_FILEOPEN, 0, sCdError[ERR_CD_FILEOPEN], path);
1373 return FALSE;
1374 }
1375 if (vi->version != 0 || vi->issue != 0) { /* for compatibility with pre-release 6 data */
1376 if (FileRead ((CharPtr)header, sizeof(Int4), 3, fd) != 3) {
1377 FileClose (fd);
1378 MemFree (p);
1379 ErrPostEx(SEV_ERROR, ERR_CD_FILEREAD, 0, sCdError[ERR_CD_FILEREAD]);
1380 return FALSE;
1381 }
1382 if (! IsOKMagic((Uint4) SwapInt4(header[1]), vi->volume_label))
1383 {
1384 ErrPostEx(SEV_ERROR, ERR_CD_BADINDEX, 0, sCdError[ERR_CD_BADINDEX]);
1385 return FALSE;
1386 }
1387 header [2] = SwapInt4 (header [2]);
1388 version = (Int4) vi->version;
1389 issue = (Int4) vi->issue;
1390 if (header [2] != ((version << 16) | issue)) {
1391 ErrPostEx(SEV_ERROR, ERR_CD_BADINDEX, 0, sCdError[ERR_CD_BADINDEX]);
1392 return FALSE;
1393 }
1394 }
1395 if (FileRead ((CharPtr)p, sizeof(Int4), n, fd) !=n) {
1396 FileClose (fd);
1397 MemFree (p);
1398 ErrPostEx(SEV_ERROR, ERR_CD_FILEREAD, 0, sCdError[ERR_CD_FILEREAD]);
1399 return FALSE;
1400 }
1401 FileClose (fd);
1402
1403 if (type == TYP_SEQ)
1404 {
1405 type_bucket_index[TYP_NT] = p;
1406 type_bucket_index[TYP_AA] = p;
1407 } else {
1408 type_bucket_index[type] = p;
1409 }
1410 for (i=0; i< (Int2) n; i++, p++)
1411 *p = SwapInt4 (*p);
1412 return TRUE;
1413 }
1414
LoadTrmIndex(DocType type,DocField field)1415 static Int2 NEAR LoadTrmIndex (DocType type, DocField field)
1416
1417 {
1418 Int2 i, k, c, buckets;
1419 CharPtr path, p;
1420 Int4 bytes;
1421 FILE *fd;
1422
1423 if (!ValidateType(type)) return 0;
1424 if (!ValidateField(type, field)) return 0;
1425
1426 if (type == term_idx_type && field == term_idx_field)
1427 return term_idx_count;
1428
1429 path = MakePath (DIR_TRM, type, field, EXT_IDX);
1430 if ((fd = FileOpen (path, "r")) ==NULL) {
1431 ErrPostEx(SEV_ERROR, ERR_CD_FILEOPEN, 0, sCdError[ERR_CD_FILEOPEN], path);
1432 return 0;
1433 }
1434
1435 if (term_idx_count > 0) FreeTrmIndex();
1436
1437 buckets = (Int2)vi->types[type].fields[field].num_bucket;
1438 bytes = (buckets + 1) * sizeof(CharPtr);
1439 if ((term_idx_str = (CharPtr PNTR) MemNew((size_t)bytes)) ==NULL) {
1440 FileClose (fd);
1441 ErrPostEx(SEV_ERROR, ERR_CD_MEMORY, 0, sCdError[ERR_CD_MEMORY]);
1442 return(0);
1443 }
1444
1445 for (i=0,c=0; c!=EOF; ) {
1446 for (p=buffer, k=0; k<128; k++) {
1447 c = fgetc(fd);
1448 if (c == EOF) break;
1449 if (c == '\n' || c == '\r') {
1450 *p = '\0';
1451 break;
1452 }
1453 *p++ = (char) TO_LOWER(c);
1454 }
1455 while (c != '\n' && c != '\r' && c != EOF) {
1456 c = fgetc(fd);
1457 }
1458 *p = '\0';
1459 if (c != EOF && i < buckets) {
1460 if ((term_idx_str[i] = StringSave(buffer)) ==NULL) {
1461 FileClose(fd);
1462 term_idx_count = i;
1463 FreeTrmIndex();
1464 ErrPostEx(SEV_ERROR, ERR_CD_MEMORY, 0, sCdError[ERR_CD_MEMORY]);
1465 return 0;
1466 }
1467 i++;
1468 }
1469 }
1470 FileClose (fd);
1471 term_idx_count = i;
1472 term_idx_type = type;
1473 term_idx_field = field;
1474 return term_idx_count;
1475 }
1476
FreeTrmIndex(void)1477 static void NEAR FreeTrmIndex (void)
1478
1479 {
1480 int i;
1481
1482 for (i=0; i<term_idx_count; i++)
1483 {
1484 MemFree(term_idx_str[i]);
1485 }
1486 term_idx_str = (CharPtr PNTR) MemFree(term_idx_str);
1487 term_idx_count = 0;
1488 term_idx_type = -1;
1489 term_idx_field = -1;
1490 }
1491
EntrezInfoOpen(CharPtr dirname)1492 extern AsnIoPtr EntrezInfoOpen (CharPtr dirname)
1493
1494 {
1495 CharPtr p, buf, endpath;
1496 AsnIoPtr aip = NULL;
1497 FILE * fp;
1498
1499
1500 buf = (CharPtr) MemNew(PATH_MAX);
1501 p = StringMove(buf, dirname);
1502 endpath = buf + StringLen (buf);
1503 p = StringMove(p , "cdromdat.val;1");
1504 p -= 2; /* point to the semi-colon */
1505 *p = '\0'; /* wipe-out the semi-colon */
1506 if ((fp = FileOpen(buf, "rb")) == NULL)
1507 {
1508 *p = ';'; /* put back the semi-colon */
1509 if ((fp = FileOpen(buf, "rb")) != NULL)
1510 bAppendVer = TRUE;
1511 }
1512
1513 if (fp == NULL) {
1514 StringCat (buf, ";1");
1515 p = endpath;
1516 while (*p != '\0') {
1517 *p = TO_UPPER (*p);
1518 p++;
1519 }
1520 upperCaseIt = TRUE;
1521 p -= 2; /* point to the semi-colon */
1522 *p = '\0'; /* wipe-out the semi-colon */
1523 if ((fp = FileOpen (buf, "rb")) == NULL) {
1524 *p = ';'; /* put back the semi-colon */
1525 if ((fp = FileOpen(buf, "rb")) != NULL)
1526 bAppendVer = TRUE;
1527 }
1528 }
1529
1530 if (fp != NULL)
1531 aip = AsnIoNew(ASNIO_BIN_IN, fp, NULL, NULL, NULL);
1532 else
1533 ErrPostEx(SEV_WARNING, ERR_CD_FILEOPEN, 0, sCdError[ERR_CD_FILEOPEN], (CharPtr) "cdromdat.val");
1534 MemFree(buf);
1535 return aip;
1536 }
1537
1538
1539 #ifdef IS_LITTLE_ENDIAN
1540
SwapInt2(Int2 k)1541 static Int2 NEAR SwapInt2 (Int2 k)
1542
1543 {
1544 Uint2 j, l;
1545 Int2 m;
1546
1547 l = (Uint2)k;
1548 j = ((l & (Uint2)0xFF00) >> 8);
1549 j |= ((l & (Uint2)0x00FF) << 8);
1550 m = (Int2)j;
1551 return m;
1552 }
1553
SwapInt4(Int4 k)1554 static Int4 NEAR SwapInt4 (Int4 k)
1555
1556 {
1557 Uint4 j, l;
1558 Int4 m;
1559
1560 l = (Uint4)k;
1561 j = ((l & (Uint4)0xFF000000) >> 24);
1562 j |= ((l & (Uint4)0x00FF0000) >> 8);
1563 j |= ((l & (Uint4)0x0000FF00) << 8);
1564 j |= ((l & (Uint4)0x000000FF) << 24);
1565 m = (Int4)j;
1566 return m;
1567 }
1568
1569 #endif
1570
1571 /****** not used in reading cdrom **********************
1572 static Int4 NEAR MergeSegOffset (Int2 seg, Int4 offset)
1573
1574 {
1575 Int4 value;
1576
1577 value = (seg - 1) << 25;
1578 value += offset;
1579 return value;
1580 }
1581 ******************************************************/
1582 /***
1583 bit 31 = if 1, is a protein, else is not
1584 bits 30-25 = segment (file number)
1585 bits 24-0 = offset into file up to 32 mbytes big
1586 ****************/
SplitSegOffset(Int4 value,Int2Ptr segptr,Int4Ptr offsetptr)1587 static Boolean NEAR SplitSegOffset (Int4 value, Int2Ptr segptr, Int4Ptr offsetptr)
1588
1589 {
1590 *segptr = (Int2)(((value >> 25) & 0x0000003F) + 1);
1591 *offsetptr = value & 0x01FFFFFF;
1592 return TRUE;
1593 }
1594
1595 /*****************************************************************************
1596 *
1597 * FILE * CdDocFil (type, uid, dat, &size)
1598 * opens a binary asn file, seeks to doc, returns a FILE * and size
1599 *
1600 *****************************************************************************/
CdDocFil(DocType type,DocUid uid,UidIdxPtr idx)1601 static FILE * NEAR CdDocFil (DocType type, DocUid uid, UidIdxPtr idx)
1602
1603 {
1604 Int4 offset;
1605 Int2 seg, dir, db;
1606 CharPtr path;
1607 FILE *fd2;
1608 UidIdx ui;
1609
1610 if (idx == NULL)
1611 {
1612 idx = UidIdxGet(type, uid, &ui);
1613 if (idx == NULL)
1614 return NULL;
1615 if ((type == TYP_AA || type == TYP_NT) && idx->type != type)
1616 return NULL;
1617 }
1618
1619 SplitSegOffset(idx->entry_offset, &seg, &offset);
1620
1621 dir = (idx->type==TYP_ML) ? DIR_MED : DIR_SEQ;
1622 db = (idx->type==TYP_ML) ? PREF_MED : PREF_SEQ;
1623
1624 path = MakePath (dir, db, SUF_ASN, (Int2) (-seg));
1625 if ((fd2=FileOpen (path, "rb")) == NULL)
1626 {
1627 ErrPostEx(SEV_ERROR, ERR_CD_FILEOPEN, 0, sCdError[ERR_CD_FILEOPEN], path);
1628 return NULL;
1629 }
1630 fseek (fd2, offset, SEEK_SET);
1631 return fd2;
1632 }
1633
1634
1635 /*****************************************************************************
1636 *
1637 * CdTrmFind(type, field, term)
1638 * returns a pointer to a CdTerm structure
1639 *
1640 *****************************************************************************/
1641
CdTrmDup(CdTermPtr trmptr)1642 static CdTermPtr CdTrmDup (CdTermPtr trmptr)
1643
1644 {
1645 CdTermPtr ctp;
1646
1647 ctp = NULL;
1648 if (trmptr != NULL) {
1649 ctp = (CdTermPtr) MemNew (sizeof (CdTerm));
1650 if (ctp != NULL) {
1651 ctp->type = trmptr->type;
1652 ctp->field = trmptr->field;
1653 ctp->term = StringSave (trmptr->term);
1654 ctp->special_count = trmptr->special_count;
1655 ctp->total_count = trmptr->total_count;
1656 ctp->offset = trmptr->offset;
1657 ctp->page = trmptr->page;
1658 ctp->next = NULL;
1659 }
1660 }
1661 return ctp;
1662 }
1663
CdTrmCache(CdTermPtr trmptr)1664 static CdTermPtr CdTrmCache (CdTermPtr trmptr)
1665
1666 {
1667 CdTermPtr ctp;
1668 Int2 i;
1669
1670 ctp = cdtrmcache [9];
1671 if (ctp != NULL) {
1672 if (ctp->term != NULL) {
1673 MemFree (ctp->term);
1674 }
1675 MemFree (ctp);
1676 cdtrmcache [9] = NULL;
1677 }
1678 for (i = 9; i > 0; i--) {
1679 cdtrmcache [i] = cdtrmcache [i - 1];
1680 }
1681 ctp = CdTrmDup (trmptr);
1682 cdtrmcache [0] = ctp;
1683 return trmptr;
1684 }
1685
CdTrmFind(DocType type,DocField field,CharPtr term)1686 CdTermPtr CdTrmFind (DocType type, DocField field, CharPtr term)
1687
1688 {
1689 Int2 i;
1690 Int2 termpage;
1691 CdTermPtr ctp = NULL;
1692 CdTermPtr trmptr;
1693
1694 for (i = 0; i < 10; i++) {
1695 trmptr = cdtrmcache [i];
1696 if (trmptr != NULL && trmptr->type == type && trmptr->field == field &&
1697 StringICmp (trmptr->term, term) == 0) {
1698 return CdTrmDup (trmptr);
1699 }
1700 }
1701 termpage = CdTrmLookup(type,field,term);
1702 if (termpage < 0)
1703 return NULL;
1704
1705 /** could it already be cached? ***/
1706 if ((trmtype == type) && (trmfield == field) && (trmpages > 0))
1707 {
1708 if ((termpage <= (trmpage + trmpages - 1)) &&
1709 ((termpage + 3) >= trmpage)) /* overlapping range */
1710 {
1711 ctp = CdTrmLocate(term, termpage);
1712 if (ctp != NULL) /* found it */
1713 return CdTrmCache (ctp);
1714 if (termpage == trmpage) /* not possible to find it */
1715 return NULL;
1716 }
1717 }
1718
1719 /** Load term pages from disk ***/
1720
1721 termpage = CdTrmPages(type, field, termpage);
1722 if (termpage == 0)
1723 return NULL;
1724
1725 ctp = CdTrmLocate(term, termpage);
1726 return CdTrmCache(ctp);
1727 }
1728
1729 /*****************************************************************************
1730 *
1731 * CdTrmLocate(term, page)
1732 * locates a term in a term list already in cache
1733 *
1734 *****************************************************************************/
CdTrmLocate(CharPtr term,Int2 page)1735 static CdTermPtr NEAR CdTrmLocate (CharPtr term, Int2 page)
1736
1737 {
1738 Int2 size, ctr, cmpval;
1739 CharPtr ret;
1740
1741 size = trmpages * BLKSIZE; /* bytes in term cache */
1742 ctr = 0;
1743 ret = trmbuf;
1744 size--; /* have to have at least one space for test below */
1745 while (ctr < size)
1746 {
1747 while (*ret != '\n' && *ret != '\r')
1748 {
1749 ret++;
1750 ctr++;
1751 if (ctr >= size)
1752 return NULL;
1753 }
1754 ret++;
1755 ctr++;
1756
1757 cmpval = MeshStringICmp(ret, term);
1758 if (! cmpval) /* found it */
1759 return CdTermRead(trmtype, trmfield, ret, trmbuf, page);
1760 else if (cmpval > 0) /* gone past */
1761 return NULL;
1762 }
1763 return NULL;
1764 }
1765
1766 /*****************************************************************************
1767 *
1768 * CdTermRead(type, field, ptr, bufr, page)
1769 * creates and returns a CdTermPtr from a CdTermPage
1770 * ptr should point at the start of a record (the term)
1771 *
1772 *****************************************************************************/
CdTermRead(Int2 type,Int2 field,CharPtr ptr,CharPtr bufr,Int2 page)1773 static CdTermPtr CdTermRead (Int2 type, Int2 field, CharPtr ptr, CharPtr bufr, Int2 page)
1774
1775 {
1776 CdTermPtr trmptr;
1777 CharPtr tmp, tmp2;
1778 Char localbuf[10];
1779 Int4 vals[3];
1780 Int2 i;
1781
1782 if (ptr == NULL)
1783 return NULL;
1784 if (*ptr == '\0')
1785 return NULL;
1786 trmptr = (CdTermPtr) MemNew(sizeof(CdTerm));
1787 trmptr->type = type;
1788 trmptr->field = field;
1789 tmp = ptr;
1790 tmp2 = tmp;
1791 while (*tmp2 != '\t')
1792 tmp2++;
1793 *tmp2 = '\0';
1794 trmptr->term = StringSave(tmp);
1795 *tmp2 = '\t';
1796 tmp2++;
1797 for (i = 0; i < 3; i++)
1798 {
1799 tmp = &localbuf[0];
1800 while (*tmp2 >= ' ')
1801 {
1802 *tmp = *tmp2;
1803 tmp++; tmp2++;
1804 }
1805 *tmp = '\0';
1806 vals[i] = atol(localbuf);
1807 tmp2++;
1808 }
1809 trmptr->special_count = vals[0];
1810 trmptr->total_count = vals[1];
1811 trmptr->offset = vals[2];
1812 trmptr->page = page + (Int2) (((size_t) (ptr - bufr - 1)) / (size_t) BLKSIZE);
1813 return trmptr;
1814 }
1815
1816 /*****************************************************************************
1817 *
1818 * CdTermScan(type, field, page, numpage, proc)
1819 * returns terms found to proc until
1820 * 1) no more pages
1821 * 2) numpage pages have been read
1822 * 3) proc returns FALSE
1823 * returns number of complete pages read
1824 * if numpage=0, scans until EOF or proc returns FALSE
1825 *
1826 *****************************************************************************/
CdTermScan(DocType type,DocField field,Int2 page,Int2 numpage,CdTermProc proc)1827 Int2 CdTermScan (DocType type, DocField field, Int2 page, Int2 numpage, CdTermProc proc)
1828
1829 {
1830 Boolean goOn;
1831 CharPtr ptr;
1832 Int2 pages, size, pagectr, startpage;
1833 CdTermPtr trmptr;
1834
1835 startpage = page;
1836 pagectr = 0;
1837 if (proc == NULL)
1838 return pagectr;
1839
1840 goOn = TRUE;
1841 while (goOn)
1842 {
1843 startpage = page;
1844 pages = CdTrmPages (type, field, page);
1845 if (pages == 0)
1846 return pagectr;
1847 ptr = trmbuf;
1848 size = pages * BLKSIZE; /* bytes available */
1849 pages = BLKSIZE; /* bytes per page */
1850 while ((size > 0) && (goOn))
1851 {
1852 while (*ptr != '\n' && *ptr != '\r' && *ptr != '\0')
1853 {
1854 size--;
1855 pages--;
1856 ptr++;
1857 }
1858 if (*ptr == '\0')
1859 return (Int2) (pagectr + 1); /* last page */
1860 size--;
1861 pages--;
1862 ptr++;
1863 if (size > 0)
1864 {
1865 trmptr = CdTermRead(type, field, ptr, trmbuf, startpage);
1866 if (trmptr != NULL) {
1867 goOn = proc (trmptr);
1868 }
1869 }
1870 while (*ptr != '\n' && *ptr != '\r' && *ptr != '\0')
1871 {
1872 size--;
1873 pages--;
1874 ptr++;
1875 }
1876 if (pages < 0) /* crossed a page boundary */
1877 {
1878 pages = BLKSIZE + pages;
1879 numpage--;
1880 pagectr++;
1881 page++;
1882 if (! numpage)
1883 goOn = FALSE;
1884 }
1885 }
1886 }
1887 return pagectr;
1888 }
1889
1890 /*****************************************************************************
1891 *
1892 * CdLinkUidGet(type, link_to_type, numuid, uids, max)
1893 * returns count of input uids processed
1894 * returns -1 on error
1895 * if neighbors (type == link_to_type)
1896 * sums weights for same uids
1897 * if (more than max uids, frees uids and weights, but leaves num set)
1898 *
1899 *****************************************************************************/
CdLinkUidGet(LinkSetPtr PNTR result,DocType type,DocType link_to_type,Int2 numuid,Int4Ptr uids,Boolean mark_missing,Int4 maxlink)1900 Int2 CdLinkUidGet (LinkSetPtr PNTR result, DocType type, DocType link_to_type, Int2 numuid, Int4Ptr uids, Boolean mark_missing, Int4 maxlink)
1901
1902 {
1903 UidIdxPtr query;
1904 UidIdx local;
1905 DocType querytype;
1906 LinkSetPtr lsp = NULL;
1907 Int2 counts[NTYPE];
1908 FILE * fp;
1909 Int4 offset;
1910 CharPtr path;
1911 Uint1Ptr ptr1;
1912 Int2 numfound = 0;
1913 Int4 j, l, r, k;
1914 Boolean first = TRUE;
1915 Boolean sorted;
1916 Int4 cursize = 0, finalsize, finalcount = 0, count, i;
1917 Int4Ptr newuids = NULL,
1918 newwts = NULL,
1919 finaluids = NULL,
1920 finalwts = NULL,
1921 tmp;
1922
1923 *result = NULL;
1924
1925 if (! ValidateType(link_to_type))
1926 return -1;
1927
1928 for (i = 0, query = NULL; i < numuid && query == NULL; i++)
1929 {
1930 query = UidIdxGet(type, uids[i], &local);
1931 if ((mark_missing) && (query == NULL))
1932 uids[i] *= -1;
1933 if (query != NULL)
1934 j = i;
1935 }
1936 if ((i == numuid) && (query == NULL)) { /* none found */
1937 lsp = (LinkSetPtr) MemNew(sizeof(LinkSet));
1938 lsp->uids = NULL;
1939 lsp->weights = NULL;
1940 *result = lsp;
1941 return 0;
1942 }
1943
1944 querytype = query->type; /* record, to allow for TYP_SEQ */
1945 if (link_to_type == TYP_SEQ)
1946 {
1947 if (type != TYP_SEQ)
1948 return -1; /* can't do it */
1949 else
1950 link_to_type = querytype; /* neighbors */
1951 }
1952
1953 path = MakePath (DIR_LNK, query->type, SUF_REC, EXT_LNK);
1954 if ((fp = FileOpen(path, "rb")) == NULL)
1955 {
1956 ErrPostEx(SEV_ERROR, ERR_CD_FILEOPEN, 0, sCdError[ERR_CD_FILEOPEN], path);
1957 return -1;
1958 }
1959
1960 if (numuid > 1)
1961 {
1962 if (numuid > 320) {
1963 finalsize = 16000;
1964 } else {
1965 finalsize = MIN((numuid * 50), 16000); /* make a guess */
1966 }
1967 finaluids = (Int4Ptr) MemNew((size_t)(sizeof(Int4) * finalsize)); /* make a guess */
1968 if (link_to_type == querytype)
1969 finalwts = (Int4Ptr) MemNew((size_t)(sizeof(Int4) * finalsize));
1970 }
1971
1972 for (; j < numuid; j++)
1973 {
1974
1975 if (! first)
1976 {
1977 if ((query = UidIdxGet(type, uids[j], &local)) == NULL)
1978 {
1979 if (mark_missing)
1980 uids[j] *= -1;
1981 continue; /* must examine remaining UIDs */
1982 }
1983 }
1984 else
1985 first = FALSE;
1986
1987 numfound++; /* count how many uids we process */
1988
1989 /* read the link counts for all types */
1990
1991 fseek (fp, query->link_offset, SEEK_SET);
1992 FileRead((CharPtr)&counts[0], sizeof(Int2), NTYPE, fp);
1993 for (i = 0; i < NTYPE; i++)
1994 counts[i] = SwapInt2(counts[i]);
1995
1996 offset = 0;
1997 for (i = 0; i < link_to_type; i++)
1998 {
1999 offset += counts[i] * sizeof(DocUid);
2000 if (i == query->type) { /* has weights */
2001 offset += counts[i] * sizeof(Uint1);
2002 }
2003 }
2004 if (offset) /* skip preceeding link types */
2005 fseek(fp, offset, SEEK_CUR);
2006
2007 count = (Int4)counts[link_to_type];
2008
2009 if (count > cursize)
2010 {
2011 MemFree(newuids);
2012 newuids = (Int4Ptr) MemNew((size_t)(sizeof(Int4) * (count + 1)));
2013 if (querytype == link_to_type)
2014 {
2015 MemFree(newwts);
2016 newwts = (Int4Ptr) MemNew((size_t)(sizeof(Int4) * (count + 1)));
2017 }
2018 cursize = count;
2019 }
2020 FileRead((CharPtr)newuids, sizeof(DocUid), (size_t)count, fp);
2021 for (i = 0; i < count; i++)
2022 newuids[i] = SwapInt4(newuids[i]);
2023 if (link_to_type == querytype) /* get the weights */
2024 {
2025 ptr1 = (Uint1Ptr) newwts;
2026 FileRead((CharPtr)ptr1, sizeof(Uint1), (size_t)count, fp);
2027 for (i = count - 1; i >= 0; i--) {
2028 newwts[i] = (Int4) (ptr1[i]);
2029 }
2030 }
2031 if (numuid > 1) /* merging lists */
2032 {
2033 if ((finalcount + count) > finalsize)
2034 {
2035 finalsize += count;
2036 if (finalsize > 16000)
2037 {
2038 MemFree(newuids);
2039 MemFree(newwts);
2040 MemFree(finaluids);
2041 MemFree(finalwts);
2042 ErrPostEx(SEV_WARNING, ERR_CD_MEMORY, 0, sCdError[ERR_CD_MEMORY]);
2043 return -1;
2044 }
2045 tmp = finaluids;
2046 finaluids = (Int4Ptr) MemNew((size_t)(sizeof(Int4) * finalsize));
2047 MemCopy(finaluids, tmp, (size_t)(finalcount * sizeof(Int4)));
2048 MemFree(tmp);
2049 if (querytype == link_to_type)
2050 {
2051 tmp = finalwts;
2052 finalwts = (Int4Ptr) MemNew((size_t)(sizeof(Int4) * finalsize));
2053 MemCopy(finalwts, tmp, (size_t)(finalcount * sizeof(Int4)));
2054 MemFree(tmp);
2055 }
2056 }
2057 for (i = 0; i < count; i++)
2058 {
2059 l = 0; /* binary search */
2060 r = (finalcount - 1);
2061 k = 0;
2062 while ((l <= r) && (finaluids[k] != newuids[i]))
2063 {
2064 k = (l + r)/ 2;
2065 if (newuids[i] < finaluids[k])
2066 r = k - 1;
2067 else
2068 l = k + 1;
2069 }
2070 if (finaluids[k] == newuids[i]) /* merge */
2071 {
2072 if (querytype == link_to_type)
2073 finalwts[k] += newwts[i];
2074 }
2075 else
2076 {
2077 if (finalcount)
2078 {
2079 if (finaluids[k] < newuids[i])
2080 k++;
2081 l = (finalcount - k);
2082 r = l;
2083 tmp = &finaluids[finalcount];
2084 while (r)
2085 {
2086 *tmp = *(tmp-1);
2087 tmp--; r--;
2088 }
2089 if (querytype == link_to_type)
2090 {
2091 r = l;
2092 tmp = &finalwts[finalcount];
2093 while (r)
2094 {
2095 *tmp = *(tmp-1);
2096 tmp--; r--;
2097 }
2098 }
2099 }
2100 finaluids[k] = newuids[i];
2101 if (querytype == link_to_type)
2102 finalwts[k] = newwts[i];
2103 finalcount++;
2104 }
2105 }
2106 }
2107 }
2108
2109 FileClose(fp);
2110
2111 lsp = (LinkSetPtr) MemNew(sizeof(LinkSet));
2112 if (maxlink <= 0)
2113 maxlink = 16000; /* default */
2114
2115 if (numuid == 1)
2116 {
2117 lsp->num = count;
2118 if (lsp->num <= maxlink)
2119 {
2120 lsp->uids = newuids;
2121 lsp->weights = newwts;
2122 }
2123 else
2124 {
2125 MemFree(newuids);
2126 MemFree(newwts);
2127 }
2128 }
2129 else
2130 {
2131 MemFree(newuids);
2132 MemFree(newwts);
2133 lsp->num = finalcount;
2134 if (lsp->num <= maxlink)
2135 {
2136 lsp->uids = (Int4Ptr) MemNew((size_t)(sizeof(Int4) * (finalcount + 1)));
2137 MemCopy(lsp->uids, finaluids, (size_t)(finalcount * sizeof(Int4)));
2138 MemFree(finaluids);
2139 if (querytype == link_to_type)
2140 {
2141 lsp->weights = (Int4Ptr) MemNew((size_t)(sizeof(Int4) * (finalcount + 1)));
2142 MemCopy(lsp->weights, finalwts, (size_t)(finalcount * sizeof(Int4)));
2143 MemFree(finalwts);
2144 linksort(lsp->uids, lsp->weights, lsp->num);
2145 }
2146 }
2147 else
2148 {
2149 MemFree(finaluids);
2150 MemFree(finalwts);
2151 }
2152 }
2153
2154 if (lsp->num > 1 && querytype != link_to_type && link_to_type == TYP_ML)
2155 {
2156 /* try to sort MEDLINE uids in descending order */
2157
2158 for (sorted = TRUE, k = 1; k < lsp->num; k++)
2159 {
2160 if (lsp->uids[k-1] < lsp->uids[k])
2161 {
2162 sorted = FALSE;
2163 break;
2164 }
2165 }
2166
2167 if (! sorted)
2168 { /* assume that the existing order is reversed */
2169 for (k = (lsp->num / 2) - 1; k >= 0; k--)
2170 {
2171 j = lsp->uids[k];
2172 lsp->uids[k] = lsp->uids[lsp->num - 1 - k];
2173 lsp->uids[lsp->num - 1 - k] = j;
2174 }
2175
2176 /* now check that it's sorted */
2177 for (sorted = TRUE, k = 1; k < lsp->num; k++)
2178 {
2179 if (lsp->uids[k-1] < lsp->uids[k])
2180 {
2181 sorted = FALSE;
2182 break;
2183 }
2184 }
2185
2186 if (! sorted)
2187 { /* as a last resort, sort them using quicksort */
2188 /* dummy array */
2189 finaluids = (Int4Ptr) MemDup(lsp->uids, (size_t) (sizeof(Int4) * lsp->num));
2190 linksort(finaluids, lsp->uids, lsp->num);
2191 MemFree(finaluids);
2192 }
2193 }
2194 }
2195
2196 *result = lsp;
2197 return numfound;
2198 }
2199
2200 /*****************************************************************************
2201 *
2202 * linksort(uids, wts, n)
2203 * quicksort into descending wts order
2204 *
2205 *****************************************************************************/
linksort(Int4Ptr uids,Int4Ptr wts,Int4 n)2206 static void NEAR linksort (Int4Ptr uids, Int4Ptr wts, Int4 n)
2207
2208 {
2209 Int4 tp, tp2;
2210 Int4 l, r, i, j, m, scnt;
2211 Int4 pstack[100];
2212 Int4Ptr p;
2213
2214 if (n < 2)
2215 return;
2216
2217 scnt = 2;
2218 l = 0; r = n - 1; p = pstack + 2;
2219
2220 do
2221 {
2222 if ((r - l) > 15)
2223 {
2224 i = l; j = r;
2225 /* median of three */
2226
2227 m = ((j - i) / 2) + i; /* get middle element */
2228 /* partitioning operation */
2229 do
2230 {
2231 while((j > i) && (wts[j] <= wts[i]))
2232 j--;
2233 if(j != i)
2234 {
2235 tp = wts[j]; wts[j] = wts[i]; wts[i] = tp;
2236 tp = uids[j]; uids[j] = uids[i]; uids[i] = tp;
2237 while((i < j) && (wts[i] >= wts[j]))
2238 i++;
2239 if(i != j)
2240 {tp = wts[j]; wts[j] = wts[i]; wts[i] = tp;
2241 tp = uids[j]; uids[j] = uids[i]; uids[i] = tp;}
2242 }
2243 }while(i != j); /* end do */
2244
2245 /* recursion elimination */
2246 if(i)
2247 {
2248 if((i - l) > (r - i)) /* put long segment on "stack" */
2249 {*p = l; p++; *p = i - 1; p++; l = i + 1;}
2250 else
2251 {*p = i + 1; p++; *p = r; p++; r = i - 1;}
2252 scnt += 2;
2253 if (scnt >= 100)
2254 {
2255 ErrPostEx(SEV_ERROR, ERR_CD_MEMORY, 0, "linksort > 100");
2256 return;
2257 }
2258 }
2259 else
2260 {
2261 l = i + 1;
2262 }
2263 }
2264 /* if done with this segment, "pop" next */
2265 else
2266 {
2267 p--; r = *p; p--; l = *p; scnt -= 2;
2268 }
2269 }
2270 while (p > pstack); /* end do */
2271
2272
2273 /* do the final insertion sort */
2274
2275 for(i = 1; i < n; i++)
2276 {
2277 tp = wts[i]; tp2 = uids[i]; j = i; m = j - 1;
2278 while ((j > 0) && (wts[m] < tp))
2279 {wts[j] = wts[m]; uids[j] = uids[m]; j--; m--;}
2280 wts[j] = tp;
2281 uids[j] = tp2;
2282 }
2283 return;
2284 }
2285
2286 /*****************************************************************************
2287 *
2288 * DecompReadFunc:
2289 * substituted read function for compressed data sources (for Sequence
2290 * and Medline data).
2291 *
2292 *****************************************************************************/
DecompReadFunc(Pointer p,CharPtr buff,Uint2 count)2293 static Int2 LIBCALLBACK DecompReadFunc (Pointer p, CharPtr buff, Uint2 count)
2294 {
2295 DecompInfoPtr dcp = (DecompInfoPtr) p;
2296 Uint1 loc_buff[3];
2297 int bytes_to_request;
2298 int bytes_read;
2299
2300 if (dcp->compr == COMPR_DONT_KNOW)
2301 {
2302 int c;
2303
2304 /* read the "decompression protocol identifier" */
2305 if ((c = fgetc(dcp->fp)) == EOF)
2306 return 0;
2307 dcp->compr = (Uint1) c;
2308
2309 if (dcp->compr == COMPR_NONE)
2310 {
2311 /* for no decompression, we still have 4 bytes of overhead; */
2312 /* 1 byte for the protocol identifier, and 3 bytes for a length */
2313 /* field of what follows */
2314 if (FileRead((CharPtr) loc_buff,1,3,dcp->fp) != 3)
2315 {
2316 ErrPostEx(SEV_ERROR, ERR_CD_BADDECOMP, 0,
2317 "No length field detected for uncompressed data");
2318 return 0;
2319 }
2320
2321 /* interpret the 3-byte length in a machine-independant order; */
2322 /* BIG ENDIAN (first byte is most significant) */
2323 dcp->bytes_left = (((int) loc_buff[0]) * 256 + loc_buff[1]) * 256 +
2324 loc_buff[2];
2325 }
2326 }
2327
2328 switch (dcp->compr)
2329 {
2330 case COMPR_NONE :
2331 /* based on knowledge of how many bytes are in this compressed */
2332 /* ASN.1 object, return only as many bytes as the caller really */
2333 /* needs */
2334 bytes_to_request = (int) MIN((Uint4) count, dcp->bytes_left);
2335 bytes_read = FileRead(buff,1,bytes_to_request,dcp->fp);
2336 dcp->bytes_left -= bytes_read;
2337 if (dcp->bytes_left <= 0)
2338 {
2339 /* reset for stream read of next entry */
2340 dcp->compr = COMPR_DONT_KNOW;
2341 }
2342 return bytes_read;
2343
2344 case COMPR_HUFFMAN :
2345 return HuffmanRead(dcp,buff,count);
2346
2347 /* others ?? */
2348
2349 default:
2350 ErrPostEx(SEV_ERROR, ERR_CD_BADDECOMP, 0,
2351 "Invalid decompression code detected <%d>", dcp->compr);
2352 return 0;
2353 }
2354 }
2355
2356 /*****************************************************************************
2357 *
2358 * HuffmanRead:
2359 * read Huffman compressed data
2360 *
2361 *****************************************************************************/
HuffmanRead(DecompInfoPtr dcp,CharPtr buff,Uint2 count)2362 static Int2 HuffmanRead (DecompInfoPtr dcp, CharPtr buff, Uint2 count)
2363 {
2364 register unsigned int mask = dcp->mask;
2365 register unsigned int byte = dcp->byte;
2366 CharPtr p = buff;
2367 int i, cnt = 0;
2368 int c;
2369 int k;
2370 FILE *fd1 = dcp->fp;
2371
2372
2373 while (cnt < (int) count)
2374 {
2375 for (i=0; i>=0; )
2376 {
2377 if (mask == 0)
2378 {
2379 if ((c = fgetc(fd1)) == EOF)
2380 {
2381 /* should never reach this point */
2382 i = HUFFMAN_SENTINEL - 257;
2383 break;
2384 }
2385 else
2386 {
2387 byte = (unsigned int) c;
2388 mask = 0x80;
2389 }
2390 }
2391
2392 if (byte & mask)
2393 i = vi->huff_left[i];
2394 else
2395 i = vi->huff_right[i];
2396
2397 mask >>= 1;
2398 }
2399
2400 if ((k = i + 257) == HUFFMAN_SENTINEL)
2401 {
2402 mask = 0; /* to skip remaining bits in current byte */
2403 dcp->compr = COMPR_DONT_KNOW; /* reset for next record */
2404 break;
2405 }
2406
2407 *p++ = (char) k;
2408 cnt++;
2409 }
2410
2411 dcp->mask = mask;
2412 dcp->byte = byte;
2413 return cnt;
2414 }
2415
2416 /*****************************************************************************
2417 *
2418 * DecompInit:
2419 * Create a data structure to be used in decompression; the data structures
2420 * are stored in a linked list. While no mutual exclusion is provided on
2421 * list access, each decompression is independent ... therefore, many
2422 * compressed ASN.1 data streams may be open and used simultaneously
2423 *
2424 *****************************************************************************/
DecompInit(FILE * fp)2425 static DecompInfoPtr NEAR DecompInit (FILE *fp)
2426 {
2427 DecompInfoPtr dcp;
2428
2429 dcp = (DecompInfoPtr) MemNew(sizeof(DecompInfo));
2430
2431 if (dcp == NULL)
2432 return NULL;
2433
2434 dcp->fp = fp;
2435 dcp->compr = COMPR_DONT_KNOW;
2436 dcp->mask = 0;
2437 dcp->bytes_left = 0;
2438
2439 /* insert it */
2440 dcp->next = DecompInfoList;
2441 DecompInfoList = dcp;
2442
2443 return dcp;
2444 }
2445
2446 /*****************************************************************************
2447 *
2448 * DecompFini:
2449 * Find and destroy the specified decompression data structure. The
2450 * data structures, in addition to having an address known to its user,
2451 * also contains a copy of the AsnIoPtr for that data stream. This
2452 * enables the Fini() operation to be performed using either the address
2453 * of this structure as a key, or the address of the AsnIoPtr as a key.
2454 *
2455 *****************************************************************************/
DecompFini(AsnIoPtr aip,DecompInfoPtr dcp)2456 static Boolean NEAR DecompFini (AsnIoPtr aip, DecompInfoPtr dcp)
2457 {
2458 DecompInfoPtr dtrail;
2459 DecompInfoPtr temp;
2460
2461 if (DecompInfoList == NULL)
2462 return FALSE; /* not found */
2463
2464 /* check for first element in list */
2465 if ((DecompInfoList == dcp && dcp != NULL) ||
2466 (DecompInfoList->aip == aip && aip != NULL))
2467 { /* unlink and delete */
2468 temp = DecompInfoList->next;
2469 DecompInfoFree(DecompInfoList);
2470 DecompInfoList = temp;
2471 return TRUE;
2472 }
2473
2474 if (DecompInfoList->next == NULL)
2475 { /* single-element list, and it's not the first element in list */
2476 return FALSE;
2477 }
2478
2479 for (dtrail = DecompInfoList; dtrail->next != NULL;
2480 dtrail = dtrail->next)
2481 { /* search remainder of list */
2482 if ((dtrail->next == dcp && dcp != NULL) ||
2483 (dtrail->next->aip == aip && aip != NULL))
2484 {
2485 temp = dtrail->next->next;
2486 DecompInfoFree(dtrail->next);
2487 dtrail->next = temp;
2488 return TRUE;
2489 }
2490 }
2491
2492 return FALSE;
2493 }
2494
2495
2496 /*****************************************************************************
2497 *
2498 * DecompInfoFree:
2499 * Free a decompression data structure
2500 *****************************************************************************/
DecompInfoFree(DecompInfoPtr dcp)2501 static void NEAR DecompInfoFree(DecompInfoPtr dcp)
2502 {
2503 MemFree(dcp);
2504 }
2505
2506
2507 /*****************************************************************************
2508 *
2509 * IsOKMagic:
2510 * Validate the magic number for a file
2511 *****************************************************************************/
IsOKMagic(Uint4 magic,CharPtr volume_label)2512 static Boolean NEAR IsOKMagic(Uint4 magic, CharPtr volume_label)
2513 {
2514 /* check for a match with the "base" magic number; supported for */
2515 /* backwards compatability */
2516 if (magic == CD_MAGIC_BASE)
2517 return TRUE;
2518
2519 /* now check if the magic number equals the "base" plus the checksum of */
2520 /* the volume-label (so as to be able to distinguish between index */
2521 /* files associated with different CDs) */
2522 while (*volume_label)
2523 {
2524 magic -= (int) (*volume_label++);
2525 }
2526 return (magic == CD_MAGIC_BASE);
2527 }
2528
2529
2530 #endif
2531
2532