1 /*
2  * ===========================================================================
3  *
4  *                             COPYRIGHT NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a Government employee and thus cannot
10  *  be copyrighted.  This software/database is freely available to the
11  *  public for use without a copyright notice.  Restrictions cannot be
12  *  placed on its present or future use.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the National Library of
16  *  Medicine (NLM) and the U. S. Government do not and cannot warrant the
17  *  performance or results that may be obtained by using this software or
18  *  data.  The NLM and the U. S. Government disclaim all warranties as to
19  *  performance, merchantability or fitness for any particular purpose.
20  *
21  *  Please see that the author is suitably cited in any work or product
22  *  based on this material.
23  *
24  * ===========================================================================
25  *
26  * RCS $Id: cdromlib.c,v 6.1 2004/04/01 13:43:05 lavr Exp $
27  *
28  * Authors: Greg Schuler, Jim Ostell, Jonathan Kans, Jonathan Epstein
29  *
30  * Original Creation Date:   9-91
31  *
32  * File Description:
33  *  Functions in this file are the I/O primitives needed to retrieve data
34  *  from the Entrez CD-ROMs, releases 12.0 and earlier.  For releases 13.0
35  *  and later, use the functions in cdnewlib.c
36  *
37  *
38  * Modifications:
39  * --------------------------------------------------------------------------
40  * Date     Name        Description of modification
41  * -------  ----------  -----------------------------------------------------
42  * 06-29-94 Schuler     AsnRead/AsnWrite functions moved to objentr.c
43  * 07-11-94 Schuler     Removed #include <cdrom.h> (no longer needed!)
44  * 07-13-94 Schuler     Moved CdTermFree to cdentrez.c
45  * 08-04-94 Kans        Fixed bug resulting in reading too many term pages
46  * 11-16-94 Schuler     Typecasts for picky compilers
47  *
48  * 05-19-95 Schuler     Added rcs Log directive for automatic insertion of
49  *                      modification comments.
50  *
51  * Revision $Log: cdromlib.c,v $
52  * Revision Revision 6.1  2004/04/01 13:43:05  lavr
53  * Revision Spell "occurred", "occurrence", and "occurring"
54  * Revision
55  * Revision Revision 6.0  1997/08/25 18:13:10  madden
56  * Revision Revision changed to 6.0
57  * Revision
58  * Revision Revision 5.0  1996/05/28 13:55:34  ostell
59  * Revision Set to revision 5.0
60  * Revision
61  * Revision 4.0  1995/07/26  13:50:32  ostell
62  * force revision to 4.0
63  *
64  * Revision 2.48  1995/05/16  14:36:20  schuler
65  * Automatic comment insertion enabled
66  *
67  *
68  * ==========================================================================
69  */
70 
71 #define REVISION_STR "$Revision: 6.1 $"
72 
73 #include <cdromlib.h>
74 
75 #ifdef _OLD_CdEntrez_
76 
77 static char * _this_module = "CdEntrez";
78 #undef  THIS_MODULE
79 #define THIS_MODULE _this_module
80 static char * _this_file = __FILE__;
81 #undef  THIS_FILE
82 #define THIS_FILE _this_file
83 
84 /*  =========================================================================
85  *      CONSTANTS & MACROS
86  */
87 
88 #define CURRENT_FORMAT_VERSION 0
89 
90 #define BLKSIZE   ((size_t)vi->field_bucket_size)  /* sizeof term index block on cdrom */
91 
92 #define PREF_ML     TYP_ML
93 #define PREF_AA     TYP_AA
94 #define PREF_NT     TYP_NT
95 #define PREF_MED    (NTYPE+0)
96 #define PREF_SEQ    (NTYPE+1)
97 
98 #define SUF_ML      TYP_ML
99 #define SUF_AA      TYP_AA
100 #define SUF_NT      TYP_NT
101 #define SUF_WORD    (NTYPE+FLD_WORD)
102 #define SUF_MESH    (NTYPE+FLD_MESH)
103 #define SUF_KYWD    (NTYPE+FLD_KYWD)
104 #define SUF_AUTH    (NTYPE+FLD_AUTH)
105 #define SUF_JOUR    (NTYPE+FLD_JOUR)
106 #define SUF_ORGN    (NTYPE+FLD_ORGN)
107 #define SUF_ACCN    (NTYPE+FLD_ACCN)
108 #define SUF_GENE    (NTYPE+FLD_GENE)
109 #define SUF_PROT    (NTYPE+FLD_PROT)
110 #define SUF_ECNO    (NTYPE+FLD_ECNO)
111 #define SUF_HIER    (NTYPE+FLD_ORGN_HIER)
112 #define SUF_DATE    (NTYPE+FLD_DATE)
113 #define SUF_ASN     (NTYPE+NFLD)
114 #define SUF_REC     (NTYPE+NFLD+1)
115 #define SUF_UID     (NTYPE+NFLD+2)
116 
117 #define EXT_DAT     0
118 #define EXT_IDX     1
119 #define EXT_LST     2
120 #define EXT_PST     3
121 #define EXT_LNK     4
122 
123 #define COMPR_NONE	0
124 #define COMPR_HUFFMAN	1
125 #define COMPR_LZW1	2
126 /* etc...*/
127 #define COMPR_DONT_KNOW	0xFF
128 
129 
130 #define HUFFMAN_SENTINEL 256
131 
132 
133 typedef struct {
134     DocUid  uid;                  /* MEDLINE UI or seq-id of Bioseq       */
135     DocType type;                  /* document type code (ml/nt/aa)        */
136     Int4  entry_offset ,        /* offset into entry file(s)            */
137         sum_offset ,            /* offset into summary file    */
138         link_offset ;            /* offset into link file       */
139 } UidIdx,  PNTR UidIdxPtr;
140 
141 typedef struct decompinfo {
142     AsnIoPtr aip;
143     FILE *fp;
144     Uint1 compr;	/* compression protocol */
145     unsigned int mask;		/* used internally for Huffman */
146     unsigned int byte;		/* used internally for Huffman */
147     Uint4 bytes_left;   /* count of remaining bytes for uncompressed protocol */
148     struct decompinfo PNTR next;
149 } DecompInfo, PNTR DecompInfoPtr;
150 
151 
152 /*  =========================================================================
153  *      VARIABLES
154  */
155 
156 static DecompInfoPtr DecompInfoList = NULL;
157 static Int4 numinits;
158 static CharPtr buffer;
159 
160 static CharPtr sPath [NDIR];
161 
162 static char  *sSdir[] = { "", "data", "sequence", "medline", "terms",
163                           "index", "links", "" };
164 static char  *sPref[] = { "ml", "aa", "nt", "med", "seq" };
165 static char  *sSuff[] = {  "word", "mesh", "kywd", "auth", "jour", "orgn",
166                            "accn", "gene", "prot", "ecno", "hier", "date",
167                            "fkey", "prop", "subs", "mloc",
168                            "ml", "aa", "nt",
169                            "asn", "rec", "uid" };
170 static char  *sExtn[] = { "dat", "idx", "lst", "pst", "lnk" };
171 
172 static Boolean bAppendVer = FALSE;
173 static Boolean upperCaseIt = FALSE;
174 
175 static EntrezInfoPtr vi = NULL;
176 static Int4Ptr type_bucket_index[NTYPE];  /* from the .idx files */
177 
178 static size_t detInfoCharCount;
179 static CharPtr CdDetailedBuf = NULL;
180 static Boolean countOnly;
181 
182                     /* for saving the last term.idx file used */
183 
184 static DocType term_idx_type = -1;
185 static DocField term_idx_field = -1;
186 static Int2 term_idx_count = 0;
187 static CharPtr PNTR term_idx_str;
188 static FILE *IdxFilePtr[NTYPE+2];
189 static Boolean HoldIdxOpen = FALSE;
190 
191 
192 #ifdef IS_BIG_ENDIAN
193 /* no swapping needed:  define do-nothing macros */
194 #define SwapInt2(X)  (X)
195 #define SwapInt4(X)  (X)
196 #else
197 /* give prototypes for byte swapping functions */
198 static Int2 NEAR  SwapInt2  PROTO((Int2));
199 static Int4 NEAR  SwapInt4  PROTO((Int4));
200 #endif
201 
202 
203 /*****************************************************************************
204 *
205 *   Private Function Prototypes
206 *
207 *****************************************************************************/
208 static Boolean NEAR CdInitialize PROTO((CharPtr,CharPtr,CharPtr,Int2Ptr));
209 static Boolean NEAR CdSetPath    PROTO((Int2,CharPtr));
210 static Boolean NEAR SaveCdMediaContext PROTO((CharPtr media_name));
211 static void    NEAR ExtraInitWork PROTO((void));
212 static Boolean NEAR ValidateType PROTO((DocType type));
213 static Boolean NEAR ValidateField PROTO((DocType type, DocField field));
214 static Boolean NEAR ValidateUid PROTO((DocType type, DocUid uid));
215 static CharPtr NEAR MakePath PROTO((Int2 nSdir,Int2 nPref,Int2 nSuf, Int2 nExtn));
216 static Boolean NEAR LoadUidIndex PROTO((DocType type));
217 static Int2    NEAR LoadTrmIndex PROTO((DocType type, DocField field));
218 static void    NEAR FreeTrmIndex PROTO((void));
219 /**** not used in reading cdrom ******
220 static Int4 NEAR MergeSegOffset PROTO((Int2 seg, Int4 offset));
221 *************************************/
222 static Boolean NEAR SplitSegOffset PROTO((Int4 value, Int2Ptr segptr, Int4Ptr offsetptr));
223 static FILE * NEAR CdDocFil PROTO((DocType type, DocUid uid, UidIdxPtr idx));
224 
225 static Boolean SwapOutCd PROTO((VoidPtr med));
226 static Boolean SwapInCd PROTO((VoidPtr med));
227 static void NEAR ForceCdFini PROTO((void));
228 static Boolean CdInitMedia PROTO((VoidPtr med));
229 static Boolean CdFmtInfo PROTO((VoidPtr medName));
230 
231 static CdTermPtr NEAR CdTrmLocate PROTO((CharPtr term, Int2 page));
232 static UidIdxPtr NEAR UidIdxGet PROTO((DocType type, DocUid uid, UidIdxPtr idx));
233 static void NEAR linksort PROTO((Int4Ptr uids, Int4Ptr wts, Int4 n));
234 static DecompInfoPtr NEAR DecompInit PROTO((FILE *fp));
235 static Boolean NEAR DecompFini PROTO((AsnIoPtr aip, DecompInfoPtr dip));
236 static void NEAR DecompInfoFree PROTO((DecompInfoPtr dcp));
237 static Int2 LIBCALLBACK DecompReadFunc PROTO((Pointer p, CharPtr buff, Uint2 count));
238 static Int2 HuffmanRead PROTO((DecompInfoPtr dcp, CharPtr buff, Uint2 count));
239 static Boolean NEAR IsOKMagic PROTO((Uint4 magic, CharPtr volume_label));
240 static CdTermPtr  CdTermRead PROTO((Int2 type, Int2 field, CharPtr ptr, CharPtr bufr, Int2 page));
241 
242 /*****************************************************************************
243 *
244 *   General purpose public functions
245 *
246 *****************************************************************************/
247 
248 /*****************************************************************************
249 *
250 *   CdInit()
251 *
252 *****************************************************************************/
253 static CharPtr trmbuf;   /* for term pages */
254 static DocType trmtype;  /* type of last term used in trmbuf */
255 static DocField trmfield;  /* field of last term used in trmbuf */
256 static Int2 trmpage,       /* page # of first page in trmbuf */
257 			trmpages; 			/* number of pages in memory */
258 static size_t trmpagesrequest;     /* how bytes to read (5 * BLKSIZE) */
259 
260 static Boolean oldStyleCfgFile;
261 
262 
263 static Int2 nCdVer;
264 static char *sCdError [] = {
265     "",
266     "Memory allocation error",
267     "File create error",
268 #ifdef WIN_MSWIN
269 	"File open error on %Fs",
270 #else
271     "File open error on %s",
272 #endif
273     "File seek error",
274     "File read error",
275     "File write error",
276     "Bad database type code [%d]",
277     "Bad field code [%d]",
278     "No terms for type/field [%d/%d]",
279     "Bad uid number [%ld]",
280     "Bad directory number [%d]",
281     "Cannot read new data format",
282     "Index files out of date",
283     "Data decompression error",
284     "Programmer error"
285 };
286 
287 static CdTermPtr cdtrmcache [10]; /* cache of most recent CdTrmFind results */
288 
289 /*****************************************************************************
290 *
291 *   CdInit()
292 *     uses environment variables to configure initialization
293 *
294 *****************************************************************************/
295 
CdInit(void)296 Boolean  CdInit (void)
297 
298 {
299     char media[64];
300 
301 	ConfigInit();
302 
303     if (nCdVer) {
304         numinits++;
305         return TRUE;   /* already setup */
306     }
307 
308 	oldStyleCfgFile = FALSE;
309 
310     GetAppParam ("ncbi", "NCBI", "MEDIA", "", media, sizeof media);
311 
312 	/* This is a work-around to provide backwards compatibility for old       */
313 	/* config files which do not specify MEDIA                                */
314 	if (media[0] == '\0')
315 	{
316 		StrCpy(media, "NCBI");
317 		SetSoleMedia();
318 		oldStyleCfgFile = TRUE;
319 	}
320 
321     return (ParseMedia(CdInitMedia, MEDIUM_CD | MEDIUM_DISK) != 0);
322 }
323 
324 
CdInitMedia(VoidPtr med)325 static Boolean CdInitMedia(VoidPtr med)
326 
327 {
328     char CdRootPath[PATH_MAX];
329     char sVol[32];
330 	char datvalpath[PATH_MAX];
331 	CharPtr mediaName = (CharPtr) med;
332 
333     GetAppParam ("ncbi", mediaName, "ROOT", "", CdRootPath, sizeof CdRootPath);
334 
335     vi = NULL;
336 
337     bAppendVer = FALSE;
338     upperCaseIt = FALSE;
339 
340     /* "VAL" overrides "ROOT" for purposes of finding first copy of .val */
341     if (GetAppParam ("ncbi", mediaName, "VAL", CdRootPath, datvalpath, sizeof datvalpath))
342         CdSetPath (DIR_VAL, CdRootPath);
343 
344     FileBuildPath(datvalpath, NULL, NULL);
345     if (! CdInitialize (CdRootPath, sVol, datvalpath, &nCdVer)) {
346         return  FALSE;
347     }
348 
349     trmpagesrequest =(size_t)(5 * BLKSIZE);   /* number of termpages to request */
350 
351     if (GetAppParam ("ncbi", mediaName, "IDX", "", CdRootPath, sizeof CdRootPath))
352         CdSetPath (DIR_IDX, CdRootPath);
353 
354     /* work-around to find alternate index files when using old-style    */
355     /* configuration file                                                */
356     if (oldStyleCfgFile)
357     {
358         if (StrICmp(sVol, "SeqData") == 0 &&
359             GetAppParam ("ncbi", mediaName, "SEQIDX", "", CdRootPath,
360             sizeof CdRootPath))
361         {
362             CdSetPath (DIR_IDX, CdRootPath);
363         }
364         if (StrICmp(sVol, "MedData") == 0 &&
365             GetAppParam ("ncbi", mediaName, "MEDIDX", "", CdRootPath,
366             sizeof CdRootPath))
367         {
368             CdSetPath (DIR_IDX, CdRootPath);
369         }
370     }
371 
372     if (GetAppParam ("ncbi", mediaName, "LNK", "", CdRootPath, sizeof CdRootPath))
373         CdSetPath (DIR_LNK, CdRootPath);
374     if (GetAppParam ("ncbi", mediaName, "MED", "", CdRootPath, sizeof CdRootPath))
375         CdSetPath (DIR_MED, CdRootPath);
376     if (GetAppParam ("ncbi", mediaName, "SEQ", "", CdRootPath, sizeof CdRootPath))
377         CdSetPath (DIR_SEQ, CdRootPath);
378     if (GetAppParam ("ncbi", mediaName, "TRM", "", CdRootPath, sizeof CdRootPath))
379         CdSetPath (DIR_TRM, CdRootPath);
380     SaveCdMediaContext(mediaName);
381 
382     return TRUE;
383 }
384 
385 
SaveCdMediaContext(CharPtr media_name)386 static Boolean NEAR SaveCdMediaContext(CharPtr media_name)
387 
388 {
389 	MediaPtr media;
390 	CdMediaInfoPtr cdm;
391 	int i;
392 	char ejectable[10];
393 	char buffer[100];
394 
395 	media = PreInitMedia(media_name);
396 
397 	if (media == NULL)
398 		return FALSE;
399 
400 	if (media->inited_partial || (media->media_type != MEDIUM_CD &&
401 		media->media_type != MEDIUM_DISK))
402 		return TRUE;
403 
404 	media->swapOutMedia = SwapOutCd;
405 	media->swapInMedia = SwapInCd;
406     GetAppParam ("ncbi", media_name, "EJECTABLE", "0", ejectable, sizeof ejectable);
407 
408 	cdm = (CdMediaInfoPtr) MemNew(sizeof(CdMediaInfo));
409 	cdm->ejectable = atoi(ejectable);
410 	cdm->device_name = NULL;
411 	cdm->raw_device_name = NULL;
412 	cdm->mount_point = NULL;
413 	cdm->mount_cmd = NULL;
414 
415     if (GetAppParam ("ncbi", media_name, "DEVICE_NAME", "", buffer, sizeof buffer))
416 	{
417     	cdm->device_name = StringSave(buffer);
418 	}
419     if (GetAppParam ("ncbi", media_name, "RAW_DEVICE_NAME", "", buffer, sizeof buffer))
420 	{
421     	cdm->raw_device_name = StringSave(buffer);
422 	}
423     if (GetAppParam ("ncbi", media_name, "MOUNT_POINT", "", buffer, sizeof buffer))
424 	{
425     	cdm->mount_point = StringSave(buffer);
426 	}
427     if (GetAppParam ("ncbi", media_name, "MOUNT_CMD", "", buffer, sizeof buffer))
428 	{
429     	cdm->mount_cmd = StringSave(buffer);
430 	}
431 	cdm->hold_idx_open = FALSE;
432     if (GetAppParam ("ncbi", media_name, "HOLD_IDX_OPEN", "", buffer, sizeof buffer))
433 	{
434     	cdm->hold_idx_open = StringICmp(buffer, "TRUE") == 0;
435 	}
436 
437 	media->media_info = (VoidPtr) cdm;
438 
439 	for (i = 0; i < NDIR; i++)
440 	{
441 		cdm->sPath[i] = sPath[i];
442 		sPath[i] = NULL;
443 	}
444 
445 	media->entrez_info = vi;
446 	cdm->bAppendVer = bAppendVer;
447 	cdm->upperCaseIt = upperCaseIt;
448 
449 	media->inited_partial = TRUE;
450 
451 	return TRUE;
452 }
453 
454 
SwapOutCd(VoidPtr curm)455 static Boolean SwapOutCd(VoidPtr curm)
456 {
457 	int i;
458 	MediaPtr CurMedia = (MediaPtr) curm;
459 	CdMediaInfoPtr cmip;
460 
461 	if (CurMedia != NULL)
462 	{
463 		cmip = (CdMediaInfoPtr) CurMedia->media_info;
464 		CurMedia->entrez_info = vi;
465 		vi = NULL; /* avoid freeing it */
466 
467 		for (i = 0; i < NDIR; i++)
468 		{ /* copy and avoid freeing */
469 			cmip->sPath[i] = sPath[i];
470 			sPath[i] = NULL;
471 		}
472 
473 		ForceCdFini();
474 	}
475 
476 	return TRUE;
477 }
478 
479 
SwapInCd(VoidPtr med)480 static Boolean SwapInCd(VoidPtr med)
481 {
482 	MediaPtr newMedia = (MediaPtr) med;
483 	int i;
484 	CdMediaInfoPtr cmip;
485 
486 	cmip = (CdMediaInfoPtr) newMedia->media_info;
487 
488 	for (i = 0; i < NDIR; i++)
489 	{ /* load up sPath */
490 		sPath[i] = cmip->sPath[i];
491 	}
492 	vi = newMedia->entrez_info;
493 
494 	bAppendVer = cmip->bAppendVer;
495 	upperCaseIt = cmip->upperCaseIt;
496     HoldIdxOpen = cmip->hold_idx_open;
497 
498 	ExtraInitWork();
499 
500 	return TRUE;
501 }
502 
503 
ExtraInitWork()504 static void NEAR ExtraInitWork()
505 
506 {
507     size_t bufsize;
508 	int i;
509 
510     /* initialize cached CdTermPtr array */
511     for (i = 0; i < 10; i++) {
512       cdtrmcache [i] = NULL;
513     }
514 
515     term_idx_type = -1;
516     term_idx_field = -1;
517 
518     if (buffer == NULL) {
519         bufsize = (size_t) MAX (MAX ((size_t) vi->type_bucket_size, (size_t) vi->field_bucket_size), sizeof (Int4) * 512);
520         buffer = (CharPtr) MemNew(bufsize);
521     }
522 }
523 
524 
ForceCdFini(void)525 static void NEAR ForceCdFini(void)
526 
527 {
528 	Int4 sav_numinits = numinits;
529 	CharPtr savDetailedBuf;
530 
531 	ConfigInit(); /* simulate Init() to balance Fini() */
532     savDetailedBuf = CdDetailedBuf;
533 	CdDetailedBuf = NULL; /* avoid freeing in Fini() */
534 	numinits = 1;
535 	CdFini();
536 	numinits = sav_numinits;
537     CdDetailedBuf = savDetailedBuf;
538 }
539 
540 
541 /*****************************************************************************
542 *
543 *   CdFini()
544 *      closes cdromlib session
545 *
546 *****************************************************************************/
CdFini(void)547 Boolean  CdFini (void)
548 
549 {
550 	Int2 i;
551 	CdTermPtr trmptr;
552 
553 	ConfigFini();
554 	numinits--;
555 	if (numinits)          /* haven't fixed all initializations yet */
556 		return TRUE;
557 
558     /* free cached CdTermPtr array */
559     for (i = 0; i < 10; i++) {
560       trmptr = cdtrmcache [i];
561       if (trmptr != NULL) {
562         if (trmptr->term != NULL) {
563           MemFree (trmptr->term);
564         }
565         MemFree (trmptr);
566       }
567       cdtrmcache [i] = NULL;
568     }
569 
570     buffer = (CharPtr) MemFree(buffer);
571 	FreeTrmIndex();
572 	for (i = 0; i < NDIR; i++)
573 		sPath[i] = (CharPtr) MemFree(sPath[i]);
574 	for (i = 0; i < NTYPE; i++)
575 		if (i != TYP_NT)
576 			type_bucket_index[i] = (Int4Ptr) MemFree(type_bucket_index[i]);
577 		else
578 			type_bucket_index[i] = NULL;  /* NT and AA use same index */
579 	vi = EntrezInfoFree(vi);
580 	trmbuf = (CharPtr) MemFree(trmbuf);
581 	trmpages = 0;
582 	nCdVer = 0;
583 	bAppendVer = FALSE;
584 	upperCaseIt = FALSE;
585 
586     for (i = 0; i < NTYPE+2; i++)
587 	{
588         if (IdxFilePtr[i] != NULL)
589 		{
590 			FileClose(IdxFilePtr[i]);
591 			IdxFilePtr[i] = NULL;
592 		}
593 	}
594 
595     CdDetailedBuf = (CharPtr) MemFree(CdDetailedBuf);
596 
597 	return TRUE;
598 }
599 
600 
601 /*  =========================================================================
602  *      PUBLIC FUNCTION BODIES
603  */
604 
605 
606 /*  -------------------- CdInitialize() --------------------------------
607  *  CdInitialize -- Initializes the library
608  *
609  *  Parameters:    sCdRoot:   CD-ROM root path
610  *                 sVolume:   pointer to volume name buffer (VOLUME_MAX)
611  *                 ver:       pointer to version number buffer
612  *
613  *  Return value:  TRUE:      Success.
614  *                 FALSE:     Failure;  refer to error code.
615  *
616  *  Notes:  1. The file cdromdat.val must be in the specified root path.
617  *          2. Default paths strings for various subdirectories are
618  *             created by this function below the specified root path.
619  *             Use CdSetPath() to override the defaults.
620  */
621 
CdInitialize(CharPtr sCdRoot,CharPtr sVolume,CharPtr datvalpath,Int2Ptr ver)622 static Boolean NEAR CdInitialize (CharPtr sCdRoot, CharPtr sVolume, CharPtr datvalpath, Int2Ptr ver)
623 
624 {
625     Int2   i;
626     AsnIoPtr aip;
627     Char drctry [16];
628     CharPtr p;
629     size_t bufsize;
630 
631 
632     *sVolume = '\0';
633     *ver = 0;
634 
635 	numinits++;     /* count the number of initialization calls */
636 
637     if (vi != NULL) {          /* already initialized ! */
638         StringCpy (sVolume, vi->volume_label);
639         *ver = vi->version;
640         return  TRUE;
641     }
642 
643     /* initialize cached CdTermPtr array */
644     for (i = 0; i < 10; i++) {
645       cdtrmcache [i] = NULL;
646     }
647 
648     term_idx_type = -1;
649     term_idx_field = -1;
650 
651     for (i = 0; i < NTYPE+2; i++)
652         IdxFilePtr[i] = NULL;
653 
654     /* initialize storage for path names */
655     for (i = 0; i < NDIR; i++)
656         if (sPath[i] == NULL)
657             sPath[i] = (CharPtr) MemNew(PATH_MAX + 1);
658 
659     /* initialize root path string variable */
660     StringNCpy (sPath[DIR_ROOT], sCdRoot, PATH_MAX);
661     FileBuildPath(sPath[DIR_ROOT], NULL, NULL);
662 
663     /* read the CDROMLIB.INF file */
664     if ((aip = EntrezInfoOpen (datvalpath)) == NULL)
665         return FALSE;
666 
667     /* set default paths for subdirectories */
668     for (i=1; i<NDIR; i++) {
669         StringCpy (sPath[i], sPath[DIR_ROOT]);
670         StringCpy (drctry, sSdir[i]);
671         if (upperCaseIt) {
672           p = drctry;
673           while (*p != '\0') {
674             *p = TO_UPPER (*p);
675             p++;
676           }
677         }
678         FileBuildPath(sPath[i], drctry, NULL);
679     }
680 
681     vi = EntrezInfoAsnRead(aip, NULL);
682     AsnIoClose(aip);
683     if (vi == NULL)
684         return FALSE;
685 
686     /* check for incompatible format */
687     if (vi->format != CURRENT_FORMAT_VERSION) {
688         ErrPostEx(SEV_ERROR, ERR_CD_BADFORMAT, 0, sCdError[ERR_CD_BADFORMAT]);
689         return FALSE;
690     }
691 
692     if (buffer == NULL) {
693         bufsize = (size_t) MAX (MAX ((size_t) vi->type_bucket_size, (size_t) vi->field_bucket_size), sizeof (Int4) * 512);
694         buffer = (CharPtr) MemNew(bufsize);
695     }
696 
697     StringCpy (sVolume, vi->volume_label);
698     *ver = vi->version;
699     return  TRUE;
700 }
701 
702 /*****************************************************************************
703 *
704 *   CdGetInfo()
705 *   	Gets Entrez info pointer
706 *
707 *****************************************************************************/
CdGetInfo(void)708 EntrezInfoPtr CdGetInfo (void)
709 
710 {
711 	return vi;
712 }
713 
714 
715 /*****************************************************************************
716 *
717 *   CdFmtInfo()
718 *   	Formats CD-ROM specific "detailed info" and either stores the number
719 *   	of characters required to format the text, or concatentates the
720 *   	formatted string to a global string
721 *
722 *****************************************************************************/
CdFmtInfo(VoidPtr medName)723 static Boolean CdFmtInfo(VoidPtr medName)
724 {
725   char buf[256];
726   MediaPtr media;
727   CharPtr mediaName = (CharPtr) medName;
728   CdMediaInfoPtr cdm;
729 
730 
731   if ((media = PreInitMedia(mediaName)) == NULL || media->invalid ||
732 	  (cdm = (CdMediaInfoPtr) media->media_info) == NULL)
733   {
734     return FALSE;
735   }
736 
737   if (media->media_type == MEDIUM_CD)
738     StrCpy(buf, "\n  CD-ROM image from ");
739   else
740     StrCpy(buf, "\n  Hard disk image from ");
741   if (cdm->sPath[DIR_ROOT] == NULL)
742   {
743 	StrCat(buf, "<location unknown>");
744   } else {
745     StrCat(buf, cdm->sPath[DIR_ROOT]);
746   }
747   if (media->entrez_info != NULL && media->entrez_info->volume_label != NULL)
748   {
749     StrCat(buf, "\n    Volume label is ");
750     StrCat(buf, media->entrez_info->volume_label);
751   }
752   if (media->formal_name == NULL)
753   {
754     StrCat(buf, "\n    [ this medium has no formal name ]");
755   }
756   else {
757     StrCat(buf, "\n    Formal name is ");
758     StrCat(buf, media->formal_name);
759   }
760   StrCat(buf, "\n");
761 
762   if (countOnly)
763   {
764 	detInfoCharCount += StringLen(buf);
765   } else {
766 	StrCat(CdDetailedBuf, buf);
767   }
768 
769   /* always return FALSE, so that ParseMedia() will refrain from setting */
770   /* validity flags                                                      */
771   return FALSE;
772 }
773 
774 
775 /*****************************************************************************
776 *
777 *   CdDetailedInfo()
778 *   	Gets formatted text information about the current status, or returns
779 *   	NULL; the text (if any) is stored in a statically allocated buffer
780 *
781 *****************************************************************************/
782 
CdDetailedInfo(void)783 CharPtr CdDetailedInfo (void)
784 
785 {
786   if (numinits == 0) /* not yet initialized */
787   {
788 	if (CdDetailedBuf == NULL)
789 	{
790 	  CdDetailedBuf = StringSave("CD-ROM and HARD DISK access information is not currently available\n");
791 	}
792 	return CdDetailedBuf;
793   }
794   detInfoCharCount = 0;
795   countOnly = TRUE;
796   ParseMedia(CdFmtInfo, MEDIUM_CD | MEDIUM_DISK);
797   countOnly = FALSE;
798   if (detInfoCharCount == 0)
799     return NULL;
800   if (CdDetailedBuf != NULL)
801   {
802     CdDetailedBuf = (CharPtr) MemFree(CdDetailedBuf);
803   }
804   CdDetailedBuf = (CharPtr) MemNew(detInfoCharCount + 200);
805   StrCpy(CdDetailedBuf, "CD-ROM and HARD DISK ACCESS\n");
806   if (CurMediaType() == MEDIUM_CD || CurMediaType() == MEDIUM_DISK)
807   {
808     StrCat(CdDetailedBuf, "  Currently active medium is ");
809     StrCat(CdDetailedBuf, (GetCurMedia())->formal_name);
810     StrCat(CdDetailedBuf, "\n");
811   }
812   ParseMedia(CdFmtInfo, MEDIUM_CD | MEDIUM_DISK);
813   return CdDetailedBuf;
814 }
815 
816 /*  -------------------- CdSetPath() ---------------------------------
817  */
CdSetPath(Int2 dir,CharPtr path)818 static Boolean NEAR CdSetPath (Int2 dir, CharPtr path)
819 
820 {
821     int  k = 0;
822 
823 	if (path != NULL)
824 		k = StringLen(path);
825 
826     if ((dir<2) || (dir>=NDIR) || (k==0)) {
827 		ErrPostEx(SEV_ERROR, ERR_CD_BADDIR, 0, sCdError[ERR_CD_BADDIR], dir);
828         return FALSE;
829     }
830     StringCpy (sPath[dir], path);
831     FileBuildPath(sPath[dir], NULL, NULL);
832     return TRUE;
833 }
834 
835 /*****************************************************************************
836 *
837 *   UidIdxGet(type, uid, idx)
838 *
839 *****************************************************************************/
UidIdxGet(DocType type,DocUid uid,UidIdxPtr idx)840 static UidIdxPtr NEAR UidIdxGet (DocType type, DocUid uid, UidIdxPtr idx)
841 
842 {
843 	Int4Ptr ip;
844 	Int2 i, j, l, r;
845     FILE * fp;
846     CharPtr path;
847 	struct idxrec {
848 		DocUid uid;
849 		Int4 entry_offset,
850 			link_offset;
851 	} PNTR idxptr;
852 
853 	if (! ValidateUid(type, uid))
854 		return NULL;
855 
856 	if (type == TYP_SEQ)    /* AA, NT, SEQ all the same */
857 		type = TYP_AA;
858 
859     if (type_bucket_index[type] == NULL)
860 	{
861         if (! LoadUidIndex(type))
862 			return NULL;
863 	}
864 
865 	ip = type_bucket_index[type];
866 	r = vi->types[type].num_bucket - 1;
867 	l = 0;
868 	j = 0;
869 	while ((l <= r) && (! ((ip[j] <= uid) && (ip[j+1] > uid))))
870 	{
871 		j = (l + r) / 2;
872 		if (uid > ip[j])
873 			l = j + 1;
874 		else
875 			r = j - 1;
876 	}
877 
878 	if (type != TYP_ML)
879 		type = TYP_SEQ;
880 
881 	if ((fp = IdxFilePtr[type]) == NULL)
882 	{
883 		path = MakePath (DIR_IDX, type, SUF_UID, EXT_LST);
884 		if ((fp = FileOpen(path, "rb")) == NULL)
885 		{
886 			ErrPostEx(SEV_ERROR, ERR_CD_FILEOPEN, 0, sCdError[ERR_CD_FILEOPEN], path);
887 			return NULL;
888 		}
889 	}
890 
891 	fseek(fp, (long)j * (long)vi->type_bucket_size, SEEK_SET);
892 
893 	j = FileRead(buffer, 1, vi->type_bucket_size, fp);
894 
895 	if (HoldIdxOpen)
896 	{
897 		IdxFilePtr[type] = fp;
898 	} else {
899 	    FileClose(fp);
900 	}
901 
902 	if (j == 0)
903 	{
904 		ErrPostEx(SEV_ERROR, ERR_CD_FILEREAD, 0, sCdError[ERR_CD_FILEREAD]);
905 		return NULL;
906 	}
907 
908 	idxptr = (struct idxrec PNTR) buffer;
909 	j = vi->type_bucket_size / sizeof(struct idxrec);
910 	for (i = 0; i < j; i++, idxptr++)
911 	{
912 		if (uid == SwapInt4(idxptr->uid))
913 		{
914 	 	    if (idx == NULL)
915        		    idx = (UidIdxPtr) MemNew(sizeof(UidIdx));
916 	  	    else
917         		MemFill(idx, '\0', sizeof(UidIdx));
918 
919 		   	idx->type = type;
920 		    idx->uid = uid;
921 			idx->entry_offset = SwapInt4(idxptr->entry_offset);
922 			idx->sum_offset = 0;
923 			idx->link_offset = SwapInt4(idxptr->link_offset);
924 			if (type == TYP_SEQ)
925 			{
926 				if (idx->entry_offset & 0x80000000)
927 					idx->type = TYP_AA;
928 				else
929 					idx->type = TYP_NT;
930 			}
931 			return idx;
932 		}
933 	}
934 
935 	return NULL;
936 }
937 
938 /*  -------------------- CdTrmPageCt() --------------------------------
939  *  CdTrmPageCt -- returns the number of term pages for a type/field pair.
940  *
941  *  Parameters:     type:     database code.
942  *                  field:    field code.
943  *
944  *  Return value:   non-zero:   Success;  page count.
945  *                  zero:       Failure;  refer to error code.
946  */
947 
CdTrmPageCt(DocType type,DocField field)948 Int2    CdTrmPageCt (DocType type, DocField field)
949 
950 {
951     if (!ValidateType (type))  return  0;
952     if (!ValidateField (type, field))  return 0;
953     return (Int2)  vi->types[type].fields[field].num_bucket;
954 }
955 
956 
957 /*  -------------------- CdTrmLookup() --------------------------------
958  *  CdTrmLookup -- returns the first page that COULD contain a term.
959  *
960  *  Parameters:     type:     database code.
961  *                  field:    field code.
962  *                  term:   term (or term fragment) to lookup.
963  *
964  *  Return value:   non-negative:   Success;  page number. (zero-based)
965  *                  negative:       Failure;  refer to error code.
966  */
967 
CdTrmLookup(DocType type,DocField field,CharPtr term)968 Int2    CdTrmLookup (DocType type, DocField field, CharPtr term)
969 
970 {
971     int  i;
972 
973     if (!LoadTrmIndex (type, field))
974 		return(-1);
975 
976     for (i=0; i< term_idx_count; i++) {
977         if (MeshStringICmp (term_idx_str[i], term) >= 0)
978             return  MAX (0,i-2);
979     }
980     return  MAX (0,term_idx_count-2);
981 }
982 
983 
984 /*  -------------------- CdTrmPages() ---------------------------------
985  *  CdTrmPages -- fetches a range of term pages from the CD-ROM.
986  *
987  *  Parameters:     type:     database code.
988  *                  field:    field code.
989  *                  pg:     page number of first page to read.
990  *                  ct:     number of pages to read.
991  *                  buffer: buffer to receive the data.
992  *
993  *  Return value:   non-zero:   Success;  number of pages read.
994  *                  zero:       Failure;  refer to error code.
995  *
996  *  Notes:  The term pages contain a series of variable-length term records,
997  *      each of which is an ASCII string with the following structure:
998  *
999  *      <term>\t<c1>\t<c2>\t<offset>\n
1000  *
1001  *      term:       term
1002  *      c1:         count of 'special' occurrences.
1003  *      c2:         count of total occurrences.  ** NOTE **
1004  *      offset:     offset in postings file of list of document numbers.
1005  *      \t:         tab character  (?).
1006  *      \n:         newline character  ('\x0A').
1007  *
1008  *      A term record may cross a page boundary.
1009  */
1010 
CdTrmPages(DocType type,DocField field,Int2 pg)1011 Int2    CdTrmPages (DocType type, DocField field, Int2 pg)
1012 
1013 {
1014     CharPtr path, buff;
1015     FILE   *fd;
1016     Int4    offset;
1017     size_t    bytes;
1018 
1019 	if ((type == trmtype) && (field == trmfield) && (pg == trmpage) && (trmpages))
1020 		return trmpages;
1021 
1022 	if (trmbuf == NULL)
1023 		trmbuf = (CharPtr) MemNew(trmpagesrequest + 2); /* allow terminating 00 */
1024 	buff = trmbuf;   /* use local static buffer */
1025     /* need to fill buffer with NULL's */
1026     MemFill(buff, 0, trmpagesrequest + 2);
1027 	trmpages = 0;    /* no pages loaded */
1028 
1029     if (!ValidateType (type))  return 0;
1030     if (!ValidateField (type, field))  return 0;
1031 
1032     path = MakePath (DIR_TRM, type, field, EXT_LST);
1033     if ((fd = FileOpen(path, "rb")) ==NULL)  {
1034 		ErrPostEx(SEV_WARNING, ERR_CD_FILEOPEN, 0, sCdError[ERR_CD_FILEOPEN], path);
1035         return  0;
1036     }
1037     offset = (long) pg * BLKSIZE;
1038     fseek (fd, offset, SEEK_SET);
1039 	bytes = FileRead(buff, 1, trmpagesrequest, fd);
1040     FileClose (fd);
1041 
1042 	if (bytes == trmpagesrequest)   /* got the extra page */
1043 		bytes -= BLKSIZE;
1044 	trmpages = (Int2)(bytes/BLKSIZE);
1045 	if (bytes % BLKSIZE)   /* got a partial last page */
1046 		trmpages++;
1047 	trmtype = type;
1048 	trmfield = field;
1049 	trmpage = pg;
1050 	                          /* may have to switch \n for \r */
1051     return  trmpages;
1052 }
1053 
1054 
1055 /*  -------------------- CdTrmUidsFil () --------------------------------
1056  *  CdTrmUids -- retrieves a list of uids for a term.
1057  *
1058  *  Parameters:     type:         database code.
1059  *                  field:        field code.
1060  *                  offset:     offset into postings file.
1061  *                  count:      number of uids.
1062  *                  filename:   name of file to receive the results.
1063  *
1064  *  Return value:   non-zero:   Success; number of documents (same as count).
1065  *                  zero:       Failure;  refer to error code.
1066  *
1067  *  Notes:   the offset value is obtained by:
1068  *      1)  looking up a term (using CdTrmLookup()).
1069  *      2)  loading term pages (using CdTrmPages()).
1070  *      3)  finding the term in the loaded pages.
1071  */
1072 
CdTrmUidsFil(DocType type,DocField field,Int4 offset,Int4 count,CharPtr filename,Boolean append)1073 Int4    CdTrmUidsFil (DocType type, DocField field, Int4 offset, Int4 count, CharPtr filename, Boolean append)
1074 
1075 {
1076     Int4  i;
1077     FILE *fd1;
1078     FILE *fd2;
1079     Char mode [4];
1080     CharPtr path;
1081     Int4Ptr ptr;
1082     size_t cnt;
1083     Int4 cntr;
1084 
1085     if (!ValidateType (type))   return  0;
1086     if (!ValidateField (type, field))  return  0;
1087 
1088     path = MakePath (DIR_TRM, type, field, EXT_PST);
1089     if ((fd1 = FileOpen(path, "rb")) == NULL)
1090 	{
1091 		ErrPostEx(SEV_WARNING, ERR_CD_FILEOPEN, 0, sCdError[ERR_CD_FILEOPEN], path);
1092         return 0;
1093     }
1094 
1095     if (append) {
1096       StringCpy (mode, "ab");
1097     } else {
1098       StringCpy (mode, "wb");
1099     }
1100     if ((fd2 = FileOpen(filename, mode)) == NULL)
1101 	{
1102         FileClose (fd1);
1103 		ErrPostEx(SEV_WARNING, ERR_CD_FILEOPEN, 0, sCdError[ERR_CD_FILEOPEN], filename);
1104         return  0;
1105     }
1106 
1107     fseek (fd1, offset, SEEK_SET);
1108 
1109     cntr = count;
1110     cnt = (size_t) MIN (cntr, (Int4)(BLKSIZE / sizeof(Int4)));
1111 	ptr = (Int4Ptr) buffer;
1112     while (cnt > 0)
1113 	{
1114         FileRead (buffer, sizeof (Int4), cnt, fd1);
1115         for (i = 0; i < (Int4) cnt; i++)
1116 			ptr[i] = SwapInt4(ptr[i]);
1117         if (! FileWrite (buffer, sizeof(Int4), cnt, fd2))
1118 		{
1119 			ErrPostEx(SEV_ERROR, ERR_CD_FILEWRITE, 0, sCdError[ERR_CD_FILEWRITE]);
1120             break;
1121         }
1122         cntr -= cnt;
1123 	    cnt = (size_t) MIN (cntr, (Int4)(BLKSIZE / sizeof(Int4)));
1124     }
1125 
1126     FileClose (fd1);
1127     FileClose (fd2);
1128 	if (cntr)    /* didn't finish */
1129 		return 0;
1130 	else
1131 		return count;
1132 }
1133 
1134 /*  -------------------- CdTrmUidsMem () --------------------------------
1135  *  CdTrmUidsMem -- retrieves a list of uids for a term.
1136  *
1137  *  Parameters:     type:         database code.
1138  *                  field:        field code.
1139  *                  offset:     offset into postings file.
1140  *                  count:      number of uids.
1141  *                  mem:       storage to receive the results.
1142  *
1143  *  Return value:   non-zero:   Success; number of documents (same as count).
1144  *                  zero:       Failure;  refer to error code.
1145  *
1146  *  Notes:   the offset value is obtained by:
1147  *      1)  looking up a term (using CdTrmLookup()).
1148  *      2)  loading term pages (using CdTrmPages()).
1149  *      3)  finding the term in the loaded pages.
1150  */
1151 
CdTrmUidsMem(DocType type,DocField field,Int4 offset,Int4 count,DocUidPtr mem)1152 Int4    CdTrmUidsMem (DocType type, DocField field, Int4 offset, Int4 count, DocUidPtr mem)
1153 
1154 {
1155     Int4  i;
1156     FILE *fd1;
1157     CharPtr path;
1158     size_t cnt;
1159 
1160     if (!ValidateField (type, field))
1161 		return  0;
1162 	if (mem == NULL)
1163 		return 0;
1164 
1165     path = MakePath (DIR_TRM, type, field, EXT_PST);
1166     if ((fd1 = FileOpen(path, "rb")) == NULL)
1167 	{
1168 		ErrPostEx(SEV_WARNING, ERR_CD_FILEOPEN, 0, sCdError[ERR_CD_FILEOPEN], path);
1169         return 0;
1170     }
1171 
1172     fseek (fd1, offset, SEEK_SET);
1173 
1174     cnt = FileRead ((VoidPtr)mem, sizeof (Int4), (size_t) count, fd1);
1175     for (i = 0; i < (Int4) cnt; i++)
1176 		mem[i] = SwapInt4(mem[i]);
1177 
1178     FileClose (fd1);
1179 	return (Int4) cnt;
1180 }
1181 
1182 /*  -------------------- CdDocAsnOpen() -----------------------------------
1183  *  CdDocAsnOpen -- returns an active AsnIoPtr for a document.
1184  *
1185  *  Parameters:     type:        class code (ML/AA/NT).
1186 *   				uid:         unique identifier
1187  *
1188  *  Return value:   non-null:   Success;  active asnioptr
1189  *                  null:       Failure;  refer to error code.
1190  *
1191  *  For TYP_ML, the value is a Medline-entry
1192 *   For TYP_AA or TYP_NT it is a Bioseq-set.
1193  */
1194 
CdDocAsnOpen(DocType type,DocUid uid)1195 AsnIoPtr  CdDocAsnOpen (DocType type, DocUid uid)
1196 
1197 {
1198     FILE * fd2;
1199     AsnIoPtr aip;
1200     DecompInfoPtr decomp;
1201 
1202     fd2 = CdDocFil (type, uid, NULL);
1203     if (fd2 == NULL)
1204         return NULL;
1205 
1206 	if (vi->no_compression)
1207 	{ /* no compression on this data source */
1208 		aip = AsnIoNew(ASNIO_BIN_IN, fd2, NULL, NULL, NULL);
1209 	}
1210 	else { /* use alternate read function for compressed data sources */
1211     	decomp = DecompInit(fd2);
1212     	aip = AsnIoNew(ASNIO_BIN_IN, fd2, decomp, DecompReadFunc, NULL);
1213     	if (aip == NULL)
1214 			DecompFini(NULL, decomp);
1215     	decomp->aip = aip;
1216 	}
1217 
1218     return aip;
1219 }
1220 
1221 /*****************************************************************************
1222 *
1223 *   CdDocAsnClose(aip)
1224 *       closes an aip opened by CdDocAsnOpen
1225 *
1226 *****************************************************************************/
CdDocAsnClose(AsnIoPtr aip)1227 AsnIoPtr  CdDocAsnClose (AsnIoPtr aip)
1228 
1229 {
1230 	if (!vi->no_compression)
1231 	{
1232     	DecompFini(aip, NULL);
1233 	}
1234 
1235     AsnIoClose(aip);
1236 
1237     return NULL;
1238 }
1239 
1240 /*  =========================================================================
1241  *      PRIVATE FUNCTION BODIES
1242  */
1243 
ValidateUid(DocType type,DocUid uid)1244 static Boolean NEAR  ValidateUid (DocType type, DocUid uid)
1245 
1246 {
1247 	EntrezTypeDataPtr tdp;
1248 	DocType tmp;
1249 
1250 	if (! ValidateType(type))
1251 		return FALSE;
1252 
1253 	tmp = type;
1254 	if (tmp == TYP_SEQ)
1255 		tmp = TYP_AA;
1256 
1257 	tdp = &vi->types[tmp];
1258 	if ((uid >= tdp->minuid) && (uid <= tdp->maxuid))
1259 		return TRUE;
1260 
1261 	if (type == TYP_SEQ)
1262 	{
1263 		tdp = &vi->types[TYP_NT];
1264 		if ((uid >= tdp->minuid) && (uid <= tdp->maxuid))
1265 			return TRUE;
1266 	}
1267 
1268 	return FALSE;
1269 }
1270 
ValidateType(DocType type)1271 static Boolean NEAR  ValidateType (DocType type)
1272 
1273 {
1274     if (((type < 0) || (type >= NTYPE)) && (type != TYP_SEQ)) {
1275 		ErrPostEx(SEV_ERROR, ERR_CD_BADTYPE, 0, sCdError[ERR_CD_BADTYPE], type);
1276         return  FALSE;
1277     }
1278     return  TRUE;
1279 }
1280 
ValidateField(DocType type,DocField field)1281 static Boolean NEAR  ValidateField (DocType type, DocField field)
1282 
1283 {
1284     if (type<0 || type>=NTYPE) {
1285 		ErrPostEx(SEV_ERROR, ERR_CD_BADTYPE, 0, sCdError[ERR_CD_BADTYPE], type);
1286         return  FALSE;
1287     }
1288     if (field<0 || field>=NFLD) {
1289 		ErrPostEx(SEV_ERROR, ERR_CD_BADFIELD, 0, sCdError[ERR_CD_BADFIELD], field);
1290         return  FALSE;
1291     }
1292     if (vi->types[type].fields[field].num_bucket == 0) {
1293 		ErrPostEx(SEV_ERROR, ERR_CD_NOTERMS, 0, sCdError[ERR_CD_NOTERMS], type, field);
1294         return  FALSE;
1295     }
1296     return  TRUE;
1297 }
1298 
MakePath(Int2 nSdir,Int2 nPref,Int2 nSuff,Int2 nExtn)1299 static CharPtr NEAR  MakePath (Int2 nSdir, Int2 nPref, Int2 nSuff, Int2 nExtn)
1300 
1301 {
1302     Char ltemp[8], filename[60];
1303     Char   c;
1304     CharPtr p;
1305 
1306     StringCpy (buffer, sPath[nSdir]);
1307 
1308     StringCpy (filename, sPref[nPref]);
1309     StringCat (filename, sSuff[nSuff]);
1310     StringCat (filename, ".");
1311         if (nExtn <0) {
1312         c = (char) -nExtn;
1313         ltemp[0] = (char) ('0' + (c/100));
1314         ltemp[1] = (char) ('0' + ((c%100)/10));
1315         ltemp[2] = (char) ('0' + (c%10));
1316         ltemp[3] = '\0';
1317         StringCat (filename, ltemp);
1318     }
1319     else
1320         StringCat (filename, sExtn[nExtn]);
1321 
1322     if (bAppendVer)
1323         StringCat (filename, ";1");
1324 
1325 	if (upperCaseIt) {
1326       p = filename;
1327 	  while (*p != '\0') {
1328 	    *p = TO_UPPER (*p);
1329 	    p++;
1330 	  }
1331 	}
1332 
1333     FileBuildPath(buffer, NULL, filename);
1334     return  buffer;
1335 }
1336 
LoadUidIndex(DocType type)1337 static Boolean NEAR  LoadUidIndex (DocType type)
1338 
1339 {
1340     Int2  i;
1341     size_t n;
1342     Int4Ptr p;
1343     CharPtr path;
1344     FILE   *fd;
1345     Int4 header [3];
1346     Int4 version;
1347     Int4 issue;
1348 
1349     if (!ValidateType (type))
1350 		return FALSE;
1351 
1352 	if ((type == TYP_SEQ) || (type == TYP_NT))
1353 		type = TYP_AA;
1354 
1355     n = (size_t) vi->types[type].num_bucket + 1;
1356     p = type_bucket_index[type];
1357     if (p != NULL)
1358         return TRUE;
1359 
1360 	p = (Int4Ptr) MemNew(sizeof(Int4) * n);
1361 	p[n-1] = INT4_MAX;            /* put sentinel at end */
1362 	n--;
1363 
1364 	if (type == TYP_AA)
1365 	{
1366 		type = TYP_SEQ;
1367 	}
1368 
1369     path = MakePath (DIR_IDX, type, SUF_UID, EXT_IDX);
1370     if ((fd = FileOpen (path, "rb")) ==NULL)  {
1371 		MemFree (p);
1372 		ErrPostEx(SEV_ERROR, ERR_CD_FILEOPEN, 0, sCdError[ERR_CD_FILEOPEN], path);
1373         return FALSE;
1374     }
1375     if (vi->version != 0 || vi->issue != 0) { /* for compatibility with pre-release 6 data */
1376       if (FileRead ((CharPtr)header, sizeof(Int4), 3, fd) != 3) {
1377         FileClose (fd);
1378         MemFree (p);
1379         ErrPostEx(SEV_ERROR, ERR_CD_FILEREAD, 0, sCdError[ERR_CD_FILEREAD]);
1380         return FALSE;
1381       }
1382 	  if (! IsOKMagic((Uint4) SwapInt4(header[1]), vi->volume_label))
1383 	  {
1384         ErrPostEx(SEV_ERROR,  ERR_CD_BADINDEX, 0, sCdError[ERR_CD_BADINDEX]);
1385 		return FALSE;
1386       }
1387       header [2] = SwapInt4 (header [2]);
1388       version = (Int4) vi->version;
1389       issue = (Int4) vi->issue;
1390       if (header [2] != ((version << 16) | issue)) {
1391         ErrPostEx(SEV_ERROR,  ERR_CD_BADINDEX, 0, sCdError[ERR_CD_BADINDEX]);
1392 		return FALSE;
1393       }
1394     }
1395     if (FileRead ((CharPtr)p, sizeof(Int4), n, fd) !=n) {
1396         FileClose (fd);
1397 		MemFree (p);
1398 		ErrPostEx(SEV_ERROR, ERR_CD_FILEREAD, 0, sCdError[ERR_CD_FILEREAD]);
1399         return FALSE;
1400     }
1401     FileClose (fd);
1402 
1403 	if (type == TYP_SEQ)
1404 	{
1405 		type_bucket_index[TYP_NT] = p;
1406 		type_bucket_index[TYP_AA] = p;
1407 	} else {
1408 		type_bucket_index[type] = p;
1409 	}
1410     for (i=0; i< (Int2) n; i++, p++)
1411         *p = SwapInt4 (*p);
1412     return TRUE;
1413 }
1414 
LoadTrmIndex(DocType type,DocField field)1415 static Int2 NEAR  LoadTrmIndex (DocType type, DocField field)
1416 
1417 {
1418     Int2   i, k, c, buckets;
1419     CharPtr path, p;
1420     Int4  bytes;
1421     FILE   *fd;
1422 
1423     if (!ValidateType(type))  return 0;
1424     if (!ValidateField(type, field))  return  0;
1425 
1426     if (type == term_idx_type && field == term_idx_field)
1427         return  term_idx_count;
1428 
1429     path = MakePath (DIR_TRM, type, field, EXT_IDX);
1430     if ((fd = FileOpen (path, "r")) ==NULL)  {
1431 		ErrPostEx(SEV_ERROR, ERR_CD_FILEOPEN, 0, sCdError[ERR_CD_FILEOPEN], path);
1432         return  0;
1433     }
1434 
1435     if (term_idx_count > 0)  FreeTrmIndex();
1436 
1437     buckets = (Int2)vi->types[type].fields[field].num_bucket;
1438     bytes = (buckets + 1) * sizeof(CharPtr);
1439     if ((term_idx_str = (CharPtr PNTR) MemNew((size_t)bytes)) ==NULL) {
1440         FileClose (fd);
1441 		ErrPostEx(SEV_ERROR, ERR_CD_MEMORY, 0, sCdError[ERR_CD_MEMORY]);
1442         return(0);
1443     }
1444 
1445     for (i=0,c=0; c!=EOF; ) {
1446         for (p=buffer, k=0; k<128; k++) {
1447             c = fgetc(fd);
1448             if (c == EOF)  break;
1449             if (c == '\n' || c == '\r') {
1450                 *p = '\0';
1451                 break;
1452             }
1453             *p++ = (char) TO_LOWER(c);
1454         }
1455         while (c != '\n' && c != '\r' && c != EOF) {
1456             c = fgetc(fd);
1457         }
1458         *p = '\0';
1459         if (c != EOF && i < buckets) {
1460             if ((term_idx_str[i] = StringSave(buffer)) ==NULL)  {
1461                 FileClose(fd);
1462                 term_idx_count = i;
1463                 FreeTrmIndex();
1464 				ErrPostEx(SEV_ERROR, ERR_CD_MEMORY, 0, sCdError[ERR_CD_MEMORY]);
1465                 return  0;
1466             }
1467 			i++;
1468         }
1469     }
1470     FileClose (fd);
1471     term_idx_count = i;
1472     term_idx_type = type;
1473     term_idx_field = field;
1474     return  term_idx_count;
1475 }
1476 
FreeTrmIndex(void)1477 static void NEAR  FreeTrmIndex (void)
1478 
1479 {
1480     int  i;
1481 
1482     for (i=0; i<term_idx_count; i++)
1483 	{
1484 		MemFree(term_idx_str[i]);
1485 	}
1486     term_idx_str = (CharPtr PNTR) MemFree(term_idx_str);
1487     term_idx_count = 0;
1488     term_idx_type = -1;
1489     term_idx_field = -1;
1490 }
1491 
EntrezInfoOpen(CharPtr dirname)1492 extern AsnIoPtr   EntrezInfoOpen (CharPtr dirname)
1493 
1494 {
1495     CharPtr p, buf, endpath;
1496     AsnIoPtr aip = NULL;
1497     FILE * fp;
1498 
1499 
1500 	buf = (CharPtr) MemNew(PATH_MAX);
1501     p = StringMove(buf, dirname);
1502     endpath = buf + StringLen (buf);
1503     p = StringMove(p , "cdromdat.val;1");
1504     p -= 2;           /* point to the semi-colon */
1505     *p = '\0';        /* wipe-out the semi-colon */
1506     if ((fp = FileOpen(buf, "rb")) == NULL)
1507     {
1508         *p = ';';     /* put back the semi-colon */
1509         if ((fp = FileOpen(buf, "rb")) != NULL)
1510             bAppendVer = TRUE;
1511     }
1512 
1513     if (fp == NULL) {
1514     	StringCat (buf, ";1");
1515     	p = endpath;
1516     	while (*p != '\0') {
1517     	  *p = TO_UPPER (*p);
1518     	  p++;
1519     	}
1520     	upperCaseIt = TRUE;
1521         p -= 2;           /* point to the semi-colon */
1522         *p = '\0';        /* wipe-out the semi-colon */
1523         if ((fp = FileOpen (buf, "rb")) == NULL) {
1524           *p = ';';     /* put back the semi-colon */
1525           if ((fp = FileOpen(buf, "rb")) != NULL)
1526             bAppendVer = TRUE;
1527         }
1528     }
1529 
1530     if (fp != NULL)
1531         aip = AsnIoNew(ASNIO_BIN_IN, fp, NULL, NULL, NULL);
1532 	else
1533 		ErrPostEx(SEV_WARNING, ERR_CD_FILEOPEN, 0, sCdError[ERR_CD_FILEOPEN], (CharPtr) "cdromdat.val");
1534 	MemFree(buf);
1535     return aip;
1536 }
1537 
1538 
1539 #ifdef IS_LITTLE_ENDIAN
1540 
SwapInt2(Int2 k)1541 static Int2 NEAR  SwapInt2 (Int2 k)
1542 
1543 {
1544     Uint2  j, l;
1545 	Int2 m;
1546 
1547 	l = (Uint2)k;
1548     j  = ((l & (Uint2)0xFF00) >> 8);
1549     j |= ((l & (Uint2)0x00FF) << 8);
1550 	m = (Int2)j;
1551     return  m;
1552 }
1553 
SwapInt4(Int4 k)1554 static Int4 NEAR  SwapInt4 (Int4 k)
1555 
1556 {
1557     Uint4  j, l;
1558 	Int4 m;
1559 
1560 	l = (Uint4)k;
1561     j  = ((l & (Uint4)0xFF000000) >> 24);
1562     j |= ((l & (Uint4)0x00FF0000) >> 8);
1563     j |= ((l & (Uint4)0x0000FF00) << 8);
1564     j |= ((l & (Uint4)0x000000FF) << 24);
1565 	m = (Int4)j;
1566     return  m;
1567 }
1568 
1569 #endif
1570 
1571 /****** not used in reading cdrom **********************
1572 static Int4 NEAR MergeSegOffset (Int2 seg, Int4 offset)
1573 
1574 {
1575 	Int4 value;
1576 
1577 	value = (seg - 1) << 25;
1578 	value += offset;
1579 	return value;
1580 }
1581 ******************************************************/
1582 /***
1583 bit 31 = if 1, is a protein, else is not
1584 bits 30-25 = segment (file number)
1585 bits 24-0  = offset into file up to 32 mbytes big
1586 ****************/
SplitSegOffset(Int4 value,Int2Ptr segptr,Int4Ptr offsetptr)1587 static Boolean NEAR SplitSegOffset (Int4 value, Int2Ptr segptr, Int4Ptr offsetptr)
1588 
1589 {
1590 	*segptr = (Int2)(((value >> 25) & 0x0000003F) + 1);
1591 	*offsetptr = value & 0x01FFFFFF;
1592 	return TRUE;
1593 }
1594 
1595 /*****************************************************************************
1596 *
1597 *   FILE * CdDocFil (type, uid, dat, &size)
1598 *       opens a binary asn file, seeks to doc, returns a FILE * and size
1599 *
1600 *****************************************************************************/
CdDocFil(DocType type,DocUid uid,UidIdxPtr idx)1601 static FILE * NEAR CdDocFil (DocType type, DocUid uid, UidIdxPtr idx)
1602 
1603 {
1604     Int4 offset;
1605 	Int2 seg, dir, db;
1606     CharPtr path;
1607     FILE   *fd2;
1608     UidIdx ui;
1609 
1610 	if (idx == NULL)
1611 	{
1612 		idx = UidIdxGet(type, uid, &ui);
1613 		if (idx == NULL)
1614 			return NULL;
1615 		if ((type == TYP_AA || type == TYP_NT) && idx->type != type)
1616 			return NULL;
1617 	}
1618 
1619 	SplitSegOffset(idx->entry_offset, &seg, &offset);
1620 
1621     dir = (idx->type==TYP_ML) ? DIR_MED : DIR_SEQ;
1622     db = (idx->type==TYP_ML) ? PREF_MED : PREF_SEQ;
1623 
1624     path = MakePath (dir, db, SUF_ASN, (Int2) (-seg));
1625     if ((fd2=FileOpen (path, "rb")) == NULL)
1626 	{
1627 		ErrPostEx(SEV_ERROR, ERR_CD_FILEOPEN, 0, sCdError[ERR_CD_FILEOPEN], path);
1628         return  NULL;
1629     }
1630     fseek (fd2, offset, SEEK_SET);
1631     return fd2;
1632 }
1633 
1634 
1635 /*****************************************************************************
1636 *
1637 *   CdTrmFind(type, field, term)
1638 *      returns a pointer to a CdTerm structure
1639 *
1640 *****************************************************************************/
1641 
CdTrmDup(CdTermPtr trmptr)1642 static CdTermPtr  CdTrmDup (CdTermPtr trmptr)
1643 
1644 {
1645 	CdTermPtr ctp;
1646 
1647 	ctp = NULL;
1648 	if (trmptr != NULL) {
1649 		ctp = (CdTermPtr) MemNew (sizeof (CdTerm));
1650 		if (ctp != NULL) {
1651 			ctp->type = trmptr->type;
1652 			ctp->field = trmptr->field;
1653 			ctp->term = StringSave (trmptr->term);
1654 			ctp->special_count = trmptr->special_count;
1655 			ctp->total_count = trmptr->total_count;
1656 			ctp->offset = trmptr->offset;
1657 			ctp->page = trmptr->page;
1658 			ctp->next = NULL;
1659 		}
1660 	}
1661 	return ctp;
1662 }
1663 
CdTrmCache(CdTermPtr trmptr)1664 static CdTermPtr  CdTrmCache (CdTermPtr trmptr)
1665 
1666 {
1667 	CdTermPtr ctp;
1668 	Int2 i;
1669 
1670 	ctp = cdtrmcache [9];
1671 	if (ctp != NULL) {
1672 		if (ctp->term != NULL) {
1673 			MemFree (ctp->term);
1674 		}
1675 		MemFree (ctp);
1676 		cdtrmcache [9] = NULL;
1677 	}
1678 	for (i = 9; i > 0; i--) {
1679 		cdtrmcache [i] = cdtrmcache [i - 1];
1680 	}
1681 	ctp = CdTrmDup (trmptr);
1682 	cdtrmcache [0] = ctp;
1683 	return trmptr;
1684 }
1685 
CdTrmFind(DocType type,DocField field,CharPtr term)1686 CdTermPtr  CdTrmFind (DocType type, DocField field, CharPtr term)
1687 
1688 {
1689     Int2 i;
1690 	Int2 termpage;
1691 	CdTermPtr ctp = NULL;
1692 	CdTermPtr trmptr;
1693 
1694     for (i = 0; i < 10; i++) {
1695       trmptr = cdtrmcache [i];
1696       if (trmptr != NULL && trmptr->type == type && trmptr->field == field &&
1697           StringICmp (trmptr->term, term) == 0) {
1698             return CdTrmDup (trmptr);
1699       }
1700     }
1701 	termpage = CdTrmLookup(type,field,term);
1702 	if (termpage < 0)
1703 		return NULL;
1704 
1705 				/** could it already be cached? ***/
1706 	if ((trmtype == type) && (trmfield == field) && (trmpages > 0))
1707 	{
1708 	 	if ((termpage <= (trmpage + trmpages - 1)) &&
1709 			((termpage + 3) >= trmpage))     /* overlapping range */
1710 		{
1711 			ctp = CdTrmLocate(term, termpage);
1712 			if (ctp != NULL)	/* found it */
1713 				return CdTrmCache (ctp);
1714 			if (termpage == trmpage)   /* not possible to find it */
1715 				return NULL;
1716 		}
1717 	}
1718 
1719 				/** Load term pages from disk ***/
1720 
1721 	termpage = CdTrmPages(type, field, termpage);
1722 	if (termpage == 0)
1723 		return NULL;
1724 
1725 	ctp = CdTrmLocate(term, termpage);
1726 	return CdTrmCache(ctp);
1727 }
1728 
1729 /*****************************************************************************
1730 *
1731 *   CdTrmLocate(term, page)
1732 *   	locates a term in a term list already in cache
1733 *
1734 *****************************************************************************/
CdTrmLocate(CharPtr term,Int2 page)1735 static CdTermPtr NEAR CdTrmLocate (CharPtr term, Int2 page)
1736 
1737 {
1738 	Int2 size, ctr, cmpval;
1739 	CharPtr ret;
1740 
1741     size = trmpages * BLKSIZE;     /* bytes in term cache */
1742     ctr = 0;
1743     ret = trmbuf;
1744 	size--;    /* have to have at least one space for test below */
1745     while (ctr < size)
1746     {
1747 	    while (*ret != '\n' && *ret != '\r')
1748 		{
1749     	  ret++;
1750 	      ctr++;
1751 		  if (ctr >= size)
1752 			return NULL;
1753     	}
1754 	    ret++;
1755     	ctr++;
1756 
1757 		cmpval = MeshStringICmp(ret, term);
1758 		if (! cmpval)     /* found it */
1759 			return CdTermRead(trmtype, trmfield, ret, trmbuf, page);
1760 		else if (cmpval > 0)   /* gone past */
1761 			return NULL;
1762     }
1763 	return NULL;
1764 }
1765 
1766 /*****************************************************************************
1767 *
1768 *   CdTermRead(type, field, ptr, bufr, page)
1769 *   	creates and returns a CdTermPtr from a CdTermPage
1770 *   	ptr should point at the start of a record (the term)
1771 *
1772 *****************************************************************************/
CdTermRead(Int2 type,Int2 field,CharPtr ptr,CharPtr bufr,Int2 page)1773 static CdTermPtr  CdTermRead (Int2 type, Int2 field, CharPtr ptr, CharPtr bufr, Int2 page)
1774 
1775 {
1776 	CdTermPtr trmptr;
1777 	CharPtr tmp, tmp2;
1778 	Char localbuf[10];
1779 	Int4 vals[3];
1780 	Int2 i;
1781 
1782 	if (ptr == NULL)
1783 		return NULL;
1784 	if (*ptr == '\0')
1785 		return NULL;
1786 	trmptr = (CdTermPtr) MemNew(sizeof(CdTerm));
1787 	trmptr->type = type;
1788 	trmptr->field = field;
1789 	tmp = ptr;
1790 	tmp2 = tmp;
1791 	while (*tmp2 != '\t')
1792 		tmp2++;
1793 	*tmp2 = '\0';
1794 	trmptr->term = StringSave(tmp);
1795 	*tmp2 = '\t';
1796 	tmp2++;
1797 	for (i = 0; i < 3; i++)
1798 	{
1799 		tmp = &localbuf[0];
1800 		while (*tmp2 >= ' ')
1801 		{
1802 			*tmp = *tmp2;
1803 			tmp++; tmp2++;
1804 		}
1805 		*tmp = '\0';
1806 		vals[i] = atol(localbuf);
1807 		tmp2++;
1808 	}
1809 	trmptr->special_count = vals[0];
1810 	trmptr->total_count = vals[1];
1811 	trmptr->offset = vals[2];
1812 	trmptr->page = page + (Int2) (((size_t) (ptr - bufr - 1)) / (size_t) BLKSIZE);
1813 	return trmptr;
1814 }
1815 
1816 /*****************************************************************************
1817 *
1818 *   CdTermScan(type, field, page, numpage, proc)
1819 *   	returns terms found to proc until
1820 *   	1) no more pages
1821 *   	2) numpage pages have been read
1822 *   	3) proc returns FALSE
1823 *   returns number of complete pages read
1824 *   if numpage=0, scans until EOF or proc returns FALSE
1825 *
1826 *****************************************************************************/
CdTermScan(DocType type,DocField field,Int2 page,Int2 numpage,CdTermProc proc)1827 Int2  CdTermScan (DocType type, DocField field, Int2 page, Int2 numpage, CdTermProc proc)
1828 
1829 {
1830 	Boolean    goOn;
1831 	CharPtr    ptr;
1832 	Int2       pages, size, pagectr, startpage;
1833 	CdTermPtr  trmptr;
1834 
1835 	startpage = page;
1836 	pagectr = 0;
1837 	if (proc == NULL)
1838 		return pagectr;
1839 
1840 	goOn = TRUE;
1841     while (goOn)
1842 	{
1843 		startpage = page;
1844 		pages = CdTrmPages (type, field, page);
1845 		if (pages == 0)
1846 			return pagectr;
1847 		ptr = trmbuf;
1848 		size = pages * BLKSIZE;     /* bytes available */
1849 		pages = BLKSIZE;      /* bytes per page */
1850 		while ((size > 0) && (goOn))
1851 		{
1852 			while (*ptr != '\n' && *ptr != '\r' && *ptr != '\0')
1853 			{
1854 				size--;
1855 				pages--;
1856 				ptr++;
1857 			}
1858 			if (*ptr == '\0')
1859 				return (Int2) (pagectr + 1);   /* last page */
1860 			size--;
1861 			pages--;
1862 			ptr++;
1863 			if (size > 0)
1864 			{
1865 				trmptr = CdTermRead(type, field, ptr, trmbuf, startpage);
1866 				if (trmptr != NULL) {
1867 					goOn = proc (trmptr);
1868 				}
1869 			}
1870 			while (*ptr != '\n' && *ptr != '\r' && *ptr != '\0')
1871 			{
1872 				size--;
1873 				pages--;
1874 				ptr++;
1875 			}
1876 			if (pages < 0)   /* crossed a page boundary */
1877 			{
1878 				pages = BLKSIZE + pages;
1879 				numpage--;
1880 				pagectr++;
1881 				page++;
1882 				if (! numpage)
1883 					goOn = FALSE;
1884 			}
1885 		}
1886 	}
1887 	return pagectr;
1888 }
1889 
1890 /*****************************************************************************
1891 *
1892 *   CdLinkUidGet(type, link_to_type, numuid, uids, max)
1893 *   	returns count of input uids processed
1894 *       returns -1 on error
1895 *       if neighbors (type == link_to_type)
1896 *   		sums weights for same uids
1897 *   	if (more than max uids, frees uids and weights, but leaves num set)
1898 *
1899 *****************************************************************************/
CdLinkUidGet(LinkSetPtr PNTR result,DocType type,DocType link_to_type,Int2 numuid,Int4Ptr uids,Boolean mark_missing,Int4 maxlink)1900 Int2  CdLinkUidGet (LinkSetPtr PNTR result, DocType type, DocType link_to_type, Int2 numuid, Int4Ptr uids, Boolean mark_missing, Int4 maxlink)
1901 
1902 {
1903 	UidIdxPtr query;
1904     UidIdx local;
1905 	DocType querytype;
1906 	LinkSetPtr lsp = NULL;
1907 	Int2 counts[NTYPE];
1908 	FILE * fp;
1909 	Int4 offset;
1910 	CharPtr path;
1911 	Uint1Ptr ptr1;
1912 	Int2 numfound = 0;
1913 	Int4 j, l, r, k;
1914 	Boolean first = TRUE;
1915 	Boolean sorted;
1916 	Int4 cursize = 0, finalsize, finalcount = 0, count, i;
1917 	Int4Ptr newuids = NULL,
1918 			newwts = NULL,
1919 			finaluids = NULL,
1920 			finalwts = NULL,
1921 			tmp;
1922 
1923 	*result = NULL;
1924 
1925 	if (! ValidateType(link_to_type))
1926 		return -1;
1927 
1928 	for (i = 0, query = NULL; i < numuid && query == NULL; i++)
1929 	{
1930 		query = UidIdxGet(type, uids[i], &local);
1931 		if ((mark_missing) && (query == NULL))
1932 			uids[i] *= -1;
1933 		if (query != NULL)
1934 			j = i;
1935 	}
1936 	if ((i == numuid) && (query == NULL)) { /* none found */
1937 		lsp = (LinkSetPtr) MemNew(sizeof(LinkSet));
1938 		lsp->uids = NULL;
1939 		lsp->weights = NULL;
1940 		*result = lsp;
1941 		return 0;
1942 	}
1943 
1944 	querytype = query->type;   /* record, to allow for TYP_SEQ */
1945 	if (link_to_type == TYP_SEQ)
1946 	{
1947 		if (type != TYP_SEQ)
1948 			return -1;           /* can't do it */
1949 		else
1950 			link_to_type = querytype;   /* neighbors */
1951 	}
1952 
1953 	path = MakePath (DIR_LNK, query->type, SUF_REC, EXT_LNK);
1954     if ((fp = FileOpen(path, "rb")) == NULL)
1955 	{
1956 		ErrPostEx(SEV_ERROR, ERR_CD_FILEOPEN, 0, sCdError[ERR_CD_FILEOPEN], path);
1957         return -1;
1958 	}
1959 
1960 	if (numuid > 1)
1961 	{
1962 		if (numuid > 320) {
1963 			finalsize = 16000;
1964 		} else {
1965 			finalsize = MIN((numuid * 50), 16000);     /* make a guess */
1966 		}
1967 		finaluids = (Int4Ptr) MemNew((size_t)(sizeof(Int4) * finalsize));  /* make a guess */
1968 		if (link_to_type == querytype)
1969 			finalwts = (Int4Ptr) MemNew((size_t)(sizeof(Int4) * finalsize));
1970 	}
1971 
1972 	for (; j < numuid; j++)
1973 	{
1974 
1975 		if (! first)
1976 		{
1977 		    if ((query = UidIdxGet(type, uids[j], &local)) == NULL)
1978 			{
1979 				if (mark_missing)
1980 					uids[j] *= -1;
1981 				continue; /* must examine remaining UIDs */
1982 			}
1983 		}
1984 		else
1985 			first = FALSE;
1986 
1987 		numfound++;         /* count how many uids we process */
1988 
1989 	                            /* read the link counts for all types */
1990 
1991 	    fseek (fp, query->link_offset, SEEK_SET);
1992 		FileRead((CharPtr)&counts[0], sizeof(Int2), NTYPE, fp);
1993 		for (i = 0; i < NTYPE; i++)
1994 			counts[i] = SwapInt2(counts[i]);
1995 
1996 		offset = 0;
1997 		for (i = 0; i < link_to_type; i++)
1998 		{
1999 			offset += counts[i] * sizeof(DocUid);
2000 			if (i == query->type) {  /* has weights */
2001 				offset += counts[i] * sizeof(Uint1);
2002 			}
2003 		}
2004 		if (offset)						   /* skip preceeding link types */
2005 			fseek(fp, offset, SEEK_CUR);
2006 
2007 		count = (Int4)counts[link_to_type];
2008 
2009 		if (count > cursize)
2010 		{
2011 			MemFree(newuids);
2012 			newuids = (Int4Ptr) MemNew((size_t)(sizeof(Int4) * (count + 1)));
2013 			if (querytype == link_to_type)
2014 			{
2015 				MemFree(newwts);
2016 				newwts = (Int4Ptr) MemNew((size_t)(sizeof(Int4) * (count + 1)));
2017 			}
2018 			cursize = count;
2019 		}
2020 		FileRead((CharPtr)newuids, sizeof(DocUid), (size_t)count, fp);
2021 		for (i = 0; i < count; i++)
2022 			newuids[i] = SwapInt4(newuids[i]);
2023 		if (link_to_type == querytype)    /* get the weights */
2024 		{
2025 			ptr1 = (Uint1Ptr) newwts;
2026 			FileRead((CharPtr)ptr1, sizeof(Uint1), (size_t)count, fp);
2027 			for (i = count - 1; i >= 0; i--) {
2028 				newwts[i] = (Int4) (ptr1[i]);
2029 			}
2030 		}
2031 		if (numuid > 1)           /* merging lists */
2032 		{
2033 			if ((finalcount + count) > finalsize)
2034 			{
2035 				finalsize += count;
2036 				if (finalsize > 16000)
2037 				{
2038 					MemFree(newuids);
2039 					MemFree(newwts);
2040 					MemFree(finaluids);
2041 					MemFree(finalwts);
2042 					ErrPostEx(SEV_WARNING, ERR_CD_MEMORY, 0, sCdError[ERR_CD_MEMORY]);
2043 					return -1;
2044 				}
2045 				tmp = finaluids;
2046 				finaluids = (Int4Ptr) MemNew((size_t)(sizeof(Int4) * finalsize));
2047 				MemCopy(finaluids, tmp, (size_t)(finalcount * sizeof(Int4)));
2048 				MemFree(tmp);
2049 				if (querytype == link_to_type)
2050 				{
2051 				    tmp = finalwts;
2052 					finalwts = (Int4Ptr) MemNew((size_t)(sizeof(Int4) * finalsize));
2053 					MemCopy(finalwts, tmp, (size_t)(finalcount * sizeof(Int4)));
2054 					MemFree(tmp);
2055 				}
2056 			}
2057 			for (i = 0; i < count; i++)
2058 			{
2059 				l = 0;               /* binary search */
2060 				r = (finalcount - 1);
2061 				k = 0;
2062 				while ((l <= r) && (finaluids[k] != newuids[i]))
2063 				{
2064 					k = (l + r)/ 2;
2065 					if (newuids[i] < finaluids[k])
2066 						r = k - 1;
2067 					else
2068 						l = k + 1;
2069 				}
2070 				if (finaluids[k] == newuids[i])   /* merge */
2071 				{
2072 					if (querytype == link_to_type)
2073 						finalwts[k] += newwts[i];
2074 				}
2075 				else
2076 				{
2077 					if (finalcount)
2078 					{
2079 						if (finaluids[k] < newuids[i])
2080 							k++;
2081 						l = (finalcount - k);
2082 						r = l;
2083 						tmp = &finaluids[finalcount];
2084 						while (r)
2085 						{
2086 							*tmp = *(tmp-1);
2087 							tmp--; r--;
2088 						}
2089 						if (querytype == link_to_type)
2090 						{
2091 							r = l;
2092 							tmp = &finalwts[finalcount];
2093 							while (r)
2094 							{
2095 								*tmp = *(tmp-1);
2096 								tmp--; r--;
2097 							}
2098 						}
2099 					}
2100 					finaluids[k] = newuids[i];
2101 					if (querytype == link_to_type)
2102 						finalwts[k] = newwts[i];
2103 					finalcount++;
2104 				}
2105 			}
2106 		}
2107 	}
2108 
2109 	FileClose(fp);
2110 
2111 	lsp = (LinkSetPtr) MemNew(sizeof(LinkSet));
2112 	if (maxlink <= 0)
2113 		maxlink = 16000;    /* default */
2114 
2115  	if (numuid == 1)
2116 	{
2117 		lsp->num = count;
2118 		if (lsp->num <= maxlink)
2119 		{
2120 			lsp->uids = newuids;
2121 			lsp->weights = newwts;
2122 		}
2123 		else
2124 		{
2125 			MemFree(newuids);
2126 			MemFree(newwts);
2127 		}
2128 	}
2129 	else
2130 	{
2131 		MemFree(newuids);
2132 		MemFree(newwts);
2133 		lsp->num = finalcount;
2134 		if (lsp->num <= maxlink)
2135 		{
2136 			lsp->uids = (Int4Ptr) MemNew((size_t)(sizeof(Int4) * (finalcount + 1)));
2137 			MemCopy(lsp->uids, finaluids, (size_t)(finalcount * sizeof(Int4)));
2138 			MemFree(finaluids);
2139 			if (querytype == link_to_type)
2140 			{
2141 				lsp->weights = (Int4Ptr) MemNew((size_t)(sizeof(Int4) * (finalcount + 1)));
2142 				MemCopy(lsp->weights, finalwts, (size_t)(finalcount * sizeof(Int4)));
2143 				MemFree(finalwts);
2144 				linksort(lsp->uids, lsp->weights, lsp->num);
2145 			}
2146 		}
2147 		else
2148 		{
2149 			MemFree(finaluids);
2150 			MemFree(finalwts);
2151 		}
2152 	}
2153 
2154 	if (lsp->num > 1 && querytype != link_to_type && link_to_type == TYP_ML)
2155 	{
2156 		/* try to sort MEDLINE uids in descending order */
2157 
2158 		for (sorted = TRUE, k = 1; k < lsp->num; k++)
2159 		{
2160 			if (lsp->uids[k-1] < lsp->uids[k])
2161 			{
2162 				sorted = FALSE;
2163 				break;
2164 			}
2165 		}
2166 
2167 		if (! sorted)
2168 		{   /* assume that the existing order is reversed */
2169 		    for (k = (lsp->num / 2) - 1; k >= 0; k--)
2170 		    {
2171 		    	j = lsp->uids[k];
2172 		    	lsp->uids[k] = lsp->uids[lsp->num - 1 - k];
2173 		    	lsp->uids[lsp->num - 1 - k] = j;
2174 		    }
2175 
2176 			/* now check that it's sorted */
2177 	    	for (sorted = TRUE, k = 1; k < lsp->num; k++)
2178 	    	{
2179 	    		if (lsp->uids[k-1] < lsp->uids[k])
2180 	    		{
2181 	    			sorted = FALSE;
2182 	    			break;
2183 	    		}
2184 	    	}
2185 
2186 			if (! sorted)
2187 			{ /* as a last resort, sort them using quicksort */
2188 				/* dummy array */
2189 				finaluids = (Int4Ptr) MemDup(lsp->uids, (size_t) (sizeof(Int4) * lsp->num));
2190 				linksort(finaluids, lsp->uids, lsp->num);
2191 				MemFree(finaluids);
2192 			}
2193 		}
2194 	}
2195 
2196 	*result = lsp;
2197 	return numfound;
2198 }
2199 
2200 /*****************************************************************************
2201 *
2202 *   linksort(uids, wts, n)
2203 *   	quicksort into descending wts order
2204 *
2205 *****************************************************************************/
linksort(Int4Ptr uids,Int4Ptr wts,Int4 n)2206 static void NEAR linksort (Int4Ptr uids, Int4Ptr wts, Int4 n)
2207 
2208 {
2209    Int4 tp, tp2;
2210    Int4 l, r, i, j, m, scnt;
2211    Int4 pstack[100];
2212    Int4Ptr p;
2213 
2214    if (n < 2)
2215        return;
2216 
2217    scnt = 2;
2218    l = 0; r = n - 1; p = pstack + 2;
2219 
2220    do
2221    {
2222        if ((r - l) > 15)
2223        {
2224            i = l; j = r;
2225                                      /* median of three */
2226 
2227            m = ((j - i) / 2) + i;    /* get middle element */
2228            /* partitioning operation */
2229            do
2230            {
2231                while((j > i) && (wts[j] <= wts[i]))
2232                    j--;
2233                if(j != i)
2234                {
2235                    tp = wts[j]; wts[j] = wts[i]; wts[i] = tp;
2236                    tp = uids[j]; uids[j] = uids[i]; uids[i] = tp;
2237                    while((i < j) && (wts[i] >= wts[j]))
2238                        i++;
2239                    if(i != j)
2240                       {tp = wts[j]; wts[j] = wts[i]; wts[i] = tp;
2241                        tp = uids[j]; uids[j] = uids[i]; uids[i] = tp;}
2242                }
2243            }while(i != j); /* end do */
2244 
2245            /* recursion elimination */
2246            if(i)
2247            {
2248                if((i - l) > (r - i))  /* put long segment on "stack" */
2249                    {*p = l; p++; *p = i - 1; p++; l = i + 1;}
2250                else
2251                    {*p = i + 1; p++; *p = r; p++; r = i - 1;}
2252                scnt += 2;
2253                if (scnt >= 100)
2254 				{
2255 					ErrPostEx(SEV_ERROR, ERR_CD_MEMORY, 0, "linksort > 100");
2256 					return;
2257 				}
2258            }
2259            else
2260            {
2261                l = i + 1;
2262            }
2263        }
2264                      /* if done with this segment, "pop" next */
2265        else
2266        {
2267             p--; r = *p; p--; l = *p; scnt -= 2;
2268        }
2269    }
2270    while (p > pstack);      /* end do */
2271 
2272 
2273    /* do the final insertion sort */
2274 
2275    for(i = 1; i < n; i++)
2276    {
2277        tp = wts[i]; tp2 = uids[i]; j = i; m = j - 1;
2278        while ((j > 0) && (wts[m] < tp))
2279            {wts[j] = wts[m]; uids[j] = uids[m]; j--; m--;}
2280        wts[j] = tp;
2281 	   uids[j] = tp2;
2282    }
2283    return;
2284 }
2285 
2286 /*****************************************************************************
2287 *
2288 *   DecompReadFunc:
2289 *   	substituted read function for compressed data sources (for Sequence
2290 *   	and Medline data).
2291 *
2292 *****************************************************************************/
DecompReadFunc(Pointer p,CharPtr buff,Uint2 count)2293 static Int2 LIBCALLBACK DecompReadFunc (Pointer p, CharPtr buff, Uint2 count)
2294 {
2295 	DecompInfoPtr dcp = (DecompInfoPtr) p;
2296 	Uint1 loc_buff[3];
2297 	int bytes_to_request;
2298 	int bytes_read;
2299 
2300 	if (dcp->compr == COMPR_DONT_KNOW)
2301 	{
2302 		int c;
2303 
2304 		/* read the "decompression protocol identifier" */
2305 		if ((c = fgetc(dcp->fp)) == EOF)
2306 			return 0;
2307 		dcp->compr = (Uint1) c;
2308 
2309 		if (dcp->compr == COMPR_NONE)
2310 		{
2311 			/* for no decompression, we still have 4 bytes of overhead;     */
2312 			/* 1 byte for the protocol identifier, and 3 bytes for a length */
2313 			/* field of what follows                                        */
2314 			if (FileRead((CharPtr) loc_buff,1,3,dcp->fp) != 3)
2315 			{
2316 				ErrPostEx(SEV_ERROR, ERR_CD_BADDECOMP, 0,
2317 				    	"No length field detected for uncompressed data");
2318 				return 0;
2319 			}
2320 
2321 			/* interpret the 3-byte length in a machine-independant order;  */
2322 			/* BIG ENDIAN (first byte is most significant)                  */
2323 			dcp->bytes_left = (((int) loc_buff[0]) * 256 + loc_buff[1]) * 256 +
2324 					          loc_buff[2];
2325 		}
2326 	}
2327 
2328 	switch (dcp->compr)
2329 	{
2330 		case COMPR_NONE :
2331 			/* based on knowledge of how many bytes are in this compressed  */
2332 			/* ASN.1 object, return only as many bytes as the caller really */
2333 			/* needs                                                        */
2334 			bytes_to_request = (int) MIN((Uint4) count, dcp->bytes_left);
2335 			bytes_read = FileRead(buff,1,bytes_to_request,dcp->fp);
2336 			dcp->bytes_left -= bytes_read;
2337 			if (dcp->bytes_left <= 0)
2338 			{
2339 				/* reset for stream read of next entry */
2340 				dcp->compr = COMPR_DONT_KNOW;
2341 			}
2342 			return bytes_read;
2343 
2344 		case COMPR_HUFFMAN :
2345 			return HuffmanRead(dcp,buff,count);
2346 
2347 		/* others ?? */
2348 
2349 		default:
2350 			ErrPostEx(SEV_ERROR, ERR_CD_BADDECOMP, 0,
2351 				    "Invalid decompression code detected <%d>", dcp->compr);
2352 			return 0;
2353 	}
2354 }
2355 
2356 /*****************************************************************************
2357 *
2358 *   HuffmanRead:
2359 *   	read Huffman compressed data
2360 *
2361 *****************************************************************************/
HuffmanRead(DecompInfoPtr dcp,CharPtr buff,Uint2 count)2362 static Int2 HuffmanRead (DecompInfoPtr dcp, CharPtr buff, Uint2 count)
2363 {
2364 	register unsigned int mask = dcp->mask;
2365 	register unsigned int byte = dcp->byte;
2366 	CharPtr p = buff;
2367 	int i, cnt = 0;
2368 	int c;
2369 	int k;
2370 	FILE *fd1 = dcp->fp;
2371 
2372 
2373 	while (cnt < (int) count)
2374 	{
2375 		for (i=0; i>=0; )
2376 		{
2377 			if (mask == 0)
2378 			{
2379 				if ((c = fgetc(fd1)) == EOF)
2380 				{
2381 					/* should never reach this point */
2382 					i = HUFFMAN_SENTINEL - 257;
2383 					break;
2384 				}
2385 				else
2386 				{
2387 					byte = (unsigned int) c;
2388 					mask = 0x80;
2389 				}
2390 			}
2391 
2392 			if (byte & mask)
2393 				i = vi->huff_left[i];
2394 			else
2395 				i = vi->huff_right[i];
2396 
2397 			mask >>= 1;
2398 		}
2399 
2400 		if ((k = i + 257) == HUFFMAN_SENTINEL)
2401 		{
2402 			mask = 0; /* to skip remaining bits in current byte */
2403 			dcp->compr = COMPR_DONT_KNOW; /* reset for next record */
2404 			break;
2405 		}
2406 
2407 		*p++ = (char) k;
2408 		cnt++;
2409 	}
2410 
2411 	dcp->mask = mask;
2412 	dcp->byte = byte;
2413 	return cnt;
2414 }
2415 
2416 /*****************************************************************************
2417 *
2418 *   DecompInit:
2419 *   	Create a data structure to be used in decompression; the data structures
2420 *   	are stored in a linked list. While no mutual exclusion is provided on
2421 *   	list access, each decompression is independent ... therefore, many
2422 *   	compressed ASN.1 data streams may be open and used simultaneously
2423 *
2424 *****************************************************************************/
DecompInit(FILE * fp)2425 static DecompInfoPtr NEAR DecompInit (FILE *fp)
2426 {
2427 	DecompInfoPtr dcp;
2428 
2429 	dcp = (DecompInfoPtr) MemNew(sizeof(DecompInfo));
2430 
2431 	if (dcp == NULL)
2432 		return NULL;
2433 
2434 	dcp->fp = fp;
2435 	dcp->compr = COMPR_DONT_KNOW;
2436 	dcp->mask = 0;
2437 	dcp->bytes_left = 0;
2438 
2439 	/* insert it */
2440 	dcp->next = DecompInfoList;
2441 	DecompInfoList = dcp;
2442 
2443 	return dcp;
2444 }
2445 
2446 /*****************************************************************************
2447 *
2448 *   DecompFini:
2449 *   	Find and destroy the specified decompression data structure. The
2450 *   	data structures, in addition to having an address known to its user,
2451 *   	also contains a copy of the AsnIoPtr for that data stream. This
2452 *   	enables the Fini() operation to be performed using either the address
2453 *   	of this structure as a key, or the address of the AsnIoPtr as a key.
2454 *
2455 *****************************************************************************/
DecompFini(AsnIoPtr aip,DecompInfoPtr dcp)2456 static Boolean NEAR DecompFini (AsnIoPtr aip, DecompInfoPtr dcp)
2457 {
2458 	DecompInfoPtr dtrail;
2459 	DecompInfoPtr temp;
2460 
2461 	if (DecompInfoList == NULL)
2462 		return FALSE; /* not found */
2463 
2464 	/* check for first element in list */
2465 	if ((DecompInfoList == dcp && dcp != NULL) ||
2466 	    (DecompInfoList->aip == aip && aip != NULL))
2467 	{ /* unlink and delete */
2468 		temp = DecompInfoList->next;
2469 		DecompInfoFree(DecompInfoList);
2470 		DecompInfoList = temp;
2471 		return TRUE;
2472 	}
2473 
2474 	if (DecompInfoList->next == NULL)
2475 	{ /* single-element list, and it's not the first element in list */
2476 		return FALSE;
2477 	}
2478 
2479 	for (dtrail = DecompInfoList; dtrail->next != NULL;
2480 	     dtrail = dtrail->next)
2481 	{ /* search remainder of list */
2482 		if ((dtrail->next == dcp && dcp != NULL) ||
2483 		    (dtrail->next->aip == aip && aip != NULL))
2484 		{
2485 			temp = dtrail->next->next;
2486 			DecompInfoFree(dtrail->next);
2487 			dtrail->next = temp;
2488 			return TRUE;
2489 		}
2490 	}
2491 
2492 	return FALSE;
2493 }
2494 
2495 
2496 /*****************************************************************************
2497 *
2498 *   DecompInfoFree:
2499 *   	Free a decompression data structure
2500 *****************************************************************************/
DecompInfoFree(DecompInfoPtr dcp)2501 static void NEAR DecompInfoFree(DecompInfoPtr dcp)
2502 {
2503 	MemFree(dcp);
2504 }
2505 
2506 
2507 /*****************************************************************************
2508 *
2509 *   IsOKMagic:
2510 *   	Validate the magic number for a file
2511 *****************************************************************************/
IsOKMagic(Uint4 magic,CharPtr volume_label)2512 static Boolean NEAR IsOKMagic(Uint4 magic, CharPtr volume_label)
2513 {
2514 	/* check for a match with the "base" magic number; supported for        */
2515 	/* backwards compatability                                              */
2516 	if (magic == CD_MAGIC_BASE)
2517 		return TRUE;
2518 
2519 	/* now check if the magic number equals the "base" plus the checksum of */
2520     /* the volume-label (so as to be able to distinguish between index      */
2521 	/* files associated with different CDs)                                 */
2522 	while (*volume_label)
2523 	{
2524 		magic -= (int) (*volume_label++);
2525 	}
2526 	return (magic == CD_MAGIC_BASE);
2527 }
2528 
2529 
2530 #endif
2531 
2532