1 /*   cdentrez.c
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * RCS $Id: cdentrez.c,v 6.2 1999/03/11 23:20:07 kans Exp $
27  *
28  * Author:  Ostell, Kans
29  *
30  * Version Creation Date:   10/15/91
31  *
32  * File Description:
33  *   	entrez index access library for Entrez CDROM
34  *
35  * Modifications:
36  * --------------------------------------------------------------------------
37  * Date     Name        Description of modification
38  * -------  ----------  -----------------------------------------------------
39  * 07-07-94 Schuler     Added CdEntrezGetInfo function
40  * 07-12-94 Schuler     Added #ifdef _NEW_CdEntrez_/_OLD_CdEntrez
41  * 07-13-94 Schuler     Moved CdTermFree here from cdromlib.c
42  * 09-22-94 Schuler     CdEntrezFini: set _nouveau to FALSE
43  * 11-22-94 Schuler     Cleaned up some integer size problems
44  *
45  * 05-19-95 Schuler     Added rcs Log directive for automatic insertion of
46  *                      modification comments.
47  *
48  * Revision $Log: cdentrez.c,v $
49  * Revision Revision 6.2  1999/03/11 23:20:07  kans
50  * Revision sprintf cast
51  * Revision
52  * Revision Revision 6.1  1998/08/24 18:42:15  kans
53  * Revision fixed -v -fd warnings
54  * Revision
55  * Revision Revision 6.0  1997/08/25 18:12:52  madden
56  * Revision Revision changed to 6.0
57  * Revision
58  * Revision Revision 5.5  1997/06/26 21:55:31  vakatov
59  * Revision [PC] DLL'd "ncbicdr.lib", "ncbiacc.lib", "ncbinacc.lib" and "ncbicacc.lib"
60  * Revision
61  * Revision Revision 5.4  1997/03/07 17:16:10  epstein
62  * Revision always choose the highest GI in EntrezFindSeqId
63  * Revision
64  * Revision 5.3  1997/01/14  21:26:07  epstein
65  * plug memory leak when performing Entrez set-difference operations
66  *
67  * Revision 5.2  1996/11/22  18:02:19  epstein
68  * change algorithm for looking up PDB accessions
69  *
70  * Revision 5.1  1996/08/14  19:56:41  epstein
71  * add APIs for fetching pieces of biostruc annots (mostly written by Chris Hogue)
72  *
73  * Revision 5.0  1996/05/28  13:55:34  ostell
74  * Set to revision 5.0
75  *
76  * Revision 4.18  1996/05/14  21:01:12  epstein
77  * use SQID index to and docsum to convert back-and-forth between GIs and SeqIds, per Jim Ostell
78  *
79  * Revision 4.17  1996/04/01  21:02:31  epstein
80  * remove dead CdEntrezBiostrucAnnotSetGet() code
81  *
82  * Revision 4.16  1996/04/01  20:59:38  epstein
83  * Schuler/Epstein changes for cleaner CdEntrezBiostrucAnnotSetGet retrieval
84  *
85  * Revision 4.15  1996/03/29  18:52:12  epstein
86  * add support for structure alignments (includes kludge for now)
87  *
88  * Revision 4.14  1995/10/23  21:39:56  epstein
89  * another tweak for PC 16-bit addressing
90  *
91  * Revision 4.13  1995/10/23  14:17:52  epstein
92  * fix 16/32-bit portability problems
93  *
94  * Revision 4.12  1995/10/03  14:12:40  epstein
95  * repair term-checking logic to avoid erroneous caching
96  *
97  * Revision 4.11  1995/10/02  15:25:56  epstein
98  * correct range-checking logic due to semantics of StringXCmp()
99  *
100  * Revision 4.10  1995/10/02  12:51:23  epstein
101  * fix endpoints for range scanning
102  *
103  * Revision 4.9  1995/10/02  12:49:44  epstein
104  * add memory-based performance enhancements
105  *
106  * Revision 4.8  1995/10/02  02:35:33  epstein
107  * add range-checking
108  *
109  * Revision 4.7  1995/09/19  13:27:51  epstein
110  * add truncation limit
111  *
112  * Revision 4.6  1995/08/30  20:54:53  epstein
113  * search TYP_CH database if retcode is -1
114  *
115  * Revision 4.5  1995/08/28  23:20:47  kans
116  * includes new mmdbapi headers
117  *
118  * Revision 4.4  1995/08/28  17:44:01  epstein
119  * add code so that when retcode is -1, we perform less validation on the retrieve Seq-entry
120  *
121  * Revision 4.3  1995/08/24  20:44:10  epstein
122  * add more stuff for genomes
123  *
124  * Revision 4.2  1995/08/18  17:41:17  epstein
125  * fix (?) parsing of PDB accession per Brandon's observation
126  *
127  * Revision 4.1  1995/08/11  20:26:18  epstein
128  * add max-models support for biostrucs
129  *
130  * Revision 4.0  1995/07/26  13:50:32  ostell
131  * force revision to 4.0
132  *
133  * Revision 2.62  1995/07/19  22:07:00  kans
134  * added (probably superfluous) casts to some sprintf calls
135  *
136  * Revision 2.61  1995/06/29  15:57:03  epstein
137  * added Complexity argument when fetching structures
138  *
139  * Revision 2.60  95/06/27  11:54:35  kans
140  * replaced _OLD_CDEntrez_ with _OLD_CdEntrez_
141  *
142  * Revision 2.59  1995/06/23  16:02:43  kans
143  * support for accmmdbs.c stub to resolve symbols without MMDB link
144  *
145  * Revision 2.58  1995/06/23  13:22:25  kans
146  * Biostruc_CD_supported symbol needed for local MMDB access
147  *
148  * Revision 2.57  1995/05/16  14:36:20  schuler
149  * Automatic comment insertion enabled
150  *
151  *
152  * ==========================================================================
153  */
154 
155 #define REVISION_STR  "$Revision: 6.2 $"
156 
157 static char * _this_module = "CdEntrez";
158 #define THIS_MODULE _this_module
159 static char * _this_file = __FILE__;
160 #define THIS_FILE _this_file
161 
162 #include <accentr.h>
163 #include <cdentrez.h>
164 #include <sequtil.h>
165 #include <objall.h>
166 
167 typedef struct posting {
168   ByteStorePtr  uids;
169   DocUidPtr     buffer;
170   Int4          bufsize;
171   Int4          index;
172   Int4          count;
173 } Posting, PNTR PostingPtr;
174 
175 #define SCAN_MAX 200
176 
177 /* the 1023 below is not a typo; it's needed to avoid overflowing 16-bit
178    addressing on PCs */
179 #define DEF_CDENTREZ_MEMUSAGE (64 * 1023L)
180 #define MAX_CDENTREZ_UID_LIST_SIZE (cdMemUsage)
181 #define CDENTREZ_TERM_MAX (cdMemUsage / 4)
182 #define MAX_CDENTREZ_BYTESTORE (cdMemUsage / 4)
183 #define MAX_CDENTREZ_SMALL_LIST (cdMemUsage / 2)
184 
185 typedef struct scanData {
186   Int4          specialCount;
187   Int4          totalCount;
188   Int4          offset;
189   ByteStorePtr  specialPtr;
190   ByteStorePtr  remainderPtr;
191 } ScanData, PNTR ScanPtr;
192 
193 static Int2          db;
194 static Int2          fld;
195 
196 static DocUidPtr     uidPtr;
197 
198 static Int2          searchTermLen;
199 static Int4          cdMemUsage = 32768;
200 
201 static ByteStorePtr  specialPost;
202 static ByteStorePtr  remainPost;
203 
204 static Char          selection [256];
205 static Char          wildcard [256];
206 static Char          topOfRange [256];
207 static Boolean       rangeScanning = FALSE;
208 
209 static ScanPtr       scanPtr;
210 static Int4          scanCount;
211 static Boolean       scanOk;
212 static CdTermProc    userScanProc;
213 
214 static CdTermPtr     eset;
215 
216 static ValNodePtr    cachedExpr = NULL;
217 static ByteStorePtr  cachedBsp = NULL;
218 
219 static void NEAR NextNode PROTO((void));
220 static ByteStorePtr NEAR Factor PROTO((void));
221 static ByteStorePtr NEAR Term PROTO((void));
222 static ByteStorePtr NEAR Diff PROTO((void));
223 static ByteStorePtr NEAR Expression PROTO((void));
224 static CdTermPtr NEAR FindTermNode PROTO((CharPtr term, DocType type, DocField field, CharPtr highRange));
225 static ValNodePtr CdTLExprFree PROTO((ValNodePtr elst));
226 
227 static PostingPtr NEAR NewPost PROTO((ByteStorePtr lst, Int4 defsize));
228 static PostingPtr NEAR FreePost PROTO((PostingPtr pst));
229 static Int4 NEAR PostLength PROTO((PostingPtr pst));
230 static void NEAR RewindPost PROTO((PostingPtr pst));
231 static DocUid NEAR ReadItem PROTO((PostingPtr pst));
232 static void NEAR WriteItem PROTO((PostingPtr pst, DocUid value));
233 static void NEAR FlushItems PROTO((PostingPtr pst));
234 static void NEAR SavePostingList PROTO((FILE *f, ByteStorePtr bsp));
235 
236 static Boolean NEAR CdEntrezMergeTerm PROTO((DocType type, DocField field, CharPtr term, Int4Ptr spcl, Int4Ptr totl, CdTermProc userProc));
237 static void NEAR SingleSpaces PROTO((CharPtr str));
238 static void NEAR TermTruncate PROTO((CharPtr str));
239 static void NEAR QuickSortSmall PROTO((DocUidPtr uids, Int4 l, Int4 r));
240 static Int4 NEAR CompressSmall PROTO((DocUidPtr uids, Int4 count));
241 static Int4 NEAR UniqueSmall PROTO((DocUidPtr uids, Int4 count));
242 static ByteStorePtr NEAR MergeSmallLists PROTO((ByteStorePtr bsp, ByteStorePtr small));
243 static Boolean NEAR MergeSeveralLists PROTO((Int4 i, Int4 count));
244 static Boolean NEAR MergeSeveralOrderedLists PROTO((Int4 i, Int4 count));
245 static Boolean NEAR MergeUnorderedLists PROTO((Int4 i, Int4 count));
246 static Boolean NEAR ProcessScanResults PROTO((void));
247 static Boolean  WildCardProc PROTO((CdTermPtr trmp));
248 static Boolean  ScanOnlyProc PROTO((CdTermPtr trmp));
249 static Boolean  ScanAndFreeProc PROTO((CdTermPtr trmp));
250 
251 /**** Moved from cdentrez.h ********************/
252 
253 static CdTermPtr NEAR CdEntrezCreateTerm PROTO((CharPtr term, DocType type, DocField field, ByteStorePtr special, ByteStorePtr remainder, CharPtr highRange));
254 static ByteStorePtr NEAR LoadPostingList PROTO((FILE *f, Int4 special, Int4 total));
255 static ByteStorePtr NEAR FreePostingList PROTO((ByteStorePtr lst));
256 static ByteStorePtr NEAR MergePostingLists PROTO((ByteStorePtr lst1, ByteStorePtr lst2));
257 static ByteStorePtr NEAR IntersectPostingLists PROTO((ByteStorePtr lst1, ByteStorePtr lst2));
258 static ByteStorePtr NEAR DifferencePostingLists PROTO((ByteStorePtr lst1, ByteStorePtr lst2));
259 
260 static ValNodePtr currNode;
261 static Uint1 currChoice;
262 
263 /************************* moved from old cdml.c ****************************/
264 static AsnTypePtr  MEDLINE_ENTRY = NULL;
265 static AsnTypePtr  MEDLINE_ENTRY_cit = NULL;
266 static AsnTypePtr  MEDLINE_ENTRY_abstract = NULL;
267 static AsnTypePtr  TITLE_E_trans = NULL;
268 static AsnTypePtr  AUTH_LIST_names_ml_E = NULL;
269 static AsnTypePtr  AUTH_LIST_names_str_E = NULL;
270 static AsnTypePtr  DATE_STD_year = NULL;
271 static AsnTypePtr  DATE_str = NULL;
272 static AsnTypePtr  TITLE_E_name = NULL;
273 static AsnTypePtr  MEDLINE_ENTRY_mesh = NULL;
274 static AsnTypePtr  MEDLINE_ENTRY_substance = NULL;
275 static AsnTypePtr  MEDLINE_ENTRY_xref = NULL;
276 static AsnTypePtr  MEDLINE_ENTRY_idnum = NULL;
277 static AsnTypePtr  MEDLINE_ENTRY_gene = NULL;
278 
279 static DocSumPtr NEAR MedSumAsnRead PROTO((AsnIoPtr aip, DocUid uid));
280 static void NEAR StripAuthor PROTO((CharPtr author));
281 static void NEAR FindAsnType PROTO((AsnTypePtr PNTR atp, AsnModulePtr amp, CharPtr str));
282 
283 static DocSumPtr NEAR CdEntMlSumGet PROTO((Int4 uid));
284 
285 /************************* moved from old cdseq.c ****************************/
286 static AsnTypePtr  SEQ_ENTRY = NULL;
287 static AsnTypePtr  SEQ_ENTRY_seq = NULL;
288 static AsnTypePtr  SEQ_ENTRY_set = NULL;
289 static AsnTypePtr  TEXTSEQ_ID_name = NULL;
290 static AsnTypePtr  TEXTSEQ_ID_accession = NULL;
291 static AsnTypePtr  SEQ_DESCR_E_title = NULL;
292 static AsnTypePtr  GIIMPORT_ID_id = NULL;
293 static AsnTypePtr  BIOSEQ_inst = NULL;
294 static AsnTypePtr  SEQ_INST_mol = NULL;
295 static AsnTypePtr  SEQ_INST_repr = NULL;
296 static AsnTypePtr  SEQ_ID_gibbsq = NULL;
297 static AsnTypePtr  SEQ_ID_gibbmt = NULL;
298 static AsnTypePtr  SEQ_ID_genbank = NULL;
299 static AsnTypePtr  SEQ_ID_gi = NULL;
300 static AsnTypePtr  SEQ_ID_embl = NULL;
301 static AsnTypePtr  SEQ_ID_ddbj = NULL;
302 static AsnTypePtr  SEQ_ID_pir = NULL;
303 static AsnTypePtr  SEQ_ID_swissprot = NULL;
304 static AsnTypePtr  PDB_BLOCK_compound_E = NULL;
305 static AsnTypePtr  PDB_SEQ_ID_MOL = NULL;
306 static AsnTypePtr  BIOSEQ_id = NULL;
307 static AsnTypePtr  BIOSEQ_id_E = NULL;
308 static AsnTypePtr  CIT_PAT_title = NULL;
309 
310 static DocSumPtr NEAR CdEntSeqSumGet PROTO((Int4 uid, DocType type));
311 
312 extern int _nouveau;
313 
314 /*****************************************************************************
315 *
316 *   CdEntrezInit ()
317 *       Creates linked list of CdTerm nodes, creates temporary file for
318 *       postings lists, saves file name in first node.  When creating new
319 *       nodes, posting file is appended to temporary file, node offset then
320 *       points to temporary file location of posting information.
321 *
322 *****************************************************************************/
323 
CdEntrezInit(Boolean no_warnings)324 NLM_EXTERN Boolean  CdEntrezInit (Boolean no_warnings)
325 
326 {
327   FILE      *fp;
328   Char      str [PATH_MAX];
329   Boolean inited = FALSE;
330   CharPtr   prop;
331 
332 #ifdef Biostruc_supported
333 	objmmdb1AsnLoad ();
334 	objmmdb2AsnLoad ();
335 	objmmdb3AsnLoad ();
336 #endif
337 #ifdef _NEW_CdEntrez_
338 	_nouveau = GetAppParamBoolean("ncbi","CdEntrez","NewStyle",TRUE);
339 	if (_nouveau)
340 	{
341 		if (cd3_CdInit())
342 			inited = TRUE;
343 		else
344 			ErrLogPrintf("cd3_CdInit() failed\n");
345 	}
346 #endif
347 	/* In the dual OLD/NEW case, go on to try CdInit if cd3_CdInit failed */
348 #ifdef _OLD_CdEntrez_
349 	if (!inited)
350 	{
351 		if (CdInit())
352 		{
353 			inited = TRUE;
354 			_nouveau = FALSE;
355 		}
356 	}
357 #endif
358 	if (!inited)
359 		return FALSE;
360 
361 	eset = MemNew (sizeof (CdTerm));
362 	if (eset == NULL)
363 		return FALSE;
364 	eset->type = 255;   /* set to not used */
365     TmpNam (str);
366     eset->term = StringSave (str);
367 #ifdef WIN_MAC
368     FileCreate (str, "????", "NCBI");
369 #endif
370     fp = FileOpen (str, "wb");
371 	if (fp == NULL) {
372 		ErrPostEx (SEV_ERROR, ERR_CD_FILEOPEN, 0, "Unable to open temporary file %s", str);
373 		return FALSE;
374 	}
375     FileClose (fp);
376   if ((prop = (CharPtr) GetAppProperty("CdEntrezMemUsage")) != NULL)
377   {
378       long tmplong;
379 
380       sscanf(prop, "%ld", &tmplong);
381 
382       cdMemUsage = tmplong;
383   } else {
384       cdMemUsage = DEF_CDENTREZ_MEMUSAGE;
385   }
386       cdMemUsage = MIN(cdMemUsage, MAXALLOC);
387 	return TRUE;
388 }
389 
390 /*****************************************************************************
391 *
392 *   CdEntrezFini ()
393 *       Frees linked list of CdTerm nodes and removes temporary posting file.
394 *
395 *****************************************************************************/
396 
CdEntrezFini(void)397 NLM_EXTERN void  CdEntrezFini (void)
398 
399 {
400   CdTermPtr nxt;
401   Char      temp [PATH_MAX];
402 
403   if (eset != NULL) {
404     if (eset->term != NULL) {
405       StringCpy (temp, eset->term);
406       FileRemove (temp);
407     }
408     while (eset != NULL) {
409       nxt = eset->next;
410       CdTermFree (eset);
411       eset = nxt;
412     }
413   }
414   eset = NULL;
415 
416   cachedExpr = CdTLExprFree(cachedExpr);
417   cachedBsp = BSFree(cachedBsp);
418 
419 #ifdef _NEW_CdEntrez_
420 	if (_nouveau)
421 		cd3_CdFini();
422 #endif
423 #ifdef _OLD_CdEntrez_
424 	if (!_nouveau)
425 		CdFini();
426 #endif
427 	_nouveau = FALSE;
428 }
429 
430 
431 /*****************************************************************************
432 *
433 *   CdEntrezGetInfo ()
434 *
435 *****************************************************************************/
436 
CdEntrezGetInfo(void)437 NLM_EXTERN EntrezInfo* CdEntrezGetInfo (void)
438 {
439 	EntrezInfo *info = NULL;
440 
441 #ifdef _NEW_CdEntrez_
442 	if (_nouveau)
443 		info = cd3_CdGetInfo();
444 #endif
445 
446 #ifdef _OLD_CdEntrez_
447 	if (!_nouveau)
448 		info = CdGetInfo();
449 #endif
450 
451 	return info;
452 }
453 
454 /*****************************************************************************
455 *
456 *   CdEntrezDetailedInfo ()
457 *
458 *****************************************************************************/
459 
CdEntrezDetailedInfo(void)460 NLM_EXTERN char* CdEntrezDetailedInfo (void)
461 {
462 	char *info = NULL;
463 
464 #ifdef _NEW_CdEntrez_
465 	if (_nouveau)
466 		info = cd3_CdDetailedInfo();
467 #endif
468 
469 #ifdef _OLD_CdEntrez_
470 	if (!_nouveau)
471 		info = CdDetailedInfo();
472 #endif
473 
474 	return info;
475 }
476 
477 
478 /*****************************************************************************
479 *
480 *   CdEntGetMaxLinks()
481 *   	returns max links in link set allowed by system
482 *
483 *****************************************************************************/
CdEntGetMaxLinks(void)484 NLM_EXTERN Int4 CdEntGetMaxLinks (void)
485 
486 {
487 	return (Int4)(INT_MAX / sizeof(DocUid));
488 }
489 
490 /*****************************************************************************
491 *
492 *   CdEntrezCreateNamedUidList(term, type, field, num, uids)
493 *       Creates a term node in the entrez set structure if one does not
494 *       yet exist, and loads the posting file from the uid parameter.
495 *
496 *****************************************************************************/
CdEntrezCreateNamedUidList(CharPtr term,DocType type,DocField field,Int4 num,DocUidPtr uids)497 NLM_EXTERN void  CdEntrezCreateNamedUidList (CharPtr term, DocType type, DocField field, Int4 num, DocUidPtr uids)
498 
499 {
500   Int4          count;
501   ByteStorePtr  post;
502   Char          str [256];
503 
504   if (term != NULL && uids != NULL && num > 0 && num <= 16383) {
505     StringNCpy (str, term, sizeof (str) - 1);
506     post = BSNew (0);
507     if (post != NULL) {
508       count = (Int4) num;
509       QuickSortSmall (uids, 0, (Int4) (count - 1));
510       count = CompressSmall (uids, count);
511       count = UniqueSmall (uids, count);
512       BSWrite (post, uids, (Int4) (count * sizeof (DocUid)));
513       CdEntrezCreateTerm (str, type, field, NULL, post, NULL);
514       BSFree (post);
515     }
516   }
517 }
518 
519 /*****************************************************************************
520 *
521 *   CdEntrezCreateNamedUidListX(term, type, field, post)
522 *       Creates a term node in the entrez set structure if one does not
523 *       yet exist, and loads the posting file from the uid parameter.
524 *
525 *****************************************************************************/
CdEntrezCreateNamedUidListX(CharPtr term,DocType type,DocField field,ByteStorePtr bsp)526 NLM_EXTERN void  CdEntrezCreateNamedUidListX (CharPtr term, DocType type, DocField field, ByteStorePtr bsp)
527 
528 {
529   Int4          actual;
530   Int4          count;
531   ByteStorePtr  post;
532   ByteStorePtr  small;
533   Char          str [256];
534   DocUidPtr     uids;
535 
536   if (term != NULL && bsp != NULL) {
537     StringNCpy (str, term, sizeof (str) - 1);
538     post = BSNew (0);
539     if (post != NULL) {
540       uids = MemNew (4096 * sizeof (DocUid));
541       BSSeek (bsp, 0L, 0);
542       actual = BSRead (bsp, uids, (Int4) (4096 * sizeof (DocUid)));
543       while (actual > 0) {
544         count = (Int4) actual;
545         QuickSortSmall (uids, 0, (Int4) (count - 1));
546         count = CompressSmall (uids, count);
547         count = UniqueSmall (uids, count);
548         if (count > 0) {
549           small = BSNew (0L);
550           if (small != NULL) {
551             BSWrite (small, uids, count * sizeof (DocUid));
552             post = MergePostingLists (post, small);
553           }
554         }
555         actual = BSRead (bsp, uids, (Int4) (4096 * sizeof (DocUid)));
556       }
557       CdEntrezCreateTerm (str, type, field, NULL, post, NULL);
558       MemFree (uids);
559       BSFree (post);
560     }
561   }
562 }
563 
564 /*****************************************************************************
565 *
566 *   CdEntTLNew (type)
567 *       Creates linked list of asn nodes for constructing boolean query on
568 *       terms.  First node points to the EntrezSetNew-created structure that
569 *       maps terms to posting lists.  Remaining nodes contain symbols for AND,
570 *       OR, LEFT PARENTHESIS, RIGHT PARENTHESIS, or a SPECIAL or TOTAL term
571 *       specification.  The term specification nodes point to a CdTerm node
572 *       within the entrez set structure.
573 *
574 *****************************************************************************/
575 
CdEntTLNew(DocType type)576 NLM_EXTERN ValNodePtr  CdEntTLNew (DocType type)
577 
578 {
579   ValNodePtr anp;
580 
581   anp = NULL;
582   if (eset != NULL) {
583     anp = ValNodeNew (NULL);
584     if (anp != NULL) {
585       anp->choice = NULLSYM;
586       anp->data.ptrvalue = (Pointer) eset;
587 	  eset->type = type;
588     }
589   }
590   return anp;
591 }
592 
593 /*****************************************************************************
594 *
595 *   CdEntTLAddTerm (elst, term, type, field, special, highRange)
596 *       Adds a term node to a boolean algebraic term query.
597 *
598 *****************************************************************************/
599 
CdEntTLAddTerm(ValNodePtr elst,CharPtr term,DocType type,DocField field,Boolean special,CharPtr highRange)600 NLM_EXTERN ValNodePtr  CdEntTLAddTerm (ValNodePtr elst, CharPtr term, DocType type, DocField field, Boolean special, CharPtr highRange)
601 
602 {
603   ValNodePtr anp;
604   CdTermPtr  trmp;
605 
606   anp = NULL;
607   if (eset != NULL && elst != NULL) {
608 	if (type != eset->type)   /* mixed databases */
609 		return NULL;
610     anp = ValNodeNew (elst);
611     if (anp != NULL) {
612       if (special) {
613         anp->choice = SPECIALTERM;
614       } else {
615         anp->choice = TOTALTERM;
616       }
617       trmp = FindTermNode (term, type, field, highRange);
618       anp->data.ptrvalue = (Pointer) trmp;
619     }
620   }
621   return anp;
622 }
623 
624 /*****************************************************************************
625 *
626 *   CdEntTLFree (elst)
627 *       Frees a boolean algebraic term query list.
628 *
629 *****************************************************************************/
630 
CdEntTLFree(ValNodePtr elst)631 NLM_EXTERN ValNodePtr  CdEntTLFree (ValNodePtr elst)
632 
633 {
634   if (elst != NULL) {
635     ValNodeFree (elst);
636 	eset->type = 255;   /* set to nothing */
637   }
638   return NULL;
639 }
640 
641 /*****************************************************************************
642 *
643 *   CdTLExprFree(elst)
644 *
645 *   Free the CdEntrez-style expression, including all of its subordinate terms
646 ****************************************************************************/
CdTLExprFree(ValNodePtr elst)647 static ValNodePtr CdTLExprFree(ValNodePtr elst)
648 {
649   ValNodePtr np;
650   CdTermPtr tp;
651 
652   for (np = elst; np != NULL; np = np->next) {
653     switch (np->choice) {
654     case SPECIALTERM:
655     case TOTALTERM:
656       if ((tp = np->data.ptrvalue) != NULL) {
657         MemFree (tp->term);
658         MemFree (tp->highRange);
659         MemFree (tp);
660       }
661       break;
662     default:
663       break;
664     }
665   }
666 
667   ValNodeFree(elst);
668 
669   return NULL;
670 }
671 
672 
673 /*****************************************************************************
674 *
675 *   CdDupExpr(elst)
676 *
677 *   Duplicate the input CdEntrez-style expression
678 ****************************************************************************/
CdDupExpr(ValNodePtr elst)679 static ValNodePtr CdDupExpr(ValNodePtr elst)
680 {
681   ValNodePtr dup = NULL;
682   ValNodePtr trailing = NULL;
683   ValNodePtr np;
684   CdTermPtr tp1, tp2;
685 
686   for (; elst != NULL; elst = elst->next) {
687     np = ValNodeNew(NULL);
688     if (dup == NULL)
689       dup = np;
690     if (trailing != NULL)
691       trailing->next = np;
692     trailing = np;
693     np->choice = elst->choice;
694     switch (elst->choice) {
695     case SPECIALTERM:
696     case TOTALTERM:
697       tp2 = elst->data.ptrvalue;
698       if (tp2 != NULL)
699       {
700         tp1 = MemNew(sizeof(*tp1));
701         np->data.ptrvalue = tp1;
702         tp1->type = tp2->type;
703         tp1->field = tp2->field;
704         tp1->term = StringSave(tp2->term);
705         tp1->highRange = StringSave(tp2->highRange);
706       }
707       break;
708     default:
709       break;
710     }
711   }
712 
713   return dup;
714 }
715 
716 static Boolean
EqualTerms(CharPtr x,CharPtr y)717 EqualTerms (CharPtr x, CharPtr y)
718 {
719   if (x == NULL && y == NULL)
720     return TRUE;
721   if (x == NULL || y == NULL)
722     return FALSE;
723   return (StringICmp(x,y) == 0);
724 }
725 
726 
727 /*****************************************************************************
728 *
729 *   CdEntTLExprEqual (elst1, elst2)
730 *
731 *   Determine whether two CdEntrez-style boolean expressions are equal
732 ****************************************************************************/
733 
734 static Boolean
CdTLExprEqual(ValNodePtr elst1,ValNodePtr elst2)735 CdTLExprEqual (ValNodePtr elst1, ValNodePtr elst2)
736 {
737   Boolean equal = TRUE;
738   CdTermPtr c1, c2;
739 
740   for (; elst1 != NULL && elst2 != NULL && equal; elst1 = elst1->next,
741        elst2 = elst2->next) {
742     if (elst1->choice == elst2->choice) {
743       switch (elst1->choice) {
744         case SPECIALTERM:
745         case TOTALTERM:
746           c1 = elst1->data.ptrvalue;
747           c2 = elst2->data.ptrvalue;
748           equal = c1 != NULL && c2 != NULL && c1->type == c2->type &&
749                   c1->field == c2->field && EqualTerms(c1->term, c2->term) &&
750                   EqualTerms(c1->highRange, c2->highRange);
751           break;
752         default:
753           break;
754       }
755     } else {
756       equal = FALSE;
757     }
758   }
759 
760   return elst1 == NULL && elst2 == NULL && equal;
761 }
762 
763 
764 /*****************************************************************************
765 *
766 *   CdEntTLEvalCount (elst)
767 *       Evaluates a boolean algebraic term query list, returning the
768 *       count of resulting UIDs.
769 *
770 *****************************************************************************/
771 
CdEntTLEvalCount(ValNodePtr elst)772 NLM_EXTERN Int4 CdEntTLEvalCount (ValNodePtr elst)
773 {
774   ByteStorePtr bsp;
775   Int4         len;
776 
777   len = 0;
778   bsp = CdEntTLEvalX(elst);
779   if (bsp != NULL) {
780     len = BSLen(bsp) / sizeof(DocUid);
781     BSFree (bsp);
782   }
783   return len;
784 }
785 
786 
787 /*****************************************************************************
788 *
789 *   CdEntTLEvalX (elst)
790 *       Evaluates a boolean algebraic term query list, returning a pointer to
791 *       a ByteStore containing the resultant unique identifiers.  The number
792 *       of UIDs is calculated as BSLen (bsp) / sizeof (DocUid).
793 *
794 *****************************************************************************/
795 
CdEntTLEvalX(ValNodePtr elst)796 NLM_EXTERN ByteStorePtr  CdEntTLEvalX (ValNodePtr elst)
797 
798 {
799   ByteStorePtr bsp;
800 
801   bsp = NULL;
802   if (eset != NULL && elst != NULL) {
803     if (cachedExpr != NULL && CdTLExprEqual(elst, cachedExpr)) {
804       BSSeek(cachedBsp, 0L, SEEK_SET);
805       bsp = BSDup (cachedBsp);
806     } else {
807       cachedExpr = CdTLExprFree(cachedExpr);
808       cachedExpr = CdDupExpr(elst);
809       cachedBsp = BSFree(cachedBsp);
810 
811       currNode = elst;
812       currChoice = NULLSYM;
813       NextNode ();
814       if (eset->term != NULL && currNode != NULL) {
815         bsp = Expression ();
816         BSSeek(bsp, 0L, SEEK_SET);
817         cachedBsp = BSDup(bsp);
818       }
819 
820     }
821   }
822   return bsp;
823 }
824 
825 /*****************************************************************************
826 *
827 *   CdEntTLEval (elst)
828 *       Evaluates a boolean algebraic term query list, returning a pointer to
829 *       a LinkSet containing the resultant unique identifiers.
830 *
831 *****************************************************************************/
832 
CdEntTLEval(ValNodePtr elst)833 NLM_EXTERN LinkSetPtr  CdEntTLEval (ValNodePtr elst)
834 
835 {
836   ByteStorePtr bsp;
837   LinkSetPtr lsp = NULL;
838   Int4 numlinks;
839 
840   bsp = CdEntTLEvalX (elst);
841   if (bsp != NULL)
842 	{
843 		numlinks = BSLen(bsp) / sizeof(DocUid);
844 		lsp = LinkSetNew();
845 		lsp->num = numlinks;
846 		if (numlinks <= CdEntGetMaxLinks())
847 		{
848 			lsp->uids = MemNew((size_t)(numlinks * sizeof(DocUid)));
849 			BSSeek (bsp, 0L, 0);
850 			BSRead(bsp, lsp->uids, (numlinks * sizeof(DocUid)));
851 		}
852 		BSFree(bsp);
853 	}
854   return lsp;
855 }
856 
857 /*****************************************************************************
858 *
859 *   DocSumPtr CdDocSum(type, uid)
860 *
861 *****************************************************************************/
CdDocSum(DocType type,DocUid uid)862 NLM_EXTERN DocSumPtr  CdDocSum (DocType type, DocUid uid)
863 
864 {
865 	DocSum *sum = NULL;
866 
867 #ifdef _NEW_CdEntrez_
868 	if (_nouveau)
869 	{
870 		sum = CdGetDocSum(type,uid);
871 	}
872 #endif
873 
874 #ifdef _OLD_CdEntrez_
875 	if (!_nouveau)
876 	{
877 		if (type == TYP_ML)
878 			sum = CdEntMlSumGet(uid);
879 		else
880 			sum = CdEntSeqSumGet(uid, type);
881 	}
882 #endif
883 
884 	return sum;
885 }
886 
887 
888 #ifdef _NEW_CdEntrez_
CdDocSumListGet(DocSum ** result,int numuid,DocType type,const DocUid * uids)889 NLM_EXTERN int  CdDocSumListGet PROTO((DocSum **result, int numuid, DocType type, const DocUid *uids))
890 {
891 	int i, n;
892 	const DocUid *p = uids;
893 	DocSum **s = result;
894 
895 	ASSERT(result != NULL);
896 	ASSERT(uids != NULL);
897 
898 	for (i=n=0; i<numuid; ++i)
899 	{
900 		if ((*s = CdGetDocSum(type,*p++)) != NULL)
901 		{
902 			s++;
903 			n++;
904 		}
905 	}
906 	return n;
907 }
908 #endif
909 
910 
911 
912 /*****************************************************************************
913 *
914 *   CdLinkUidList(type, link_to_type, numuid, uids)
915 *   	returns count of input uids processed
916 *       returns -1 on error
917 *       if neighbors (type == link_to_type)
918 *   		sums weights for same uids
919 *   	if (more than EntrezUserMaxLinks() uids, frees uids and weights,
920 *           but leaves num set)
921 *
922 *****************************************************************************/
CdLinkUidList(LinkSetPtr PNTR result,DocType type,DocType link_to_type,Int2 numuid,Int4Ptr uids,Boolean mark_missing)923 NLM_EXTERN Int2  CdLinkUidList (LinkSetPtr PNTR result, DocType type, DocType link_to_type, Int2 numuid, Int4Ptr uids, Boolean mark_missing)
924 {
925 	Int4 max_links = CdEntGetMaxLinks();
926 	Int4 count;
927 
928 #ifdef _NEW_CdEntrez_
929 	if (_nouveau)
930 		count = cd3_CdLinkUidGet(result,type,link_to_type,numuid,uids,mark_missing,max_links);
931 #endif
932 
933 #ifdef _OLD_CdEntrez_
934 	if (!_nouveau)
935 		count = CdLinkUidGet(result,type,link_to_type,numuid,uids,mark_missing,max_links);
936 #endif
937 
938 	return count;
939 }
940 
941 /*****************************************************************************
942 *
943 *   CdUidLinks()
944 *   	retrieves links to other uids
945 *
946 *****************************************************************************/
CdUidLinks(DocType type,DocUid uid,DocType link_to_type)947 NLM_EXTERN LinkSetPtr  CdUidLinks (DocType type, DocUid uid, DocType link_to_type)
948 {
949 	LinkSetPtr lsp = NULL;
950 	DocUid u = uid;
951 
952 #ifdef _NEW_CdEntrez_
953 	if (_nouveau)
954 	  cd3_CdLinkUidGet(&lsp,type,link_to_type,1,&u,FALSE,CdEntGetMaxLinks());
955 #endif
956 #ifdef _OLD_CdEntrez_
957 	if (!_nouveau)
958 	  CdLinkUidGet(&lsp,type,link_to_type,1,&u,FALSE,CdEntGetMaxLinks());
959 #endif
960 
961 	return lsp;
962 }
963 
964 static Boolean  TermListPageScanProc PROTO((CdTermPtr trmptr));
965 static Boolean  TermListTermScanProc PROTO((CdTermPtr trmptr));
966 static TermListProc trmproc;
967 static Int4 trmcount;
968 static Int4 trmmax;
969 static Boolean trmfound;
970 static Char trmfirst [80];
971 static Int4 the_first_page;
972 
973 /*****************************************************************************
974 *
975 *   CdTermListByPage (type, field, page, numpage, proc)
976 *   	Gets terms starting at page, for numpage, by calling proc
977 *   	returns number of complete pages read
978 *
979 *****************************************************************************/
CdTermListByPage(DocType type,DocField field,Int2 page,Int2 numpage,TermListProc proc)980 NLM_EXTERN Int2  CdTermListByPage (DocType type, DocField field, Int2 page, Int2 numpage, TermListProc proc)
981 
982 {
983 	trmproc = proc;
984 	if (trmproc != NULL) {
985 #ifdef _NEW_CdEntrez_
986 	if (_nouveau)
987 		return cd3_CdTermScan(type, field, page, numpage, TermListPageScanProc);
988 #endif
989 #ifdef _OLD_CdEntrez_
990 	if (!_nouveau)
991 		return CdTermScan(type, field, page, numpage, TermListPageScanProc);
992 #endif
993 	} else {
994 		return 0;
995 	}
996 
997 	return 0;
998 }
999 
1000 /*****************************************************************************
1001 *
1002 *   CdTermListByTerm (type, field, term, numterms, proc, first_page)
1003 *   	Gets Terms starting with at term
1004 *   	returns number of complete pages read
1005 *   	sets first_page to first page read
1006 *
1007 *****************************************************************************/
CdTermListByTerm(DocType type,DocField field,CharPtr term,Int2 numterms,TermListProc proc,Int2Ptr first_page)1008 NLM_EXTERN Int2  CdTermListByTerm (DocType type, DocField field, CharPtr term, Int2 numterms, TermListProc proc, Int2Ptr first_page)
1009 
1010 {
1011 	Int4  first;
1012 	Int4  rsult;
1013 
1014 	rsult = 0;
1015 #ifdef _NEW_CdEntrez_
1016 	if (_nouveau)
1017 		first = cd3_CdTrmLookup(type, field, term);
1018 #endif
1019 #ifdef _OLD_CdEntrez_
1020 	if (!_nouveau)
1021 		first = CdTrmLookup(type, field, term);
1022 #endif
1023 	the_first_page = first;
1024 	trmproc = proc;
1025 	trmcount = 0;
1026 	if (numterms > 0) {
1027 		trmmax = numterms;
1028 	} else {
1029 		trmmax = INT2_MAX;
1030 	}
1031 	trmfound = FALSE;
1032 	StringNCpy (trmfirst, term, sizeof (trmfirst) - 1);
1033 	if (trmproc != NULL) {
1034 #ifdef _NEW_CdEntrez_
1035 		if (_nouveau)
1036 			rsult = cd3_CdTermScan(type,field,first,0,TermListTermScanProc);
1037 #endif
1038 #ifdef _OLD_CdEntrez_
1039 		if (!_nouveau)
1040 			rsult = CdTermScan(type,field,first,0,TermListTermScanProc);
1041 #endif
1042 	}
1043 	if (first_page != NULL) {
1044 	  *first_page = the_first_page;
1045 	}
1046 	return rsult;
1047 }
1048 
1049 /*****************************************************************************
1050 *
1051 *   TermListPageScanProc(trmptr)
1052 *   	Callback for CdTermListByPage
1053 *
1054 *****************************************************************************/
TermListPageScanProc(CdTermPtr trmptr)1055 static Boolean  TermListPageScanProc(CdTermPtr trmptr)
1056 {
1057   Boolean ret = trmproc(trmptr->term,
1058                         trmptr->special_count, trmptr->total_count);
1059   MemFree(trmptr);
1060   return ret;
1061 }
1062 
1063 /*****************************************************************************
1064 *
1065 *   TermListTermScanProc(trmptr)
1066 *   	Callback for CdTermListByTerm
1067 *
1068 *****************************************************************************/
TermListTermScanProc(CdTermPtr trmptr)1069 static Boolean  TermListTermScanProc(CdTermPtr trmptr)
1070 {
1071   Boolean ret = TRUE;
1072   if (! trmfound) {
1073     if (MeshStringICmp (trmptr->term, trmfirst) >= 0) {
1074       trmfound = TRUE;
1075       the_first_page = trmptr->page;
1076     }
1077   }
1078   if (trmfound) {
1079     ret = trmproc(trmptr->term, trmptr->special_count, trmptr->total_count);
1080     trmcount++;
1081   } else {
1082     MemFree (trmptr->term);
1083   }
1084   MemFree(trmptr);
1085   return (ret && trmcount < trmmax);
1086 }
1087 
1088 /*****************************************************************************
1089 *
1090 *   CdEntrezFindTerm(type, field, term, spec, total)
1091 *   	returns count of special and total for a term
1092 *   	if term ends with  "...", does a truncated merge of the term
1093 *   	if term contains '*' or '?', does a wild card merge
1094 *
1095 *****************************************************************************/
CdEntrezFindTerm(DocType type,DocField field,CharPtr term,Int4Ptr spcl,Int4Ptr totl)1096 NLM_EXTERN Boolean  CdEntrezFindTerm (DocType type, DocField field, CharPtr term, Int4Ptr spcl, Int4Ptr totl)
1097 
1098 {
1099 	CharPtr tmp;
1100 	CdTermPtr ctp;
1101 
1102 	tmp = term;
1103 	while (*tmp != '\0')
1104 		tmp++;
1105 	tmp -= 3;
1106 	if ((*tmp == '.') && (*(tmp+1) == '.') && (*(tmp+2) == '.')) {
1107 		return CdEntrezMergeTerm (type, field, term, spcl, totl, NULL);
1108 	} else if (StringChr (term, '*') != NULL || StringChr (term, '?') != NULL) {
1109 		return CdEntrezMergeTerm (type, field, term, spcl, totl, WildCardProc);
1110 	} else {
1111 #ifdef _NEW_CdEntrez_
1112 		if (_nouveau)
1113 			ctp = cd3_CdTrmFind(type,field,term);
1114 #endif
1115 #ifdef _OLD_CdEntrez_
1116 		if (!_nouveau)
1117 			ctp = CdTrmFind(type,field,term);
1118 #endif
1119 		if (ctp == NULL)
1120 			return FALSE;
1121 		*spcl = ctp->special_count;
1122 		*totl = ctp->total_count;
1123 		CdTermFree(ctp);
1124 		return TRUE;
1125 	}
1126 }
1127 
1128 
1129 /*****************************************************************************
1130 *
1131 *   CdTermFree(trmp)
1132 *      frees a CdTerm structure
1133 *
1134 *****************************************************************************/
1135 
CdTermFree(CdTermPtr trmp)1136 NLM_EXTERN CdTermPtr  CdTermFree (CdTermPtr trmp)
1137 
1138 {
1139 	if (trmp == NULL)
1140 		return NULL;
1141 	if (trmp->term != NULL)
1142 		MemFree (trmp->term);
1143 	if (trmp->highRange != NULL)
1144 		MemFree (trmp->highRange);
1145 	return (CdTermPtr) MemFree(trmp);
1146 }
1147 
1148 
1149 
1150 
1151 /*****************************************************************************
1152 *
1153 *   Below are static functions local to this module
1154 *   ===============================================
1155 *
1156 *****************************************************************************/
1157 
1158 /*****************************************************************************
1159 *
1160 *   Functions to manipulate Boolean lists
1161 *
1162 *****************************************************************************/
1163 
1164 /*****************************************************************************
1165 *
1166 *   NextNode ()
1167 *       Advances to the next node in a term query list.
1168 *
1169 *****************************************************************************/
1170 
NextNode(void)1171 static void NEAR NextNode (void)
1172 
1173 {
1174   if (currNode != NULL) {
1175     currNode = currNode->next;
1176     if (currNode != NULL) {
1177       currChoice = currNode->choice;
1178     } else {
1179       currChoice = NULLSYM;
1180     }
1181   } else {
1182     currChoice = NULLSYM;
1183   }
1184 }
1185 
1186 /*****************************************************************************
1187 *
1188 *   Factor ()
1189 *       Processes individual term nodes or parenthetical expressions in a
1190 *       term query list.
1191 *
1192 *****************************************************************************/
1193 
Factor(void)1194 static ByteStorePtr NEAR Factor (void)
1195 
1196 {
1197   ByteStorePtr bsp;
1198   FILE         *fp;
1199   CdTermPtr    trmp;
1200 
1201   bsp = NULL;
1202   if (currChoice == LPAREN) {
1203     NextNode ();
1204     bsp = Expression ();
1205     if (currChoice != RPAREN) {
1206       ErrPostEx (SEV_ERROR, ERR_CD_BOOL, 0, "Expected right parenthesis");
1207     } else {
1208       NextNode ();
1209     }
1210   } else if (currChoice == SPECIALTERM || currChoice == TOTALTERM) {
1211     if (currNode != NULL) {
1212       trmp = currNode->data.ptrvalue;
1213       if (trmp != NULL) {
1214         fp = FileOpen (eset->term, "rb");
1215         if (fp != NULL) {
1216           fseek (fp, trmp->offset, SEEK_SET);
1217           if (currChoice == SPECIALTERM) {
1218             bsp = LoadPostingList (fp, trmp->special_count, trmp->special_count);
1219           } else if (currChoice == TOTALTERM) {
1220             bsp = LoadPostingList (fp, trmp->special_count, trmp->total_count);
1221           }
1222           FileClose (fp);
1223         }
1224       }
1225     }
1226     NextNode ();
1227   } else {
1228     NextNode ();
1229   }
1230   return bsp;
1231 }
1232 
1233 /*****************************************************************************
1234 *
1235 *   Term ()
1236 *       Processes strings of ANDed term nodes in a term query list.
1237 *
1238 *****************************************************************************/
1239 
Term(void)1240 static ByteStorePtr NEAR Term (void)
1241 
1242 {
1243   ByteStorePtr bsp;
1244   ByteStorePtr fct;
1245 
1246   bsp = Factor ();
1247   while (currChoice == ANDSYMBL) {
1248     NextNode ();
1249     fct = Factor ();
1250     bsp = IntersectPostingLists (bsp, fct);
1251   }
1252   return bsp;
1253 }
1254 
1255 /*****************************************************************************
1256 *
1257 *   Diff ()
1258 *       Processes strings of ORed term nodes in a term query list.
1259 *
1260 *****************************************************************************/
1261 
Diff(void)1262 static ByteStorePtr NEAR Diff (void)
1263 
1264 {
1265   ByteStorePtr bsp;
1266   ByteStorePtr trm;
1267 
1268   bsp = Term ();
1269   while (currChoice == ORSYMBL) {
1270     NextNode ();
1271     trm = Term ();
1272     bsp = MergePostingLists (bsp, trm);
1273   }
1274   return bsp;
1275 }
1276 
1277 
1278 /*****************************************************************************
1279 *
1280 *   Expression ()
1281 *       Processes strings of BUTNOTed term nodes in a term query list.
1282 *
1283 *****************************************************************************/
1284 
Expression(void)1285 static ByteStorePtr NEAR Expression (void)
1286 
1287 {
1288   ByteStorePtr bsp;
1289   ByteStorePtr trm;
1290 
1291   bsp = Diff ();
1292   while (currChoice == BUTNOTSYMBL) {
1293     NextNode ();
1294     trm = Diff ();
1295     bsp = DifferencePostingLists (bsp, trm);
1296   }
1297   return bsp;
1298 }
1299 
1300 
1301 /*****************************************************************************
1302 *
1303 *   Low level functions to manipulate postings lists.
1304 *
1305 *****************************************************************************/
1306 
NewPost(ByteStorePtr lst,Int4 defsize)1307 static PostingPtr NEAR NewPost (ByteStorePtr lst, Int4 defsize)
1308 
1309 {
1310   PostingPtr  pst;
1311 
1312   pst = NULL;
1313   if (lst != NULL) {
1314     pst = MemNew (sizeof (Posting));
1315     if (pst != NULL) {
1316       pst->uids = lst;
1317       pst->buffer = NULL;
1318       if (defsize == 0) {
1319         pst->bufsize = (Int4) MIN (16384L, BSLen (lst));
1320       } else {
1321         pst->bufsize = (Int4) MIN (16384L, defsize);
1322       }
1323       pst->count = 0;
1324       pst->index = 0;
1325     }
1326   }
1327   return pst;
1328 }
1329 
FreePost(PostingPtr pst)1330 static PostingPtr NEAR FreePost (PostingPtr pst)
1331 
1332 {
1333   if (pst != NULL) {
1334     if (pst->uids != NULL) {
1335       BSFree (pst->uids);
1336     }
1337     if (pst->buffer != NULL) {
1338       MemFree (pst->buffer);
1339     }
1340     MemFree (pst);
1341   }
1342   return NULL;
1343 }
1344 
PostLength(PostingPtr pst)1345 static Int4 NEAR PostLength (PostingPtr pst)
1346 
1347 {
1348   Int4  k;
1349 
1350   k = 0;
1351   if (pst != NULL) {
1352     k = (Int4) (BSLen (pst->uids) / (Int4) sizeof (DocUid));
1353   }
1354   return k;
1355 }
1356 
RewindPost(PostingPtr pst)1357 static void NEAR RewindPost (PostingPtr pst)
1358 
1359 {
1360   if (pst != NULL) {
1361     if (pst->uids != NULL) {
1362       BSSeek (pst->uids, 0L, 0);
1363     }
1364     pst->count = 0;
1365     pst->index = 0;
1366   }
1367 }
1368 
ReadItem(PostingPtr pst)1369 static DocUid NEAR ReadItem (PostingPtr pst)
1370 
1371 {
1372   DocUid  rsult;
1373 
1374   rsult = INT4_MAX;
1375   if (pst != NULL && pst->uids != NULL) {
1376     if (pst->buffer == NULL) {
1377       pst->buffer = MemNew ((size_t) pst->bufsize);
1378       pst->count = 0;
1379       pst->index = 0;
1380     }
1381     if (pst->count <= 0) {
1382       pst->count = (Int4) BSRead (pst->uids, pst->buffer, pst->bufsize);
1383       pst->index = 0;
1384     }
1385     if (pst->count > 0) {
1386       rsult = pst->buffer [pst->index];
1387       (pst->index)++;
1388       (pst->count) -= sizeof (DocUid);
1389     }
1390   }
1391   return rsult;
1392 }
1393 
WriteItem(PostingPtr pst,DocUid value)1394 static void NEAR WriteItem (PostingPtr pst, DocUid value)
1395 
1396 {
1397   if (pst != NULL && pst->uids != NULL) {
1398     if (pst->buffer == NULL) {
1399       pst->buffer = MemNew ((size_t) pst->bufsize);
1400       pst->count = 0;
1401       pst->index = 0;
1402     }
1403     pst->buffer [pst->index] = value;
1404     (pst->index)++;
1405     (pst->count) += sizeof (DocUid);
1406     if (pst->count >= pst->bufsize) {
1407       BSWrite (pst->uids, pst->buffer, pst->count);
1408       pst->count = 0;
1409       pst->index = 0;
1410     }
1411   }
1412 }
1413 
FlushItems(PostingPtr pst)1414 static void NEAR FlushItems (PostingPtr pst)
1415 
1416 {
1417   if (pst != NULL && pst->uids != NULL && pst->buffer != NULL) {
1418     BSWrite (pst->uids, pst->buffer, pst->count);
1419     if (pst->buffer != NULL) {
1420       pst->buffer = MemFree (pst->buffer);
1421     }
1422     pst->count = 0;
1423     pst->index = 0;
1424   }
1425 }
1426 
MergePostingLists(ByteStorePtr lst1,ByteStorePtr lst2)1427 static ByteStorePtr NEAR MergePostingLists (ByteStorePtr lst1, ByteStorePtr lst2)
1428 
1429 {
1430   PostingPtr    buf1;
1431   PostingPtr    buf2;
1432   PostingPtr    buf3;
1433   Int4          k;
1434   Int4          k1;
1435   Int4          k2;
1436   DocUid        pstar;
1437   DocUid        qstar;
1438   ByteStorePtr  rsult;
1439 
1440   ProgMon ("MergePostingLists");
1441   rsult = NULL;
1442   if (lst1 != NULL && lst2 != NULL) {
1443     buf1 = NewPost (lst1, 0);
1444     buf2 = NewPost (lst2, 0);
1445     k1 = PostLength (buf1);
1446     k2 = PostLength (buf2);
1447     k = k1 + k2;
1448     rsult = BSNew (k * sizeof (DocUid));
1449     buf3 = NewPost (rsult, k * (Int4) sizeof (DocUid));
1450     if (rsult != NULL && buf1 != NULL && buf2 != NULL && buf3 != NULL) {
1451       RewindPost (buf1);
1452       RewindPost (buf2);
1453       pstar = ReadItem (buf1);
1454       qstar = ReadItem (buf2);
1455       while (k > 0) {
1456         if (pstar < qstar) {
1457           WriteItem (buf3, pstar);
1458           k--;
1459           pstar = ReadItem (buf1);
1460         } else if (qstar < pstar) {
1461           WriteItem (buf3, qstar);
1462           k--;
1463           qstar = ReadItem (buf2);
1464         } else {
1465           WriteItem (buf3, pstar);
1466           k -= 2;
1467           pstar = ReadItem (buf1);
1468           qstar = ReadItem (buf2);
1469         }
1470       }
1471       FlushItems (buf3);
1472     } else {
1473       ErrPostEx (SEV_ERROR, ERR_CD_BOOL, 0, "List is too large to merge");
1474     }
1475     if (buf1 != NULL) {
1476       FreePost (buf1);
1477     }
1478     if (buf2 != NULL) {
1479       FreePost (buf2);
1480     }
1481     if (buf3 != NULL) {
1482       buf3->uids = NULL;
1483       FreePost (buf3);
1484     }
1485   } else if (lst1 != NULL) {
1486     rsult = lst1;
1487   } else if (lst2 != NULL) {
1488     rsult = lst2;
1489   }
1490   return rsult;
1491 }
1492 
IntersectPostingLists(ByteStorePtr lst1,ByteStorePtr lst2)1493 static ByteStorePtr NEAR IntersectPostingLists (ByteStorePtr lst1, ByteStorePtr lst2)
1494 
1495 {
1496   PostingPtr    buf1;
1497   PostingPtr    buf2;
1498   PostingPtr    buf3;
1499   Int4          k;
1500   Int4          k1;
1501   Int4          k2;
1502   DocUid        pstar;
1503   DocUid        qstar;
1504   ByteStorePtr  rsult;
1505 
1506   ProgMon ("UnionPostingLists");
1507   rsult = NULL;
1508   if (lst1 != NULL && lst2 != NULL) {
1509     buf1 = NewPost (lst1, 0);
1510     buf2 = NewPost (lst2, 0);
1511     k1 = PostLength (buf1);
1512     k2 = PostLength (buf2);
1513     k = MIN (k1, k2);
1514     rsult = BSNew (k * sizeof (DocUid));
1515     buf3 = NewPost (rsult, k * (Int4) sizeof (DocUid));
1516     if (rsult != NULL && buf1 != NULL && buf2 != NULL && buf3 != NULL) {
1517       RewindPost (buf1);
1518       RewindPost (buf2);
1519       pstar = ReadItem (buf1);
1520       qstar = ReadItem (buf2);
1521       while (k1 > 0 && k2 > 0) {
1522         if (pstar < qstar) {
1523           k1--;
1524           pstar = ReadItem (buf1);
1525         } else if (qstar < pstar) {
1526           k2--;
1527           qstar = ReadItem (buf2);
1528         } else {
1529           WriteItem (buf3, pstar);
1530           k1--;
1531           k2--;
1532           pstar = ReadItem (buf1);
1533           qstar = ReadItem (buf2);
1534         }
1535       }
1536       FlushItems (buf3);
1537     } else {
1538       ErrPostEx (SEV_ERROR, ERR_CD_BOOL, 0, "List is too large to intersect");
1539     }
1540     if (buf1 != NULL) {
1541       FreePost (buf1);
1542     }
1543     if (buf2 != NULL) {
1544       FreePost (buf2);
1545     }
1546     if (buf3 != NULL) {
1547       buf3->uids = NULL;
1548       FreePost (buf3);
1549     }
1550   }
1551   return rsult;
1552 }
1553 
DifferencePostingLists(ByteStorePtr lst1,ByteStorePtr lst2)1554 static ByteStorePtr NEAR DifferencePostingLists (ByteStorePtr lst1, ByteStorePtr lst2)
1555 
1556 {
1557   PostingPtr    buf1;
1558   PostingPtr    buf2;
1559   PostingPtr    buf3;
1560   Int4          k;
1561   Int4          k1;
1562   Int4          k2;
1563   DocUid        pstar;
1564   DocUid        qstar;
1565   ByteStorePtr  rsult;
1566 
1567   ProgMon ("DiffPostingLists");
1568   rsult = NULL;
1569   if (lst1 != NULL && lst2 != NULL) {
1570     buf1 = NewPost (lst1, 0);
1571     buf2 = NewPost (lst2, 0);
1572     k1 = PostLength (buf1);
1573     k2 = PostLength (buf2);
1574     k = k1 + k2;
1575     rsult = BSNew (k * sizeof (DocUid));
1576     buf3 = NewPost (rsult, k * (Int4) sizeof (DocUid));
1577     if (rsult != NULL && buf1 != NULL && buf2 != NULL && buf3 != NULL) {
1578       RewindPost (buf1);
1579       RewindPost (buf2);
1580       pstar = ReadItem (buf1);
1581       qstar = ReadItem (buf2);
1582       while (k > 0) {
1583         if (pstar < qstar) {
1584           WriteItem (buf3, pstar);
1585           k--;
1586           pstar = ReadItem (buf1);
1587         } else if (qstar < pstar) {
1588           k--;
1589           qstar = ReadItem (buf2);
1590         } else {
1591           k -= 2;
1592           pstar = ReadItem (buf1);
1593           qstar = ReadItem (buf2);
1594         }
1595       }
1596       FlushItems (buf3);
1597     } else {
1598       ErrPostEx (SEV_ERROR, ERR_CD_BOOL, 0, "List is too large to difference");
1599     }
1600     if (buf1 != NULL) {
1601       FreePost (buf1);
1602     }
1603     if (buf2 != NULL) {
1604       FreePost (buf2);
1605     }
1606     if (buf3 != NULL) {
1607       buf3->uids = NULL;
1608       FreePost (buf3);
1609     }
1610   } else if (lst1 != NULL) {
1611     rsult = lst1;
1612   }
1613   return rsult;
1614 }
1615 
FreePostingList(ByteStorePtr lst)1616 static ByteStorePtr NEAR FreePostingList (ByteStorePtr lst)
1617 
1618 {
1619   if (lst != NULL) {
1620     BSFree (lst);
1621   }
1622   return NULL;
1623 }
1624 
LoadPostingList(FILE * f,Int4 special,Int4 total)1625 static ByteStorePtr NEAR LoadPostingList (FILE *f, Int4 special, Int4 total)
1626 
1627 {
1628   VoidPtr       bufr;
1629   Int4          cnt;
1630   Int4          cntr;
1631   Int4          k1;
1632   Int4          k2;
1633   ByteStorePtr  lst1;
1634   ByteStorePtr  lst2;
1635   ByteStorePtr  rsult;
1636 
1637   rsult = NULL;
1638   if (f != NULL && special >= 0 && total >= 0) {
1639     bufr = MemNew (MAX_CDENTREZ_BYTESTORE * sizeof (DocUid));
1640     if (bufr != NULL) {
1641       k1 = special;
1642       k2 = total - special;
1643       lst1 = BSNew (k1 * sizeof (DocUid));
1644       if (lst1 != NULL) {
1645         cntr = k1;
1646         cnt = MIN (k1, (long) MAX_CDENTREZ_BYTESTORE);
1647         while (cnt > 0) {
1648           FileRead (bufr, sizeof (DocUid), (size_t) cnt, f);
1649           BSWrite (lst1, bufr, cnt * sizeof (DocUid));
1650           cntr -= cnt;
1651           cnt = MIN (cntr, (long) MAX_CDENTREZ_BYTESTORE);
1652         }
1653       } else {
1654         ErrPostEx (SEV_ERROR, ERR_CD_BOOL, 0, "List is too large to load");
1655       }
1656       lst2 = BSNew (k2 * sizeof (DocUid));
1657       if (lst2 != NULL) {
1658         cntr = k2;
1659         cnt = MIN (k2, (long) MAX_CDENTREZ_BYTESTORE);
1660         while (cnt > 0) {
1661           FileRead (bufr, sizeof (DocUid), (size_t) cnt, f);
1662           BSWrite (lst2, bufr, cnt * sizeof (DocUid));
1663           cntr -= cnt;
1664           cnt = MIN (cntr, (long) MAX_CDENTREZ_BYTESTORE);
1665         }
1666       } else {
1667         ErrPostEx (SEV_ERROR, ERR_CD_BOOL, 0, "List is too large to load");
1668       }
1669       rsult = MergePostingLists (lst1, lst2);
1670     }
1671     MemFree (bufr);
1672   }
1673   return rsult;
1674 }
1675 
1676 /*****************************************************************************
1677 *
1678 *   CdEntrezCreateTerm (term, type, field, special, remainder)
1679 *       Creates a CdTerm node in the entrez set structure if one does not yet
1680 *       exist, and loads the posting file from two ByteStorePtr posting lists.
1681 *
1682 *****************************************************************************/
1683 
SavePostingList(FILE * f,ByteStorePtr bsp)1684 static void NEAR SavePostingList (FILE *f, ByteStorePtr bsp)
1685 
1686 {
1687   VoidPtr  bufr;
1688   Int4     cnt;
1689   Int4     cntr;
1690 
1691   if (f != NULL && bsp != NULL) {
1692     bufr = MemNew (MAX_CDENTREZ_BYTESTORE * sizeof (DocUid));
1693     if (bufr != NULL) {
1694       cntr = (BSLen (bsp) / (Int4) sizeof (DocUid));
1695       cnt = MIN (cntr, (long) MAX_CDENTREZ_BYTESTORE);
1696       BSSeek (bsp, 0L, 0);
1697       while (cnt > 0) {
1698         BSRead (bsp, bufr, cnt * sizeof (DocUid));
1699         FileWrite (bufr, sizeof (DocUid), (size_t) cnt, f);
1700         cntr -= cnt;
1701         cnt = MIN (cntr, (long) MAX_CDENTREZ_BYTESTORE);
1702       }
1703     }
1704     MemFree (bufr);
1705   }
1706 }
1707 
CdEntrezCreateTerm(CharPtr term,DocType type,DocField field,ByteStorePtr special,ByteStorePtr remainder,CharPtr highRange)1708 static CdTermPtr NEAR CdEntrezCreateTerm (CharPtr term, DocType type, DocField field, ByteStorePtr special, ByteStorePtr remainder, CharPtr highRange)
1709 
1710 {
1711   FILE      *fp;
1712   Boolean   goOn;
1713   CdTermPtr last;
1714   Int4      remainderCount;
1715   Int4      specialCount;
1716   CdTermPtr trmp;
1717 
1718   trmp = NULL;
1719   if (eset != NULL && term != NULL) {
1720     trmp = eset->next;
1721     last = eset;
1722     goOn = TRUE;
1723     while (trmp != NULL && goOn) {
1724       if (trmp->type == type && trmp->field == field &&
1725           EqualTerms (trmp->term, term) &&
1726           EqualTerms (trmp->highRange, highRange)) {
1727         goOn = FALSE;
1728       } else {
1729         last = trmp;
1730         trmp = trmp->next;
1731       }
1732     }
1733     if (goOn) {
1734       trmp = MemNew (sizeof (CdTerm));
1735       if (trmp != NULL) {
1736         specialCount = 0;
1737         remainderCount = 0;
1738         if (special != NULL) {
1739           specialCount = (BSLen (special) / (Int4) sizeof (DocUid));
1740         }
1741         if (remainder != NULL) {
1742           remainderCount = (BSLen (remainder) / (Int4) sizeof (DocUid));
1743         }
1744         trmp->type = type;
1745         trmp->field = field;
1746         trmp->term = StringSave (term);
1747         trmp->special_count = specialCount;
1748         trmp->total_count = specialCount + remainderCount;
1749         trmp->highRange = StringSave(highRange);
1750         trmp->next = NULL;
1751         last->next = trmp;
1752         fp = FileOpen (eset->term, "ab");
1753         if (fp != NULL) {
1754           fseek (fp, 0, SEEK_END);
1755           trmp->offset = ftell (fp);
1756           SavePostingList (fp, special);
1757           SavePostingList (fp, remainder);
1758           FileClose (fp);
1759         } else {
1760           trmp->offset = 0;
1761         }
1762       }
1763     }
1764   }
1765   return trmp;
1766 }
1767 
1768 /*****************************************************************************
1769 *
1770 *   FindTermNode (term, type, field, highRange)
1771 *       Returns a pointer to a CdTerm node in the entrez set structure,
1772 *       creating the node and loading the posting file, if necessary.  The
1773 *       value of the offset field becomes the offset into the temporary file.
1774 *
1775 *****************************************************************************/
1776 
FindTermNode(CharPtr term,DocType type,DocField field,CharPtr highRange)1777 static CdTermPtr NEAR FindTermNode (CharPtr term, DocType type, DocField field, CharPtr highRange)
1778 
1779 {
1780   FILE      *fp;
1781   Boolean   goOn;
1782   CdTermPtr last;
1783   Int4      offset;
1784   Int4      remain;
1785   Int4      special;
1786   CharPtr   tmp;
1787   Int4      total;
1788   CdTermPtr trmp;
1789 
1790   trmp = NULL;
1791   if (eset != NULL && term != NULL) {
1792     trmp = eset->next;
1793     last = eset;
1794     goOn = TRUE;
1795     while (trmp != NULL && goOn) {
1796       if (trmp->type == type && trmp->field == field &&
1797           EqualTerms (trmp->term, term) &&
1798           EqualTerms (trmp->highRange, highRange)) {
1799         goOn = FALSE;
1800       } else {
1801         last = trmp;
1802         trmp = trmp->next;
1803       }
1804     }
1805     if (goOn) {
1806       tmp = term;
1807       while (*tmp != '\0')
1808         tmp++;
1809       tmp -= 3;
1810       rangeScanning = FALSE;
1811       if (highRange != NULL) {
1812           rangeScanning = TRUE;
1813           StrNCpy(topOfRange, highRange, sizeof(topOfRange));
1814           CdEntrezMergeTerm (type, field, term, NULL, NULL, NULL);
1815       } else {
1816         if ((*tmp == '.') && (*(tmp+1) == '.') && (*(tmp+2) == '.')) {
1817           CdEntrezMergeTerm (type, field, term, NULL, NULL, NULL);
1818         } else if (StringChr (term, '*') != NULL || StringChr (term, '?') != NULL) {
1819           CdEntrezMergeTerm (type, field, term, NULL, NULL, WildCardProc);
1820         }
1821       }
1822       trmp = eset->next;
1823       last = eset;
1824       goOn = TRUE;
1825       while (trmp != NULL && goOn) {
1826         if (trmp->type == type && trmp->field == field &&
1827             EqualTerms (trmp->term, term) &&
1828             EqualTerms (trmp->highRange, highRange)) {
1829           goOn = FALSE;
1830         } else {
1831           last = trmp;
1832           trmp = trmp->next;
1833         }
1834       }
1835     }
1836     if (goOn) {
1837 #ifdef _NEW_CdEntrez_
1838 		if (_nouveau)
1839 			trmp = cd3_CdTrmFind(type,field,term);
1840 #endif
1841 #ifdef _OLD_CdEntrez_
1842 		if (!_nouveau)
1843 			trmp = CdTrmFind(type,field,term);
1844 #endif
1845       if (trmp != NULL) {
1846         if (field != FLD_ORGN) {
1847           last->next = trmp;
1848           fp = FileOpen (eset->term, "rb");
1849           if (fp != NULL) {
1850             fseek (fp, 0, SEEK_END);
1851             offset = ftell (fp);
1852             FileClose (fp);
1853           } else {
1854             offset = 0;
1855           }
1856 #ifdef _NEW_CdEntrez_
1857           if (_nouveau)
1858             cd3_CdTrmUidsFil (type, field, trmp->offset, trmp->total_count, eset->term, TRUE);
1859 #endif
1860 #ifdef _OLD_CdEntrez_
1861           if (!_nouveau)
1862             CdTrmUidsFil (type, field, trmp->offset, trmp->total_count, eset->term, TRUE);
1863 #endif
1864           trmp->offset = offset;
1865         } else {
1866           db = type;
1867           fld = field;
1868           uidPtr = MemNew ((size_t) MAX_CDENTREZ_UID_LIST_SIZE);
1869           if (uidPtr != NULL) {
1870             scanPtr = MemNew (SCAN_MAX * sizeof (ScanData));
1871             if (scanPtr != NULL) {
1872               scanOk = TRUE;
1873               scanCount = 0;
1874               specialPost = NULL;
1875               remainPost = NULL;
1876               ScanOnlyProc (trmp);
1877               if (scanCount > 0) {
1878                 ProcessScanResults ();
1879               }
1880               if (specialPost != NULL && remainPost != NULL) {
1881                 remainPost = DifferencePostingLists (remainPost, specialPost);
1882               }
1883               if (specialPost == NULL) {
1884                 specialPost = BSNew (0);
1885               }
1886               if (remainPost == NULL) {
1887                 remainPost = BSNew (0);
1888               }
1889               special = BSLen (specialPost) / sizeof (DocUid);
1890               remain = BSLen (remainPost) / sizeof (DocUid);
1891               total = special + remain;
1892               scanPtr = MemFree (scanPtr);
1893             }
1894             uidPtr = MemFree (uidPtr);
1895             if (scanOk && total > 0) {
1896               trmp = CdTermFree (trmp);
1897               trmp = CdEntrezCreateTerm (term, db, fld, specialPost, remainPost, highRange);
1898             }
1899             specialPost = BSFree (specialPost);
1900             remainPost = BSFree (remainPost);
1901           }
1902         }
1903       }
1904     }
1905   }
1906   return trmp;
1907 }
1908 
1909 /*****************************************************************************
1910 *
1911 *   CdEntrezPreloadMerge (term, type, field, spcl, totl)
1912 *       Creates a CdTerm node in the entrez set structure if one does not yet
1913 *       exist, and loads the posting file by merging multiple postings files.
1914 *
1915 *****************************************************************************/
1916 
SingleSpaces(CharPtr str)1917 static void NEAR SingleSpaces (CharPtr str)
1918 
1919 {
1920   Char  ch;
1921   Int2  i;
1922   Int2  j;
1923   Int2  k;
1924 
1925   i = 0;
1926   j = 0;
1927   k = 0;
1928   ch = str [i];
1929   while (ch != '\0') {
1930     if (ch == ' ') {
1931       if (k == 0) {
1932         str [j] = ch;
1933         j++;
1934       }
1935       k++;
1936       i++;
1937     } else {
1938       k = 0;
1939       str [j] = ch;
1940       i++;
1941       j++;
1942     }
1943     ch = str [i];
1944   }
1945   str [j] = '\0';
1946 }
1947 
TermTruncate(CharPtr str)1948 static void NEAR TermTruncate (CharPtr str)
1949 
1950 {
1951   if (str != NULL && str [0] != '\0') {
1952     SingleSpaces (str);
1953     if (searchTermLen < (Int2) StringLen (str)) {
1954       str [searchTermLen] = '\0';
1955     }
1956   }
1957 }
1958 
HeapCompare(VoidPtr ptr1,VoidPtr ptr2)1959 static int LIBCALLBACK HeapCompare (VoidPtr ptr1, VoidPtr ptr2)
1960 
1961 {
1962   DocUidPtr  uid1;
1963   DocUidPtr  uid2;
1964 
1965   if (ptr1 != NULL && ptr2 != NULL) {
1966     uid1 = (DocUidPtr) ptr1;
1967     uid2 = (DocUidPtr) ptr2;
1968     if (*uid1 > *uid2) {
1969       return 1;
1970     } else if (*uid1 < *uid2) {
1971       return -1;
1972     } else {
1973       return 0;
1974     }
1975   } else {
1976     return 0;
1977   }
1978 }
1979 
QuickSortSmall(DocUidPtr uids,Int4 l,Int4 r)1980 static void NEAR QuickSortSmall (DocUidPtr uids, Int4 l, Int4 r)
1981 
1982 {
1983   HeapSort (uids + l, (size_t) (r - l + 1), sizeof (DocUid), HeapCompare);
1984 }
1985 
1986 /*
1987 static Boolean NEAR AlreadyInOrder (DocUidPtr uids, Int4 l, Int4 r)
1988 
1989 {
1990   DocUid   last;
1991   Boolean  rsult;
1992 
1993   rsult = TRUE;
1994   if (l < r) {
1995     last = 0;
1996     while (l <= r) {
1997       if (uids [l] < last) {
1998         rsult = FALSE;
1999       }
2000       last = uids [l];
2001       l++;
2002     }
2003   }
2004   return rsult;
2005 }
2006 
2007 static void NEAR QuickSortSmall (DocUidPtr uids, Int4 l, Int4 r)
2008 
2009 {
2010   DocUid  a;
2011   DocUid  b;
2012   DocUid  c;
2013   Int4    i;
2014   Int4    j;
2015   DocUid  temp;
2016   DocUid  x;
2017 
2018   if (AlreadyInOrder (uids, l, r)) {
2019     return;
2020   }
2021   i = l;
2022   j = r;
2023   a = uids [l];
2024   b = uids [(l + r) / 2];
2025   c = uids [r];
2026   if (a > b) {
2027     if (c > a) {
2028       x = a;
2029     } else if (c < b) {
2030       x = b;
2031     } else {
2032       x = c;
2033     }
2034   } else {
2035     if (c < a) {
2036       x = a;
2037     } else if (c > b) {
2038       x = b;
2039     } else {
2040       x = c;
2041     }
2042   }
2043   do {
2044     while (uids [i] < x) {
2045       i++;
2046     }
2047     while (x < uids [j]) {
2048       j--;
2049     }
2050     if (i <= j) {
2051       temp = uids [i];
2052       uids [i] = uids [j];
2053       uids [j] = temp;
2054       i++;
2055       j--;
2056     }
2057   } while (i <= j);
2058   if (i - l < r - j) {
2059     if (l < j) {
2060       QuickSortSmall (uids, l, j);
2061     }
2062     if (i < r) {
2063       QuickSortSmall (uids, i, r);
2064     }
2065   } else {
2066     if (i < r) {
2067       QuickSortSmall (uids, i, r);
2068     }
2069     if (l < j) {
2070       QuickSortSmall (uids, l, j);
2071     }
2072   }
2073 }
2074 */
2075 
CompressSmall(DocUidPtr uids,Int4 count)2076 static Int4 NEAR CompressSmall (DocUidPtr uids, Int4 count)
2077 
2078 {
2079   Int4  i;
2080   Int4  j;
2081 
2082   i = 0;
2083   j = 0;
2084   while (i < count) {
2085     if (uids [i] > 0) {
2086       uids [j] = uids [i];
2087       i++;
2088       j++;
2089     } else {
2090       i++;
2091     }
2092   }
2093   i = j;
2094   while (j < count) {
2095     uids [j] = 0;
2096     j++;
2097   }
2098   return i;
2099 }
2100 
UniqueSmall(DocUidPtr uids,Int4 count)2101 static Int4 NEAR UniqueSmall (DocUidPtr uids, Int4 count)
2102 
2103 {
2104   Int4    i;
2105   Int4    j;
2106   DocUid  last;
2107 
2108   i = 0;
2109   if (count <= 1) {
2110     i = count;
2111   } else {
2112     i = 0;
2113     j = 0;
2114     last = 0;
2115     while (i < count) {
2116       if (uids [i] != last) {
2117         uids [j] = uids [i];
2118         last = uids [i];
2119         i++;
2120         j++;
2121       } else {
2122         i++;
2123       }
2124     }
2125     i = j;
2126     while (j < count) {
2127       uids [j] = 0;
2128       j++;
2129     }
2130   }
2131   return i;
2132 }
2133 
MergeSmallLists(ByteStorePtr bsp,ByteStorePtr small)2134 static ByteStorePtr NEAR MergeSmallLists (ByteStorePtr bsp, ByteStorePtr small)
2135 
2136 {
2137   Int4       count;
2138   Int4       len;
2139   DocUidPtr  uids;
2140 
2141   if (small != NULL) {
2142     len = BSLen (small) / (Int4) sizeof (DocUid);
2143     if (len <= (long) MAX_CDENTREZ_SMALL_LIST && len > 0) {
2144       count = (Int4) len;
2145       uids = MemNew ((size_t) count * sizeof (DocUid));
2146       if (uids != NULL) {
2147         BSMerge (small, (VoidPtr) uids);
2148         small = BSFree (small);
2149         QuickSortSmall (uids, 0, (Int4) (count - 1));
2150         count = CompressSmall (uids, count);
2151         count = UniqueSmall (uids, count);
2152         if (count > 0) {
2153           small = BSNew (0L);
2154           BSWrite (small, uids, count * sizeof (DocUid));
2155         }
2156         uids = MemFree (uids);
2157         if (small != NULL) {
2158           bsp = MergePostingLists (bsp, small);
2159         }
2160       } else {
2161         ErrPostEx (SEV_ERROR, ERR_CD_BOOL, 0, "MergeSmallLists memory failure");
2162       }
2163     } else if (len > (long) MAX_CDENTREZ_SMALL_LIST) {
2164       ErrPostEx (SEV_ERROR, ERR_CD_BOOL, 0, "MergeSmallLists > %d", MAX_CDENTREZ_SMALL_LIST);
2165     }
2166   }
2167   return bsp;
2168 }
2169 
MergeUnorderedLists(Int4 i,Int4 count)2170 static Boolean NEAR MergeUnorderedLists (Int4 i, Int4 count)
2171 
2172 {
2173   BytePtr       bptr;
2174   Int4          finish;
2175   Boolean       goOn;
2176   Int4          j;
2177   Int4          len;
2178   Int4          max;
2179   DocUidPtr     mptr;
2180   Int4          number;
2181   Int4          offset;
2182   ByteStorePtr  remainLarge;
2183   ByteStorePtr  remainSmall;
2184   Int4          smallCount;
2185   Int4          start;
2186   Int4          total;
2187 
2188   goOn = TRUE;
2189   j = i + count - 1;
2190   max = scanPtr [j].offset + scanPtr [j].totalCount *
2191         (Int4) sizeof (DocUid) - scanPtr [i].offset;
2192   if (max <= MAX_CDENTREZ_UID_LIST_SIZE) {
2193     offset = scanPtr [i].offset;
2194     len = (Int4) (max / (Int4) sizeof (DocUid));
2195 #ifdef _NEW_CdEntrez_
2196     if (_nouveau)
2197       cd3_CdTrmUidsMem (db, fld, offset, (Int4) len, uidPtr);
2198 #endif
2199 #ifdef _OLD_CdEntrez_
2200     if (!_nouveau)
2201       CdTrmUidsMem (db, fld, offset, (Int4) len, uidPtr);
2202 #endif
2203     remainSmall = NULL;
2204     smallCount = 0;
2205     for (j = i; j < i + count; j++) {
2206       scanPtr [j].offset -= offset;
2207       total = scanPtr [j].totalCount;
2208       bptr = ((BytePtr) uidPtr) + scanPtr [j].offset;
2209       mptr = (DocUidPtr) bptr;
2210       if (smallCount + total > MAX_CDENTREZ_SMALL_LIST) {
2211         if (remainSmall != NULL) {
2212           remainPost = MergeSmallLists (remainPost, remainSmall);
2213           remainSmall = NULL;
2214         }
2215         smallCount = 0;
2216       }
2217       if (total > 100) {
2218         start = 0;
2219         number = 0;
2220         while (start < total) {
2221           finish = start + 1;
2222           while (finish < total && mptr [finish - 1] < mptr [finish]) {
2223             finish++;
2224           }
2225           number = finish - start;
2226           if (number > 100) {
2227             remainLarge = BSNew (number * sizeof (DocUid));
2228             BSWrite (remainLarge, (mptr + start), number * sizeof (DocUid));
2229             remainPost = MergePostingLists (remainPost, remainLarge);
2230           } else {
2231             smallCount += number;
2232             if (number > 0) {
2233               if (remainSmall == NULL) {
2234                 remainSmall = BSNew (0L);
2235               }
2236               BSWrite (remainSmall, (mptr + start), number * sizeof (DocUid));
2237             }
2238             if (smallCount > MAX_CDENTREZ_SMALL_LIST) {
2239               if (remainSmall != NULL) {
2240                 remainPost = MergeSmallLists (remainPost, remainSmall);
2241                 remainSmall = NULL;
2242               }
2243               smallCount = 0;
2244             }
2245           }
2246           start = finish;
2247         }
2248       } else {
2249         smallCount += total;
2250         if (total > 0) {
2251           if (remainSmall == NULL) {
2252             remainSmall = BSNew (0L);
2253           }
2254           BSWrite (remainSmall, mptr, total * sizeof (DocUid));
2255         }
2256       }
2257     }
2258     if (remainSmall != NULL) {
2259       remainPost = MergeSmallLists (remainPost, remainSmall);
2260       remainSmall = NULL;
2261     }
2262   } else {
2263     ErrPostEx (SEV_ERROR, ERR_CD_BOOL, 0, "Cannot merge > 32 K element");
2264     scanOk = FALSE;
2265     goOn = FALSE;
2266   }
2267   return goOn;
2268 }
2269 
MergeSeveralOrderedLists(Int4 i,Int4 count)2270 static Boolean NEAR MergeSeveralOrderedLists (Int4 i, Int4 count)
2271 
2272 {
2273   BytePtr       bptr;
2274   Boolean       goOn;
2275   Int4          j;
2276   Int4          len;
2277   Int4          max;
2278   DocUidPtr     mptr;
2279   Int4          offset;
2280   Int4          remainder;
2281   ByteStorePtr  remainLarge;
2282   ByteStorePtr  remainSmall;
2283   Int4          smallCount;
2284   Int4          special;
2285   ByteStorePtr  specialLarge;
2286   ByteStorePtr  specialSmall;
2287   Int4          total;
2288 
2289   goOn = TRUE;
2290   j = i + count - 1;
2291   max = scanPtr [j].offset + scanPtr [j].totalCount *
2292         (Int4) sizeof (DocUid) - scanPtr [i].offset;
2293   if (max <= MAX_CDENTREZ_UID_LIST_SIZE) {
2294     offset = scanPtr [i].offset;
2295     len = (Int4) (max / (Int4) sizeof (DocUid));
2296 #ifdef _NEW_CdEntrez_
2297     if (_nouveau)
2298       cd3_CdTrmUidsMem (db, fld, offset, (Int4) len, uidPtr);
2299 #endif
2300 #ifdef _OLD_CdEntrez_
2301     if (!_nouveau)
2302       CdTrmUidsMem (db, fld, offset, (Int4) len, uidPtr);
2303 #endif
2304     specialSmall = NULL;
2305     remainSmall = NULL;
2306     smallCount = 0;
2307     for (j = i; j < i + count; j++) {
2308       scanPtr [j].offset -= offset;
2309       special = scanPtr [j].specialCount;
2310       total = scanPtr [j].totalCount;
2311       remainder = total - special;
2312       bptr = ((BytePtr) uidPtr) + scanPtr [j].offset;
2313       mptr = (DocUidPtr) bptr;
2314       if (smallCount + total > MAX_CDENTREZ_SMALL_LIST) {
2315         if (specialSmall != NULL) {
2316           specialPost = MergeSmallLists (specialPost, specialSmall);
2317           specialSmall = NULL;
2318         }
2319         if (remainSmall != NULL) {
2320           remainPost = MergeSmallLists (remainPost, remainSmall);
2321           remainSmall = NULL;
2322         }
2323         smallCount = 0;
2324       }
2325       if (total > 100) {
2326         specialLarge = BSNew (special * sizeof (DocUid));
2327         BSWrite (specialLarge, mptr, special * sizeof (DocUid));
2328         specialPost = MergePostingLists (specialPost, specialLarge);
2329         remainLarge = BSNew (remainder * sizeof (DocUid));
2330         BSWrite (remainLarge, (mptr + special),
2331                  remainder * sizeof (DocUid));
2332         remainPost = MergePostingLists (remainPost, remainLarge);
2333       } else {
2334         smallCount += total;
2335         if (special > 0) {
2336           if (specialSmall == NULL) {
2337             specialSmall = BSNew (0L);
2338           }
2339           BSWrite (specialSmall, mptr, special * sizeof (DocUid));
2340         }
2341         if (remainder > 0) {
2342           if (remainSmall == NULL) {
2343             remainSmall = BSNew (0L);
2344           }
2345           BSWrite (remainSmall, (mptr + special), remainder * sizeof (DocUid));
2346         }
2347       }
2348     }
2349     if (specialSmall != NULL) {
2350       specialPost = MergeSmallLists (specialPost, specialSmall);
2351       specialSmall = NULL;
2352     }
2353     if (remainSmall != NULL) {
2354       remainPost = MergeSmallLists (remainPost, remainSmall);
2355       remainSmall = NULL;
2356     }
2357   } else {
2358     ErrPostEx (SEV_ERROR, ERR_CD_BOOL, 0, "Cannot merge > %ld element", (long) MAX_CDENTREZ_UID_LIST_SIZE);
2359     scanOk = FALSE;
2360     goOn = FALSE;
2361   }
2362   return goOn;
2363 }
2364 
MergeSeveralLists(Int4 i,Int4 count)2365 static Boolean NEAR MergeSeveralLists (Int4 i, Int4 count)
2366 
2367 {
2368   if (fld != FLD_ORGN) {
2369     return MergeSeveralOrderedLists (i, count);
2370   } else {
2371     return MergeUnorderedLists (i, count);
2372   }
2373 }
2374 
ProcessScanResults(void)2375 static Boolean NEAR ProcessScanResults (void)
2376 
2377 {
2378   Boolean  goOn;
2379   Int4     i;
2380   Int4     j;
2381   Int4     max;
2382 
2383   ProgMon ("ProcessScanResults");
2384   goOn = TRUE;
2385   i = 0;
2386   j = 0;
2387   max = 0;
2388   while (j < scanCount) {
2389     if (scanPtr [j].offset < scanPtr [i].offset) {
2390       goOn = MergeSeveralLists (i, (Int4) (j - i));
2391       max = 0;
2392       i = j;
2393     } else {
2394       max = scanPtr [j].offset + scanPtr [j].totalCount *
2395             (Int4) sizeof (DocUid) - scanPtr [i].offset;
2396       if (max >= MAX_CDENTREZ_UID_LIST_SIZE) {
2397         if (j == i) {
2398           goOn = MergeSeveralLists (i, 1);
2399           j++;
2400           i = j;
2401           max = 0;
2402         } else {
2403           goOn = MergeSeveralLists (i, (Int4) (j - i));
2404           i = j;
2405           max = 0;
2406         }
2407       } else {
2408         j++;
2409       }
2410     }
2411   }
2412   if (max > 0) {
2413     goOn = MergeSeveralLists (i, (Int4) (j - i));
2414   }
2415   scanCount = 0;
2416   return goOn;
2417 }
2418 
ScanOnlyProc(CdTermPtr trmp)2419 static Boolean  ScanOnlyProc (CdTermPtr trmp)
2420 
2421 {
2422   Int4     count;
2423   Boolean  goOn;
2424 
2425   goOn = TRUE;
2426   if (scanCount >= SCAN_MAX) {
2427     goOn = ProcessScanResults ();
2428   }
2429   if (scanCount < SCAN_MAX) {
2430     if (trmp->total_count >= CDENTREZ_TERM_MAX) {
2431       while (trmp->special_count > 0) {
2432         if (scanCount >= SCAN_MAX) {
2433           goOn = ProcessScanResults ();
2434         }
2435         count = MIN (trmp->special_count, (long) CDENTREZ_TERM_MAX);
2436         scanPtr [scanCount].specialCount = count;
2437         scanPtr [scanCount].totalCount = count;
2438         scanPtr [scanCount].offset = trmp->offset;
2439         scanPtr [scanCount].specialPtr = NULL;
2440         scanPtr [scanCount].remainderPtr = NULL;
2441         scanCount++;
2442         trmp->special_count -= count;
2443         trmp->total_count -= count;
2444         trmp->offset += count * sizeof (DocUid);
2445       }
2446       while (trmp->total_count > 0) {
2447         if (scanCount >= SCAN_MAX) {
2448           goOn = ProcessScanResults ();
2449         }
2450         count = MIN (trmp->total_count, (long) CDENTREZ_TERM_MAX);
2451         scanPtr [scanCount].specialCount = 0;
2452         scanPtr [scanCount].totalCount = count;
2453         scanPtr [scanCount].offset = trmp->offset;
2454         scanPtr [scanCount].specialPtr = NULL;
2455         scanPtr [scanCount].remainderPtr = NULL;
2456         scanCount++;
2457         trmp->total_count -= count;
2458         trmp->offset += count * sizeof (DocUid);
2459       }
2460     } else {
2461       if (scanCount >= SCAN_MAX) {
2462         goOn = ProcessScanResults ();
2463       }
2464       scanPtr [scanCount].specialCount = trmp->special_count;
2465       scanPtr [scanCount].totalCount = trmp->total_count;
2466       scanPtr [scanCount].offset = trmp->offset;
2467       scanPtr [scanCount].specialPtr = NULL;
2468       scanPtr [scanCount].remainderPtr = NULL;
2469       scanCount++;
2470     }
2471   }
2472   return goOn;
2473 }
2474 
WildCardProc(CdTermPtr trmp)2475 static Boolean  WildCardProc (CdTermPtr trmp)
2476 
2477 {
2478   Int4     diff;
2479   Boolean  goOn;
2480   CharPtr  src;
2481   CharPtr  tgt;
2482 
2483   goOn = FALSE;
2484   src = selection;
2485   tgt = trmp->term;
2486   diff = 0;
2487   while (*src != '\0' && *tgt != '\0' && diff == 0) {
2488     if (*src != '?') {
2489       diff = TO_UPPER (*src) - TO_UPPER (*tgt);
2490     }
2491     if (diff == 0) {
2492       src++;
2493       tgt++;
2494     }
2495   }
2496   if (diff != 0) {
2497     if (*src == '*') {
2498       goOn = TRUE;
2499     }
2500   } else if (*src == '*') {
2501     goOn = TRUE;
2502   } else if (*src == '\0' && *tgt == '\0') {
2503     goOn = TRUE;
2504   } else {
2505     goOn = FALSE;
2506   }
2507   return goOn;
2508 }
2509 
ScanAndFreeProc(CdTermPtr trmp)2510 static Boolean  ScanAndFreeProc (CdTermPtr trmp)
2511 
2512 {
2513   Int4     compare;
2514   Boolean  goOn;
2515   Char     str [256];
2516 
2517   goOn = TRUE;
2518   if (trmp != NULL && trmp->term != NULL) {
2519     if (rangeScanning) {
2520       compare = MeshStringICmp (trmp->term, selection);
2521       if (compare >= 0) {
2522         if (topOfRange[0] == '\0')
2523           compare = -1;
2524         else
2525           compare = MeshStringICmp (trmp->term, topOfRange);
2526         if (compare > 0)
2527           goOn = FALSE;
2528         else
2529           goOn = ScanOnlyProc (trmp);
2530       }
2531     } else {
2532       StringNCpy (str, trmp->term, sizeof (str));
2533       TermTruncate (str);
2534       if (userScanProc != NULL) {
2535         compare = MeshStringICmp (str, wildcard);
2536       } else {
2537         compare = MeshStringICmp (str, selection);
2538       }
2539       if (compare > 0) {
2540         str [searchTermLen] = '\0';
2541         if (userScanProc != NULL) {
2542           compare = MeshStringICmp (str, wildcard);
2543         } else {
2544           compare = MeshStringICmp (str, selection);
2545         }
2546         if (compare > 0) {
2547           goOn = FALSE;
2548         }
2549       } else if (compare == 0) {
2550         if (userScanProc != NULL) {
2551           if (userScanProc (trmp)) {
2552             goOn = ScanOnlyProc (trmp);
2553           }
2554         } else {
2555           goOn = ScanOnlyProc (trmp);
2556         }
2557       }
2558     }
2559   }
2560   trmp = CdTermFree (trmp);
2561   return goOn;
2562 }
2563 
CdEntrezMergeTerm(DocType type,DocField field,CharPtr term,Int4Ptr spcl,Int4Ptr totl,CdTermProc userProc)2564 static Boolean NEAR CdEntrezMergeTerm (DocType type, DocField field, CharPtr term,
2565                                         Int4Ptr spcl, Int4Ptr totl, CdTermProc userProc)
2566 
2567 {
2568   Char  ch;
2569   Int4  remain;
2570   Int4  special;
2571   Char  str [256];
2572   Int4  total;
2573   Int4  termpage;
2574   CharPtr tmp;
2575   Int4  limit = 0;
2576   CharPtr prop;
2577   Boolean retval = FALSE;
2578 
2579   if (spcl != NULL) {
2580     *spcl = 0;
2581   }
2582   if (totl != NULL) {
2583     *totl = 0;
2584   }
2585   db = type;
2586   fld = field;
2587   userScanProc = userProc;
2588   StringNCpy (str, term, sizeof (str));
2589   tmp = str;
2590   while (*tmp != '\0') {
2591     tmp++;
2592   }
2593   tmp -= 3;
2594   if ((*tmp == '.') && (*(tmp+1) == '.') && (*(tmp+2) == '.')) {
2595     *tmp = '\0';
2596   }
2597   SingleSpaces (str);
2598   if (userProc != NULL) {
2599     searchTermLen = 0;
2600     ch = str [searchTermLen];
2601     while (ch != '\0' && ch != '*' && ch != '?') {
2602       searchTermLen++;
2603       ch = str [searchTermLen];
2604     }
2605   } else {
2606     searchTermLen = (Int4) StringLen (str);
2607   }
2608   if ((prop = (CharPtr) GetAppProperty("CdEntrezTruncLimit")) != NULL)
2609   {
2610     limit = atoi(prop);
2611   }
2612   if (searchTermLen > limit || str [0] == '?' || str [0] == '*' ||
2613       rangeScanning) {
2614     scanOk = TRUE;
2615     uidPtr = MemNew ((size_t) MAX_CDENTREZ_UID_LIST_SIZE);
2616     if (uidPtr != NULL) {
2617       scanPtr = MemNew (SCAN_MAX * sizeof (ScanData));
2618       if (scanPtr != NULL) {
2619         scanCount = 0;
2620         specialPost = NULL;
2621         remainPost = NULL;
2622         StringNCpy (selection, str, sizeof (selection));
2623         StringNCpy (wildcard, str, sizeof (wildcard));
2624         wildcard [searchTermLen] = '\0';
2625 #ifdef _NEW_CdEntrez_
2626 		if (_nouveau)
2627           termpage = cd3_CdTrmLookup (db, fld, wildcard);
2628 #endif
2629 #ifdef _OLD_CdEntrez_
2630 		if (!_nouveau)
2631           termpage = CdTrmLookup (db, fld, wildcard);
2632 #endif
2633         if (fld == FLD_MESH) {
2634           ch = str [0];
2635           str [0] = TO_UPPER (ch);
2636         }
2637         if (termpage >= 0) {
2638 #ifdef _NEW_CdEntrez_
2639 			if (_nouveau)
2640 				cd3_CdTermScan (db, fld, termpage, (Int4)0, ScanAndFreeProc);
2641 #endif
2642 #ifdef _OLD_CdEntrez_
2643 			if (!_nouveau)
2644 				CdTermScan (db, fld, termpage, (Int4)0, ScanAndFreeProc);
2645 #endif
2646         }
2647         if (scanCount > 0) {
2648           ProcessScanResults ();
2649         }
2650         if (specialPost != NULL && remainPost != NULL) {
2651           remainPost = DifferencePostingLists (remainPost, specialPost);
2652         }
2653         if (specialPost == NULL) {
2654           specialPost = BSNew (0);
2655         }
2656         if (remainPost == NULL) {
2657           remainPost = BSNew (0);
2658         }
2659         special = BSLen (specialPost) / sizeof (DocUid);
2660         remain = BSLen (remainPost) / sizeof (DocUid);
2661         total = special + remain;
2662         scanPtr = MemFree (scanPtr);
2663       }
2664       uidPtr = MemFree (uidPtr);
2665       if (scanOk && total > 0) {
2666 		retval = TRUE;
2667 		if (userProc == NULL && !rangeScanning) {
2668 			StringCat (str, "...");
2669 		}
2670         CdEntrezCreateTerm (str, db, fld, specialPost, remainPost, rangeScanning ? topOfRange : NULL);
2671         if (spcl != NULL) {
2672           *spcl = special;
2673         }
2674         if (totl != NULL) {
2675           *totl = total;
2676         }
2677       }
2678       specialPost = BSFree (specialPost);
2679       remainPost = BSFree (remainPost);
2680     }
2681   }
2682   return retval;
2683 }
2684 
2685 /*****************************************************************************
2686 *
2687 *   CdEntMedlineEntryListGet (result, numuid, uids, mark_missing)
2688 *   	returns a count of entries read
2689 *   	if (mark_missing) ids which could not be located are made negative
2690 *
2691 *****************************************************************************/
CdEntMedlineEntryListGet(MedlineEntryPtr PNTR result,Int2 numuid,Int4Ptr uids,Boolean mark_missing)2692 NLM_EXTERN Int2  CdEntMedlineEntryListGet (MedlineEntryPtr PNTR result, Int2 numuid, Int4Ptr uids, Boolean mark_missing)
2693 
2694 {
2695 	MedlineEntryPtr mep;
2696 	Int2 count = 0, ctr;
2697 	AsnIoPtr aip;
2698 	DocType db = TYP_ML;
2699 
2700 	if (! MedlineAsnLoad())
2701 		return 0;
2702 
2703 	for (ctr = 0; ctr < numuid; ctr++)
2704 	{
2705 		mep = NULL;
2706 
2707 #ifdef _NEW_CdEntrez_
2708 		if (_nouveau)
2709 			aip = cd3_CdDocAsnOpen(db, uids[ctr]);
2710 #endif
2711 #ifdef _OLD_CdEntrez_
2712 		if (!_nouveau)
2713 			aip = CdDocAsnOpen(db, uids[ctr]);
2714 #endif
2715 		if (aip != NULL)
2716 		{
2717 		 	mep = MedlineEntryAsnRead(aip, NULL);
2718 #ifdef _NEW_CdEntrez_
2719 		 	if (_nouveau)
2720 	 			cd3_CdDocAsnClose(aip);
2721 #endif
2722 #ifdef _OLD_CdEntrez_
2723 		 	if (!_nouveau)
2724 	 			CdDocAsnClose(aip);
2725 #endif
2726 		}
2727 		if (mep == NULL)    /* didn't get it */
2728 		{
2729 			if (mark_missing)
2730 				uids[ctr] *= -1;
2731 		}
2732 		else
2733 		{
2734 			count++;
2735 			result[ctr] = mep;
2736 		}
2737 	}
2738 
2739 	return count;
2740 }
2741 
2742 /*****************************************************************************
2743 *
2744 *   CdEntSeqEntryListGet (result, numuid, uids, retcode, mark_missing)
2745 *   	returns a count of entries read
2746 *   	if (mark_missing) ids which could not be located are made negative
2747 *       retcode is defined in objsset.h
2748 *
2749 *****************************************************************************/
2750 static AsnIo* CdSeqAsnOpen (DocType *type, DocUid uid, Boolean isGenome);
2751 
CdEntSeqEntryListGet(SeqEntryPtr PNTR result,Int2 numuid,Int4Ptr uids,Int2 retcode,Boolean mark_missing)2752 NLM_EXTERN Int2  CdEntSeqEntryListGet (SeqEntryPtr PNTR result, Int2 numuid, Int4Ptr uids, Int2 retcode, Boolean mark_missing)
2753 {
2754 	SeqEntryPtr sep;
2755 	Int2 count = 0, ctr;
2756 	AsnIoPtr aip;
2757 	DocType db = TYP_SEQ;
2758 	ValNode an;
2759 
2760 	if (! SeqSetAsnLoad())
2761 		return 0;
2762 
2763     an.data.intvalue = 0;
2764     an.choice = SEQID_GI;
2765 
2766 	for (ctr = 0; ctr < numuid; ctr++)
2767 	{
2768 		sep = NULL;
2769 		aip = CdSeqAsnOpen(&db, uids[ctr], retcode == -1);
2770 		if (aip != NULL)
2771 		{
2772 			an.data.intvalue = uids[ctr];
2773 			if (retcode == -1)
2774 		 		sep = SeqEntryAsnRead(aip, NULL);
2775 			else
2776 		 		sep = SeqEntryAsnGet(aip, NULL, &an, retcode);
2777 #ifdef _NEW_CdEntrez_
2778 			if (_nouveau)
2779 	 			cd3_CdDocAsnClose(aip);
2780 #endif
2781 #ifdef _OLD_CdEntrez_
2782 			if (!_nouveau)
2783 	 			CdDocAsnClose(aip);
2784 #endif
2785 		}
2786 		if (sep == NULL)    /* didn't get it */
2787 		{
2788 			if (mark_missing)
2789 				uids[ctr] *= -1;
2790 		}
2791 		else
2792 		{
2793 			count++;
2794 			result[ctr] = sep;
2795 		}
2796 	}
2797 
2798 	return count;
2799 }
2800 
2801 
CdSeqAsnOpen(DocType * type,DocUid uid,Boolean isGenome)2802 static AsnIo* CdSeqAsnOpen (DocType *type, DocUid uid, Boolean isGenome)
2803 {
2804 	AsnIo *aio = NULL;
2805 
2806 #ifdef _NEW_CdEntrez_
2807 	if (_nouveau)
2808 	{
2809 		if (isGenome) {
2810 			if ((aio = cd3_CdDocAsnOpen(TYP_CH,uid)) != NULL)
2811 				*type = TYP_CH;
2812 		} else {
2813 			if (*type != TYP_SEQ)
2814 			{
2815 				aio = cd3_CdDocAsnOpen(*type,uid);
2816 			}
2817 			else
2818 			{
2819 				if ((aio = cd3_CdDocAsnOpen(TYP_AA,uid)) != NULL)
2820 					*type = TYP_AA;
2821 				else if ((aio = cd3_CdDocAsnOpen(TYP_NT,uid)) != NULL)
2822 						*type = TYP_NT;
2823 			}
2824 		}
2825 	}
2826 #endif
2827 
2828 #ifdef _OLD_CdEntrez_
2829 	if (!_nouveau)
2830 		aio = CdDocAsnOpen(*type,uid);
2831 #endif
2832 
2833 	return aio;
2834 }
2835 
2836 /*****************************************************************************
2837 *
2838 *   CdEntMlSumListGet (result, numuid, uids)
2839 *   	returns a count of entries read
2840 *   	head of linked list is in result
2841 *
2842 *****************************************************************************/
2843 
CdEntMlSumListGet(DocSumPtr PNTR result,Int2 numuid,Int4Ptr uids)2844 NLM_EXTERN Int2  CdEntMlSumListGet (DocSumPtr PNTR result, Int2 numuid, Int4Ptr uids)          /* Gi numbers */
2845 {
2846 	Int2 count = 0;
2847 
2848 #ifdef _NEW_CdEntrez_
2849 	if (_nouveau)
2850 		count = CdDocSumListGet(result,numuid,TYP_ML,uids);
2851 #endif
2852 
2853 #ifdef _OLD_CdEntrez_
2854 	if (!_nouveau)
2855 	{
2856 		Int2 ctr;
2857 		DocType db = TYP_ML;
2858 		AsnIoPtr aip;
2859 
2860 		for (ctr = 0; ctr < numuid; ctr++)
2861 		{
2862 			result[ctr] = NULL;
2863 			aip = CdDocAsnOpen (db, uids[ctr]);
2864 		    if (aip != NULL)
2865 			{
2866 				result[ctr] = MedSumAsnRead(aip, uids[ctr]);
2867 				CdDocAsnClose(aip);
2868 				if (result[ctr] != NULL)
2869 					count++;
2870 			}
2871 		}
2872 	}
2873 #endif
2874 
2875 	return count;
2876 }
2877 
2878 
2879 /*****************************************************************************
2880 *
2881 *   CdEntMlSumGet(uid)
2882 *   	get one MlSummary
2883 *
2884 *****************************************************************************/
2885 #ifdef _OLD_CdEntrez_
2886 
CdEntMlSumGet(Int4 uid)2887 static DocSumPtr NEAR CdEntMlSumGet (Int4 uid)
2888 {
2889 	DocSumPtr dsp = NULL;
2890 
2891 	CdEntMlSumListGet(&dsp, 1, &uid);
2892 	return dsp;
2893 }
2894 
2895 #endif
2896 
2897 /*****************************************************************************
2898 *
2899 *   void StripAuthor(author)
2900 *
2901 *****************************************************************************/
StripAuthor(CharPtr author)2902 static void NEAR StripAuthor (CharPtr author)
2903 
2904 {
2905   CharPtr  p1, p2;
2906 
2907   p1 = author;
2908   while ((p1 = StringChr (p1, ' ')) != NULL) {
2909     for (p2 = p1 + 1; *p2 != '\0'; p2++) {
2910       if (*p2 == ' ') break;
2911       if (IS_ALPHA (*p2) && IS_LOWER (*p2)) break;
2912     }
2913     if (*p2 == '\0' || *p2 == ' ') {
2914       *p1 = '\0';
2915       return;
2916     }
2917     p1++;
2918   }
2919 }
2920 
2921 /*****************************************************************************
2922 *
2923 *   MedSumAsnRead(aip, uid)
2924 *
2925 *****************************************************************************/
FindAsnType(AsnTypePtr PNTR atp,AsnModulePtr amp,CharPtr str)2926 static void NEAR FindAsnType (AsnTypePtr PNTR atp, AsnModulePtr amp, CharPtr str)
2927 
2928 {
2929   if (atp != NULL && (*atp) == NULL) {
2930     *atp = AsnTypeFind (amp, str);
2931   }
2932 }
2933 
2934 
MedSumAsnRead(AsnIoPtr aip,DocUid uid)2935 static DocSumPtr NEAR MedSumAsnRead (AsnIoPtr aip, DocUid uid)
2936 
2937 {
2938   DataVal       av;
2939   AsnModulePtr  amp;
2940   AsnTypePtr    atp;
2941   Boolean       citFound;
2942   DocSumPtr     dsp;
2943   Boolean       goOn;
2944   Int2          i;
2945   CharPtr       ptr;
2946   Char          caption [50];
2947   Char          author [40];
2948   Char          year [10];
2949 
2950   if ((aip == NULL) || (! AllObjLoad ()))
2951     return NULL;
2952 
2953 	  amp = AsnAllModPtr ();
2954 
2955 	  FindAsnType (&MEDLINE_ENTRY, amp, "Medline-entry");
2956 	  FindAsnType (&MEDLINE_ENTRY_cit, amp, "Medline-entry.cit");
2957 	  FindAsnType (&MEDLINE_ENTRY_abstract, amp, "Medline-entry.abstract");
2958 	  FindAsnType (&TITLE_E_trans, amp, "Title.E.trans");
2959 	  FindAsnType (&AUTH_LIST_names_ml_E, amp, "Auth-list.names.ml.E");
2960 	  FindAsnType (&AUTH_LIST_names_str_E, amp, "Auth-list.names.str.E");
2961 	  FindAsnType (&DATE_STD_year, amp, "Date-std.year");
2962 	  FindAsnType (&DATE_str, amp, "Date.str");
2963 	  FindAsnType (&TITLE_E_name, amp, "Title.E.name");
2964 	  FindAsnType (&MEDLINE_ENTRY_mesh, amp, "Medline-entry.mesh");
2965 	  FindAsnType (&MEDLINE_ENTRY_substance, amp, "Medline-entry.substance");
2966 	  FindAsnType (&MEDLINE_ENTRY_xref, amp, "Medline-entry.xref");
2967 	  FindAsnType (&MEDLINE_ENTRY_idnum, amp, "Medline-entry.idnum");
2968 	  FindAsnType (&MEDLINE_ENTRY_gene, amp, "Medline-entry.gene");
2969 
2970   atp = AsnReadId (aip, amp, MEDLINE_ENTRY);
2971   AsnReadVal (aip, atp, &av);
2972 
2973   dsp = MemNew (sizeof (DocSum));
2974   if (dsp != NULL) {
2975     dsp->no_abstract = TRUE;
2976     dsp->translated_title = FALSE;
2977     dsp->no_authors = TRUE;
2978     author [0] = '\0';
2979     year [0] = '\0';
2980     citFound = FALSE;
2981     goOn = TRUE;
2982     while (goOn) {
2983       atp = AsnReadId (aip, amp, atp);
2984       if (atp == MEDLINE_ENTRY) {
2985         AsnReadVal (aip, atp, NULL);
2986         goOn = FALSE;
2987       } else if (atp == MEDLINE_ENTRY_cit) {
2988         AsnReadVal (aip, atp, NULL);
2989         citFound = TRUE;
2990       } else if (atp == MEDLINE_ENTRY_abstract) {
2991         AsnReadVal (aip, atp, NULL);
2992         dsp->no_abstract = FALSE;
2993         goOn = FALSE;
2994       } else if (atp == TITLE_E_trans) {
2995         AsnReadVal (aip, atp, &av);
2996         dsp->translated_title = TRUE;
2997         if (dsp->title != NULL) {
2998           dsp->title = MemFree (dsp->title);
2999         }
3000         dsp->title = MemNew ((size_t) StringLen ((CharPtr) av.ptrvalue) + 3);
3001         ptr = dsp->title;
3002         *ptr = '[';
3003         ptr++;
3004         ptr = StringMove (ptr, (CharPtr) av.ptrvalue);
3005         *ptr = ']';
3006         ptr++;
3007         *ptr = '\0';
3008         AsnKillValue (atp, &av);
3009       } else if (atp == AUTH_LIST_names_ml_E) {
3010         AsnReadVal (aip, atp, &av);
3011         dsp->no_authors = FALSE;
3012         if (author [0] == '\0') {
3013           StringNCpy (author, (CharPtr) av.ptrvalue, sizeof (author));
3014         }
3015         AsnKillValue (atp, &av);
3016       } else if (atp == AUTH_LIST_names_str_E) {
3017         AsnReadVal (aip, atp, &av);
3018         dsp->no_authors = FALSE;
3019         if (author [0] == '\0') {
3020           StringNCpy (author, (CharPtr) av.ptrvalue, sizeof (author));
3021         }
3022         AsnKillValue (atp, &av);
3023       } else if (atp == DATE_STD_year) {
3024         AsnReadVal (aip, atp, &av);
3025         if (citFound) {
3026           sprintf (year, "%ld", (long) av.intvalue);
3027         }
3028       } else if (atp == DATE_str) {
3029         AsnReadVal (aip, atp, &av);
3030         if (citFound) {
3031           i = 0;
3032           ptr = av.ptrvalue;
3033           while (ptr [i] != '\0' && ptr [i] != ' ' && i < sizeof (year) - 1) {
3034             year [i] = ptr [i];
3035             i++;
3036           }
3037           year [i] = '\0';
3038         }
3039         AsnKillValue (atp, &av);
3040       } else if (atp == TITLE_E_name) {
3041         AsnReadVal (aip, atp, &av);
3042         if (dsp->title == NULL) {
3043           dsp->title = StringSave ((CharPtr) av.ptrvalue);
3044         }
3045         AsnKillValue (atp, &av);
3046       } else if (atp == MEDLINE_ENTRY_mesh || atp == MEDLINE_ENTRY_substance ||
3047                  atp == MEDLINE_ENTRY_xref || atp == MEDLINE_ENTRY_idnum ||
3048                  atp == MEDLINE_ENTRY_gene) {
3049         AsnReadVal (aip, atp, NULL);
3050         goOn = FALSE;
3051       } else {
3052         AsnReadVal (aip, atp, NULL);
3053       }
3054     }
3055     if (dsp->no_authors) {
3056       sprintf (caption, "[%ld], %s", (long) uid, year);
3057     } else if (author [0] != '\0') {
3058       StripAuthor (author);
3059       author [12] = '.';
3060       author [12] = '\0';
3061       sprintf (caption, "%s, %s", author, year);
3062     } else {
3063       sprintf (caption, "[%ld], %s", (long) uid, year);
3064     }
3065     dsp->caption = StringSave (caption);
3066     dsp->uid = uid;
3067   }
3068   AsnIoReset (aip);
3069   return dsp;
3070 }
3071 
3072 
3073 /*****************************************************************************
3074 *
3075 *   CdSeqIdForGI(Int4 gi)
3076 *
3077 *****************************************************************************/
CdSeqIdForGI(Int4 gi)3078 NLM_EXTERN SeqIdPtr CdSeqIdForGI (Int4 gi)
3079 {
3080 #ifdef _NEW_CdEntrez_
3081 	DocSum* dsp;
3082 	SeqIdPtr sip = NULL, tmp, next;
3083 
3084 	dsp = cd3_CdGetDocSum (TYP_NT, gi);   /* nucleic acid? */
3085 	if (dsp == NULL)
3086 		dsp = cd3_CdGetDocSum (TYP_AA, gi);  /* protein? */
3087 	if (dsp != NULL)
3088 	{
3089 		tmp = SeqIdParse(dsp->extra);
3090 		DocSumFree(dsp);
3091 
3092 		while (tmp != NULL)
3093 		{
3094 			next = tmp->next;
3095 			tmp->next = NULL;
3096 			if (tmp->choice == SEQID_GI)
3097 				SeqIdFree(tmp);
3098 			else
3099 				sip = tmp;
3100 			tmp = next;
3101 		}
3102 	}
3103 	return sip;
3104 
3105 #else
3106 
3107 	SeqIdPtr sip = NULL, ids, curr, best;
3108 	AsnIoPtr aip;
3109 	AsnModulePtr amp;
3110 	AsnTypePtr atp;
3111 	Boolean gotit;
3112 	DocType db = TYP_SEQ;
3113 	GiimPtr gip;
3114 
3115 	static Uint1 pick_order[20] = {
3116  	83, /* 0 = not set */
3117 	65, /* 1 = local Object-id */
3118 	65,  /* 2 = gibbsq */
3119 	65,  /* 3 = gibbmt */
3120 	70, /* 4 = giim Giimport-id */
3121 	60, /* 5 = genbank */
3122 	60, /* 6 = embl */
3123 	60, /* 7 = pir */
3124 	60, /* 8 = swissprot */
3125 	65,  /* 9 = patent */
3126 	65, /* 10 = other TextSeqId */
3127 	65, /* 11 = general Dbtag */
3128 	90,  /* 12 = gi */
3129 	60, /* 13 = ddbj */
3130 	60, /* 14 = prf */
3131 	60,  /* 15 = pdb */
3132 	0,	/* extras for new ids */
3133 	0,
3134 	0,
3135 	0
3136     };
3137 
3138 	if (! AllObjLoad()) return sip;
3139 	amp = AsnAllModPtr();
3140 	FindAsnType (&SEQ_ENTRY, amp, "Seq-entry");
3141     FindAsnType (&BIOSEQ_id, amp, "Bioseq.id");
3142     FindAsnType (&BIOSEQ_id_E, amp, "Bioseq.id.E");
3143 
3144 	aip = CdSeqAsnOpen (&db, gi, FALSE);
3145 	if (aip == NULL) return sip;
3146 
3147 	atp = SEQ_ENTRY;
3148 	while ((atp = AsnReadId(aip, amp, atp)) != NULL)
3149 	{
3150 		if (atp == BIOSEQ_id)
3151 		{
3152 			gotit = FALSE;
3153 		    ids = SeqIdSetAsnRead(aip, atp, BIOSEQ_id_E);
3154 			for (curr = ids; curr != NULL; curr = curr->next)
3155 			{
3156 				if (curr->choice == SEQID_GIIM)
3157 				{
3158 					gip = (GiimPtr)(curr->data.ptrvalue);
3159 					if (gip->id == gi)
3160 					{
3161 						gotit = TRUE;
3162 						break;
3163 					}
3164 				}
3165 				else if (curr->choice == SEQID_GI)
3166 				{
3167 					if (curr->data.intvalue == gi)
3168 					{
3169 						gotit = TRUE;
3170 						break;
3171 					}
3172 				}
3173 			}
3174 			if (gotit)
3175 			{
3176 				best = SeqIdSelect(ids, pick_order, 20);
3177 				sip = ValNodeExtract(&ids, (Int2)(best->choice));
3178 			}
3179 			SeqIdSetFree(ids);
3180 			if (gotit)
3181 				break;
3182 
3183 		}
3184 		else
3185 			AsnReadVal(aip, atp, NULL);
3186 		if (! AsnGetLevel(aip))       /* finished reading a Seq-entry */
3187 			break;                    /* failed */
3188 	}
3189 
3190 #ifdef _NEW_CdEntrez_
3191 	if (_nouveau)
3192 	  cd3_CdDocAsnClose(aip);
3193 #endif
3194 #ifdef _OLD_CdEntrez_
3195 	if (!_nouveau)
3196 	  CdDocAsnClose(aip);
3197 #endif
3198 
3199 	return sip;
3200 #endif
3201 }
3202 
3203 
3204 
3205 /*****************************************************************************
3206 *
3207 *   CdEntSeqSumListGet (result, numuid, db, uids)
3208 *   	returns a count of entries read
3209 *   	head of linked list is in result
3210 *
3211 *****************************************************************************/
3212 
CdEntSeqSumListGet(DocSumPtr PNTR result,Int2 numuid,DocType db,Int4Ptr uids)3213 NLM_EXTERN Int2  CdEntSeqSumListGet (DocSumPtr PNTR result, Int2 numuid, DocType db, Int4Ptr uids)          /* Gi numbers */
3214 {
3215 	Int2 count = 0;
3216 
3217 #ifdef _NEW_CdEntrez_
3218 	if (_nouveau)
3219 	{
3220 		ASSERT(db != TYP_SEQ);
3221 		count = CdDocSumListGet(result,numuid,db,uids);
3222 	}
3223 #endif
3224 
3225 #ifdef _OLD_CdEntrez_
3226 	if (!_nouveau)
3227 	{
3228 		Int2 ctr;
3229 		AsnIoPtr aip;
3230 
3231 		for (ctr = 0; ctr < numuid; ctr++)
3232 		{
3233 			result[ctr] = NULL;
3234 			aip = CdDocAsnOpen (db, uids[ctr]);
3235 		    if (aip != NULL)
3236 			{
3237 				result[ctr] = CdSeqSumAsnRead(aip, uids[ctr]);
3238 				CdDocAsnClose(aip);
3239 				if (result[ctr] != NULL)
3240 					count++;
3241 			}
3242 		}
3243 	}
3244 #endif
3245 
3246 	return count;
3247 }
3248 
3249 /*****************************************************************************
3250 *
3251 *   CdEntSeqSumGet(uid, type)
3252 *   	get one SeqSummary
3253 *
3254 *****************************************************************************/
3255 #ifdef _OLD_CdEntrez_
3256 
CdEntSeqSumGet(Int4 uid,DocType type)3257 static DocSumPtr NEAR CdEntSeqSumGet (Int4 uid, DocType type)
3258 {
3259 	DocSumPtr dsp = NULL;
3260 
3261 	CdEntSeqSumListGet(&dsp, 1, type, &uid);
3262 	return dsp;
3263 }
3264 
3265 #endif
3266 
CdSeqSumAsnRead(AsnIoPtr aip,DocUid uid)3267 NLM_EXTERN DocSumPtr CdSeqSumAsnRead (AsnIoPtr aip, DocUid uid)
3268 
3269 {
3270   DataVal       av;
3271   AsnModulePtr  amp;
3272   AsnTypePtr    atp;
3273   DocSumPtr     dsp;
3274   Boolean       goOn;
3275   Char          caption [50];
3276   Char          author [40];
3277   Char          year [10];
3278   Char          locus [40];
3279   Char          cds [10];
3280   CharPtr       chptr;
3281   Int2          proteins;
3282   CharPtr       recentTitle;
3283   Boolean       backbone;
3284   Boolean       genBank;
3285   Boolean       embl;
3286   Boolean       ddbj;
3287   Boolean       pir;
3288   Boolean       swissprot;
3289   Boolean       isaNA;
3290   Boolean       isaAA;
3291   Boolean       isaSEG;
3292   Boolean		in_id;
3293   Int2          level;
3294 
3295   if ((aip == NULL) || (! AllObjLoad ()))
3296     return NULL;
3297 
3298   amp = AsnAllModPtr ();
3299 
3300   FindAsnType (&SEQ_ENTRY, amp, "Seq-entry");
3301   FindAsnType (&SEQ_ENTRY_seq, amp, "Seq-entry.seq");
3302   FindAsnType (&SEQ_ENTRY_set, amp, "Seq-entry.set");
3303   FindAsnType (&TEXTSEQ_ID_name, amp, "Textseq-id.name");
3304   FindAsnType (&TEXTSEQ_ID_accession, amp, "Textseq-id.accession");
3305   FindAsnType (&AUTH_LIST_names_str_E, amp, "Auth-list.names.str.E");
3306   FindAsnType (&DATE_STD_year, amp, "Date-std.year");
3307   FindAsnType (&DATE_str, amp, "Date.str");
3308   FindAsnType (&SEQ_DESCR_E_title, amp, "Seq-descr.E.title");
3309   FindAsnType (&GIIMPORT_ID_id, amp, "Giimport-id.id");
3310   FindAsnType (&BIOSEQ_inst, amp, "Bioseq.inst");
3311   FindAsnType (&SEQ_INST_mol, amp, "Seq-inst.mol");
3312   FindAsnType (&SEQ_INST_repr, amp, "Seq-inst.repr");
3313   FindAsnType (&SEQ_ID_gibbsq, amp, "Seq-id.gibbsq");
3314   FindAsnType (&SEQ_ID_gibbmt, amp, "Seq-id.gibbmt");
3315   FindAsnType (&SEQ_ID_genbank, amp, "Seq-id.genbank");
3316   FindAsnType (&SEQ_ID_gi, amp, "Seq-id.gi");
3317   FindAsnType (&SEQ_ID_embl, amp, "Seq-id.embl");
3318   FindAsnType (&SEQ_ID_ddbj, amp, "Seq-id.ddbj");
3319   FindAsnType (&SEQ_ID_pir, amp, "Seq-id.pir");
3320   FindAsnType (&SEQ_ID_swissprot, amp, "Seq-id.swissprot");
3321   FindAsnType (&PDB_BLOCK_compound_E, amp, "PDB-block.compound.E");
3322   FindAsnType (&PDB_SEQ_ID_MOL, amp, "PDB-seq-id.mol");
3323   FindAsnType (&BIOSEQ_id, amp, "Bioseq.id");
3324   FindAsnType (&CIT_PAT_title, amp, "Cit-pat.title");
3325 
3326   atp = AsnReadId (aip, amp, SEQ_ENTRY);
3327   AsnReadVal (aip, atp, &av);
3328 
3329   atp = AsnReadId (aip, amp, atp);
3330   AsnReadVal (aip, atp, &av);
3331 
3332   dsp = MemNew (sizeof (DocSum));
3333   if (dsp != NULL) {
3334     dsp->no_abstract = TRUE;
3335     dsp->translated_title = FALSE;
3336     dsp->no_authors = TRUE;
3337     author [0] = '\0';
3338     year [0] = '\0';
3339     locus [0] = '\0';
3340     cds [0] = '\0';
3341     proteins = 1;
3342     recentTitle = NULL;
3343     backbone = FALSE;
3344     genBank = FALSE;
3345     embl = FALSE;
3346 	ddbj = FALSE;
3347     pir = FALSE;
3348     swissprot = FALSE;
3349     isaNA = FALSE;
3350     isaAA = FALSE;
3351     isaSEG = FALSE;
3352 	in_id = FALSE;
3353     goOn = TRUE;
3354     level = AsnGetLevel (aip);
3355     while (goOn) {
3356       atp = AsnReadId (aip, amp, atp);
3357       if (atp == SEQ_ENTRY_seq || atp == SEQ_ENTRY_set) {
3358         AsnReadVal (aip, atp, NULL);
3359         if (AsnGetLevel (aip) <= level) {
3360           goOn = FALSE;
3361 	    }
3362       } else if (atp == BIOSEQ_id) {
3363         AsnReadVal (aip, atp, &av);
3364 		if (in_id) {
3365 			in_id = FALSE;
3366 		} else {
3367 			in_id = TRUE;
3368         }
3369       } else if (in_id && ((atp == TEXTSEQ_ID_name) ||
3370  	  		(atp == PDB_SEQ_ID_MOL))) {
3371         AsnReadVal (aip, atp, &av);
3372         if (locus [0] == '\0') {
3373           StringNCpy (locus, (CharPtr) av.ptrvalue, sizeof (locus));
3374         }
3375         AsnKillValue (atp, &av);
3376       } else if (in_id && (atp == TEXTSEQ_ID_accession)) {
3377         AsnReadVal (aip, atp, &av);
3378         if (locus [0] == '\0') {
3379           StringNCpy (locus, (CharPtr) av.ptrvalue, sizeof (locus));
3380         }
3381         AsnKillValue (atp, &av);
3382       } else if (atp == AUTH_LIST_names_str_E) {
3383         AsnReadVal (aip, atp, &av);
3384         if (author [0] == '\0') {
3385           StringNCpy (author, (CharPtr) av.ptrvalue, sizeof (author));
3386         }
3387         AsnKillValue (atp, &av);
3388       } else if (atp == DATE_STD_year) {
3389         AsnReadVal (aip, atp, &av);
3390         sprintf (year, "%ld", (long) av.intvalue);
3391       } else if (atp == DATE_str) {
3392         AsnReadVal (aip, atp, &av);
3393         StringNCpy (year, (CharPtr) av.ptrvalue, sizeof (year));
3394         AsnKillValue (atp, &av);
3395       } else if ((atp == SEQ_DESCR_E_title) ||
3396  	  	(atp == PDB_BLOCK_compound_E) || (atp == CIT_PAT_title)) {
3397         AsnReadVal (aip, atp, &av);
3398 		if (*((CharPtr)av.ptrvalue) != '\0')
3399 		{
3400 	        if (recentTitle != NULL) {
3401     	      recentTitle = MemFree (recentTitle);
3402         	}
3403 
3404 	        if (dsp->uid == uid && dsp->title == NULL &&
3405 				atp != CIT_PAT_title) {
3406     	      dsp->title = (CharPtr)av.ptrvalue;
3407 	        }
3408 			else
3409 				recentTitle = (CharPtr)av.ptrvalue;
3410 		}
3411 		else
3412 	        AsnKillValue (atp, &av);
3413       } else if (atp == GIIMPORT_ID_id || atp == SEQ_ID_gi) {
3414         AsnReadVal (aip, atp, &av);
3415         if (av.intvalue == uid) {
3416           dsp->uid = uid;
3417         }
3418       } else if (atp == SEQ_INST_mol) {
3419         AsnReadVal (aip, atp, &av);
3420         if ((! isaNA) && (! isaAA) && dsp->uid == uid) {
3421           isaNA = (Boolean) ISA_na (av.intvalue);
3422           isaAA = (Boolean) ISA_aa (av.intvalue);
3423           if (isaAA && cds [0] == '\0') {
3424             sprintf (cds, " cds%d", (int) proteins);
3425           }
3426         }
3427         if (ISA_aa (av.intvalue)) {
3428           proteins++;
3429         }
3430       } else if (atp == SEQ_INST_repr) {
3431         AsnReadVal (aip, atp, &av);
3432         if (av.intvalue == Seq_repr_seg) {
3433           isaSEG = TRUE;
3434         }
3435       } else if (atp == BIOSEQ_inst) {
3436         AsnReadVal (aip, atp, NULL);
3437         if (dsp->uid == uid && dsp->title == NULL) {
3438           dsp->title = recentTitle;
3439           recentTitle = NULL;
3440         }
3441       } else if (atp == SEQ_ID_gibbsq || atp == SEQ_ID_gibbmt) {
3442         AsnReadVal (aip, atp, NULL);
3443         backbone = TRUE;
3444       } else if (atp == SEQ_ID_genbank) {
3445         AsnReadVal (aip, atp, NULL);
3446 		if (in_id)
3447 	        genBank = TRUE;
3448       } else if (atp == SEQ_ID_embl) {
3449         AsnReadVal (aip, atp, NULL);
3450 		if (in_id)
3451 	        embl = TRUE;
3452       } else if (atp == SEQ_ID_ddbj) {
3453         AsnReadVal (aip, atp, NULL);
3454 		if (in_id)
3455 	        ddbj = TRUE;
3456       } else if (atp == SEQ_ID_pir) {
3457         AsnReadVal (aip, atp, NULL);
3458 		if (in_id)
3459 	        pir = TRUE;
3460       } else if (atp == SEQ_ID_swissprot) {
3461         AsnReadVal (aip, atp, NULL);
3462 		if (in_id)
3463 	        swissprot = TRUE;
3464       } else {
3465         AsnReadVal (aip, atp, NULL);
3466       }
3467       if (dsp->title != NULL && dsp->uid == uid) {
3468         if (backbone) {
3469           if (author [0] != '\0' && year [0] != '\0') {
3470             goOn = FALSE;
3471           }
3472         } else if (genBank || embl || ddbj) {
3473           if (locus [0] != '\0') {
3474             if (isaAA && cds [0] != '\0') {
3475               goOn = FALSE;
3476             } else if (isaNA) {
3477               goOn = FALSE;
3478             }
3479           }
3480         } else if (pir) {
3481           if (locus [0] != '\0') {
3482             goOn = FALSE;
3483           }
3484         } else if (swissprot) {
3485           if (locus [0] != '\0') {
3486             goOn = FALSE;
3487           }
3488         } else if (embl) {
3489         }
3490       }
3491     }
3492     if (backbone) {
3493       chptr = StringChr (author, ',');
3494       if (chptr != NULL) {
3495         *chptr = '\0';
3496       }
3497       chptr = StringChr (year, ' ');
3498       if (chptr != NULL) {
3499         *chptr = '\0';
3500       }
3501       author [12] = '.';
3502       author [12] = '\0';
3503       sprintf (caption, "%s, %s", author, year);
3504       dsp->caption = StringSave (caption);
3505     } else if (genBank || embl || ddbj) {
3506       if (isaAA) {
3507         sprintf (caption, "%s%s", locus, cds);
3508       } else if (isaSEG) {
3509         sprintf (caption, "%s segs", locus);
3510       } else {
3511         sprintf (caption, "%s", locus);
3512       }
3513       dsp->caption = StringSave (caption);
3514     } else {
3515       sprintf (caption, "%s", locus);
3516       dsp->caption = StringSave (caption);
3517     }
3518     dsp->uid = uid;
3519     if (recentTitle != NULL) {
3520       recentTitle = MemFree (recentTitle);
3521     }
3522   }
3523   AsnIoReset (aip);
3524   return dsp;
3525 }
3526 
3527 /*****************************************************************************
3528 *
3529 *   CdEntrezFindSeqId(sip)
3530 *       given a Seq-id, get the uid.
3531 *       returns 0 on failure
3532 *
3533 *****************************************************************************/
CdEntrezFindSeqId(SeqIdPtr sip)3534 NLM_EXTERN Int4 CdEntrezFindSeqId (SeqIdPtr sip)
3535 {
3536     Int4 uid = 0;
3537     DocType db = -1;
3538     TextSeqIdPtr tsip;
3539     PDBSeqIdPtr psip;
3540     PatentSeqIdPtr patsip;
3541     CharPtr locus = NULL;
3542     Char localbuf[40];
3543     ValNodePtr lst;
3544     LinkSetPtr lsp;
3545     Boolean check_both, done;
3546     EntrezInfoPtr eip;
3547     Int4 index;
3548 
3549     if ((eip = CdEntrezGetInfo()) != NULL && eip->field_count > FLD_SQID &&
3550 	eip->types[TYP_NT].fields[FLD_SQID].num_terms > 0)
3551     {
3552         done = FALSE;
3553         check_both = TRUE;
3554 	db = TYP_NT;
3555 	SeqIdWrite(sip, localbuf, PRINTID_FASTA_LONG, sizeof(localbuf));
3556         while (! done)     /* might need to check 2 types */
3557         {
3558             lst = CdEntTLNew(db);
3559             if (lst == NULL) return uid;
3560             CdEntTLAddTerm(lst, localbuf, db, FLD_SQID, TRUE, NULL);
3561             lsp = CdEntTLEval(lst);
3562             CdEntTLFree(lst);
3563             if (lsp != NULL)
3564             {
3565                 for (index = 0; index < lsp->num; index++)
3566 		{ /* choose the highest one */
3567 		    if (lsp->uids[index] > uid)
3568                         uid = lsp->uids[index];
3569 		}
3570                 LinkSetFree(lsp);
3571             }
3572             if ((! check_both) || (uid > 0))
3573                 done = TRUE;
3574             else
3575             {
3576                 if (db == TYP_AA)
3577                     db = TYP_NT;
3578                 else
3579                     db = TYP_AA;
3580                 check_both = FALSE;
3581             }
3582         }
3583     }
3584 
3585     if (uid > 0)
3586     {
3587 	return uid;
3588     }
3589 
3590     check_both = FALSE;
3591     switch (sip->choice)
3592     {
3593         case SEQID_NOT_SET:           /* not set */
3594         case SEQID_LOCAL:           /* local */
3595             break;
3596         case SEQID_GIBBSQ:           /* gibbsq */
3597         case SEQID_GIBBMT:           /* gibbmt */
3598             sprintf(localbuf, "B%ld", (long)(sip->data.intvalue));
3599             locus = (CharPtr)localbuf;
3600             db = TYP_AA;   /* guess it's a protein */
3601             check_both = TRUE;
3602             break;             /* not on cdrom */
3603         case SEQID_GIIM:           /* giim */
3604             uid = ((GiimPtr)sip->data.ptrvalue)->id;
3605             break;
3606         case SEQID_GI:
3607             uid = sip->data.intvalue;
3608             break;
3609         case SEQID_GENBANK:             /* genbank */
3610         case SEQID_EMBL:                /* embl */
3611         case SEQID_DDBJ:
3612             db = TYP_NT;   /* guess it's a nucleic acid */
3613             check_both = TRUE;
3614         case SEQID_PIR:             /* pir */
3615         case SEQID_SWISSPROT:
3616         case SEQID_PRF:
3617             if (db < 0)
3618                 db = TYP_AA;
3619             tsip = (TextSeqIdPtr)sip->data.ptrvalue;
3620             if (tsip->accession != NULL)
3621                 locus = tsip->accession;
3622             else
3623                 locus = tsip->name;
3624             break;
3625         case SEQID_PDB:
3626             psip = (PDBSeqIdPtr)(sip->data.ptrvalue);
3627             if (psip->chain == '\0' || psip->chain == ' ')
3628                 StrCpy (localbuf, psip->mol);
3629 	    else
3630                 sprintf(localbuf, "%s-%c", psip->mol, (Char)psip->chain);
3631             locus = localbuf;
3632             db = TYP_AA;   /* guess protein */
3633             check_both = TRUE;
3634             break;
3635         case SEQID_PATENT:
3636             patsip = (PatentSeqIdPtr)(sip->data.ptrvalue);
3637             sprintf(localbuf, "%s%s %d", patsip->cit->country, patsip->cit->number,
3638                 (int)patsip->seqid);
3639             locus = localbuf;
3640             db = TYP_AA;   /* guess protein */
3641             check_both = TRUE;
3642             break;
3643         default:
3644             break;
3645     }
3646 
3647     if ((! uid) && (locus != NULL))   /* got a term to find */
3648     {
3649         done = FALSE;
3650         while (! done)     /* might need to check 2 types */
3651         {
3652             lst = CdEntTLNew(db);
3653             if (lst == NULL) return uid;
3654             CdEntTLAddTerm(lst, locus, db, FLD_ACCN, TRUE, NULL);
3655             lsp = CdEntTLEval(lst);
3656             CdEntTLFree(lst);
3657             if (lsp != NULL)
3658             {
3659                 for (index = 0; index < lsp->num; index++)
3660 		{ /* choose the highest one */
3661 		    if (lsp->uids[index] > uid)
3662                         uid = lsp->uids[index];
3663 		}
3664                 LinkSetFree(lsp);
3665             }
3666             if ((! check_both) || (uid > 0))
3667                 done = TRUE;
3668             else
3669             {
3670                 if (db == TYP_AA)
3671                     db = TYP_NT;
3672                 else
3673                     db = TYP_AA;
3674                 check_both = FALSE;
3675             }
3676         }
3677     }
3678 
3679     return uid;
3680 }
3681 
3682 #ifdef Biostruc_supported
CdEntrezBiostrucGet(DocUid uid,Int4 mdlLvl,Int4 maxModels)3683 NLM_EXTERN BiostrucPtr CdEntrezBiostrucGet (DocUid uid, Int4 mdlLvl, Int4 maxModels)
3684 {
3685 	Biostruc *struc = NULL;
3686 	AsnIo *stream = NULL;
3687 
3688 	if (! BiostrucAvail ()) return NULL;
3689 	stream = cd3_CdDocAsnOpen(TYP_ST,uid);
3690 	if (stream != NULL)
3691 	{
3692 		struc = BiostrucAsnGet(stream,NULL, mdlLvl, maxModels);
3693 		cd3_CdDocAsnClose(stream);
3694 	}
3695 	return struc;
3696 }
3697 
3698 
3699 #ifdef OS_UNIX
3700 
CdEntrezBiostrucAnnotSetGet(DocUid uid)3701 NLM_EXTERN BiostrucAnnotSetPtr CdEntrezBiostrucAnnotSetGet (DocUid uid)
3702 {
3703 	BiostrucAnnotSetPtr retval = NULL;
3704 	AsnIoPtr  aip;
3705 	FILE *pipe;
3706 	char command[PATH_MAX+5];
3707 	char fname[PATH_MAX];
3708 
3709 	if (CdMountEntrezVolume(1,fname,PATH_MAX-32))
3710 	{
3711 		sprintf(strchr(fname,0), "/vast/%ld.bas.Z", (long) uid);
3712 		if (FileLength(fname) <= 0)
3713 		{
3714 			return NULL;
3715 		}
3716 		sprintf(command,"zcat %s", fname);
3717 		if ((pipe=popen(command,"r")) ==NULL)
3718 		{
3719 			ErrPostEx(SEV_ERROR,0,0,"Unable to open pipe [%s]",command);
3720 			return NULL;
3721 		}
3722 		aip = AsnIoNew(ASNIO_TEXT_IN, pipe, NULL, NULL, NULL);
3723 		if (aip != NULL)
3724 		{
3725 			retval = BiostrucAnnotSetAsnRead(aip, NULL);
3726 		}
3727 		AsnIoFree(aip,FALSE);
3728 		pclose(pipe);
3729 	}
3730 	return retval;
3731 }
3732 
3733 #else
3734 
CdEntrezBiostrucAnnotSetGet(DocUid uid)3735 NLM_EXTERN BiostrucAnnotSetPtr CdEntrezBiostrucAnnotSetGet (DocUid uid)
3736 {
3737 	return NULL;
3738 }
3739 
3740 #endif
3741 
3742 
CdEntrezBiostrucAnnotSetGetByFid(DocUid mmdbid,Int4 feature_id,Int4 feature_set_id)3743 NLM_EXTERN BiostrucAnnotSetPtr LIBCALL CdEntrezBiostrucAnnotSetGetByFid (DocUid mmdbid, Int4 feature_id, Int4 feature_set_id)
3744 {
3745     BiostrucAnnotSetPtr basp = CdEntrezBiostrucAnnotSetGet (mmdbid);
3746     BiostrucAnnotSetPtr basp2 = NULL;
3747     BiostrucFeatureSetPtr pbsfs = NULL;
3748     BiostrucFeaturePtr pbsf = NULL;
3749 
3750     if (basp == NULL)
3751 	return NULL;
3752 
3753     pbsfs = basp->features;
3754     while (pbsfs)
3755      {
3756        if (pbsfs->id == feature_set_id)
3757         {
3758           pbsf =  pbsfs->features;
3759           while(pbsf)
3760             {
3761               if (pbsf->id == feature_id)
3762                 {  /* found it */
3763                      basp2 = BiostrucAnnotSetNew();
3764      		     basp2->id = basp->id;
3765     		     basp2->descr = basp->descr;
3766     		     basp->descr = NULL;  /* unlink the descr from basp object */
3767     		     basp2->features = BiostrucFeatureSetNew();
3768                      basp2->features->id = pbsfs->id;
3769                      basp2->features->descr = pbsfs->descr;
3770                      pbsfs->descr = NULL; /* unlink the feature-set descr from basp  object */
3771                      basp2->features->features = BiostrucFeatureNew();
3772                      basp2->features->features->id = pbsf->id;
3773                      basp2->features->features->name = StringSave(pbsf->name);
3774 		     basp2->features->features->type = pbsf->type;
3775 		     basp2->features->features->Property_property = pbsf->Property_property;
3776 		     pbsf->Property_property = NULL; /* unlink the property from basp  object */
3777 		     basp2->features->features->Location_location = pbsf->Location_location;
3778 		     pbsf->Location_location = NULL; /* unlink the location from basp  object */
3779 		     BiostrucAnnotSetFree(basp);
3780                      return basp2;
3781                 }
3782                pbsf = pbsf->next;
3783             }
3784         }
3785        pbsfs = pbsfs->next;
3786      }
3787 
3788     BiostrucAnnotSetFree(basp);
3789     return basp2;
3790 }
3791 
3792 
CdEntrezBiostrucFeatIds(DocUid mmdbid,Int2 feature_type,Int4 feature_set_id)3793 NLM_EXTERN LinkSetPtr LIBCALL CdEntrezBiostrucFeatIds(DocUid mmdbid, Int2 feature_type, Int4 feature_set_id)
3794 {
3795     BiostrucAnnotSetPtr basp = CdEntrezBiostrucAnnotSetGet (mmdbid);
3796     LinkSetPtr retval = NULL;
3797     Int4Ptr ids = NULL;
3798     Int4Ptr scores = NULL;
3799     Int4 count = 0;
3800     BiostrucFeatureSetPtr pbsfs = NULL;
3801     BiostrucFeaturePtr pbsf = NULL;
3802     ChemGraphAlignmentPtr  pcga = NULL;
3803 
3804 
3805     if (basp == NULL)
3806 	return NULL;
3807 
3808     /* count the number of features of type feature_type */
3809     pbsfs = basp->features;
3810     while (pbsfs)
3811      {
3812        if (pbsfs->id == feature_set_id)
3813         {
3814           pbsf =  pbsfs->features;
3815           while(pbsf)
3816             {
3817               if (pbsf->type == feature_type)
3818                 {
3819 		   count++;
3820                 }
3821                pbsf = pbsf->next;
3822             }
3823         }
3824        pbsfs = pbsfs->next;
3825      }
3826 
3827      /* allocate vectors for ids, scores iff alignment data */
3828 
3829     ids = (Int4Ptr) MemNew(sizeof(Int4) * count);
3830     if (feature_type == 200) /* NCBI alignments */
3831       scores = (Int4Ptr) MemNew(sizeof(Int4) * count);
3832 
3833     count = 0;
3834     /* collect the feature-id's and scores  */
3835     pbsfs = basp->features;
3836     while (pbsfs)
3837      {
3838        if (pbsfs->id == feature_set_id)
3839         {
3840           pbsf =  pbsfs->features;
3841           while(pbsf)
3842             {
3843               if (pbsf->type == feature_type)
3844                 {
3845 
3846 		   ids[count] = pbsf->id;
3847 		   if (feature_type == 200) /* alignment type id */
3848 		    {
3849 		     pcga = (ChemGraphAlignmentPtr) pbsf->Location_location->data.ptrvalue;
3850 		     scores[count] = pcga->aligndata->vast_mlogp;  /* an Int4 already */
3851 		    }
3852 		   count++;
3853                 }
3854                pbsf = pbsf->next;
3855             }  /* while feature */
3856           retval = LinkSetNew();
3857           retval->num = count;
3858           retval->uids = ids;
3859           retval->weights = scores;
3860           MemFree(basp);
3861           return retval;
3862         }  /* if feature_set_id */
3863        pbsfs = pbsfs->next;
3864      }  /* while feature_set */
3865   MemFree(basp);
3866   return NULL;
3867 }
3868 #endif /* Biostruc_supported */
3869