1 /*   accentr.h
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * RCS $Id: accentr.h,v 6.1 2000/01/12 20:17:13 vakatov Exp $
27 *
28 * Author:  Ostell
29 *
30 * Version Creation Date:   4/23/92
31 *
32 * File Description:
33 *       entrez index access library for Entrez
34 *
35 * Modifications:
36 * --------------------------------------------------------------------------
37 * Date     Name        Description of modification
38 * -------  ----------  -----------------------------------------------------
39 * 06-29-94 Schuler     Structure definitions and prototypes for EntrezInfo
40 *                      AsnRead/AsnWrite functions moved to objentr.h
41 * 08-16-94 Brylawski   Prototypes for EntrezHierarchyGet, on-the-fly
42 *                      text neighboring, and enabling of medline features
43 * 09-08-94 Schuler     Define for TYP_ST
44 * 10-06-94 Schuler     Added #include <objmmdb1.h>
45 * 10-06-94 Schuler     Added EntrezBiostrucGet()
46 * 11-20-94 Brylawski   Moved EntrezNeighborText structure to here from
47 *                      objneten.h .
48 * 11-25-94 Schuler     Moved CdTerm structure to here from cdromlib.h
49 * 04-11-95 Schuler     Defined TYP_ES, TYP_CH; changed value of TYP_SEQ
50 * 05-16-94 Schuler     Removed TYP_ES (decided not to use it)
51 * 05-16-94 Schuler     Added RCS Log directive.  Comments will henceforth
52 *                      be inserted automatically into the source at the
53 *                      time it is checked into the system.
54 * $Log: accentr.h,v $
55 * Revision 6.1  2000/01/12 20:17:13  vakatov
56 * Get rid of the LIBCALL specifier at EntrezSeqEntryGet()
57 *
58 * Revision 5.10  1997/07/28 13:30:41  ostell
59 * Moved GetUniGeneIDForSeqId() to seqmgr.c
60 *
61 * Revision 5.9  1997/06/26 21:55:14  vakatov
62 * [PC] DLL'd "ncbicdr.lib", "ncbiacc.lib", "ncbinacc.lib" and "ncbicacc.lib"
63 *
64 * Revision 5.5  1996/09/23  21:21:33  shavirin
65 * Added definition for the function AccessionToFasta
66 *
67 * Revision 5.4  1996/08/14  19:56:09  epstein
68 * add APIs for fetching pieces of biostruc annots
69 *
70 * Revision 5.1  1996/08/14  15:15:05  brandon
71 * added date parameter to tleval functions
72 *
73 * Revision 4.8  1996/05/14  21:02:10  epstein
74 * adjust field descriptions to reflect current reality
75 *
76 * Revision 4.7  1996/03/29  18:52:51  epstein
77 * add support for structure alignments
78 *
79 * Revision 4.6  1996/03/11  21:51:03  ostell
80 * made GetUniGeneIDForSeqId() externally visible
81 *
82 * Revision 4.5  1995/10/11  13:39:29  epstein
83 * add EntrezIsInited() function
84 *
85 * Revision 4.4  1995/10/02  02:35:50  epstein
86 * add range-checking
87 *
88 * Revision 4.3  1995/08/28  23:20:47  kans
89 * Biostruc_supported restored
90 *
91 * Revision 4.2  1995/08/21  19:42:16  epstein
92 * add cluster analysis
93 *
94 * Revision 4.1  1995/08/11  20:25:53  epstein
95 * add max-models support for biostrucs
96 *
97 * Revision 2.44  1995/07/25  18:47:57  kans
98 * revert to no Biostruc_supported
99 *
100 * Revision 2.43  1995/07/05  19:15:46  ostell
101 * changed prototype for BiostrucAvail to be included only once
102 *
103 * Revision 2.42  1995/06/29  15:57:46  epstein
104 * added Complexity argument when fetching structures
105 *
106 * Revision 2.41  95/06/26  18:07:13  kans
107 * restored #define Biostruc_supported
108 *
109 * Revision 2.39  1995/06/23  16:02:43  kans
110 * support for accmmdbs.c stub to resolve symbols without MMDB link
111 *
112 * Revision 2.38  1995/06/22  21:21:22  kans
113 * #define Biostruc_supported
114 * ==========================================================================
115 */
116 
117 #ifndef _ACCENTR_
118 #define _ACCENTR_
119 
120 #ifndef Biostruc_supported
121 #define Biostruc_supported
122 #endif
123 
124 #ifndef _NCBI_Seqset_
125 #include <objsset.h>
126 #endif
127 
128 #ifndef _NCBI_Medline_
129 #include <objmedli.h>
130 #endif
131 
132 #ifndef _NCBI_Access_
133 #include <objacces.h>
134 #endif
135 
136 #include <objentr.h>
137 
138 #ifdef _PMENTREZ_
139 #include <pmaccs.h>
140 #endif
141 
142 #ifdef Biostruc_supported
143 #include <mmdbapi1.h>
144 #include <mmdbapi2.h>
145 #include <mmdbapi3.h>
146 #endif
147 
148 
149 #undef NLM_EXTERN
150 #ifdef NLM_IMPORT
151 #define NLM_EXTERN NLM_IMPORT
152 #else
153 #define NLM_EXTERN extern
154 #endif
155 
156 #ifdef __cplusplus
157 extern "C" {
158 #endif
159 
160 
161 #ifndef Biostruc_supported
162 typedef struct { int bogus; } Biostruc, *BiostrucPtr;
163 
164 /*****************************************************************************
165 *
166 *   BiostrucAvail is in the accmmdbs.c stub (returning FALSE) and in
167 *      bios2ff7.c (returning TRUE).  The stub file also has BiostrucAsnRead
168 *      and BiostrucAsnWrite empty functions for proper linking without MMDB.
169 *
170 *****************************************************************************/
171 
172 NLM_EXTERN Boolean LIBCALL BiostrucAvail(void);
173 #endif
174 
175 
176 /* --- Type Definitions --- */
177 
178 typedef DocUid *DocUidPtr;
179 typedef Int2   DocType;
180 typedef Int2   DocField;
181 
182 typedef struct CdTerm {
183 	DocType type;                 /* class of term */
184 	DocField field;               /* field of term */
185 	CharPtr term;                 /* the term */
186 	CharPtr highRange;            /* the top end of a "range" */
187 	Int4 special_count ,
188 		total_count;
189 	Int4 offset;                  /* offset into the postings file */
190 	Int2 page;
191 	struct CdTerm *next;
192 } CdTerm, *CdTermPtr;
193 
194 typedef struct FastaSeq {
195   Int4 gi;
196   CharPtr label;
197   CharPtr seq;
198 } FastaSeq, PNTR FastaSeqPtr;
199 
200 
201 /*****************************************************************************
202 *
203 *   PreDefined Entrez types and fields
204 *
205 *****************************************************************************/
206 
207 /*--- doc type codes  ---
208  *
209  *	Although NTYPE is _NOT_ the correct number of document types, its
210  *	value must not be changed or the code in cdromlib.c will break.
211  *	Since the number of types is really determined at run-time, not
212  *	compile-time, a more appropriate way to determine this value is to
213  *	examine the field in the EntrezInfo structure.
214  */
215 
216 #define NTYPE       3   /* DO NOT CHANGE !! */
217 
218 #define TYP_ML      0   /*  MEDLINE */
219 #define TYP_AA      1   /*  Amino Acid sequence */
220 #define TYP_NT      2   /*  Nucleotide sequence */
221 #define TYP_ST      3   /*  3-D Structure (not supported by cdromlib.c) */
222 #define TYP_CH      4   /*  Chromosome (not supported by cdromlib.c) */
223 
224 #define TYP_SEQ     127   /*  either aa or na used only for uid lookups */
225 
226 
227 /*--- field codes ---*/
228 #define NFLD        19  /* number of fields == 19 */
229 #define FLD_WORD    0   /*  Words           */
230 #define FLD_MESH    1   /*  MeSH terms      */
231 #define FLD_KYWD    2   /*  Keyword         */
232 #define FLD_AUTH    3   /*  Authors         */
233 #define FLD_JOUR    4   /*  Journal title   */
234 #define FLD_ORGN    5   /*  Organism        */
235 #define FLD_ACCN    6   /*  Accession number */
236 #define FLD_GENE    7   /*  Gene Symbol     */
237 #define FLD_PROT    8   /*  Protein name    */
238 #define FLD_ECNO    9   /*  E.C. number     */
239 #define FLD_ORGN_HIER 10 /* Organism hierarchy */
240 #define FLD_DATE   11   /* date of entry*/
241 #define FLD_FKEY   12   /* Feature key  */
242 #define FLD_PROP   13   /* Properties */
243 #define FLD_SUBS   14   /* Substance */
244 #define FLD_DATM   15   /* Modification Date */
245 #define FLD_SLEN   16   /* Sequence Length */
246 #define FLD_AFFL   17   /* Affilication */
247 #define FLD_SQID   18   /* SeqId */
248 
249 #define FLD_MESH_HIER 10 /* Overload for MeSH */
250 
251 #define TERM__EXPLODE 1
252 
253 
254 typedef Boolean (*DocSumListCallBack)(DocSumPtr dsp, DocUid uid);
255 
256 
257 typedef struct EntrezHierarchyChild {
258     CharPtr name;
259     Boolean isLeafNode;
260     Int4 special;
261     Int4 total;
262 } EntrezHierarchyChild, *EntrezHierarchyChildPtr;
263 
264 typedef struct EntrezHierarchy {
265     Int2 numInLineage;
266     Int2 numChildren;
267     DocType db;
268     DocField fld;
269     CharPtr term;
270     CharPtr PNTR lineage;
271     EntrezHierarchyChildPtr children;
272     CharPtr canonicalForm;
273 } EntrezHierarchy, *EntrezHierarchyPtr;
274 
275 typedef struct EntrezNeighborText {
276    Int4   fld;
277    Int4   percent_terms_to_use;
278    Int4   max_neighbors;
279    Int4   min_score;
280    CharPtr   normalText;
281    CharPtr   specialText;
282 } EntrezNeighborText, PNTR EntrezNeighborTextPtr;
283 
284 
285 #define NULLSYM     0     /* for building booleans */
286 #define LPAREN      1
287 #define RPAREN      2
288 #define ANDSYMBL    3
289 #define ORSYMBL     4
290 #define BUTNOTSYMBL 5
291 #define SPECIALTERM 6
292 #define TOTALTERM   7
293 
294 
295 /**** Initialize and close session *********************/
296 
297 /* Note:                                                                     */
298 /*   The EntrezInitWithExtras() capability is provided for backwards         */
299 /*   compatability until May, 1994.  This name was introduced as a temporary */
300 /*   interface to EntrezInit(), to smooth the transition for EntrezInit()'s  */
301 /*   use of three parameters; previously it took no parameters.              */
302 /*                                                                           */
303 /*                                  - J. Epstein, 17 Feb 1994                */
304 
305 #define EntrezInitWithExtras(a,b,c) EntrezInit(a,b,c)
306 NLM_EXTERN Boolean LIBCALL EntrezInit(CharPtr appl_id, Boolean no_warnings, BoolPtr is_network);
307 NLM_EXTERN Boolean LIBCALL EntrezIsInited(void);
308 NLM_EXTERN void LIBCALL EntrezFini(void);
309 
310 /**** Get names and numbers of fields and types ********/
311 
312 NLM_EXTERN EntrezInfoPtr LIBCALL EntrezGetInfo(void);
313 
314 /**** Creates a term node from the uid parameter ********/
315 NLM_EXTERN void LIBCALL EntrezCreateNamedUidList(CharPtr term, DocType type, DocField field, Int4 num, DocUidPtr uids);
316 NLM_EXTERN void LIBCALL EntrezCreateNamedUidListX(CharPtr term, DocType type, DocField field, ByteStorePtr bsp);
317 
318 /**** Get detailed text information about the current status *****/
319 NLM_EXTERN CharPtr LIBCALL EntrezDetailedInfo(void);
320 
321 /**** Get Links and Neighbors **************************/
322 
323 NLM_EXTERN Int4 LIBCALL EntrezGetMaxLinks(void);
324 NLM_EXTERN Int4 LIBCALL EntrezSetUserMaxLinks(Int4 usermax);
325 NLM_EXTERN Int4 LIBCALL EntrezGetUserMaxLinks(void);
326 
327 NLM_EXTERN LinkSetPtr LIBCALL EntrezUidLinks(DocType type, DocUid uid, DocType link_to_type);
328 NLM_EXTERN Int2 LIBCALL EntrezLinkUidList(LinkSetPtr PNTR result, DocType type, DocType link_to_type, Int2 numuid, Int4Ptr uids, Boolean mark_missing);
329 
330 /**** Get Summaries ************************************/
331 
332 NLM_EXTERN DocSumPtr LIBCALL EntrezDocSum(DocType type, DocUid uid);
333 NLM_EXTERN DocSumPtr LIBCALL DocSumFree(DocSumPtr dsp);
334 NLM_EXTERN Int2 LIBCALL EntrezDocSumListGet(Int2 numuid, DocType type, DocUidPtr uids, DocSumListCallBack callback);
335 NLM_EXTERN Int2 LIBCALL EntrezMlSumListGet(DocSumPtr PNTR result, Int2 numuid, Int4Ptr uids);
336 NLM_EXTERN Int2 LIBCALL EntrezSeqSumListGet(DocSumPtr PNTR result, Int2 numuid, DocType type, Int4Ptr uids);
337 
338 /**** Get Term Lists ***********************************/
339 
340 typedef Boolean (*TermListProc)(CharPtr term, Int4 special, Int4 total);
341 
342 NLM_EXTERN Int2 LIBCALL EntrezTermListByPage(DocType type, DocField field, Int2 page, Int2 numpage, TermListProc proc);
343 NLM_EXTERN Int2 LIBCALL EntrezTermListByTerm(DocType type, DocField field, CharPtr term, Int2 numterms, TermListProc proc, Int2Ptr first_page);
344 
345 NLM_EXTERN Boolean LIBCALL EntrezFindTerm(DocType type, DocField field, CharPtr term, Int4Ptr spcl, Int4Ptr totl);
346 
347 /**** Look up terms with Boolean operations ************/
348 
349 NLM_EXTERN ValNodePtr LIBCALL EntrezTLNew(DocType type);
350 NLM_EXTERN ValNodePtr LIBCALL EntrezTLAddTerm(ValNodePtr elst, CharPtr term, DocType type, DocField field, Boolean special);
351 NLM_EXTERN ValNodePtr LIBCALL EntrezTLAddTermWithRange(ValNodePtr elst, CharPtr term, DocType type, DocField field, Boolean special, CharPtr highRange);
352 NLM_EXTERN ValNodePtr LIBCALL EntrezTLAddLParen(ValNodePtr elst);
353 NLM_EXTERN ValNodePtr LIBCALL EntrezTLAddRParen(ValNodePtr elst);
354 NLM_EXTERN ValNodePtr LIBCALL EntrezTLAddAND(ValNodePtr elst);
355 NLM_EXTERN ValNodePtr LIBCALL EntrezTLAddOR(ValNodePtr elst);
356 NLM_EXTERN ValNodePtr LIBCALL EntrezTLAddBUTNOT(ValNodePtr elst);
357 NLM_EXTERN ValNodePtr LIBCALL EntrezTLFree(ValNodePtr elst);
358 NLM_EXTERN LinkSetPtr LIBCALL EntrezPMTLEval(ValNodePtr elst, void * edc);
359 NLM_EXTERN Int4 LIBCALL EntrezPMTLEvalCount(ValNodePtr elst, void * edc);
360 NLM_EXTERN ByteStorePtr LIBCALL EntrezPMTLEvalX(ValNodePtr elst, void * edc);
361 NLM_EXTERN LinkSetPtr LIBCALL EntrezTLEval(ValNodePtr elst);
362 NLM_EXTERN Int4 LIBCALL EntrezTLEvalCount(ValNodePtr elst);
363 NLM_EXTERN ByteStorePtr LIBCALL EntrezTLEvalX(ValNodePtr elst);
364 
365 
366 /**** Look Up a Uid from a SeqId using the Terms list ****/
367 
368 NLM_EXTERN Int4 LIBCALL EntrezFindSeqId(SeqIdPtr sip);
369 
370 /**** Look Up the source SeqId given a GI ****************/
371 
372 NLM_EXTERN SeqIdPtr LIBCALL EntrezSeqIdForGI(Int4 gi);
373 
374 /**** Look Up the Fasta entry given a GI or Accession ****/
375 
376 NLM_EXTERN FastaSeqPtr LIBCALL AccessionToFasta(CharPtr string);
377 
378 /**** Get Sequence or MEDLINE data **********************/
379 
380 NLM_EXTERN Int2 LIBCALL EntrezSeqEntryListGet(SeqEntryPtr PNTR result, Int2 numuid, Int4Ptr uids, Int2 retcode, Boolean mark_missing);
381 NLM_EXTERN SeqEntryPtr EntrezSeqEntryGet(Int4 uid, Int2 retcode);
382 
383 NLM_EXTERN Int2 LIBCALL EntrezMedlineEntryListGet(MedlineEntryPtr PNTR result, Int2 numuid, Int4Ptr uids, Boolean mark_missing);
384 NLM_EXTERN MedlineEntryPtr LIBCALL EntrezMedlineEntryGet(Int4 uid);
385 
386 #ifdef _PMENTREZ_
387 Int2 LIBCALL EntrezPubmedEntryListGet(PubmedEntryPtr PNTR result, Int2 numuid, Int4Ptr uids, Boolean mark_missing);
388 PubmedEntryPtr LIBCALL EntrezPubmedEntryGet(Int4 uid);
389 #endif
390 
391 NLM_EXTERN BiostrucPtr LIBCALL EntrezBiostrucGet(DocUid uid, Int4 mdlLvl, Int4 maxModels);
392 NLM_EXTERN BiostrucAnnotSetPtr LIBCALL EntrezBiostrucAnnotSetGet(DocUid uid);
393 NLM_EXTERN LinkSetPtr LIBCALL EntrezBiostrucFeatIds(DocUid mmdbid, Int2 feature_type, Int4 feature_set_id);
394 NLM_EXTERN BiostrucAnnotSetPtr LIBCALL EntrezBiostrucAnnotSetGetByFid(DocUid mmdbid, Int4 feature_id, Int4 feature_set_id);
395 
396 
397 NLM_EXTERN EntrezHierarchyPtr LIBCALL EntrezHierarchyGet(CharPtr term,DocType type, DocField fld);
398 NLM_EXTERN EntrezHierarchyPtr LIBCALL EntrezHierarchyFree(EntrezHierarchyPtr ehp);
399 
400 /***** Neighbor text on-the-fly ***********/
401 NLM_EXTERN EntrezNeighborTextPtr LIBCALL EntrezNeighborTextFree(EntrezNeighborTextPtr );
402 NLM_EXTERN EntrezNeighborTextPtr LIBCALL EntrezNeighborTextNew(void);
403 NLM_EXTERN LinkSetPtr LIBCALL EntrezDoNeighborText(EntrezNeighborTextPtr entp);
404 NLM_EXTERN Boolean LIBCALL EntrezCanNeighborText(void);
405 
406 
407 NLM_EXTERN Boolean LIBCALL EntrezExpandedMedlineFeatures(void);
408 NLM_EXTERN Int4 LIBCALL EntrezClusterAnalysis(DocUidPtr uids, Int4 numuids, DocField fld, Int4 minCluster, Int4 maxCluster, Int4 maxTerms, CharPtr *terms, Int4Ptr termTotals);
409 
410 /***** on-the-fly BLAST *****/
411 NLM_EXTERN LinkSetPtr LIBCALL EntrezBlastBioseq(BioseqPtr bsp, DocType db, CharPtr program, CharPtr database, CharPtr options, Boolean usemonitor);
412 NLM_EXTERN Boolean LIBCALL EntrezCanBlast(void);
413 
414 /*****************************************************************************
415 *
416 *   The Following two functions allow access by BioseqFetch using the
417 *   SeqMgr.  The application should call EntrezBioseqFetchEnable() at the start
418 *   of the application and EntrezBioseqFetchDisable() at the end; This
419 *   will make EntrezBioseqFetch() the "remote" access procedure for the
420 *   SeqMgr. EntrezInit() will only be called on the first fetch unless "now"
421 *   is true;
422 *
423 *   If you add your own fetch function after calling EntrezBioseqFetchEnable,
424 *     it will be called BEFORE EntrezBioseqFetchEnable. Add yours after this
425 *     call, and yours will be call AFTER entrez.
426 *
427 *****************************************************************************/
428 NLM_EXTERN Boolean LIBCALL EntrezBioseqFetchEnable(CharPtr progname, Boolean now);
429 NLM_EXTERN void LIBCALL EntrezBioseqFetchDisable(void);
430 
431 
432 #ifdef __cplusplus
433 }  /* extern "C" */
434 #endif
435 
436 #undef NLM_EXTERN
437 #ifdef NLM_EXPORT
438 #define NLM_EXTERN NLM_EXPORT
439 #else
440 #define NLM_EXTERN
441 #endif
442 
443 #endif
444 
445 
446