1 /* accentr.h 2 * =========================================================================== 3 * 4 * PUBLIC DOMAIN NOTICE 5 * National Center for Biotechnology Information 6 * 7 * This software/database is a "United States Government Work" under the 8 * terms of the United States Copyright Act. It was written as part of 9 * the author's official duties as a United States Government employee and 10 * thus cannot be copyrighted. This software/database is freely available 11 * to the public for use. The National Library of Medicine and the U.S. 12 * Government have not placed any restriction on its use or reproduction. 13 * 14 * Although all reasonable efforts have been taken to ensure the accuracy 15 * and reliability of the software and data, the NLM and the U.S. 16 * Government do not and cannot warrant the performance or results that 17 * may be obtained by using this software or data. The NLM and the U.S. 18 * Government disclaim all warranties, express or implied, including 19 * warranties of performance, merchantability or fitness for any particular 20 * purpose. 21 * 22 * Please cite the author in any work or product based on this material. 23 * 24 * =========================================================================== 25 * 26 * RCS $Id: accentr.h,v 6.1 2000/01/12 20:17:13 vakatov Exp $ 27 * 28 * Author: Ostell 29 * 30 * Version Creation Date: 4/23/92 31 * 32 * File Description: 33 * entrez index access library for Entrez 34 * 35 * Modifications: 36 * -------------------------------------------------------------------------- 37 * Date Name Description of modification 38 * ------- ---------- ----------------------------------------------------- 39 * 06-29-94 Schuler Structure definitions and prototypes for EntrezInfo 40 * AsnRead/AsnWrite functions moved to objentr.h 41 * 08-16-94 Brylawski Prototypes for EntrezHierarchyGet, on-the-fly 42 * text neighboring, and enabling of medline features 43 * 09-08-94 Schuler Define for TYP_ST 44 * 10-06-94 Schuler Added #include <objmmdb1.h> 45 * 10-06-94 Schuler Added EntrezBiostrucGet() 46 * 11-20-94 Brylawski Moved EntrezNeighborText structure to here from 47 * objneten.h . 48 * 11-25-94 Schuler Moved CdTerm structure to here from cdromlib.h 49 * 04-11-95 Schuler Defined TYP_ES, TYP_CH; changed value of TYP_SEQ 50 * 05-16-94 Schuler Removed TYP_ES (decided not to use it) 51 * 05-16-94 Schuler Added RCS Log directive. Comments will henceforth 52 * be inserted automatically into the source at the 53 * time it is checked into the system. 54 * $Log: accentr.h,v $ 55 * Revision 6.1 2000/01/12 20:17:13 vakatov 56 * Get rid of the LIBCALL specifier at EntrezSeqEntryGet() 57 * 58 * Revision 5.10 1997/07/28 13:30:41 ostell 59 * Moved GetUniGeneIDForSeqId() to seqmgr.c 60 * 61 * Revision 5.9 1997/06/26 21:55:14 vakatov 62 * [PC] DLL'd "ncbicdr.lib", "ncbiacc.lib", "ncbinacc.lib" and "ncbicacc.lib" 63 * 64 * Revision 5.5 1996/09/23 21:21:33 shavirin 65 * Added definition for the function AccessionToFasta 66 * 67 * Revision 5.4 1996/08/14 19:56:09 epstein 68 * add APIs for fetching pieces of biostruc annots 69 * 70 * Revision 5.1 1996/08/14 15:15:05 brandon 71 * added date parameter to tleval functions 72 * 73 * Revision 4.8 1996/05/14 21:02:10 epstein 74 * adjust field descriptions to reflect current reality 75 * 76 * Revision 4.7 1996/03/29 18:52:51 epstein 77 * add support for structure alignments 78 * 79 * Revision 4.6 1996/03/11 21:51:03 ostell 80 * made GetUniGeneIDForSeqId() externally visible 81 * 82 * Revision 4.5 1995/10/11 13:39:29 epstein 83 * add EntrezIsInited() function 84 * 85 * Revision 4.4 1995/10/02 02:35:50 epstein 86 * add range-checking 87 * 88 * Revision 4.3 1995/08/28 23:20:47 kans 89 * Biostruc_supported restored 90 * 91 * Revision 4.2 1995/08/21 19:42:16 epstein 92 * add cluster analysis 93 * 94 * Revision 4.1 1995/08/11 20:25:53 epstein 95 * add max-models support for biostrucs 96 * 97 * Revision 2.44 1995/07/25 18:47:57 kans 98 * revert to no Biostruc_supported 99 * 100 * Revision 2.43 1995/07/05 19:15:46 ostell 101 * changed prototype for BiostrucAvail to be included only once 102 * 103 * Revision 2.42 1995/06/29 15:57:46 epstein 104 * added Complexity argument when fetching structures 105 * 106 * Revision 2.41 95/06/26 18:07:13 kans 107 * restored #define Biostruc_supported 108 * 109 * Revision 2.39 1995/06/23 16:02:43 kans 110 * support for accmmdbs.c stub to resolve symbols without MMDB link 111 * 112 * Revision 2.38 1995/06/22 21:21:22 kans 113 * #define Biostruc_supported 114 * ========================================================================== 115 */ 116 117 #ifndef _ACCENTR_ 118 #define _ACCENTR_ 119 120 #ifndef Biostruc_supported 121 #define Biostruc_supported 122 #endif 123 124 #ifndef _NCBI_Seqset_ 125 #include <objsset.h> 126 #endif 127 128 #ifndef _NCBI_Medline_ 129 #include <objmedli.h> 130 #endif 131 132 #ifndef _NCBI_Access_ 133 #include <objacces.h> 134 #endif 135 136 #include <objentr.h> 137 138 #ifdef _PMENTREZ_ 139 #include <pmaccs.h> 140 #endif 141 142 #ifdef Biostruc_supported 143 #include <mmdbapi1.h> 144 #include <mmdbapi2.h> 145 #include <mmdbapi3.h> 146 #endif 147 148 149 #undef NLM_EXTERN 150 #ifdef NLM_IMPORT 151 #define NLM_EXTERN NLM_IMPORT 152 #else 153 #define NLM_EXTERN extern 154 #endif 155 156 #ifdef __cplusplus 157 extern "C" { 158 #endif 159 160 161 #ifndef Biostruc_supported 162 typedef struct { int bogus; } Biostruc, *BiostrucPtr; 163 164 /***************************************************************************** 165 * 166 * BiostrucAvail is in the accmmdbs.c stub (returning FALSE) and in 167 * bios2ff7.c (returning TRUE). The stub file also has BiostrucAsnRead 168 * and BiostrucAsnWrite empty functions for proper linking without MMDB. 169 * 170 *****************************************************************************/ 171 172 NLM_EXTERN Boolean LIBCALL BiostrucAvail(void); 173 #endif 174 175 176 /* --- Type Definitions --- */ 177 178 typedef DocUid *DocUidPtr; 179 typedef Int2 DocType; 180 typedef Int2 DocField; 181 182 typedef struct CdTerm { 183 DocType type; /* class of term */ 184 DocField field; /* field of term */ 185 CharPtr term; /* the term */ 186 CharPtr highRange; /* the top end of a "range" */ 187 Int4 special_count , 188 total_count; 189 Int4 offset; /* offset into the postings file */ 190 Int2 page; 191 struct CdTerm *next; 192 } CdTerm, *CdTermPtr; 193 194 typedef struct FastaSeq { 195 Int4 gi; 196 CharPtr label; 197 CharPtr seq; 198 } FastaSeq, PNTR FastaSeqPtr; 199 200 201 /***************************************************************************** 202 * 203 * PreDefined Entrez types and fields 204 * 205 *****************************************************************************/ 206 207 /*--- doc type codes --- 208 * 209 * Although NTYPE is _NOT_ the correct number of document types, its 210 * value must not be changed or the code in cdromlib.c will break. 211 * Since the number of types is really determined at run-time, not 212 * compile-time, a more appropriate way to determine this value is to 213 * examine the field in the EntrezInfo structure. 214 */ 215 216 #define NTYPE 3 /* DO NOT CHANGE !! */ 217 218 #define TYP_ML 0 /* MEDLINE */ 219 #define TYP_AA 1 /* Amino Acid sequence */ 220 #define TYP_NT 2 /* Nucleotide sequence */ 221 #define TYP_ST 3 /* 3-D Structure (not supported by cdromlib.c) */ 222 #define TYP_CH 4 /* Chromosome (not supported by cdromlib.c) */ 223 224 #define TYP_SEQ 127 /* either aa or na used only for uid lookups */ 225 226 227 /*--- field codes ---*/ 228 #define NFLD 19 /* number of fields == 19 */ 229 #define FLD_WORD 0 /* Words */ 230 #define FLD_MESH 1 /* MeSH terms */ 231 #define FLD_KYWD 2 /* Keyword */ 232 #define FLD_AUTH 3 /* Authors */ 233 #define FLD_JOUR 4 /* Journal title */ 234 #define FLD_ORGN 5 /* Organism */ 235 #define FLD_ACCN 6 /* Accession number */ 236 #define FLD_GENE 7 /* Gene Symbol */ 237 #define FLD_PROT 8 /* Protein name */ 238 #define FLD_ECNO 9 /* E.C. number */ 239 #define FLD_ORGN_HIER 10 /* Organism hierarchy */ 240 #define FLD_DATE 11 /* date of entry*/ 241 #define FLD_FKEY 12 /* Feature key */ 242 #define FLD_PROP 13 /* Properties */ 243 #define FLD_SUBS 14 /* Substance */ 244 #define FLD_DATM 15 /* Modification Date */ 245 #define FLD_SLEN 16 /* Sequence Length */ 246 #define FLD_AFFL 17 /* Affilication */ 247 #define FLD_SQID 18 /* SeqId */ 248 249 #define FLD_MESH_HIER 10 /* Overload for MeSH */ 250 251 #define TERM__EXPLODE 1 252 253 254 typedef Boolean (*DocSumListCallBack)(DocSumPtr dsp, DocUid uid); 255 256 257 typedef struct EntrezHierarchyChild { 258 CharPtr name; 259 Boolean isLeafNode; 260 Int4 special; 261 Int4 total; 262 } EntrezHierarchyChild, *EntrezHierarchyChildPtr; 263 264 typedef struct EntrezHierarchy { 265 Int2 numInLineage; 266 Int2 numChildren; 267 DocType db; 268 DocField fld; 269 CharPtr term; 270 CharPtr PNTR lineage; 271 EntrezHierarchyChildPtr children; 272 CharPtr canonicalForm; 273 } EntrezHierarchy, *EntrezHierarchyPtr; 274 275 typedef struct EntrezNeighborText { 276 Int4 fld; 277 Int4 percent_terms_to_use; 278 Int4 max_neighbors; 279 Int4 min_score; 280 CharPtr normalText; 281 CharPtr specialText; 282 } EntrezNeighborText, PNTR EntrezNeighborTextPtr; 283 284 285 #define NULLSYM 0 /* for building booleans */ 286 #define LPAREN 1 287 #define RPAREN 2 288 #define ANDSYMBL 3 289 #define ORSYMBL 4 290 #define BUTNOTSYMBL 5 291 #define SPECIALTERM 6 292 #define TOTALTERM 7 293 294 295 /**** Initialize and close session *********************/ 296 297 /* Note: */ 298 /* The EntrezInitWithExtras() capability is provided for backwards */ 299 /* compatability until May, 1994. This name was introduced as a temporary */ 300 /* interface to EntrezInit(), to smooth the transition for EntrezInit()'s */ 301 /* use of three parameters; previously it took no parameters. */ 302 /* */ 303 /* - J. Epstein, 17 Feb 1994 */ 304 305 #define EntrezInitWithExtras(a,b,c) EntrezInit(a,b,c) 306 NLM_EXTERN Boolean LIBCALL EntrezInit(CharPtr appl_id, Boolean no_warnings, BoolPtr is_network); 307 NLM_EXTERN Boolean LIBCALL EntrezIsInited(void); 308 NLM_EXTERN void LIBCALL EntrezFini(void); 309 310 /**** Get names and numbers of fields and types ********/ 311 312 NLM_EXTERN EntrezInfoPtr LIBCALL EntrezGetInfo(void); 313 314 /**** Creates a term node from the uid parameter ********/ 315 NLM_EXTERN void LIBCALL EntrezCreateNamedUidList(CharPtr term, DocType type, DocField field, Int4 num, DocUidPtr uids); 316 NLM_EXTERN void LIBCALL EntrezCreateNamedUidListX(CharPtr term, DocType type, DocField field, ByteStorePtr bsp); 317 318 /**** Get detailed text information about the current status *****/ 319 NLM_EXTERN CharPtr LIBCALL EntrezDetailedInfo(void); 320 321 /**** Get Links and Neighbors **************************/ 322 323 NLM_EXTERN Int4 LIBCALL EntrezGetMaxLinks(void); 324 NLM_EXTERN Int4 LIBCALL EntrezSetUserMaxLinks(Int4 usermax); 325 NLM_EXTERN Int4 LIBCALL EntrezGetUserMaxLinks(void); 326 327 NLM_EXTERN LinkSetPtr LIBCALL EntrezUidLinks(DocType type, DocUid uid, DocType link_to_type); 328 NLM_EXTERN Int2 LIBCALL EntrezLinkUidList(LinkSetPtr PNTR result, DocType type, DocType link_to_type, Int2 numuid, Int4Ptr uids, Boolean mark_missing); 329 330 /**** Get Summaries ************************************/ 331 332 NLM_EXTERN DocSumPtr LIBCALL EntrezDocSum(DocType type, DocUid uid); 333 NLM_EXTERN DocSumPtr LIBCALL DocSumFree(DocSumPtr dsp); 334 NLM_EXTERN Int2 LIBCALL EntrezDocSumListGet(Int2 numuid, DocType type, DocUidPtr uids, DocSumListCallBack callback); 335 NLM_EXTERN Int2 LIBCALL EntrezMlSumListGet(DocSumPtr PNTR result, Int2 numuid, Int4Ptr uids); 336 NLM_EXTERN Int2 LIBCALL EntrezSeqSumListGet(DocSumPtr PNTR result, Int2 numuid, DocType type, Int4Ptr uids); 337 338 /**** Get Term Lists ***********************************/ 339 340 typedef Boolean (*TermListProc)(CharPtr term, Int4 special, Int4 total); 341 342 NLM_EXTERN Int2 LIBCALL EntrezTermListByPage(DocType type, DocField field, Int2 page, Int2 numpage, TermListProc proc); 343 NLM_EXTERN Int2 LIBCALL EntrezTermListByTerm(DocType type, DocField field, CharPtr term, Int2 numterms, TermListProc proc, Int2Ptr first_page); 344 345 NLM_EXTERN Boolean LIBCALL EntrezFindTerm(DocType type, DocField field, CharPtr term, Int4Ptr spcl, Int4Ptr totl); 346 347 /**** Look up terms with Boolean operations ************/ 348 349 NLM_EXTERN ValNodePtr LIBCALL EntrezTLNew(DocType type); 350 NLM_EXTERN ValNodePtr LIBCALL EntrezTLAddTerm(ValNodePtr elst, CharPtr term, DocType type, DocField field, Boolean special); 351 NLM_EXTERN ValNodePtr LIBCALL EntrezTLAddTermWithRange(ValNodePtr elst, CharPtr term, DocType type, DocField field, Boolean special, CharPtr highRange); 352 NLM_EXTERN ValNodePtr LIBCALL EntrezTLAddLParen(ValNodePtr elst); 353 NLM_EXTERN ValNodePtr LIBCALL EntrezTLAddRParen(ValNodePtr elst); 354 NLM_EXTERN ValNodePtr LIBCALL EntrezTLAddAND(ValNodePtr elst); 355 NLM_EXTERN ValNodePtr LIBCALL EntrezTLAddOR(ValNodePtr elst); 356 NLM_EXTERN ValNodePtr LIBCALL EntrezTLAddBUTNOT(ValNodePtr elst); 357 NLM_EXTERN ValNodePtr LIBCALL EntrezTLFree(ValNodePtr elst); 358 NLM_EXTERN LinkSetPtr LIBCALL EntrezPMTLEval(ValNodePtr elst, void * edc); 359 NLM_EXTERN Int4 LIBCALL EntrezPMTLEvalCount(ValNodePtr elst, void * edc); 360 NLM_EXTERN ByteStorePtr LIBCALL EntrezPMTLEvalX(ValNodePtr elst, void * edc); 361 NLM_EXTERN LinkSetPtr LIBCALL EntrezTLEval(ValNodePtr elst); 362 NLM_EXTERN Int4 LIBCALL EntrezTLEvalCount(ValNodePtr elst); 363 NLM_EXTERN ByteStorePtr LIBCALL EntrezTLEvalX(ValNodePtr elst); 364 365 366 /**** Look Up a Uid from a SeqId using the Terms list ****/ 367 368 NLM_EXTERN Int4 LIBCALL EntrezFindSeqId(SeqIdPtr sip); 369 370 /**** Look Up the source SeqId given a GI ****************/ 371 372 NLM_EXTERN SeqIdPtr LIBCALL EntrezSeqIdForGI(Int4 gi); 373 374 /**** Look Up the Fasta entry given a GI or Accession ****/ 375 376 NLM_EXTERN FastaSeqPtr LIBCALL AccessionToFasta(CharPtr string); 377 378 /**** Get Sequence or MEDLINE data **********************/ 379 380 NLM_EXTERN Int2 LIBCALL EntrezSeqEntryListGet(SeqEntryPtr PNTR result, Int2 numuid, Int4Ptr uids, Int2 retcode, Boolean mark_missing); 381 NLM_EXTERN SeqEntryPtr EntrezSeqEntryGet(Int4 uid, Int2 retcode); 382 383 NLM_EXTERN Int2 LIBCALL EntrezMedlineEntryListGet(MedlineEntryPtr PNTR result, Int2 numuid, Int4Ptr uids, Boolean mark_missing); 384 NLM_EXTERN MedlineEntryPtr LIBCALL EntrezMedlineEntryGet(Int4 uid); 385 386 #ifdef _PMENTREZ_ 387 Int2 LIBCALL EntrezPubmedEntryListGet(PubmedEntryPtr PNTR result, Int2 numuid, Int4Ptr uids, Boolean mark_missing); 388 PubmedEntryPtr LIBCALL EntrezPubmedEntryGet(Int4 uid); 389 #endif 390 391 NLM_EXTERN BiostrucPtr LIBCALL EntrezBiostrucGet(DocUid uid, Int4 mdlLvl, Int4 maxModels); 392 NLM_EXTERN BiostrucAnnotSetPtr LIBCALL EntrezBiostrucAnnotSetGet(DocUid uid); 393 NLM_EXTERN LinkSetPtr LIBCALL EntrezBiostrucFeatIds(DocUid mmdbid, Int2 feature_type, Int4 feature_set_id); 394 NLM_EXTERN BiostrucAnnotSetPtr LIBCALL EntrezBiostrucAnnotSetGetByFid(DocUid mmdbid, Int4 feature_id, Int4 feature_set_id); 395 396 397 NLM_EXTERN EntrezHierarchyPtr LIBCALL EntrezHierarchyGet(CharPtr term,DocType type, DocField fld); 398 NLM_EXTERN EntrezHierarchyPtr LIBCALL EntrezHierarchyFree(EntrezHierarchyPtr ehp); 399 400 /***** Neighbor text on-the-fly ***********/ 401 NLM_EXTERN EntrezNeighborTextPtr LIBCALL EntrezNeighborTextFree(EntrezNeighborTextPtr ); 402 NLM_EXTERN EntrezNeighborTextPtr LIBCALL EntrezNeighborTextNew(void); 403 NLM_EXTERN LinkSetPtr LIBCALL EntrezDoNeighborText(EntrezNeighborTextPtr entp); 404 NLM_EXTERN Boolean LIBCALL EntrezCanNeighborText(void); 405 406 407 NLM_EXTERN Boolean LIBCALL EntrezExpandedMedlineFeatures(void); 408 NLM_EXTERN Int4 LIBCALL EntrezClusterAnalysis(DocUidPtr uids, Int4 numuids, DocField fld, Int4 minCluster, Int4 maxCluster, Int4 maxTerms, CharPtr *terms, Int4Ptr termTotals); 409 410 /***** on-the-fly BLAST *****/ 411 NLM_EXTERN LinkSetPtr LIBCALL EntrezBlastBioseq(BioseqPtr bsp, DocType db, CharPtr program, CharPtr database, CharPtr options, Boolean usemonitor); 412 NLM_EXTERN Boolean LIBCALL EntrezCanBlast(void); 413 414 /***************************************************************************** 415 * 416 * The Following two functions allow access by BioseqFetch using the 417 * SeqMgr. The application should call EntrezBioseqFetchEnable() at the start 418 * of the application and EntrezBioseqFetchDisable() at the end; This 419 * will make EntrezBioseqFetch() the "remote" access procedure for the 420 * SeqMgr. EntrezInit() will only be called on the first fetch unless "now" 421 * is true; 422 * 423 * If you add your own fetch function after calling EntrezBioseqFetchEnable, 424 * it will be called BEFORE EntrezBioseqFetchEnable. Add yours after this 425 * call, and yours will be call AFTER entrez. 426 * 427 *****************************************************************************/ 428 NLM_EXTERN Boolean LIBCALL EntrezBioseqFetchEnable(CharPtr progname, Boolean now); 429 NLM_EXTERN void LIBCALL EntrezBioseqFetchDisable(void); 430 431 432 #ifdef __cplusplus 433 } /* extern "C" */ 434 #endif 435 436 #undef NLM_EXTERN 437 #ifdef NLM_EXPORT 438 #define NLM_EXTERN NLM_EXPORT 439 #else 440 #define NLM_EXTERN 441 #endif 442 443 #endif 444 445 446