1 /* asn2ffg.h 2 * =========================================================================== 3 * 4 * PUBLIC DOMAIN NOTICE 5 * National Center for Biotechnology Information (NCBI) 6 * 7 * This software/database is a "United States Government Work" under the 8 * terms of the United States Copyright Act. It was written as part of 9 * the author's official duties as a United States Government employee and 10 * thus cannot be copyrighted. This software/database is freely available 11 * to the public for use. The National Library of Medicine and the U.S. 12 * Government do not place any restriction on its use or reproduction. 13 * We would, however, appreciate having the NCBI and the author cited in 14 * any work or product based on this material 15 * 16 * Although all reasonable efforts have been taken to ensure the accuracy 17 * and reliability of the software and data, the NLM and the U.S. 18 * Government do not and cannot warrant the performance or results that 19 * may be obtained by using this software or data. The NLM and the U.S. 20 * Government disclaim all warranties, express or implied, including 21 * warranties of performance, merchantability or fitness for any particular 22 * purpose. 23 * 24 * =========================================================================== 25 * 26 * File Name: asn2ffg.h 27 * 28 * Author: Karl Sirotkin, Tom Madden, Tatiana Tatusov 29 * 30 * Version Creation Date: 7/15/95 31 * 32 * $Revision: 6.19 $ 33 * $Revision: 6.19 $ 34 * 35 * File Description: 36 * 37 * Modifications: 38 * -------------------------------------------------------------------------- 39 * Date Name Description of modification 40 * ------- ---------- ----------------------------------------------------- 41 * 42 * 43 * ========================================================================== 44 */ 45 46 /************************************* 47 * 48 * $Log: asn2ffg.h,v $ 49 * Revision 6.19 2006/07/13 17:06:38 bollin 50 * use Uint4 instead of Uint2 for itemID values 51 * removed unused variables 52 * resolved compiler warnings 53 * 54 * Revision 6.18 2001/12/21 20:21:16 cavanaug 55 * old_locus_fmt now controls generated of *old* LOCUS line format 56 * 57 * Revision 6.17 2001/12/05 18:14:05 cavanaug 58 * Changes for new LOCUS line format 59 * 60 * Revision 6.16 2000/06/05 17:51:41 tatiana 61 * increase size of feature arrays to Int4 62 * 63 * Revision 6.15 2000/04/03 23:33:36 tatiana 64 * added showSeqLoc for GenBank region view 65 * 66 * Revision 6.14 2000/03/20 23:38:39 aleksey 67 * Finally submitted the changes which have been made by serge bazhin 68 * and been kept in my local directory. 69 * 70 * These changes allow to establish user callback functions 71 * in 'Asn2ffJobPtr' structure which are called within 72 * 'SeqEntryToFlatAjp' function call. 73 * The new members are: 74 * user_data - pointer to a user context for passing data 75 * ajp_count_index - user defined function 76 * ajp_print_data - user defined function 77 * ajp_print_index - user defined function 78 * 79 * Revision 6.13 2000/02/09 19:34:38 kans 80 * added forgbrel flag to Asn2ffJobPtr, currently used to suppress PUBMED line, which was not formally announced in release notes 81 * 82 * Revision 6.12 1999/11/05 14:54:04 tatiana 83 * EMBL_PREFNUM increased to 5 84 * 85 * Revision 6.11 1999/10/27 20:57:07 tatiana 86 * bankit added to ajp 87 * 88 * Revision 6.10 1999/09/23 18:06:04 tatiana 89 * contig_view added to ajp 90 * 91 * Revision 6.9 1999/04/02 16:33:15 tatiana 92 * added LinkStr struct and ByteStorePtr to ajp 93 * 94 * Revision 6.8 1999/03/11 19:04:31 tatiana 95 * version added 96 * 97 * Revision 6.7 1998/11/10 15:12:59 bazhin 98 * Macro NUM_OF_ESTIMATES and static array of integers "line_estimate" 99 * moved out to "asn2ff1.c". 100 * 101 * Revision 6.6 1998/09/24 17:46:00 kans 102 * fixed GetDBXrefFromGene problem (TT) 103 * 104 * Revision 6.5 1998/07/14 17:59:06 kans 105 * added useSeqMgrIndexes to OrganizeFeatPtr and Asn2ffJobPtr 106 * 107 * Revision 6.4 1998/03/09 21:41:03 tatiana 108 * accession length increased to 60 109 * 110 * Revision 6.3 1998/02/12 15:48:26 tatiana 111 * EMBL_PREFNUM increased to 4 112 * 113 * Revision 6.2 1998/01/13 21:10:50 tatiana 114 * added Biosrclist to organizefeat struct 115 * 116 * Revision 6.0 1997/08/25 18:05:13 madden 117 * Revision changed to 6.0 118 * 119 * Revision 5.16 1997/07/23 18:35:51 tatiana 120 * SeqIdPtr id_print added to Asn2ffJob structure 121 * 122 * Revision 5.14 1997/04/25 19:26:03 tatiana 123 * #define EMBL_PREFNUM 1 added 124 * 125 * Revision 5.13 1997/03/13 15:42:47 tatiana 126 * *** empty log message *** 127 * 128 * Revision 5.12 1997/01/27 18:33:51 tatiana 129 * hup added to ajp 130 * 131 * Revision 5.11 1996/12/17 22:45:55 tatiana 132 * added Boolean feat_free to SortStruct 133 * 134 * Revision 5.8 1996/10/25 22:22:44 tatiana 135 * defline added to GBEntry 136 * 137 * Revision 5.7 1996/09/03 19:54:01 tatiana 138 * extra_loc added 139 * 140 * Revision 5.6 1996/08/05 13:54:56 tatiana 141 * orgname added to ajp struct 142 * 143 * Revision 5.4 1996/07/30 13:37:37 tatiana 144 * 'show_gene' added to asn2ffJob 145 * 146 * Revision 5.3 1996/07/23 22:32:50 tatiana 147 * added format to orf struct 148 * 149 * Revision 5.2 1996/07/02 18:06:20 tatiana 150 * *** empty log message *** 151 * 152 * Revision 5.2 1996/07/02 18:06:20 tatiana 153 * *** empty log message *** 154 * 155 * Revision 5.1 1996/06/11 15:24:59 tatiana 156 * add embl ni to GBEntry struct 157 * 158 * Revision 4.13 1996/05/16 20:56:46 tatiana 159 * source_info added to GBEntry structure 160 * 161 * Revision 4.12 1996/04/29 18:54:26 tatiana 162 * multiple comments 163 * 164 * Revision 4.11 1996/04/12 03:41:44 tatiana 165 * added Booleans to GBEntry 166 * 167 * Revision 4.10 1996/04/09 14:03:50 tatiana 168 * DescrStructPtr and comms added to GBEntry 169 * 170 * Revision 4.9 1996/03/20 00:00:20 tatiana 171 * add activity to GeneStruct 172 * 173 * Revision 4.8 1996/03/04 17:11:20 ostell 174 * added support for ignore_top features 175 * 176 * Revision 4.7 1996/02/28 04:53:06 ostell 177 * changes to support segmented master seeuquences 178 * 179 * Revision 4.6 1996/02/18 21:17:17 tatiana 180 * number of pubs, feats and seqblocks added to GBEntry structure 181 * 182 * Revision 4.5 1996/02/15 15:57:11 tatiana 183 * SortStruct changed for sorting within one entity 184 * 185 * Revision 4.4 1996/01/29 22:44:00 tatiana 186 * genome_view added to Asn2ffJobPtr 187 * 188 * Revision 4.2 1995/12/13 16:35:02 tatiana 189 * itemID etc. added to FFPrintArray structure 190 * 191 * Revision 1.1 1995/07/17 19:24:04 kans 192 * Initial revision 193 * 194 * 195 **************************************/ 196 197 #ifndef _ASN2FFG_ 198 #define _ASN2FFG_ 199 200 #include <asn.h> 201 #include <objall.h> 202 #include <objpubd.h> 203 204 #include <seqport.h> 205 #include <objsub.h> 206 #include <prtutil.h> 207 #include <gather.h> 208 209 #define LINKS 20 210 #define EMBL_AC "AFVXYZ" /* patent is "A" */ /* dbEST = "F" */ 211 #define EMBL_PREFNUM 5 /* embl two-letter prefix {XX} see asn2ff4.c is_embl()*/ 212 213 #define FF_REGULAR 0 214 #define FF_TOP_COMPLETE 1 215 #define FF_TOP_CONTIG 2 216 217 typedef struct _link_str { 218 CharPtr line; 219 struct _link_str PNTR next; 220 } LinkStr, PNTR LinkStrPtr; 221 222 typedef struct genestruct { 223 ValNodePtr gene; 224 ValNodePtr product; 225 ValNodePtr standard_name; 226 CharPtr PNTR map; /* only map[0] is used why we need PNTR? */ 227 ValNodePtr ECNum; 228 ValNodePtr activity; 229 Int2 map_size, map_index; /* map_size is always 1 */ 230 Boolean pseudo; 231 GeneRefPtr grp; 232 } GeneStruct, PNTR GeneStructPtr; 233 234 typedef struct notestruct { 235 CharPtr PNTR note; 236 Uint1 PNTR note_alloc; 237 CharPtr PNTR note_annot; 238 Int2 note_size, note_index; 239 } NoteStruct, PNTR NoteStructPtr; 240 241 typedef struct sortstruct { 242 BioseqPtr bsp; 243 BioseqPtr seg_bsp; 244 SeqFeatPtr sfp; /* would be NULL if gather tempload == TRUE */ 245 SeqLocPtr slp; /*for converted locations, free if not NULL!*/ 246 Uint2 entityID, 247 itemtype; 248 Uint4 itemID; 249 Int4 hash; 250 Boolean dup; 251 SeqLocPtr PNTR extra_loc; 252 Int2 extra_loc_cnt; 253 Boolean feat_free; 254 Boolean tempload; 255 GeneStructPtr gsp; /* information on Genes */ 256 NoteStructPtr nsp; /* information on Notes. */ 257 } SortStruct, PNTR SortStructPtr; 258 259 typedef struct descrstruct { 260 ValNodePtr vnp; 261 Uint2 entityID, 262 itemtype; 263 Uint4 itemID; 264 struct descrstruct PNTR next; 265 } DescrStruct, PNTR DescrStructPtr; 266 267 typedef struct comstruct { 268 CharPtr string; 269 Boolean gsdb_id; 270 Uint2 entityID, 271 itemtype; 272 Uint4 itemID; 273 struct comstruct PNTR next; 274 } ComStruct, PNTR ComStructPtr; 275 276 typedef struct organizeprot { 277 SortStructPtr list; 278 Int2 size; 279 } OrganizeProt, PNTR OrganizeProtPtr; 280 281 typedef struct organizefeat { 282 Boolean embl_feat; 283 BioseqPtr bsp; 284 BioseqPtr seg_bsp; 285 Int4 sfpListsize; 286 Int4 sortListsize; 287 SortStructPtr List; /* ptr's to "generic" features */ 288 Int2 sfpCommsize; /* Number of comment features */ 289 Int2 sortCommsize; /* Number of sorted comment features */ 290 SortStructPtr Commlist; 291 Int4 sfpGenesize; /* Number of gene features */ 292 Int4 sortGenesize; /* Number of sorted gene features */ 293 SortStructPtr Genelist; 294 Int4 sfpOrgsize; /* Number of Organism features. */ 295 Int4 sortOrgsize; /* Number of sorted Organism features. */ 296 SortStructPtr Orglist; 297 Int4 sfpSitesize; /* Number of ImpFeat's with key "Site-ref" */ 298 Int4 sortSitesize; /* Number of sorted ImpFeat's with key "Site-ref" */ 299 SortStructPtr Siteslist; 300 Int4 sfpSourcesize; /* Number of ImpFeat's with key "source" */ 301 Int4 sortSourcesize; /* Number of sorted ImpFeat's with key "source" */ 302 SortStructPtr Sourcelist; 303 Int4 sfpXrefsize; /* Number of Seq's that go out as xref's */ 304 SortStructPtr Xreflist; 305 Uint2 oldID; /* is used to sort within entity */ 306 NoteStructPtr source_notes; /* Note for source feature */ 307 Uint1 format; /* needed for ProtRef convertion */ 308 Boolean show_gene; /* needed temporarely for 'gene' feature */ 309 BioseqPtr lock_bsp; 310 Boolean non_strict; 311 Int2 biosrcsize; /* Number of sorted ImpFeat's with key "source" */ 312 SortStructPtr Biosrclist; 313 Boolean useSeqMgrIndexes; /* new style indexing to eliminate nested gathers */ 314 Boolean showSeqLoc; /* GenBank view for a region, skip truncated check */ 315 } OrganizeFeat, PNTR OrganizeFeatPtr; 316 317 /***************************************************************************** 318 * 319 * GBEntry 320 * structure with info for a single GenBank record 321 * 322 *****************************************************************************/ 323 typedef struct gbentry { 324 BioseqPtr bsp; /* the Bioseq for this record */ 325 Uint2 entityID, 326 itemtype; 327 Uint4 itemID; 328 Char date[12]; 329 CharPtr create_date; 330 CharPtr update_date; 331 CharPtr embl_rel; 332 Int2 embl_ver; 333 Int4 gi; 334 CharPtr ni; 335 CharPtr base_cnt_line; 336 Boolean xref_present; 337 Char div[4]; /* division */ 338 Char locus[25]; /* locus */ 339 Char accession[60]; /* primary accession */ 340 Int2 num_seg; /* segment number if segmented */ 341 Int4 length; /* length of entry */ 342 SeqPortPtr spp; /* seqport on entry */ 343 OrganizeFeatPtr feat; /* temporary struct with features */ 344 ValNodePtr Pub; 345 DescrStructPtr descr; /* keeps entityID, itemID, itemtype for descr */ 346 DescrStructPtr source_info; /* keeps org info if no source feature found */ 347 Int4 feat_num; /* number of printed features */ 348 Int2 comm_num; /* number of printed cooment blocks */ 349 ComStructPtr comm; /* CharPtr in com.data.ptrvalue are comments */ 350 Boolean map; 351 CharPtr defline; 352 struct gbentry PNTR next; 353 Char version[60]; /* accession.version */ 354 } GBEntry, PNTR GBEntryPtr; 355 356 /***************************************************************************** 357 * 358 * Asn2ffWE 359 * asn2ff working environment. Keeps top level information about current 360 * unit under construction. Serves as head of chain of structs, one for 361 * GenBank record finally produced. 362 * 363 *****************************************************************************/ 364 typedef struct asn2ffwe { 365 /* Working environment for data object */ 366 SeqEntryPtr current_sep; /* current SeqEntry ???? Tatiana*/ 367 BioseqPtr current_bsp; /* current Bioseq ???? Tatiana*/ 368 BioseqSetPtr current_bssp;/* current BioseqSet ???? Tatiana*/ 369 /* SeqSubmit Only */ 370 ValNodePtr cit_sub; /* if a SeqSubmit, the Cit-sub for it */ 371 /* Segmented Set Only */ 372 Char base_name[25]; /* base LOCUS name if segmented set */ 373 Int2 total_seg; /* number of segments in segmented set */ 374 BioseqPtr seg; /* segmented Bioseq in segmented set */ 375 BioseqSetPtr parts; /* parts set for segmented set */ 376 Boolean only_one; /* only one segment of set being shown (current bsp) */ 377 GBEntryPtr gbp; /* chain of data for each GB record */ 378 379 } Asn2ffWE, PNTR Asn2ffWEPtr; 380 381 /***************************************************************************** 382 * 383 * Asn2ffJob 384 * top level job control structure to be filled in by caller 385 * sets global options and I/O information 386 * sets overall scope and target for formatting 387 * points to function working environment (added by function) 388 * 389 *****************************************************************************/ 390 typedef struct asn2ff_job { 391 /* Set formatting options */ 392 Boolean show_gene, /* show the gene feature */ 393 show_seq, /* do not show the sequence */ 394 show_gi, /* do not show the GI id */ 395 error_msgs, /* do not show feature validator messages */ 396 non_strict, /* only strict gene/protein binding allowed */ 397 null_str, /* error msg if printing NULL string */ 398 no_hold, /* if TRUE, do not use ObjMgrSetHold around asn2ff_print */ 399 free_cache; /* if TRUE, ObjMgrFreeCache() after asn2ff_print */ 400 Uint1 format, /* from _FMT above */ 401 mode; /* from _MODE above */ 402 FILE * fp; /* if not NULL, output goes to file */ 403 StdPrintOptionsPtr Spop; /* for templates */ 404 405 /* Specify the data to be formatted */ 406 Uint2 entityID, /* could be a data object */ 407 itemtype; 408 Uint4 itemID; /* specified by ID or by pointer */ 409 Boolean only_one, /* show only one top-level bioseq */ 410 ignore_top; /* do not show features from top-level bioseq */ 411 SeqSubmitPtr ssp; /* alternative data object pointers */ 412 SeqEntryPtr sep; 413 SeqLocPtr slp; /* or could be a location on a sequence */ 414 Boolean gb_style; /* only complete features are shown e.g. join cds on the last segmented bioseq */ 415 Boolean genome_view; /* not printing the sequence and features*/ 416 Boolean map_view; /* not printing the sequence */ 417 Boolean hup; /* for submissions */ 418 /* taken directly from Biotable, used for printing and formatting */ 419 Int4 pap_index; 420 Uint1 pap_last; 421 Boolean pseudo; 422 SeqFeatPtr sfp_out; 423 Int2 number_of_cds; 424 Boolean help; 425 426 /** this section filled in by function **************/ 427 Asn2ffWEPtr asn2ffwep; 428 Boolean orgname; /* new algorithm for /organism in source feature*/ 429 SeqIdPtr id_print; 430 Boolean useSeqMgrIndexes; /* new style indexing to eliminate nested gathers */ 431 Boolean show_version; 432 ByteStorePtr byte_st; 433 Boolean contig_view; /* CONTIG line and features*/ 434 Boolean bankit; /* show Bankit comments*/ 435 Boolean forgbrel; 436 Pointer user_data; 437 Int4 (*ajp_print_data)(struct asn2ff_job *ajp, CharPtr str, 438 Pointer user_data); 439 Int4 (*ajp_print_index)(struct asn2ff_job *ajp, Pointer user_data); 440 Int4 (*ajp_count_index)(struct asn2ff_job *ajp, Int4 num, Pointer user_data); 441 Boolean old_locus_fmt; /* Set to TRUE in order to generate the old LOCUS-line format, 442 pre-dating 12/21/2001 */ 443 } Asn2ffJob, PNTR Asn2ffJobPtr; 444 445 446 447 /*----------- Estimates for the number of lines returned for 448 each of the following------------------------------------------*/ 449 450 #define NUM_SEQ_LINES 10 451 452 typedef void (* FFPapFct) PROTO ((Asn2ffJobPtr ajp, GBEntryPtr gbp)); 453 454 typedef struct ffprintarray { 455 FFPapFct fct; 456 Asn2ffJobPtr ajp; 457 GBEntryPtr gbp; 458 Int4 index; 459 Uint1 last; 460 Uint1 printxx; 461 Int2 estimate; 462 DescrStructPtr descr; /* keeps entityID, itemID, itemtype for descr */ 463 } FFPrintArray, PNTR FFPrintArrayPtr; 464 /***************************************************************************** 465 * 466 * Main asn2ff entry points 467 * 468 *****************************************************************************/ 469 470 Boolean Asn2ff PROTO((Asn2ffJobPtr ajp)); 471 472 #endif 473