1 /*   asn2ffg.h
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *            National Center for Biotechnology Information (NCBI)
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government do not place any restriction on its use or reproduction.
13 *  We would, however, appreciate having the NCBI and the author cited in
14 *  any work or product based on this material
15 *
16 *  Although all reasonable efforts have been taken to ensure the accuracy
17 *  and reliability of the software and data, the NLM and the U.S.
18 *  Government do not and cannot warrant the performance or results that
19 *  may be obtained by using this software or data. The NLM and the U.S.
20 *  Government disclaim all warranties, express or implied, including
21 *  warranties of performance, merchantability or fitness for any particular
22 *  purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name:  asn2ffg.h
27 *
28 * Author:  Karl Sirotkin, Tom Madden, Tatiana Tatusov
29 *
30 * Version Creation Date:   7/15/95
31 *
32 * $Revision: 6.19 $
33 * $Revision: 6.19 $
34 *
35 * File Description:
36 *
37 * Modifications:
38 * --------------------------------------------------------------------------
39 * Date     Name        Description of modification
40 * -------  ----------  -----------------------------------------------------
41 *
42 *
43 * ==========================================================================
44 */
45 
46 /*************************************
47 *
48  * $Log: asn2ffg.h,v $
49  * Revision 6.19  2006/07/13 17:06:38  bollin
50  * use Uint4 instead of Uint2 for itemID values
51  * removed unused variables
52  * resolved compiler warnings
53  *
54  * Revision 6.18  2001/12/21 20:21:16  cavanaug
55  * old_locus_fmt now controls generated of *old* LOCUS line format
56  *
57  * Revision 6.17  2001/12/05 18:14:05  cavanaug
58  * Changes for new LOCUS line format
59  *
60  * Revision 6.16  2000/06/05 17:51:41  tatiana
61  * increase size of feature arrays to Int4
62  *
63  * Revision 6.15  2000/04/03 23:33:36  tatiana
64  * added showSeqLoc for GenBank region view
65  *
66  * Revision 6.14  2000/03/20 23:38:39  aleksey
67  * Finally submitted the changes which have been made by serge bazhin
68  * and been kept in my local directory.
69  *
70  * These changes allow to establish user callback functions
71  * in 'Asn2ffJobPtr' structure which are called within
72  * 'SeqEntryToFlatAjp' function call.
73  * The new members are:
74  * user_data       - pointer to a user context for passing data
75  * ajp_count_index - user defined function
76  * ajp_print_data  - user defined function
77  * ajp_print_index - user defined function
78  *
79  * Revision 6.13  2000/02/09 19:34:38  kans
80  * added forgbrel flag to Asn2ffJobPtr, currently used to suppress PUBMED line, which was not formally announced in release notes
81  *
82  * Revision 6.12  1999/11/05 14:54:04  tatiana
83  * EMBL_PREFNUM increased to 5
84  *
85  * Revision 6.11  1999/10/27 20:57:07  tatiana
86  * bankit added to ajp
87  *
88  * Revision 6.10  1999/09/23 18:06:04  tatiana
89  * contig_view added to ajp
90  *
91  * Revision 6.9  1999/04/02 16:33:15  tatiana
92  * added LinkStr struct and ByteStorePtr to ajp
93  *
94  * Revision 6.8  1999/03/11 19:04:31  tatiana
95  * version added
96  *
97  * Revision 6.7  1998/11/10 15:12:59  bazhin
98  * Macro NUM_OF_ESTIMATES and static array of integers "line_estimate"
99  * moved out to "asn2ff1.c".
100  *
101  * Revision 6.6  1998/09/24 17:46:00  kans
102  * fixed GetDBXrefFromGene problem (TT)
103  *
104  * Revision 6.5  1998/07/14 17:59:06  kans
105  * added useSeqMgrIndexes to OrganizeFeatPtr and Asn2ffJobPtr
106  *
107  * Revision 6.4  1998/03/09 21:41:03  tatiana
108  * accession length increased to 60
109  *
110  * Revision 6.3  1998/02/12 15:48:26  tatiana
111  * EMBL_PREFNUM increased to 4
112  *
113  * Revision 6.2  1998/01/13 21:10:50  tatiana
114  * added Biosrclist to organizefeat struct
115  *
116  * Revision 6.0  1997/08/25 18:05:13  madden
117  * Revision changed to 6.0
118  *
119  * Revision 5.16  1997/07/23 18:35:51  tatiana
120  * SeqIdPtr id_print added to Asn2ffJob structure
121  *
122  * Revision 5.14  1997/04/25  19:26:03  tatiana
123  * #define EMBL_PREFNUM 1 added
124  *
125  * Revision 5.13  1997/03/13  15:42:47  tatiana
126  * *** empty log message ***
127  *
128  * Revision 5.12  1997/01/27  18:33:51  tatiana
129  * hup added to ajp
130  *
131  * Revision 5.11  1996/12/17  22:45:55  tatiana
132  * added Boolean feat_free to SortStruct
133  *
134  * Revision 5.8  1996/10/25  22:22:44  tatiana
135  * defline added to GBEntry
136  *
137  * Revision 5.7  1996/09/03  19:54:01  tatiana
138  * extra_loc added
139  *
140  * Revision 5.6  1996/08/05  13:54:56  tatiana
141  * orgname added to ajp struct
142  *
143  * Revision 5.4  1996/07/30  13:37:37  tatiana
144  * 'show_gene' added to asn2ffJob
145  *
146  * Revision 5.3  1996/07/23  22:32:50  tatiana
147  * added format to orf struct
148  *
149  * Revision 5.2  1996/07/02  18:06:20  tatiana
150  * *** empty log message ***
151  *
152  * Revision 5.2  1996/07/02  18:06:20  tatiana
153  * *** empty log message ***
154  *
155  * Revision 5.1  1996/06/11  15:24:59  tatiana
156  * add embl ni to GBEntry struct
157  *
158  * Revision 4.13  1996/05/16  20:56:46  tatiana
159  * source_info added to GBEntry structure
160  *
161  * Revision 4.12  1996/04/29  18:54:26  tatiana
162  * multiple comments
163  *
164  * Revision 4.11  1996/04/12  03:41:44  tatiana
165  * added Booleans to GBEntry
166  *
167  * Revision 4.10  1996/04/09  14:03:50  tatiana
168  * DescrStructPtr and comms added to GBEntry
169  *
170  * Revision 4.9  1996/03/20  00:00:20  tatiana
171  * add activity to GeneStruct
172  *
173  * Revision 4.8  1996/03/04  17:11:20  ostell
174  * added support for ignore_top features
175  *
176  * Revision 4.7  1996/02/28  04:53:06  ostell
177  * changes to support segmented master seeuquences
178  *
179  * Revision 4.6  1996/02/18  21:17:17  tatiana
180  * number of pubs, feats and seqblocks added to GBEntry structure
181  *
182  * Revision 4.5  1996/02/15  15:57:11  tatiana
183  * SortStruct changed for sorting within one entity
184  *
185  * Revision 4.4  1996/01/29  22:44:00  tatiana
186  * genome_view added to Asn2ffJobPtr
187  *
188  * Revision 4.2  1995/12/13  16:35:02  tatiana
189  * itemID etc. added to FFPrintArray structure
190  *
191  * Revision 1.1  1995/07/17  19:24:04  kans
192  * Initial revision
193  *
194 *
195 **************************************/
196 
197 #ifndef _ASN2FFG_
198 #define _ASN2FFG_
199 
200 #include <asn.h>
201 #include <objall.h>
202 #include <objpubd.h>
203 
204 #include <seqport.h>
205 #include <objsub.h>
206 #include <prtutil.h>
207 #include <gather.h>
208 
209 #define LINKS 20
210 #define EMBL_AC       "AFVXYZ"   /* patent is "A" */ /* dbEST = "F" */
211 #define EMBL_PREFNUM 5 /* embl two-letter prefix {XX} see asn2ff4.c is_embl()*/
212 
213 #define FF_REGULAR 0
214 #define FF_TOP_COMPLETE 1
215 #define FF_TOP_CONTIG 2
216 
217 typedef struct _link_str {
218     CharPtr line;
219     struct _link_str PNTR next;
220 } LinkStr, PNTR LinkStrPtr;
221 
222 typedef struct genestruct {
223 	ValNodePtr gene;
224 	ValNodePtr product;
225 	ValNodePtr standard_name;
226 	CharPtr PNTR map;         /* only map[0] is used why we need PNTR? */
227 	ValNodePtr ECNum;
228 	ValNodePtr activity;
229 	Int2 map_size, map_index;   /* map_size is always 1 */
230 	Boolean pseudo;
231 	GeneRefPtr grp;
232 } GeneStruct, PNTR GeneStructPtr;
233 
234 typedef struct notestruct {
235 	CharPtr PNTR note;
236 	Uint1 PNTR note_alloc;
237 	CharPtr PNTR note_annot;
238 	Int2 	note_size, note_index;
239 } NoteStruct, PNTR NoteStructPtr;
240 
241 typedef struct sortstruct {
242 	BioseqPtr 	bsp;
243 	BioseqPtr 	seg_bsp;
244 	SeqFeatPtr 	sfp;		/* would be NULL if gather tempload == TRUE */
245 	SeqLocPtr 	slp;		/*for converted locations, free if not NULL!*/
246 	Uint2		entityID,
247 				itemtype;
248     Uint4       itemID;
249 	Int4		hash;
250 	Boolean 	dup;
251 	SeqLocPtr PNTR extra_loc;
252 	Int2 		extra_loc_cnt;
253 	Boolean 	feat_free;
254 	Boolean 	tempload;
255 	GeneStructPtr gsp;	/* information on Genes */
256 	NoteStructPtr nsp;	/* information on Notes. */
257 } SortStruct, PNTR SortStructPtr;
258 
259 typedef struct descrstruct {
260 	ValNodePtr vnp;
261 	Uint2	entityID,
262 			itemtype;
263     Uint4   itemID;
264 	struct descrstruct PNTR next;
265 } DescrStruct, PNTR DescrStructPtr;
266 
267 typedef struct comstruct {
268 	CharPtr string;
269 	Boolean gsdb_id;
270 	Uint2	entityID,
271 			itemtype;
272     Uint4   itemID;
273 	struct comstruct PNTR next;
274 } ComStruct, PNTR ComStructPtr;
275 
276 typedef struct organizeprot {
277 	SortStructPtr list;
278 	Int2 size;
279 } OrganizeProt, PNTR OrganizeProtPtr;
280 
281 typedef struct organizefeat {
282 	Boolean embl_feat;
283 	BioseqPtr bsp;
284 	BioseqPtr seg_bsp;
285 	Int4 sfpListsize;
286 	Int4 sortListsize;
287 	SortStructPtr List;	/* ptr's to "generic" features */
288 	Int2 sfpCommsize;	/* Number of comment features */
289 	Int2 sortCommsize;	/* Number of sorted comment features */
290 	SortStructPtr Commlist;
291 	Int4 sfpGenesize;	/* Number of gene features */
292 	Int4 sortGenesize;	/* Number of sorted gene features */
293 	SortStructPtr Genelist;
294 	Int4 sfpOrgsize;	/* Number of Organism features. */
295 	Int4 sortOrgsize;	/* Number of sorted Organism features. */
296 	SortStructPtr Orglist;
297 	Int4 sfpSitesize;	/* Number of ImpFeat's with key "Site-ref" */
298 	Int4 sortSitesize;	/* Number of sorted ImpFeat's with key "Site-ref" */
299 	SortStructPtr Siteslist;
300 	Int4 sfpSourcesize;	/* Number of ImpFeat's with key "source" */
301 	Int4 sortSourcesize;	/* Number of sorted ImpFeat's with key "source" */
302 	SortStructPtr Sourcelist;
303 	Int4 sfpXrefsize;	/* Number of Seq's that go out as xref's */
304 	SortStructPtr Xreflist;
305 	Uint2	oldID;	   /* is used to sort within entity */
306 	NoteStructPtr source_notes;	/* Note for source feature */
307 	Uint1 format;				/* needed for ProtRef convertion */
308 	Boolean show_gene;				/* needed temporarely for 'gene' feature */
309 	BioseqPtr lock_bsp;
310 	Boolean non_strict;
311 	Int2 biosrcsize;	/* Number of sorted ImpFeat's with key "source" */
312 	SortStructPtr Biosrclist;
313 	Boolean useSeqMgrIndexes;  /* new style indexing to eliminate nested gathers */
314 	Boolean showSeqLoc;  /* GenBank view for a region, skip truncated check */
315 } OrganizeFeat, PNTR OrganizeFeatPtr;
316 
317 /*****************************************************************************
318 *
319 *   GBEntry
320 *     structure with info for a single GenBank record
321 *
322 *****************************************************************************/
323 typedef struct gbentry {
324 	BioseqPtr bsp;         /* the Bioseq for this record */
325 	Uint2	entityID,
326 			itemtype;
327     Uint4   itemID;
328 	Char date[12];
329 	CharPtr create_date;
330 	CharPtr update_date;
331 	CharPtr embl_rel;
332 	Int2 embl_ver;
333 	Int4 gi;
334 	CharPtr ni;
335 	CharPtr base_cnt_line;
336 	Boolean xref_present;
337 	Char div[4];			/* division */
338 	Char locus[25];        /* locus */
339 	Char accession[60];    /* primary accession */
340 	Int2 num_seg;          /* segment number if segmented */
341 	Int4 length;           /* length of entry */
342 	SeqPortPtr spp;        /* seqport on entry */
343 	OrganizeFeatPtr feat;  /* temporary struct with features */
344 	ValNodePtr Pub;
345 	DescrStructPtr descr;		/* keeps entityID, itemID, itemtype for descr */
346 	DescrStructPtr source_info;	/* keeps org info if no source feature found */
347 	Int4 feat_num;				/* number of printed features */
348 	Int2 comm_num;				/* number of printed cooment blocks */
349 	ComStructPtr comm;		/* CharPtr in com.data.ptrvalue are comments */
350 	Boolean map;
351 	CharPtr defline;
352 	struct gbentry PNTR next;
353 	Char version[60];    /* accession.version */
354 } GBEntry, PNTR GBEntryPtr;
355 
356 /*****************************************************************************
357 *
358 *   Asn2ffWE
359 *     asn2ff working environment. Keeps top level information about current
360 *       unit under construction. Serves as head of chain of structs, one for
361 *       GenBank record finally produced.
362 *
363 *****************************************************************************/
364 typedef struct asn2ffwe {
365 						 /* Working environment for data object */
366 	SeqEntryPtr current_sep;  /* current SeqEntry  ???? Tatiana*/
367 	BioseqPtr current_bsp;    /* current Bioseq ???? Tatiana*/
368 	BioseqSetPtr current_bssp;/* current BioseqSet ???? Tatiana*/
369 	                        /* SeqSubmit Only */
370 	ValNodePtr cit_sub;       /* if a SeqSubmit, the Cit-sub for it */
371 	                        /* Segmented Set Only */
372 	Char base_name[25];       /* base LOCUS name if segmented set */
373 	Int2 total_seg;             /* number of segments in segmented set */
374 	BioseqPtr seg;            /* segmented Bioseq in segmented set */
375 	BioseqSetPtr parts;       /* parts set for segmented set */
376 	Boolean only_one;   /* only one segment of set being shown (current bsp) */
377 	GBEntryPtr gbp;         /* chain of data for each GB record */
378 
379 } Asn2ffWE, PNTR Asn2ffWEPtr;
380 
381 /*****************************************************************************
382 *
383 *   Asn2ffJob
384 *     top level job control structure to be filled in by caller
385 *     sets global options and I/O information
386 *     sets overall scope and target for formatting
387 *     points to function working environment (added by function)
388 *
389 *****************************************************************************/
390 typedef struct asn2ff_job {
391 	                      /* Set formatting options */
392 	Boolean show_gene,     /* show the gene feature */
393 			show_seq,     /* do not show the sequence */
394 		    show_gi,      /* do not show the GI id */
395 			error_msgs,   /* do not show feature validator messages */
396 			non_strict,   /* only strict gene/protein binding allowed */
397 			null_str,    /* error msg if printing NULL string */
398 			no_hold,      /* if TRUE, do not use ObjMgrSetHold around asn2ff_print */
399 			free_cache;   /* if TRUE, ObjMgrFreeCache() after asn2ff_print */
400 	Uint1	format,         /* from _FMT above */
401 			mode;           /* from _MODE above */
402 	FILE *  fp;             /* if not NULL, output goes to file */
403 	StdPrintOptionsPtr Spop; /* for templates */
404 
405 						 /* Specify the data to be formatted */
406 	Uint2	entityID,	    /* could be a data object */
407 			itemtype;
408 	Uint4   itemID;			   /* specified by ID or by pointer */
409 	Boolean only_one, 			/* show only one top-level bioseq */
410 		ignore_top;            /* do not show features from top-level bioseq */
411 	SeqSubmitPtr ssp;          /* alternative data object pointers */
412 	SeqEntryPtr sep;
413 	SeqLocPtr slp;      	/* or could be a location on a sequence */
414 	Boolean gb_style;		/* only complete features are shown e.g. join cds on the last segmented bioseq */
415 	Boolean genome_view;	/* not printing the sequence and features*/
416 	Boolean map_view;	/* not printing the sequence */
417 	Boolean hup;		/* for submissions */
418 /* taken directly from Biotable, used for printing and formatting */
419 	Int4 pap_index;
420 	Uint1 pap_last;
421 	Boolean pseudo;
422 	SeqFeatPtr sfp_out;
423 	Int2 number_of_cds;
424 	Boolean help;
425 
426  			 /** this section filled in by function **************/
427    Asn2ffWEPtr asn2ffwep;
428 	Boolean orgname;    /* new algorithm for /organism in source feature*/
429 	SeqIdPtr id_print;
430 	Boolean useSeqMgrIndexes;  /* new style indexing to eliminate nested gathers */
431 	Boolean show_version;
432 	ByteStorePtr byte_st;
433 	Boolean contig_view;	/* CONTIG line and features*/
434 	Boolean bankit; /* show Bankit comments*/
435 	Boolean forgbrel;
436 	Pointer user_data;
437 	Int4 (*ajp_print_data)(struct asn2ff_job *ajp, CharPtr str,
438                                Pointer user_data);
439 	Int4 (*ajp_print_index)(struct asn2ff_job *ajp, Pointer user_data);
440 	Int4 (*ajp_count_index)(struct asn2ff_job *ajp, Int4 num, Pointer user_data);
441 	Boolean old_locus_fmt;	/* Set to TRUE in order to generate the old LOCUS-line format,
442 				   pre-dating 12/21/2001 */
443 } Asn2ffJob, PNTR Asn2ffJobPtr;
444 
445 
446 
447 /*----------- Estimates for the number of lines returned for
448 each of the following------------------------------------------*/
449 
450 #define NUM_SEQ_LINES 10
451 
452 typedef void (* FFPapFct) PROTO ((Asn2ffJobPtr ajp, GBEntryPtr gbp));
453 
454 typedef struct ffprintarray {
455 	FFPapFct fct;
456 	Asn2ffJobPtr ajp;
457 	GBEntryPtr gbp;
458 	Int4 index;
459 	Uint1 last;
460 	Uint1 printxx;
461 	Int2 estimate;
462 	DescrStructPtr descr;		/* keeps entityID, itemID, itemtype for descr */
463 } FFPrintArray, PNTR FFPrintArrayPtr;
464 /*****************************************************************************
465 *
466 *   Main asn2ff entry points
467 *
468 *****************************************************************************/
469 
470 Boolean Asn2ff PROTO((Asn2ffJobPtr ajp));
471 
472 #endif
473