1 /* asn2ff1.c
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information (NCBI)
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government do not place any restriction on its use or reproduction.
13 * We would, however, appreciate having the NCBI and the author cited in
14 * any work or product based on this material
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name: asn2ff1.c
27 *
28 * Author: Karl Sirotkin, Tom Madden, Tatiana Tatusov
29 *
30 * Version Creation Date: 7/15/95
31 *
32 * $Revision: 6.119 $
33 * $Revision: 6.119 $
34 *
35 * File Description: files that go with "asn2ff"
36 *
37 * Modifications:
38 * --------------------------------------------------------------------------
39 * ==========================================================================
40 */
41
42 #include <seqmgr.h>
43 #include <gather.h>
44 #include <asn2ffg.h>
45 #include <asn2ffp.h>
46 #include <a2ferrdf.h>
47 #include <parsegb.h>
48 #include <gbfeat.h>
49 #include <ffprint.h>
50 #include <tofasta.h>
51 #include <subutil.h>
52 #include <explore.h>
53 #include <objfdef.h>
54 #include <sqnutils.h>
55 #include <lsqfetch.h>
56
57 #ifdef ENABLE_ENTREZ
58 #include <accentr.h>
59 #endif
60
61 #define NUM_OF_ESTIMATES 20
62
63 static void PrintSeqRegion (Asn2ffJobPtr ajp, GBEntryPtr gbp);
64
65 static Int2 line_estimate[NUM_OF_ESTIMATES] = {
66 1, /* 0; Locus, Segment, Base Count, Origin, Feature Header lines */
67 1, /* 1; Definition line(s) */
68 1, /* 2; Accession line(s) */
69 1, /* 3; Keyword line(s) */
70 2, /* 4; Source lines */
71 6, /* 5; Reference (pub) lines */
72 10, /* 6; GBComAndFH */
73 11, /* 7; GBComAndXref */
74 6, /* 8; Features */
75 NUM_SEQ_LINES, /* 9; Sequence lines */
76 2, /* 10; EMBL Date lines */
77 2, /* 11; EMBL Organism lines */
78 4, /* 12; DBSOURCE field */
79 };
80
81 /* The following corresponds to NUM_SEQ_LINES lines each with 60
82 residues/basepairs */
83 #define SEQ_BLK_SIZE (60*NUM_SEQ_LINES)
84 #define A2F_OTHER ( (Uint1)0)
85 #define A2F_SOURCE_FEATURE ( (Uint1)1)
86 #define A2F_FEATURE ( (Uint1)2)
87 #define A2F_REFERENCE ( (Uint1)3)
88 #define A2F_FEATURE_NEW ( (Uint1)4)
89 #define A2F_COMMENT ( (Uint1)5)
90 #define A2F_SEQUENCE ( (Uint1)6)
91
92 static Boolean Template_load = FALSE;
93
94 /* ---------------Function Prototypes ---------------*/
95 Int4 asn2pr_setup PROTO ((Asn2ffJobPtr ajp, FFPrintArrayPtr PNTR papp));
96 Int4 asn2hp_setup PROTO ((Asn2ffJobPtr ajp, FFPrintArrayPtr PNTR papp));
97 Int4 asn2gb_setup PROTO ((Asn2ffJobPtr ajp, FFPrintArrayPtr PNTR papp));
98 Int4 asn2embl_setup PROTO ((Asn2ffJobPtr ajp, FFPrintArrayPtr PNTR papp));
99 Int4 asn2gp_setup PROTO ((Asn2ffJobPtr ajp, FFPrintArrayPtr PNTR papp));
100 Int4 asn2ep_setup PROTO ((Asn2ffJobPtr ajp, FFPrintArrayPtr PNTR papp));
101 Int4 asn2gr_setup PROTO ((Asn2ffJobPtr ajp, FFPrintArrayPtr PNTR papp));
102 void PrintTaxonomy PROTO ((Asn2ffJobPtr ajp, GBEntryPtr gbp));
103 void LoadPap PROTO ((FFPrintArrayPtr pap, FFPapFct fct, Asn2ffJobPtr ajp, Int4 index, Uint1 last, Uint1 printxx, Int2 estimate, Uint1 element_type, GBEntryPtr gbp));
104
105 void CheckSeqPort PROTO ((Asn2ffJobPtr ajp, GBEntryPtr gbp, Int4 start));
106 void PrintGenome PROTO ((Asn2ffJobPtr ajp, GBEntryPtr gbp));
107 void GetMolInfo PROTO ((Asn2ffJobPtr ajp, CharPtr buffer, GBEntryPtr gbp));
108 CharPtr GetPDBSourceLine PROTO ((PdbBlockPtr pdb));
109 void PrintDateLines PROTO ((Asn2ffJobPtr ajp, GBEntryPtr gbp));
110 void PrintXrefLine PROTO ((Asn2ffJobPtr ajp, GBEntryPtr gbp));
111 Boolean CheckXrefLine PROTO ((Asn2ffJobPtr ajp, GBEntryPtr gbp));
112 void set_flags PROTO ((Asn2ffJobPtr ajp));
113 void PrintSeqBlk PROTO ((Asn2ffJobPtr ajp, GBEntryPtr gbp));
114
115
116 #define TOTAL_ESTKW 11
117 #define TOTAL_STSKW 5
118 #define TOTAL_GSSKW 2
119
120 static CharPtr EST_kw_array[TOTAL_ESTKW] = {
121 "EST", "EST PROTO((expressed sequence tag)", "expressed sequence tag",
122 "EST (expressed sequence tag)", "EST(expressed sequence tag)",
123 "partial cDNA sequence", "transcribed sequence fragment", "TSR",
124 "putatively transcribed partial sequence", "UK putts"
125 };
126
127 static CharPtr GSS_kw_array[TOTAL_GSSKW] = {
128 "GSS", "trapped exon"
129 };
130 static CharPtr STS_kw_array[TOTAL_STSKW] = {
131 "STS", "STS(sequence tagged site)", "STS (sequence tagged site)",
132 "STS sequence", "sequence tagged site"
133 };
134
MatchArrayString(CharPtr array_string[],Int2 totalstr,CharPtr text)135 static Int2 MatchArrayString(CharPtr array_string[], Int2 totalstr, CharPtr text)
136 {
137 Int2 i;
138
139 for (i = 0; i < totalstr && text != NULL; i++)
140 if (StringCmp(array_string[i], text) == 0)
141 return (i);
142
143 return (-1);
144
145 } /* MatchArrayString */
146
147 /***************************************************************************
148 * Using the chain that was spliced on, we can reconize the splice
149 * and break it.
150 ****************************************************************************/
FlatSpliceOff(SeqEntryPtr the_set,ValNodePtr desc)151 void FlatSpliceOff (SeqEntryPtr the_set, ValNodePtr desc)
152 {
153 BioseqSetPtr bss;
154 BioseqPtr bs;
155 ValNodePtr PNTR desc_head=NULL;
156 ValNodePtr PNTR desc_target=NULL;
157 ValNodePtr scan;
158
159 if (IS_Bioseq(the_set) ){
160 bs = (BioseqPtr) the_set -> data.ptrvalue;
161 desc_head = & (bs -> descr);
162 }else{
163 bss = (BioseqSetPtr) the_set -> data.ptrvalue;
164 desc_head = & (bss -> descr);
165 }
166 if (* desc_head){
167 desc_target = desc_head;
168 for (scan = * desc_head; scan; scan = scan -> next){
169 if (scan == desc){
170 * desc_target = NULL;
171 break;
172 }
173 desc_target = & (scan -> next);
174 }
175 }
176
177 }
178
FlatSpliceOn(SeqEntryPtr the_set,ValNodePtr desc)179 void FlatSpliceOn (SeqEntryPtr the_set, ValNodePtr desc)
180 {
181 BioseqSetPtr bss;
182 BioseqPtr bs;
183
184 if (IS_Bioseq(the_set) ){
185 bs = (BioseqPtr) the_set -> data.ptrvalue;
186 bs -> descr = tie_next(bs -> descr, desc);
187 } else {
188 bss = (BioseqSetPtr) the_set -> data.ptrvalue;
189 bss -> descr = tie_next(bss -> descr, desc);
190 }
191 }
192
193 /**************************************************************************
194 * Get the ValNodePtr associated with a certain reference.
195 **************************************************************************/
196
GetPapRefPtr(Asn2ffJobPtr ajp,GBEntryPtr gbp,Int4 ext_index,Int4 pap_index,FFPrintArrayPtr pap)197 static void GetPapRefPtr (Asn2ffJobPtr ajp, GBEntryPtr gbp, Int4 ext_index, Int4 pap_index, FFPrintArrayPtr pap)
198
199 {
200 PubStructPtr psp=NULL;
201 ValNodePtr vnp;
202 Int4 i;
203 DescrStructPtr dsp;
204
205 for (vnp=gbp->Pub, i=0; vnp && i < ext_index; vnp=vnp->next, i++);
206 if (vnp == NULL) {
207 return;
208 }
209 psp = (PubStructPtr)vnp->data.ptrvalue;
210 if (psp == NULL) {
211 return;
212 }
213 if ((dsp = pap[pap_index].descr) == NULL) {
214 dsp = (DescrStructPtr) MemNew(sizeof(DescrStruct));
215 pap[pap_index].descr = dsp;
216 }
217 dsp->entityID = psp->entityID;
218 dsp->itemID = psp->itemID;
219 dsp->itemtype = psp->itemtype;
220
221 return;
222 }
223
224 /**************************************************************************
225 * Get the Comment structure associated with a certain comment block.
226 **************************************************************************/
227
GetPapCommPtr(Asn2ffJobPtr ajp,GBEntryPtr gbp,Int4 ext_index,Int4 pap_index,FFPrintArrayPtr pap)228 static void GetPapCommPtr (Asn2ffJobPtr ajp, GBEntryPtr gbp, Int4 ext_index, Int4 pap_index, FFPrintArrayPtr pap)
229
230 {
231 ComStructPtr s=NULL;
232 Int4 i;
233 DescrStructPtr dsp;
234
235 for (s=gbp->comm, i=0; s && i < ext_index; s=s->next, i++);
236 if (s == NULL) {
237 return;
238 }
239 if ((dsp = pap[pap_index].descr) == NULL) {
240 dsp = (DescrStructPtr) MemNew(sizeof(DescrStruct));
241 pap[pap_index].descr = dsp;
242 }
243 dsp->entityID = s->entityID;
244 dsp->itemID = s->itemID;
245 dsp->itemtype = s->itemtype;
246
247 return;
248 }
249
250 /**************************************************************************
251 * Find the SeqFeatPtr that is associated with this entry in the
252 * FFPrintArrayPtr.
253 *************************************************************************/
254
GetPapSeqFeatPtr(GBEntryPtr gbp,Int4 ext_index,Int4 pap_index,FFPrintArrayPtr pap)255 static void GetPapSeqFeatPtr (GBEntryPtr gbp, Int4 ext_index, Int4 pap_index, FFPrintArrayPtr pap)
256
257 {
258 Int4 feat_index, index, listsize;
259 OrganizeFeatPtr ofp;
260 DescrStructPtr dsp;
261
262 if (gbp == NULL || gbp->feat == NULL) {
263 return;
264 }
265 ofp = gbp->feat;
266 listsize=ofp->sfpListsize;
267 index = (Int4) ext_index;
268
269 feat_index = index - listsize;
270 if (feat_index < 0) {
271 if ((dsp = pap[pap_index].descr) == NULL) {
272 dsp = (DescrStructPtr) MemNew(sizeof(DescrStruct));
273 pap[pap_index].descr = dsp;
274 }
275 dsp->entityID = ofp->List[index].entityID;
276 dsp->itemID = ofp->List[index].itemID;
277 dsp->itemtype = ofp->List[index].itemtype;
278 }
279 return;
280 }
281
asn2ff_print(Asn2ffJobPtr ajp)282 NLM_EXTERN Boolean asn2ff_print (Asn2ffJobPtr ajp)
283 {
284 AsnIoPtr aip;
285 CharPtr string;
286 FFPrintArrayPtr pap = NULL;
287 Int4 index, pap_size;
288 Boolean result = FALSE, hold = TRUE;
289 DescrStructPtr dsp = NULL;
290
291 if ((ajp->sep == NULL && ajp->slp == NULL) ||
292 (ajp->fp == NULL && ajp->ajp_print_data == NULL))
293 return FALSE;
294 if (ajp->no_hold)
295 hold = FALSE;
296 if (hold)
297 ObjMgrSetHold(); /* hold any autoloaded records in memory */
298
299 pap_size = asn2ff_setup (ajp, &pap);
300 if(ajp->ajp_count_index != NULL)
301 (*ajp->ajp_count_index)(ajp, pap_size, ajp->user_data);
302
303 if (ajp->ssp && ajp->format == EMBL_FMT && ajp->fp != NULL) {
304 aip = AsnIoNew(ASNIO_TEXT_OUT, ajp->fp, NULL, NULL, NULL);
305 SubmitBlockAsnWrite(ajp->ssp->sub, aip, NULL);
306 AsnIoFlush(aip);
307 AsnIoReset(aip);
308 }
309 if (pap_size > 0) {
310 if(ajp->fp != NULL)
311 head_www(ajp->fp, ajp->sep);
312 asn2ff_set_output (NULL, "\n");
313 for (index = 0; index < pap_size; index++) {
314 string = FFPrint (pap, index, pap_size);
315 if (string != NULL && *string != '\0') {
316 if(ajp->fp != NULL)
317 ff_print_string (ajp->fp, string, "\n");
318 if(ajp->ajp_print_data != NULL)
319 (*ajp->ajp_print_data)(ajp, string, ajp->user_data);
320 string = (char *)MemFree (string);
321 } else if (ajp->null_str) {
322 ErrPostStr(SEV_WARNING, ERR_PRINT_NullString,
323 "CAUTION: NULL String returned\n");
324 }
325 if (pap[index].descr) {
326 pap[index].descr = (DescrStructPtr)MemFree(pap[index].descr);
327 }
328 }
329
330 if(ajp->fp != NULL)
331 tail_www(ajp->fp);
332 result = TRUE;
333 MemFree(pap);
334 }
335 if(ajp->ajp_print_index != NULL)
336 (*ajp->ajp_print_index)(ajp, ajp->user_data);
337 free_buff();
338 asn2ff_cleanup (ajp);
339 if (hold)
340 ObjMgrClearHold();
341 if (ajp->free_cache)
342 ObjMgrFreeCache(0);
343
344 return result;
345 }
346
Asn2ffJobCreate(SeqEntryPtr sep,SeqSubmitPtr ssp,SeqLocPtr slp,FILE * fp,Uint1 format,Uint1 mode,StdPrintOptionsPtr Spop)347 Asn2ffJobPtr Asn2ffJobCreate(SeqEntryPtr sep, SeqSubmitPtr ssp, SeqLocPtr slp, FILE *fp, Uint1 format, Uint1 mode, StdPrintOptionsPtr Spop)
348 {
349 Asn2ffJobPtr ajp;
350 Uint2 entityID = 0;
351 Uint4 itemID = 0;
352 BioseqPtr bsp;
353
354 ajp = (Asn2ffJobPtr) MemNew(sizeof(Asn2ffJob));
355 ajp->show_seq = TRUE;
356 ajp->show_gi = TRUE;
357 ajp->error_msgs = TRUE;
358 ajp->null_str = FALSE;
359 ajp->non_strict = TRUE;
360 ajp->format = format;
361 ajp->mode = mode;
362 ajp->show_gene = TRUE;
363 ajp->gb_style = TRUE;
364 ajp->fp = fp;
365 ajp->Spop = Spop;
366 if (ssp != NULL) {
367 if ((entityID = ObjMgrGetEntityIDForPointer(ssp)) == 0) {
368 ErrPostStr(SEV_WARNING, 0, 0, "Couldn't get entityID");
369 MemFree(ajp);
370 return NULL;
371 }
372 ajp->ssp = ssp;
373 ajp->sep = (SeqEntryPtr) ssp->data;
374 } else if (slp != NULL) {
375 if ((bsp = BioseqFindFromSeqLoc(slp)) != NULL) {
376 entityID = ObjMgrGetEntityIDForPointer (bsp);
377 } else {
378 fprintf(stderr, "Couldn't get Bioseq from location %s", SeqLocPrint(slp));
379 MemFree(ajp);
380 return NULL;
381 }
382 ajp->slp = slp;
383 ajp->sep = NULL;
384 } else {
385 if ((entityID = ObjMgrGetEntityIDForChoice(sep)) == 0) {
386 ErrPostStr(SEV_WARNING, 0, 0, "Couldn't get entityID");
387 MemFree(ajp);
388 return NULL;
389 }
390 ajp->sep = sep;
391 }
392 ajp->entityID = entityID;
393
394 return ajp;
395 }
396
397 /**********************************************************/
asn2ff_print_to_mem(Asn2ffJobPtr ajp,LinkStrPtr lsp)398 NLM_EXTERN LinkStrPtr asn2ff_print_to_mem(Asn2ffJobPtr ajp, LinkStrPtr lsp)
399 {
400 CharPtr string;
401 FFPrintArrayPtr pap = NULL;
402 Int4 index, pap_size;
403 Boolean hold = TRUE;
404 DescrStructPtr dsp;
405 DescrStructPtr dspnext;
406
407 if(ajp->sep == NULL && ajp->slp == NULL)
408 return(lsp);
409
410 if(ajp->no_hold)
411 hold = FALSE;
412 else
413 ObjMgrSetHold(); /* hold any autoloaded records in memory */
414
415 pap_size = asn2ff_setup(ajp, &pap);
416
417 if(pap_size > 0) {
418 for(index = 0; index < pap_size; index++) {
419 asn2ff_set_output(NULL, "\n");
420 string = FFPrint(pap, index, pap_size);
421 if(string != NULL && *string != '\0') {
422 string = ff_print_string_mem(string);
423 lsp->next = (LinkStrPtr) MemNew(sizeof(LinkStr));
424 lsp = lsp->next;
425 lsp->next = NULL;
426 lsp->line = string;
427 string = NULL;
428 } else if(ajp->null_str != FALSE) {
429 ErrPostStr(SEV_WARNING, ERR_PRINT_NullString,
430 "CAUTION: NULL String returned\n");
431 }
432 if(pap[index].descr != NULL)
433 {
434 for(dsp = pap[index].descr; dsp != NULL; dsp = dspnext)
435 {
436 dspnext = dsp->next;
437 MemFree(dsp);
438 }
439 pap[index].descr = NULL;
440 }
441 }
442 MemFree(pap);
443 }
444 free_buff();
445 asn2ff_cleanup(ajp);
446 if(hold != FALSE)
447 ObjMgrClearHold();
448 if(ajp->free_cache)
449 ObjMgrFreeCache(0);
450
451 return(lsp);
452 }
is_contig(BioseqPtr bsp)453 static Boolean is_contig(BioseqPtr bsp)
454 {
455 SeqLocPtr slp=NULL;
456 DeltaSeqPtr dsp;
457 SeqEntryPtr oldscope;
458
459 oldscope=SeqEntrySetScope(GetTopSeqEntryForEntityID(ObjMgrGetEntityIDForPointer(bsp)));
460
461 if (bsp->repr == Seq_repr_seg || bsp->repr == Seq_repr_delta) {
462 if (bsp->seq_ext_type == 1) {
463 slp = (SeqLocPtr) bsp->seq_ext;
464 } else if (bsp->seq_ext_type == 4) {
465 for (dsp = (DeltaSeqPtr) bsp->seq_ext; dsp; dsp=dsp->next) {
466 if (dsp->choice == 1) { /* SeqLoc */
467 slp = (SeqLocPtr)(dsp->data.ptrvalue);
468 break;
469 }
470 }
471 }
472 }
473 if (slp && BioseqFindCore(SeqLocId(slp)) == NULL) {
474 SeqEntrySetScope(oldscope);
475 return TRUE;
476 }
477 SeqEntrySetScope(oldscope);
478 return FALSE;
479 }
480
481 /***********************************************************************
482 *
483 * SeqEntryToFlatEx is a stand-alone function works as SeqEntryToFlat
484 * takes SeqIdPtr and various types of the output
485 *
486 * successful, TRUE is returned; otherwise FALSE is returned.
487 *
488 * display =0,1 - generates CONTIG view only
489 * display = 2 - generates CONTIG and all parts regardless of seqid
490 *
491 **************************************************************************/
SeqEntryToEntrez(SeqEntryPtr sep,FILE * fp,SeqIdPtr seqid,Uint1 format,Uint1 display,Int4 from,Int4 to,Uint1 strand)492 NLM_EXTERN Boolean SeqEntryToEntrez (SeqEntryPtr sep, FILE *fp, SeqIdPtr seqid, Uint1 format, Uint1 display, Int4 from, Int4 to, Uint1 strand)
493 {
494 Boolean rsult=FALSE;
495 Asn2ffJobPtr ajp=NULL;
496 BioseqPtr bsp;
497 BioseqSetPtr bssp;
498 SeqLocPtr slp = NULL;
499 SeqLoc sl;
500 SeqInt seqint;
501 StdPrintOptionsPtr Spop = NULL;
502 SeqLocPtr slp_region=NULL;
503
504 if (sep == NULL) {
505 return FALSE;
506 }
507
508 if (seqid == NULL) { /*should never happen */
509 if (IS_Bioseq(sep)) {
510 if ((bsp = (BioseqPtr) sep->data.ptrvalue) == NULL) {
511 return rsult;
512 }
513 if (is_contig(bsp)) {
514 return (SeqEntryToFlatEx(sep, fp, format,
515 RELEASE_MODE, seqid, FF_TOP_CONTIG));
516 }
517 return (SeqEntryToFlatEx(sep, fp, format,
518 RELEASE_MODE, seqid, FF_REGULAR));
519 } else {
520 if ((bssp = (BioseqSetPtr) sep->data.ptrvalue) == NULL) {
521 return rsult;
522 }
523 if (bssp->_class == 7 ||
524 (bssp->_class >= 13 && bssp->_class <= 16)) {
525 for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
526 rsult = SeqEntryToFlat (sep, fp, GENBANK_FMT, RELEASE_MODE);
527 }
528 return rsult;
529 } else {
530 if (format == GENPEPT_FMT) {
531 return (SeqEntryToFlatEx(sep, fp, GENPEPT_FMT,
532 RELEASE_MODE, seqid, FF_TOP_COMPLETE));
533 } else if (display == 1) {
534 return (SeqEntryToFlatEx(sep, fp, GENBANK_FMT,
535 RELEASE_MODE, seqid, FF_TOP_CONTIG));
536 } else {
537 return (SeqEntryToFlatEx(sep, fp, GENBANK_FMT,
538 RELEASE_MODE, seqid, FF_REGULAR));
539 }
540 }
541 }
542 }
543 if ((bsp = BioseqFind(seqid)) == NULL) {
544 return FALSE;
545 }
546 if (from > 0 && to > 0) {
547 seqint.from = from-1;
548 seqint.to = to-1;
549 seqint.id=seqid;
550 seqint.strand=strand;
551 seqint.if_from=NULL;
552 seqint.if_to=NULL;
553 sl.choice= SEQLOC_INT;
554 sl.data.ptrvalue=&seqint;
555 sl.next=NULL;
556 slp_region=&sl;
557 }
558 if(format == GENPEPT_FMT) {
559 if(Template_load == FALSE) {
560 PrintTemplateSetLoad("asn2ff.prt");
561 Template_load = TRUE;
562 }
563 Spop = StdPrintOptionsNew(NULL);
564 if(Spop != NULL) {
565 Spop->newline = "~";
566 Spop->indent = "";
567 } else {
568 ErrPostStr(SEV_FATAL, 0, 0, "StdPrintOptionsNew failed");
569 return FALSE;
570 }
571 }
572
573 ajp = Asn2ffJobCreate(sep, NULL, slp_region, fp, format, RELEASE_MODE, Spop);
574 if (ajp == NULL) {
575 if(Spop) StdPrintOptionsFree(Spop);
576 return FALSE;
577 }
578 ajp->show_version = TRUE;
579 ajp->show_gi = FALSE;
580 ajp->gb_style = FALSE;
581 ajp->id_print = seqid;
582 ajp->sep = sep;
583 if(ajp->entityID > 0 && SeqMgrFeaturesAreIndexed (ajp->entityID) == 0) {
584 SeqMgrIndexFeatures (ajp->entityID, NULL);
585 }
586
587 if (bsp->repr == Seq_repr_seg || bsp->repr == Seq_repr_delta) {
588 /* always print CONTIG view first */
589 if (slp_region == NULL) {
590 if (bsp->repr == Seq_repr_delta && !is_contig(bsp)) {
591 ajp->genome_view = FALSE;
592 /* delta with internal parts cannot use CONTIG view */
593 } else {
594 ajp->genome_view = TRUE;
595 }
596 }
597 ajp->only_one = TRUE;
598 ajp->gb_style = FALSE;
599 ajp->sep = sep;
600 if (is_contig(bsp) && slp_region == NULL) {
601 ajp->contig_view = TRUE;
602 ajp->show_seq = FALSE;
603 }
604 rsult = SeqEntryToFlatAjp(ajp, sep, fp, format, RELEASE_MODE);
605 if (display == 2) {
606 ajp->only_one = FALSE;
607 ajp->contig_view = FALSE;
608 ajp->genome_view = FALSE;
609 ajp->gb_style = TRUE;
610 ajp->id_print = NULL;
611 ajp->only_one = FALSE;
612 ajp->show_seq = TRUE;
613 rsult = SeqEntryToFlatAjp(ajp, sep, fp, format, RELEASE_MODE);
614 }
615 } else {
616 rsult = SeqEntryToFlatAjp(ajp, sep, fp, format, RELEASE_MODE);
617 }
618 if(ajp) MemFree(ajp);
619 if(Spop) StdPrintOptionsFree(Spop);
620 return rsult;
621 }
622
623 /**********************************************************/
SeqEntryToLinkStr(Asn2ffJobPtr ajp,SeqEntryPtr sep,LinkStrPtr lsp,Uint1 format,Uint1 mode)624 static LinkStrPtr SeqEntryToLinkStr(Asn2ffJobPtr ajp, SeqEntryPtr sep,
625 LinkStrPtr lsp, Uint1 format, Uint1 mode)
626 {
627 StdPrintOptionsPtr Spop = NULL;
628 BioseqSetPtr bssp;
629 Int2 tofree;
630
631 if(sep == NULL)
632 return(lsp);
633
634 if(format == GENPEPT_FMT)
635 {
636 if(Template_load == FALSE)
637 {
638 PrintTemplateSetLoad("asn2ff.prt");
639 Template_load = TRUE;
640 }
641 Spop = StdPrintOptionsNew(NULL);
642 if(Spop != NULL)
643 {
644 Spop->newline = "~";
645 Spop->indent = "";
646 }
647 else
648 {
649 ErrPostStr(SEV_FATAL, 0, 0, "StdPrintOptionsNew failed");
650 return(lsp);
651 }
652 }
653 if(ajp == NULL)
654 {
655 ajp = Asn2ffJobCreate(sep, NULL, NULL, NULL, format, mode, Spop);
656 tofree = 1;
657 }
658 else
659 tofree = 0;
660
661 if(IS_Bioseq_set(sep) != 0 && ajp->id_print == NULL)
662 {
663 bssp = (BioseqSetPtr) sep->data.ptrvalue;
664 if(bssp != NULL && (bssp->_class == 7 ||
665 (bssp->_class >= 13 && bssp->_class <= 16)))
666 {
667 for(sep = bssp->seq_set; sep != NULL; sep = sep->next)
668 {
669 lsp = SeqEntryToLinkStr(ajp, sep, lsp, format, mode);
670 }
671 return(lsp);
672 }
673 }
674 if(ajp == NULL)
675 return(lsp);
676
677 ajp->sep = sep;
678
679 lsp = asn2ff_print_to_mem(ajp, lsp);
680
681 StdPrintOptionsFree(ajp->Spop);
682 if(ajp != NULL && tofree != 0)
683 MemFree(ajp);
684
685 return(lsp);
686 }
687 /**********************************************************/
SeqEntryToStrArray(SeqEntryPtr sep,Uint1 format,Uint1 mode)688 NLM_EXTERN CharPtr PNTR SeqEntryToStrArray(SeqEntryPtr sep, Uint1 format,
689 Uint1 mode)
690 {
691 LinkStrPtr lsp;
692 LinkStrPtr tlsp;
693 CharPtr PNTR res;
694 CharPtr PNTR tres;
695 Int4 num;
696
697 lsp = (LinkStrPtr) MemNew(sizeof(LinkStr));
698 lsp->next = NULL;
699 lsp->line = NULL;
700 SeqEntryToLinkStr(NULL, sep, lsp, format, mode);
701 tlsp = lsp;
702 lsp = lsp->next;
703 MemFree(tlsp);
704
705 for(tlsp = lsp, num = 1; tlsp != NULL; tlsp = tlsp->next, num++)
706 continue;
707
708 if(num == 1)
709 return(NULL);
710
711 res = (CharPtr PNTR) MemNew(sizeof(CharPtr) * num);
712 for(tres = res; lsp != NULL; lsp = tlsp, tres++)
713 {
714 tlsp = lsp->next;
715 *tres = lsp->line;
716 MemFree(lsp);
717 }
718 *tres = NULL;
719 return(res);
720 }
721
722 /**********************************************************/
AjpToLinkStr(Asn2ffJobPtr ajp,SeqEntryPtr sep,LinkStrPtr lsp)723 static LinkStrPtr AjpToLinkStr(Asn2ffJobPtr ajp, SeqEntryPtr sep,
724 LinkStrPtr lsp)
725 {
726 BioseqSetPtr bssp;
727
728 if(sep == NULL)
729 return(lsp);
730
731 if(IS_Bioseq_set(sep) != 0 && ajp->id_print == NULL)
732 {
733 bssp = (BioseqSetPtr) sep->data.ptrvalue;
734 if(bssp != NULL && (bssp->_class == 7 ||
735 (bssp->_class >= 13 && bssp->_class <= 16)))
736 {
737 for(sep = bssp->seq_set; sep != NULL; sep = sep->next)
738 {
739 lsp = AjpToLinkStr(ajp, sep, lsp);
740 }
741 return(lsp);
742 }
743 }
744 if(ajp == NULL)
745 return(lsp);
746
747 ajp->sep = sep;
748
749 lsp = asn2ff_print_to_mem(ajp, lsp);
750
751 return(lsp);
752 }
753
754 /***********************************************************/
AjpToStrArray(Asn2ffJobPtr ajp)755 NLM_EXTERN CharPtr PNTR AjpToStrArray(Asn2ffJobPtr ajp)
756 {
757 StdPrintOptionsPtr Spop = NULL;
758 LinkStrPtr lsp;
759 LinkStrPtr tlsp;
760 CharPtr PNTR res;
761 CharPtr PNTR tres;
762 Int4 num;
763
764 if(ajp == NULL || ajp->sep == NULL)
765 return(NULL);
766
767 if(ajp->format == GENPEPT_FMT)
768 {
769 if(Template_load == FALSE)
770 {
771 PrintTemplateSetLoad("asn2ff.prt");
772 Template_load = TRUE;
773 }
774 Spop = StdPrintOptionsNew(NULL);
775 if(Spop == NULL)
776 {
777 ErrPostStr(SEV_FATAL, 0, 0, "StdPrintOptionsNew failed");
778 return(NULL);
779 }
780 Spop->newline = "~";
781 Spop->indent = "";
782 ajp->Spop = Spop;
783 }
784
785 lsp = (LinkStrPtr) MemNew(sizeof(LinkStr));
786 lsp->next = NULL;
787 lsp->line = NULL;
788 AjpToLinkStr(ajp, ajp->sep, lsp);
789 tlsp = lsp;
790 lsp = lsp->next;
791 MemFree(tlsp);
792
793 if(ajp->Spop != NULL)
794 StdPrintOptionsFree(ajp->Spop);
795
796 for(tlsp = lsp, num = 1; tlsp != NULL; tlsp = tlsp->next, num++)
797 continue;
798
799 if(num == 1)
800 return(NULL);
801
802 res = (CharPtr PNTR) MemNew(sizeof(CharPtr) * num);
803 for(tres = res; lsp != NULL; lsp = tlsp, tres++)
804 {
805 tlsp = lsp->next;
806 *tres = lsp->line;
807 MemFree(lsp);
808 }
809 *tres = NULL;
810 return(res);
811 }
812
813 /**********************************************************
814 * This function allocates memory for the linked list
815 ***********************************************************/
SeqEntryToLinkStrEx(Asn2ffJobPtr ajp,SeqEntryPtr sep,LinkStrPtr lsp,Uint1 format)816 static LinkStrPtr SeqEntryToLinkStrEx(Asn2ffJobPtr ajp, SeqEntryPtr sep, LinkStrPtr lsp, Uint1 format)
817 {
818 BioseqSetPtr bssp;
819
820 if(sep == NULL || ajp == NULL)
821 return(NULL);
822
823 if(IS_Bioseq_set(sep) != 0) {
824 bssp = (BioseqSetPtr) sep->data.ptrvalue;
825 if(bssp != NULL && (bssp->_class == 7 ||
826 (bssp->_class >= 13 && bssp->_class <= 16))) {
827 for(sep = bssp->seq_set; sep != NULL; sep = sep->next) {
828 lsp = SeqEntryToLinkStrEx(ajp, sep, lsp, format);
829 }
830 return(lsp);
831 }
832 }
833
834
835 lsp = asn2ff_print_to_mem(ajp, lsp);
836 return(lsp);
837 }
838
839 /**********************************************************/
SeqEntryToStrArrayEx(SeqEntryPtr sep,Uint1 format,Int4 gi,Boolean is_html)840 NLM_EXTERN LinkStrPtr SeqEntryToStrArrayEx(SeqEntryPtr sep, Uint1 format,
841 Int4 gi, Boolean is_html)
842 {
843 StdPrintOptionsPtr Spop = NULL;
844 LinkStrPtr lsp;
845 LinkStrPtr tlsp;
846 Asn2ffJobPtr ajp;
847 ValNodePtr v;
848 BioseqPtr bsp;
849 SeqIdPtr sip;
850 TextSeqIdPtr tsip;
851
852
853 if (is_html) {
854 init_www();
855 }
856 if(format == GENPEPT_FMT) {
857 if(Template_load == FALSE) {
858 PrintTemplateSetLoad("asn2ff.prt");
859 Template_load = TRUE;
860 }
861 Spop = StdPrintOptionsNew(NULL);
862 if(Spop != NULL) {
863 Spop->newline = "~";
864 Spop->indent = "";
865 } else {
866 ErrPostStr(SEV_FATAL, 0, 0, "StdPrintOptionsNew failed");
867 return NULL;
868 }
869 }
870 ajp = Asn2ffJobCreate(sep, NULL, NULL, NULL, format, RELEASE_MODE, Spop);
871 /* ajp->entityID = ObjMgrGetEntityIDForPointer (ajp->slp); */ /* entityID already set - slp is NULL */
872 if (SeqMgrFeaturesAreIndexed (ajp->entityID) == 0) {
873 SeqMgrIndexFeatures (ajp->entityID, NULL);
874 }
875 ajp->useSeqMgrIndexes = TRUE;
876 ajp->contig_view = FALSE;
877 if (gi > 0) {
878 ajp->show_version = TRUE;
879 v = ValNodeNew(NULL);
880 v->choice = SEQID_GI;
881 v->data.intvalue = gi;
882 ajp->id_print = v;
883 if (v != NULL) {
884 ajp->gb_style = FALSE;
885 if ((bsp = BioseqFind(v)) == NULL) {
886 ErrPostEx(SEV_FATAL, 0, 0, "BioseqFind failed for %ld", gi);
887 return NULL;
888 }
889 if (bsp->repr == Seq_repr_seg) {
890 ajp->gb_style = TRUE;
891 }
892 for (sip=bsp->id; sip; sip=sip->next) {
893 if (sip->choice == SEQID_OTHER) {
894 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
895 if (StringNCmp(tsip->accession, "NT_", 3) == 0) {
896 ajp->contig_view = TRUE;
897 ajp->genome_view = TRUE;
898 break;
899 }
900 }
901 }
902 if (bsp->repr == Seq_repr_seg && ajp->contig_view == FALSE) {
903 ajp->id_print = NULL;
904 }
905 if (bsp->repr != Seq_repr_seg) {
906 sep = SeqMgrGetSeqEntryForData((Pointer)bsp);
907 }
908 }
909 }
910 lsp = (LinkStrPtr) MemNew(sizeof(LinkStr));
911 lsp->next = NULL;
912 lsp->line = NULL;
913 SeqEntryToLinkStr(ajp, sep, lsp, format, RELEASE_MODE);
914 tlsp = lsp;
915 lsp = lsp->next;
916 MemFree(tlsp);
917
918 if(Spop != NULL) {
919 StdPrintOptionsFree(Spop);
920 }
921 return (lsp);
922 }
923
924 /**********************************************************/
SeqEntryToStrArrayQEx(SeqEntryPtr sep,Uint1 format,Int4 gi,Boolean is_html)925 NLM_EXTERN LinkStrPtr SeqEntryToStrArrayQEx(SeqEntryPtr sep, Uint1 format, Int4 gi, Boolean is_html)
926 {
927 StdPrintOptionsPtr Spop = NULL;
928 LinkStrPtr lsp;
929 LinkStrPtr tlsp;
930 Asn2ffJobPtr ajp;
931 ValNodePtr v;
932 BioseqPtr bsp;
933 SeqIdPtr sip;
934 SeqLocPtr slp;
935 TextSeqIdPtr tsip;
936
937
938 if (is_html) {
939 init_www();
940 }
941 if(format == GENPEPT_FMT) {
942 if(Template_load == FALSE) {
943 PrintTemplateSetLoad("asn2ff.prt");
944 Template_load = TRUE;
945 }
946 Spop = StdPrintOptionsNew(NULL);
947 if(Spop != NULL) {
948 Spop->newline = "~";
949 Spop->indent = "";
950 } else {
951 ErrPostStr(SEV_FATAL, 0, 0, "StdPrintOptionsNew failed");
952 return NULL;
953 }
954 }
955 ajp = Asn2ffJobCreate(sep, NULL, NULL, NULL, format, RELEASE_MODE, Spop);
956 /* ajp->entityID = ObjMgrGetEntityIDForPointer (ajp->slp); */ /* entityID already set - slp is NULL */
957 if (SeqMgrFeaturesAreIndexed (ajp->entityID) == 0) {
958 SeqMgrIndexFeatures (ajp->entityID, NULL);
959 }
960 ajp->useSeqMgrIndexes = TRUE;
961 ajp->contig_view = FALSE;
962 /*if (is_html && gi > 0) {*/
963 if (gi > 0) {
964 ajp->show_version = TRUE;
965 v = ValNodeNew(NULL);
966 v->choice = SEQID_GI;
967 v->data.intvalue = gi;
968 if ((bsp = BioseqFind(v)) != NULL) {
969 if (bsp->repr == Seq_repr_seg) {
970 if ((slp = bsp->seq_ext) != NULL) {
971 if (BioseqFindCore(SeqLocId(slp)) == NULL) {
972 ajp->genome_view = TRUE;
973 ajp->only_one = TRUE;
974 }
975 }
976 }
977 }
978 ajp->id_print = v;
979 if (v != NULL) {
980 ajp->gb_style = FALSE;
981 if ((bsp = BioseqFind(v)) == NULL) {
982 ErrPostEx(SEV_FATAL, 0, 0, "BioseqFind failed for %ld", gi);
983 return NULL;
984 }
985 if (bsp->repr == Seq_repr_seg) {
986 ajp->gb_style = TRUE;
987 }
988 for (sip=bsp->id; sip; sip=sip->next) {
989 if (sip->choice == SEQID_OTHER) {
990 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
991 if (StringNCmp(tsip->accession, "NT_", 3) == 0) {
992 ajp->contig_view = TRUE;
993 ajp->genome_view = TRUE;
994 ajp->show_seq = FALSE;
995 break;
996 }
997 }
998 }
999 if (bsp->repr == Seq_repr_seg && ajp->contig_view == FALSE) {
1000 ajp->id_print = NULL;
1001 }
1002 if (bsp->repr != Seq_repr_seg) {
1003 sep = SeqMgrGetSeqEntryForData((Pointer)bsp);
1004 }
1005 }
1006 }
1007 lsp = (LinkStrPtr) MemNew(sizeof(LinkStr));
1008 lsp->next = NULL;
1009 lsp->line = NULL;
1010 SeqEntryToLinkStr(ajp, sep, lsp, format, RELEASE_MODE);
1011 tlsp = lsp;
1012 lsp = lsp->next;
1013 MemFree(tlsp);
1014
1015 if(Spop != NULL) {
1016 StdPrintOptionsFree(Spop);
1017 }
1018 return (lsp);
1019 }
1020
1021 /**********************************************************/
asn2ff_print_bs(Asn2ffJobPtr ajp)1022 NLM_EXTERN void asn2ff_print_bs(Asn2ffJobPtr ajp)
1023 {
1024 FFPrintArrayPtr pap = NULL;
1025 Int4 index;
1026 Int4 pap_size;
1027 Boolean hold = TRUE;
1028
1029
1030 if(ajp->no_hold != FALSE)
1031 hold = FALSE;
1032 if(hold != FALSE)
1033 ObjMgrSetHold(); /* hold any autoloaded records in memory */
1034
1035 pap_size = asn2ff_setup(ajp, &pap);
1036 if(pap_size > 0)
1037 {
1038 for(index = 0; index < pap_size; index++)
1039 {
1040 asn2ff_set_output(NULL, "\n");
1041 FFBSPrint(pap, index, pap_size);
1042 if(pap[index].descr)
1043 {
1044 pap[index].descr = (DescrStructPtr)MemFree(pap[index].descr);
1045 }
1046 }
1047 MemFree(pap);
1048 }
1049
1050 free_buff();
1051 asn2ff_cleanup(ajp);
1052 if(hold != FALSE)
1053 ObjMgrClearHold();
1054 if(ajp->free_cache)
1055 ObjMgrFreeCache(0);
1056 }
1057
1058 /**********************************************************/
AjpToByteStore(Asn2ffJobPtr ajp)1059 NLM_EXTERN ByteStorePtr AjpToByteStore(Asn2ffJobPtr ajp)
1060 {
1061 StdPrintOptionsPtr Spop = NULL;
1062 ByteStorePtr bs;
1063 BioseqSetPtr bssp;
1064 SeqEntryPtr tsep;
1065
1066 if(ajp == NULL)
1067 return(NULL);
1068
1069 if(ajp->ssp != NULL)
1070 {
1071 ajp->sep = (SeqEntryPtr)ajp->ssp->data;
1072 ajp->ssp = NULL;
1073 }
1074
1075 if(ajp->sep == NULL && ajp->slp == NULL)
1076 {
1077 bs = ajp->byte_st;
1078 ajp->byte_st = NULL;
1079 return(bs);
1080 }
1081
1082 if(ajp->format == GENPEPT_FMT)
1083 {
1084 if(Template_load == FALSE)
1085 {
1086 PrintTemplateSetLoad("asn2ff.prt");
1087 Template_load = TRUE;
1088 }
1089 if(ajp->Spop == NULL)
1090 {
1091 Spop = StdPrintOptionsNew(NULL);
1092 if(Spop != NULL)
1093 {
1094 Spop->newline = "~";
1095 Spop->indent = "";
1096 ajp->Spop = Spop;
1097 }
1098 else
1099 {
1100 ErrPostStr(SEV_FATAL, 0, 0, "StdPrintOptionsNew failed");
1101 bs = ajp->byte_st;
1102 ajp->byte_st = NULL;
1103 return(bs);
1104 }
1105 }
1106 }
1107
1108 if(ajp->sep != NULL && ajp->sep->choice == 2)
1109 {
1110 bssp = (BioseqSetPtr) ajp->sep->data.ptrvalue;
1111 if(bssp != NULL && (bssp->_class == 7 ||
1112 (bssp->_class >= 13 && bssp->_class <= 16)))
1113 {
1114 for(tsep = bssp->seq_set; tsep != NULL; tsep = tsep->next)
1115 {
1116 ajp->sep = tsep;
1117 bs = AjpToByteStore(ajp);
1118 ajp->byte_st = bs;
1119 }
1120 }
1121 else
1122 asn2ff_print_bs(ajp);
1123 }
1124 else
1125 asn2ff_print_bs(ajp);
1126
1127 bs = ajp->byte_st;
1128 ajp->byte_st = NULL;
1129
1130 return(bs);
1131 }
1132
1133 /***********************************************************************
1134 *
1135 * SeqEntryToFlat is a stand-alone function that takes a SeqEntryPtr
1136 * and writes a flat file to a disk file. If the formatting is
1137 * successful, TRUE is returned; otherwise FALSE is returned.
1138 *
1139 Choices for the Uint1's format and mode are defined in asn2ff.h.
1140
1141 For format they are:
1142
1143 GENBANK_FMT standard GenBank flat file for nucleotides
1144 EMBL_FMT standard EMBL flat file for nucleotides
1145 GENPEPT_FMT standard GenBank flat file for proteins
1146 PSEUDOEMBL_FMT a flavor of the EMBL flat file used by the "Authorin" program
1147
1148 The modes are:
1149
1150 RELEASE_MODE this mode assures that all the requirements (e.g., identifiers
1151 features, references as described in the GenBank release notes
1152 and the feature table) are met.
1153 are met
1154 DUMP_MODE dump out the ASN.1 to a flat file
1155 SEQUIN_MODE mode used by sequin
1156 CHROMO_MODE mode used by Chromoscope
1157 DIRSUB_MODE mode used by NCBI indexers during the "dirsub" process.
1158 DIRSUB_DEBUG_MODE mode used by NCBI indexers during the "dirsub" process.
1159 REVISE_MODE mode used by the "revise" program at NCBI (for in-house
1160 editing of entries).
1161 *
1162 **************************************************************************/
1163
SeqEntryToFlat(SeqEntryPtr sep,FILE * fp,Uint1 format,Uint1 mode)1164 NLM_EXTERN Boolean SeqEntryToFlat (SeqEntryPtr sep, FILE *fp, Uint1 format, Uint1 mode)
1165
1166 {
1167 Boolean rsult = FALSE;
1168 Asn2ffJobPtr ajp;
1169 StdPrintOptionsPtr Spop = NULL;
1170 BioseqSetPtr bssp;
1171
1172 if (sep == NULL) {
1173 return FALSE;
1174 }
1175 if (format == GENPEPT_FMT) {
1176 if (AllObjLoad () && SubmitAsnLoad () && SeqCodeSetLoad ()) {
1177 ErrShow();
1178 }
1179 if (!Template_load) {
1180 PrintTemplateSetLoad ("asn2ff.prt");
1181 Template_load = TRUE;
1182 }
1183 Spop = StdPrintOptionsNew(NULL);
1184 if (Spop) {
1185 Spop->newline = "~";
1186 Spop->indent = "";
1187 } else {
1188 ErrPostStr(SEV_FATAL,0,0, "StdPrintOptionsNew failed");;
1189 return FALSE;
1190 }
1191 }
1192 if (IS_Bioseq_set (sep)) {
1193 bssp = (BioseqSetPtr) sep->data.ptrvalue;
1194 if (bssp != NULL && (bssp->_class == 7 ||
1195 (bssp->_class >= 13 && bssp->_class <= 16))) {
1196 for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
1197 rsult = SeqEntryToFlat (sep, fp, format, mode);
1198 }
1199 return rsult;
1200 }
1201 }
1202
1203 ajp = Asn2ffJobCreate(sep, NULL, NULL, fp, format, mode, Spop);
1204
1205 if (ajp == NULL) {
1206 return FALSE;
1207 }
1208 ajp->show_version = TRUE;
1209 rsult = asn2ff_print(ajp);
1210 StdPrintOptionsFree(ajp->Spop);
1211 MemFree(ajp);
1212
1213 return rsult;
1214 }
1215
SeqEntryToFlatAjp(Asn2ffJobPtr ajp,SeqEntryPtr sep,FILE * fp,Uint1 format,Uint1 mode)1216 NLM_EXTERN Boolean SeqEntryToFlatAjp (Asn2ffJobPtr ajp, SeqEntryPtr sep, FILE *fp, Uint1 format, Uint1 mode)
1217
1218 {
1219 Boolean rsult = FALSE;
1220 StdPrintOptionsPtr Spop = NULL;
1221 BioseqSetPtr bssp;
1222
1223 if (sep == NULL) {
1224 return FALSE;
1225 }
1226 if (format == GENPEPT_FMT) {
1227 if (AllObjLoad () && SubmitAsnLoad () && SeqCodeSetLoad ()) {
1228 ErrShow();
1229 }
1230 if (!Template_load) {
1231 PrintTemplateSetLoad ("asn2ff.prt");
1232 Template_load = TRUE;
1233 }
1234 Spop = StdPrintOptionsNew(NULL);
1235 if (Spop) {
1236 Spop->newline = "~";
1237 Spop->indent = "";
1238 } else {
1239 ErrPostStr(SEV_FATAL,0,0, "StdPrintOptionsNew failed");;
1240 return FALSE;
1241 }
1242 }
1243 if (IS_Bioseq_set (sep)) {
1244 bssp = (BioseqSetPtr) sep->data.ptrvalue;
1245 if (bssp != NULL && (bssp->_class == 7 ||
1246 (bssp->_class >= 13 && bssp->_class <= 16))) {
1247 for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
1248 rsult = SeqEntryToFlatAjp (ajp, sep, fp, format, mode);
1249 }
1250 if (format == GENPEPT_FMT && Spop) {
1251 Spop = StdPrintOptionsFree(Spop);
1252 }
1253 return rsult;
1254 }
1255 }
1256 if (sep == NULL && ajp != NULL) {
1257 StdPrintOptionsFree(ajp->Spop);
1258 return rsult;
1259 }
1260 if (ajp == NULL) {
1261 if ((ajp = Asn2ffJobCreate(sep, NULL, NULL, fp, format, mode, Spop))
1262 == NULL) {
1263 if (format == GENPEPT_FMT && Spop) {
1264 Spop = StdPrintOptionsFree(Spop);
1265 }
1266 return FALSE;
1267 }
1268 } else {
1269 if ((ajp->entityID=ObjMgrGetEntityIDForPointer(sep)) == 0) {
1270 ErrPostStr(SEV_WARNING, 0, 0, "Couldn't get entityID");
1271 if (format == GENPEPT_FMT && Spop) {
1272 Spop = StdPrintOptionsFree(Spop);
1273 }
1274 return rsult;
1275 }
1276 ajp->sep = sep;
1277 }
1278 rsult = asn2ff_print(ajp);
1279 if (format == GENPEPT_FMT && Spop) {
1280 Spop = StdPrintOptionsFree(Spop);
1281 }
1282
1283 return rsult;
1284 }
1285
1286 /**************************************************************************
1287 * Prints out flat file in GenBank format WITHOUT Sequence
1288 **************************************************************************/
SeqEntryToGBFlatNoSeq(SeqEntryPtr sep,FILE * fp,Uint1 mode,Boolean show_gi)1289 NLM_EXTERN Boolean SeqEntryToGBFlatNoSeq(SeqEntryPtr sep, FILE *fp,
1290 Uint1 mode, Boolean show_gi)
1291 {
1292 Boolean rsult;
1293 Asn2ffJobPtr ajp;
1294 BioseqSetPtr bssp;
1295
1296 if (sep == NULL) {
1297 return FALSE;
1298 }
1299
1300 if (IS_Bioseq_set (sep)) {
1301 bssp = (BioseqSetPtr) sep->data.ptrvalue;
1302 if (bssp != NULL && (bssp->_class == 7 ||
1303 (bssp->_class >= 13 && bssp->_class <= 16))) {
1304 for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
1305 rsult = SeqEntryToGBFlatNoSeq(sep, fp, mode, show_gi);
1306 }
1307 return rsult;
1308 }
1309 }
1310
1311 ajp = Asn2ffJobCreate(sep, NULL, NULL, fp, GENBANK_FMT, mode, NULL);
1312
1313 if (ajp == NULL)
1314 return FALSE;
1315
1316 ajp->show_seq = FALSE; /* This is the point */
1317 ajp->show_gi = show_gi;
1318
1319 rsult = asn2ff_print(ajp);
1320 MemFree(ajp);
1321
1322 return rsult;
1323 }
1324
1325 /***********************************************************************
1326 *
1327 * SeqEntryToFlatEx is a stand-alone function works as SeqEntryToFlat
1328 * takes SeqIdPtr and various types of the output
1329 *
1330 * successful, TRUE is returned; otherwise FALSE is returned.
1331 *
1332 * Choices for the Uint1's type are defined in asn2ff.h.
1333 * FF_REGULAR 0
1334 * FF_TOP_COMPLETE 1
1335 * FF_TOP_CONTIG 2
1336 *
1337 **************************************************************************/
1338
SeqEntryToFlatEx(SeqEntryPtr sep,FILE * fp,Uint1 format,Uint1 mode,SeqIdPtr seqid,Uint1 type)1339 NLM_EXTERN Boolean SeqEntryToFlatEx (SeqEntryPtr sep, FILE *fp, Uint1 format, Uint1 mode, SeqIdPtr seqid, Uint1 type)
1340 {
1341 Boolean rsult=FALSE;
1342 Asn2ffJobPtr ajp;
1343 StdPrintOptionsPtr Spop = NULL;
1344 BioseqPtr bsp;
1345 BioseqSetPtr bssp;
1346
1347 rsult = FALSE;
1348 if (format == GENPEPT_FMT) {
1349 if (AllObjLoad () && SubmitAsnLoad () && SeqCodeSetLoad ()) {
1350 ErrShow();
1351 }
1352 if (!Template_load) {
1353 PrintTemplateSetLoad ("asn2ff.prt");
1354 Template_load = TRUE;
1355 }
1356 Spop = StdPrintOptionsNew(NULL);
1357 if (Spop) {
1358 Spop->newline = "~";
1359 Spop->indent = "";
1360 } else {
1361 Message (MSG_FATAL, "StdPrintOptionsNew failed");
1362 return rsult;
1363 }
1364 }
1365 ajp = Asn2ffJobCreate(sep, NULL, NULL, fp, format, mode, Spop);
1366 if (ajp == NULL) {
1367 return FALSE;
1368 }
1369 ajp->show_version = TRUE;
1370 if (mode == RELEASE_MODE) {
1371 ajp->show_gi = FALSE;
1372 }
1373 if (type == FF_REGULAR) {
1374 ajp->gb_style = TRUE;
1375 ajp->id_print = NULL;
1376 }
1377 if (type == FF_TOP_COMPLETE) {
1378 ajp->gb_style = FALSE;
1379 ajp->only_one = TRUE;
1380 ajp->ignore_top = TRUE;
1381 }
1382 if (type == FF_TOP_CONTIG) {
1383 ajp->gb_style = FALSE;
1384 ajp->only_one = TRUE;
1385 ajp->ignore_top = TRUE;
1386 ajp->genome_view = TRUE;
1387 ajp->show_seq = FALSE;
1388 }
1389 if (seqid != NULL) {
1390 ajp->gb_style = FALSE;
1391 ajp->id_print = seqid;
1392 bsp = BioseqFind(seqid);
1393 if (bsp->repr == Seq_repr_seg) {
1394 ajp->sep = sep;
1395 } else {
1396 ajp->sep = SeqMgrGetSeqEntryForData((Pointer)bsp);
1397 }
1398 if ((ajp->entityID = ObjMgrGetEntityIDForChoice(ajp->sep)) == 0) {
1399 ErrPostStr(SEV_WARNING, 0, 0, "Couldn't get entityID");
1400 }
1401 rsult = asn2ff_print(ajp);
1402
1403 StdPrintOptionsFree(ajp->Spop);
1404 MemFree(ajp);
1405
1406 return rsult;
1407 }
1408 if (IS_Bioseq_set (sep)) {
1409 bssp = (BioseqSetPtr) sep->data.ptrvalue;
1410 if (bssp != NULL && (bssp->_class == 7 ||
1411 (bssp->_class >= 13 && bssp->_class <= 16))) {
1412 for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
1413 rsult = SeqEntryToFlatEx (sep, fp, format, mode, seqid, type);
1414 }
1415 return rsult;
1416 }
1417 }
1418 ajp->sep = sep;
1419 if ((ajp->entityID = ObjMgrGetEntityIDForChoice(ajp->sep)) == 0) {
1420 ErrPostStr(SEV_WARNING, 0, 0, "Couldn't get entityID");
1421 }
1422 rsult = asn2ff_print(ajp);
1423
1424 StdPrintOptionsFree(ajp->Spop);
1425 MemFree(ajp);
1426
1427 return rsult;
1428 }
1429
1430 /**************************************************************************
1431 * Prints out short flat file report in GenBank format
1432 **************************************************************************/
SeqEntryToPartRpt(SeqEntryPtr sep,FILE * fp)1433 NLM_EXTERN Boolean SeqEntryToPartRpt (SeqEntryPtr sep, FILE *fp)
1434 {
1435 Boolean rsult;
1436 Asn2ffJobPtr ajp;
1437
1438 ajp = Asn2ffJobCreate(sep, NULL, NULL, fp, GENBANK_FMT, PARTIAL_MODE, NULL);
1439 if (ajp == NULL) {
1440 return FALSE;
1441 }
1442 rsult = asn2ff_print(ajp);
1443 MemFree(ajp);
1444
1445 return rsult;
1446 }
1447
SeqSubmitToFlat(SeqSubmitPtr ssp,FILE * fp,Uint1 mode,Boolean show_gi,Uint1 format,Boolean show_gene)1448 NLM_EXTERN Boolean SeqSubmitToFlat (SeqSubmitPtr ssp, FILE *fp, Uint1 mode, Boolean show_gi, Uint1 format, Boolean show_gene)
1449 {
1450 Boolean rsult = FALSE;
1451 Asn2ffJobPtr ajp;
1452
1453 if (ssp == NULL) {
1454 return rsult;
1455 }
1456 if (ssp->datatype != 1) {
1457 return rsult;
1458 }
1459 ajp = Asn2ffJobCreate(NULL, ssp, NULL, fp, format, mode, NULL);
1460 if (ajp == NULL) {
1461 return FALSE;
1462 }
1463 rsult = asn2ff_print(ajp);
1464 MemFree(ajp);
1465 return rsult;
1466 }
1467
SeqGenomeToFlat(SeqEntryPtr sep,FILE * fp,Uint1 format,Uint1 mode)1468 NLM_EXTERN Boolean SeqGenomeToFlat (SeqEntryPtr sep, FILE *fp, Uint1 format, Uint1 mode)
1469
1470 {
1471 Boolean rsult;
1472 Asn2ffJobPtr ajp;
1473 StdPrintOptionsPtr Spop = NULL;
1474
1475 rsult = FALSE;
1476 ajp = Asn2ffJobCreate(sep, NULL, NULL, fp, format, mode, Spop);
1477 if (ajp == NULL) {
1478 return FALSE;
1479 }
1480 ajp->only_one = TRUE;
1481 ajp->ignore_top = TRUE;
1482 ajp->genome_view = TRUE;
1483
1484 rsult = asn2ff_print(ajp);
1485 MemFree(ajp);
1486
1487 return rsult;
1488 }
1489
SeqGenomeToFlatEx(SeqEntryPtr sep,FILE * fp,Uint1 format,Uint1 mode,Boolean map_view)1490 NLM_EXTERN Boolean SeqGenomeToFlatEx (SeqEntryPtr sep, FILE *fp, Uint1 format, Uint1 mode, Boolean map_view)
1491
1492 {
1493 Boolean rsult;
1494 Asn2ffJobPtr ajp;
1495 StdPrintOptionsPtr Spop = NULL;
1496
1497 rsult = FALSE;
1498 ajp = Asn2ffJobCreate(sep, NULL, NULL, fp, format, mode, Spop);
1499 if (ajp == NULL) {
1500 return FALSE;
1501 }
1502 ajp->only_one = TRUE;
1503 ajp->ignore_top = TRUE;
1504 ajp->genome_view = TRUE;
1505 ajp->map_view = map_view;
1506 ajp->forgbrel = FALSE;
1507
1508 rsult = asn2ff_print(ajp);
1509 MemFree(ajp);
1510 return rsult;
1511 }
1512
SeqLocToFlat(SeqLocPtr slp,FILE * fp,Uint1 format,Uint1 mode)1513 NLM_EXTERN Boolean SeqLocToFlat (SeqLocPtr slp, FILE *fp, Uint1 format, Uint1 mode)
1514 {
1515 Boolean rsult;
1516 Asn2ffJobPtr ajp;
1517 StdPrintOptionsPtr Spop = NULL;
1518 Uint4 itemID=0;
1519
1520 rsult = FALSE;
1521 if (format == GENPEPT_FMT) {
1522 if (AllObjLoad () && SubmitAsnLoad () && SeqCodeSetLoad () &&
1523 PrintTemplateSetLoad ("asn2ff.prt")) {
1524 ErrShow();
1525 }
1526 Spop = StdPrintOptionsNew(NULL);
1527 if (Spop) {
1528 Spop->newline = "~";
1529 Spop->indent = "";
1530 } else {
1531 Message (MSG_FATAL, "StdPrintOptionsNew failed");
1532 return rsult;
1533 }
1534 }
1535
1536 ajp = Asn2ffJobCreate(NULL, NULL, slp, fp, format, mode, Spop);
1537 if (ajp == NULL) {
1538 return FALSE;
1539 }
1540 ajp->show_version = TRUE;
1541 ajp->only_one = TRUE;
1542 ajp->ignore_top = FALSE;
1543 ajp->id_print = SeqLocId(slp);
1544
1545 rsult = asn2ff_print(ajp);
1546 return rsult;
1547 }
1548
ChangeObsoleteImpFeats(SeqFeatPtr sfp,Pointer userdata)1549 static void ChangeObsoleteImpFeats (SeqFeatPtr sfp, Pointer userdata)
1550
1551 {
1552 ImpFeatPtr ifp;
1553
1554 if (sfp == NULL || sfp->data.choice != SEQFEAT_IMP) return;
1555 ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
1556 if (ifp != NULL &&
1557 (StringCmp (ifp->key, "allele") == 0 ||
1558 StringCmp (ifp->key, "mutation") == 0)) {
1559 ifp->key = MemFree (ifp->key);
1560 ifp->key = StringSave ("variation");
1561 sfp->idx.subtype = FEATDEF_variation;
1562 }
1563 }
1564
1565
1566 /***************************************************************************
1567 *
1568 * Setup the FFPrintArrayPtr to be used by "FFPrint", the number
1569 * returned is the number of entries in the array.
1570 ***************************************************************************/
1571
asn2ff_setup(Asn2ffJobPtr ajp,FFPrintArrayPtr PNTR papp)1572 NLM_EXTERN Int4 asn2ff_setup (Asn2ffJobPtr ajp, FFPrintArrayPtr PNTR papp)
1573
1574 {
1575 Int4 pap_size = -1;
1576 Asn2ffWEPtr awp;
1577 GatherScope gs;
1578 Uint1 focus;
1579 BioseqPtr bsp;
1580 SeqEntryPtr sep = NULL;
1581
1582 Message (MSG_POSTERR, "The asn2ff flatfile generator is obsolete and unsupported. Please switch to using asn2gb/SeqEntryToGnbk in the future.");
1583
1584 if (ajp->sep != NULL) {
1585 sep = ajp->sep;
1586 } else if (ajp->ssp != NULL && ajp->ssp->datatype == 1) {
1587 sep = (SeqEntryPtr) ajp->ssp->data;
1588 } else if (ajp->entityID > 0 && ajp->slp == NULL) {
1589 sep = GetTopSeqEntryForEntityID (ajp->entityID);
1590 }
1591
1592 if (sep != NULL) {
1593 VisitFeaturesInSep (sep, NULL, ChangeObsoleteImpFeats);
1594 }
1595
1596 ajp->show_gi = FALSE; /* displayed two obsolete line types - should always be FALSE */
1597
1598 MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
1599 MemSet ((Pointer) (gs.ignore), (int)(TRUE), (size_t) (OBJ_MAX * sizeof(Boolean)));
1600 gs.ignore[OBJ_BIOSEQ] = FALSE;
1601 gs.ignore[OBJ_BIOSEQSET] = FALSE;
1602
1603 if (ajp->format == EMBLPEPT_FMT) /* Turn off Validators for EMBLPEPT */
1604 ajp->mode = DUMP_MODE;
1605
1606 if (ajp->format == EMBLPEPT_FMT || ajp->format == GENPEPT_FMT)
1607 ajp->gb_style = FALSE;
1608 set_flags(ajp);
1609
1610
1611 flat2asn_install_accession_user_string("SET-UP");
1612 flat2asn_install_locus_user_string("SET-UP");
1613
1614 ajp->sfp_out = MakeSyntheticSeqFeat();
1615 awp = (Asn2ffWEPtr) MemNew(sizeof(Asn2ffWE));
1616 awp->seg = NULL;
1617 awp->parts = NULL;
1618 ajp->hup = FALSE;
1619 if (ajp->ssp && ajp->ssp->sub) {
1620 ajp->hup = ajp->ssp->sub->hup;
1621 }
1622 ajp->asn2ffwep = awp;
1623 if (ajp->entityID == 0) {
1624 if (ajp->sep != NULL) {
1625 ajp->entityID = ObjMgrGetEntityIDForChoice (ajp->sep);
1626 } else if (ajp->ssp != NULL) {
1627 ajp->entityID = ObjMgrGetEntityIDForPointer (ajp->ssp);
1628 } else if (ajp->slp != NULL) {
1629 bsp = BioseqFindFromSeqLoc(ajp->slp);
1630 ajp->entityID = ObjMgrGetEntityIDForPointer (ajp->slp);
1631 SeqMgrIndexFeatures (ajp->entityID, NULL);
1632 ajp->useSeqMgrIndexes = TRUE;
1633 }
1634 }
1635
1636 if (ajp->entityID != 0) {
1637 if (ajp->slp != NULL) {
1638 SeqMgrIndexFeatures (ajp->entityID, NULL);
1639 ajp->useSeqMgrIndexes = TRUE;
1640 }
1641 focus = (Uint1)FocusSeqEntry(ajp->sep, &gs);
1642 MemSet ((Pointer) (gs.ignore), (int)(TRUE), (size_t) (OBJ_MAX * sizeof(Boolean)));
1643 gs.ignore[OBJ_BIOSEQ] = FALSE;
1644 gs.ignore[OBJ_BIOSEQSET] = FALSE;
1645 GatherEntity(ajp->entityID, (Pointer) ajp, SeqToAwp, &gs);
1646 if (focus == FOCUS_INITIALIZED) {
1647 SeqLocFree(gs.target);
1648 }
1649 awp = ajp->asn2ffwep;
1650 if (!ajp->only_one && awp->gbp == NULL) {
1651 if (awp) {
1652 ajp->asn2ffwep = (Asn2ffWEPtr)MemFree(awp);
1653 }
1654 return 0;
1655 }
1656 if (ajp->forgbrel && awp->seg == NULL && ajp->genome_view == TRUE) {
1657 return 0;
1658 }
1659 if (awp->seg == NULL && awp->parts == NULL) {
1660 awp->total_seg = 0;
1661 if (awp->gbp) {
1662 awp->gbp->num_seg = 0;
1663 }
1664 if (SeqMgrFeaturesAreIndexed (ajp->entityID)) {
1665 ajp->useSeqMgrIndexes = TRUE; /* initial use of new indexes */
1666 }
1667 }
1668 if (awp->gbp && awp->gbp->next == NULL) {
1669 awp->total_seg = 0;
1670 awp->gbp->num_seg = 0;
1671 if (SeqMgrFeaturesAreIndexed (ajp->entityID)) {
1672 ajp->useSeqMgrIndexes = TRUE; /* initial use of new indexes */
1673 }
1674 }
1675 /*
1676 if (ajp->slp != NULL) {
1677 if (awp->gbp) {
1678 if (ajp->slp->choice == SEQLOC_MIX || ajp->slp->choice == SEQLOC_PACKED_INT) {
1679 awp->gbp->next = NULL;
1680 awp->gbp->num_seg = 0;
1681 awp->total_seg = 0;
1682 }
1683 }
1684 }
1685 */
1686 ajp->asn2ffwep = awp;
1687 if (ajp->mode != DIRSUB_MODE)
1688 GetGIs(ajp);
1689 }
1690
1691 init_buff();
1692 ajp->pseudo = FALSE;
1693 if (ajp->format == SELECT_FMT) { /* quick fix 07.17.95 change later */
1694 ajp->format = GENBANK_FMT;
1695 ajp->pseudo = TRUE;
1696 }
1697 if (ajp->format == PSEUDOEMBL_FMT) {
1698 ajp->pseudo = TRUE;
1699 }
1700 if (ajp->help) {
1701 pap_size = asn2hp_setup(ajp, papp);
1702 return pap_size;
1703 }
1704 if (ajp->format == GENBANK_FMT || ajp->format == SELECT_FMT) {
1705 if (ajp->mode == PARTIAL_MODE) {
1706 pap_size = asn2pr_setup(ajp, papp);
1707 } else {
1708 pap_size = asn2gb_setup(ajp, papp);
1709 }
1710 } else if (ajp->format == EMBL_FMT || ajp->format == PSEUDOEMBL_FMT) {
1711 pap_size = asn2embl_setup(ajp, papp);
1712 } else if (ajp->format == EMBLPEPT_FMT) {
1713 pap_size = asn2ep_setup(ajp, papp);
1714 }else if (ajp->format == GENPEPT_FMT) {
1715 pap_size = asn2gp_setup(ajp, papp);
1716 }else if (ajp->format == GRAPHIK_FMT) {
1717 pap_size = asn2gr_setup(ajp, papp);
1718 }
1719 return pap_size;
1720 } /* asn2ff_setup */
1721
1722 /****************************************************************************
1723 *void set_flags (Asn2ffJobPtr ajp)
1724 *
1725 * set_flags to determine which tasks to perform.
1726 *****************************************************************************/
set_flags(Asn2ffJobPtr ajp)1727 void set_flags (Asn2ffJobPtr ajp)
1728
1729 {
1730
1731 /* The defines are:
1732 ASN2FF_LOCAL_ID asn2ff_flags[0]
1733 If FALSE then entries with "local" id's are NOT formatted
1734 ASN2FF_LOOK_FOR_SEQ asn2ff_flags[1]
1735 If TRUE BioseqFind is run in an attempt to "find" entries that
1736 have been loaded into memory and are referenced by an entry
1737 ASN2FF_VALIDATE_FEATURES asn2ff_flags[2]
1738 If TRUE then validation is run on features. If they are invalid
1739 they are dropped.
1740 ASN2FF_IGNORE_PATENT_PUBS asn2ff_flags[3]
1741 This flag only applies to patent pubs. If FlatIgnoreThisPatentPub
1742 is true and this flag is TRUE, that pub is dropped. ALL OTHER
1743 PUBS are validated all the time.
1744 ASN2FF_DROP_SHORT_AA asn2ff_flags[4]
1745 Drop amino acid sequences that are too short. Only applies to
1746 GenPept (i.e., protein) format
1747 ASN2FF_AVOID_LOCUS_COLL asn2ff_flags[5]
1748 If TRUE Check for LOCUS collisions with Karl's algorithm
1749 Otherwise Use the LOCUS in the id field.
1750 ASN2FF_DATE_ERROR_MSG asn2ff_flags[6]
1751 If TRUE report a missing date. SHould be FALSE for indexing
1752 work when no date for a record has been set.
1753 ASN2FF_IUPACAA_ONLY asn2ff_flags[7]
1754 Use only iupaca characters if TRUE. Only iupacaa is the flat
1755 file standard.
1756 ASN2FF_TRANSL_TABLE asn2ff_flags[8]
1757 If TRUE print the transl_table qualifiers. Set to FALSE until
1758 the database correctly reflects transl_tables.
1759 ASN2FF_REPORT_LOCUS_COLL asn2ff_flags[9]
1760 If TRUE, report locus collisions via ErrPostEx
1761 ASN2FF_SHOW_ALL_PUBS asn2ff_flags[10]
1762 if TRUE don't drop CitGen reference or replace CitGen->cit with
1763 "Unpublished"
1764 ASN2FF_SHOW_ERROR_MSG asn2ff_flags[11]
1765 ASN2FF_SHOW_GB_STYLE asn2ff_flags[12]
1766 show only features comleted on this bioseq or location - gb_style
1767
1768 */
1769
1770 asn2ff_flags[11] = ajp->error_msgs;
1771 asn2ff_flags[12] = ajp->gb_style;
1772 if (ajp->mode == RELEASE_MODE)
1773 {
1774 asn2ff_flags[0] = FALSE;
1775 if (GetAppProperty ("InternalNcbiSequin") != NULL) {
1776 asn2ff_flags[0] = TRUE;
1777 }
1778 asn2ff_flags[1] = FALSE;
1779 asn2ff_flags[2] = TRUE;
1780 asn2ff_flags[3] = TRUE;
1781 asn2ff_flags[4] = TRUE;
1782 asn2ff_flags[5] = TRUE;
1783 asn2ff_flags[6] = TRUE;
1784 asn2ff_flags[7] = TRUE;
1785 asn2ff_flags[8] = TRUE;
1786 asn2ff_flags[9] = FALSE;
1787 asn2ff_flags[10] = FALSE;
1788 }
1789 else if (ajp->mode == DIRSUB_MODE)
1790 {
1791 asn2ff_flags[0] = FALSE;
1792 asn2ff_flags[1] = FALSE;
1793 asn2ff_flags[2] = TRUE;
1794 asn2ff_flags[3] = TRUE;
1795 asn2ff_flags[4] = TRUE;
1796 asn2ff_flags[5] = TRUE;
1797 asn2ff_flags[6] = FALSE;
1798 asn2ff_flags[7] = FALSE;
1799 asn2ff_flags[8] = TRUE;
1800 asn2ff_flags[9] = FALSE;
1801 asn2ff_flags[10] = FALSE;
1802 ajp->show_gi = FALSE;
1803 }
1804 else if (ajp->mode == DIRSUB_DEBUG_MODE)
1805 {
1806 asn2ff_flags[0] = FALSE;
1807 asn2ff_flags[1] = FALSE;
1808 asn2ff_flags[2] = FALSE;
1809 asn2ff_flags[3] = TRUE;
1810 asn2ff_flags[4] = TRUE;
1811 asn2ff_flags[5] = TRUE;
1812 asn2ff_flags[6] = FALSE;
1813 asn2ff_flags[7] = FALSE;
1814 asn2ff_flags[8] = TRUE;
1815 asn2ff_flags[9] = FALSE;
1816 asn2ff_flags[10] = FALSE;
1817 ajp->show_gi = FALSE;
1818 }
1819 else if (ajp->mode == REVISE_MODE)
1820 {
1821 asn2ff_flags[0] = TRUE;
1822 asn2ff_flags[1] = FALSE;
1823 asn2ff_flags[2] = FALSE;
1824 asn2ff_flags[3] = FALSE;
1825 asn2ff_flags[4] = FALSE;
1826 asn2ff_flags[5] = FALSE;
1827 asn2ff_flags[6] = TRUE;
1828 asn2ff_flags[7] = FALSE;
1829 asn2ff_flags[8] = TRUE;
1830 asn2ff_flags[9] = TRUE;
1831 asn2ff_flags[10] = FALSE;
1832 }
1833 else if (ajp->mode == DUMP_MODE)
1834 {
1835 asn2ff_flags[0] = TRUE;
1836 asn2ff_flags[1] = FALSE;
1837 asn2ff_flags[2] = FALSE;
1838 asn2ff_flags[3] = FALSE;
1839 asn2ff_flags[4] = FALSE;
1840 asn2ff_flags[5] = FALSE;
1841 asn2ff_flags[6] = TRUE;
1842 asn2ff_flags[7] = FALSE;
1843 asn2ff_flags[8] = TRUE;
1844 asn2ff_flags[9] = FALSE;
1845 asn2ff_flags[10] = TRUE;
1846 }
1847 else if (ajp->mode == SEQUIN_MODE)
1848 {
1849 asn2ff_flags[0] = TRUE;
1850 asn2ff_flags[1] = FALSE;
1851 asn2ff_flags[2] = FALSE;
1852 asn2ff_flags[3] = TRUE;
1853 asn2ff_flags[4] = TRUE;
1854 asn2ff_flags[5] = TRUE;
1855 asn2ff_flags[6] = FALSE;
1856 asn2ff_flags[7] = FALSE;
1857 asn2ff_flags[8] = TRUE;
1858 asn2ff_flags[9] = FALSE;
1859 asn2ff_flags[10] = FALSE;
1860 }
1861 else if (ajp->mode == CHROMO_MODE)
1862 {
1863 asn2ff_flags[0] = TRUE;
1864 asn2ff_flags[1] = TRUE;
1865 asn2ff_flags[2] = FALSE;
1866 asn2ff_flags[3] = TRUE;
1867 asn2ff_flags[4] = FALSE;
1868 asn2ff_flags[5] = FALSE;
1869 asn2ff_flags[6] = FALSE;
1870 asn2ff_flags[7] = FALSE;
1871 asn2ff_flags[8] = TRUE;
1872 asn2ff_flags[9] = FALSE;
1873 asn2ff_flags[10] = FALSE;
1874 }
1875 }
1876
check_whole(SeqFeatPtr f,Int4 len)1877 static Boolean check_whole(SeqFeatPtr f, Int4 len)
1878 {
1879 Boolean whole = FALSE;
1880 SeqLocPtr slp;
1881 SeqIntPtr sip;
1882
1883 slp = f->location;
1884 if (slp->choice == SEQLOC_WHOLE) {
1885 whole = TRUE;
1886 } else if (slp->choice == SEQLOC_INT) {
1887 sip = (SeqIntPtr)slp->data.ptrvalue;
1888 if (sip->from == 0 && sip->to == len-1) {
1889 whole = TRUE;
1890 }
1891 }
1892 return whole;
1893 }
get_pubs(GatherContextPtr gcp)1894 Boolean get_pubs (GatherContextPtr gcp)
1895 {
1896 ValNodePtr tmp, vnp, v;
1897 PubdescPtr pdp;
1898 ValNodePtr PNTR vnpp;
1899 BioseqPtr bsp;
1900 SeqLocPtr slp;
1901 SeqFeatPtr sfp;
1902 ImpFeatPtr ifp;
1903 SubmitBlockPtr sbp;
1904 CitSubPtr the_cit;
1905
1906 vnpp = (ValNodePtr *)gcp->userdata;
1907 vnp = *vnpp;
1908 switch (gcp->thistype)
1909 {
1910 case OBJ_SEQDESC:
1911 tmp = (ValNodePtr) (gcp->thisitem);
1912 if (gcp->parenttype == OBJ_BIOSEQ) {
1913 bsp = (BioseqPtr) (gcp->parentitem);
1914 } else {
1915 bsp = NULL;
1916 }
1917 if (tmp->choice == Seq_descr_pub) {
1918 vnp = StorePub(bsp, vnp, tmp, NULL, 1, gcp->entityID,
1919 gcp->itemID, gcp->thistype);
1920 }
1921 break;
1922 case OBJ_SEQFEAT:
1923 sfp = (SeqFeatPtr) (gcp->thisitem);
1924 if (sfp->data.choice == SEQFEAT_PUB) {
1925 slp = sfp->location;
1926 bsp = BioseqFindCore(SeqLocId(slp));
1927 if (bsp && check_whole(sfp, bsp->length)) {
1928 tmp = ValNodeNew(NULL);
1929 tmp->choice = Seq_descr_pub;
1930 tmp->data.ptrvalue = (PubdescPtr) sfp->data.value.ptrvalue;
1931 vnp = StorePub(bsp, vnp, tmp, NULL, 1, gcp->entityID,
1932 gcp->itemID, gcp->thistype);
1933 ValNodeFree(tmp);
1934 } else {
1935 vnp = StorePub(NULL, vnp, NULL, sfp, 2, gcp->entityID,
1936 gcp->itemID, gcp->thistype);
1937 }
1938 }
1939 if (sfp->data.choice == SEQFEAT_IMP) {
1940 ifp = (ImpFeatPtr)sfp->data.value.ptrvalue;
1941 if (StringCmp(ifp->key, "Site-ref") == 0) {
1942 if (sfp->cit != NULL) {
1943 vnp = StorePub(NULL, vnp, NULL, sfp, 3, gcp->entityID,
1944 gcp->itemID, gcp->thistype);
1945 }
1946 }
1947 }
1948 break;
1949 case OBJ_SUBMIT_BLOCK:
1950 sbp = (SubmitBlockPtr) (gcp->thisitem);
1951 the_cit = (CitSubPtr)AsnIoMemCopy(sbp->cit, (AsnReadFunc) CitSubAsnRead,
1952 (AsnWriteFunc) CitSubAsnWrite);
1953 v = ValNodeNew(NULL);
1954 v->choice = PUB_Sub;
1955 v->data.ptrvalue = the_cit;
1956 pdp = PubdescNew();
1957 pdp->pub = v;
1958 tmp = ValNodeNew(NULL);
1959 tmp->choice = Seq_descr_pub;
1960 tmp->data.ptrvalue = pdp;
1961 vnp = StorePub(NULL, vnp, tmp, NULL, 1, gcp->entityID,
1962 gcp->itemID, gcp->thistype);
1963 PubdescFree(pdp);
1964 break;
1965 /* case OBJ_SEQSUB_CIT:
1966 csp = (CitSubPtr) (gcp->thisitem);
1967 the_cit = AsnIoMemCopy(csp, (AsnReadFunc) CitSubAsnRead,
1968 (AsnWriteFunc) CitSubAsnWrite);
1969 v = ValNodeNew(NULL);
1970 v->choice = PUB_Sub;
1971 v->data.ptrvalue = the_cit;
1972 pdp = PubdescNew();
1973 pdp->pub = v;
1974 tmp = ValNodeNew(NULL);
1975 tmp->choice = Seq_descr_pub;
1976 tmp->data.ptrvalue = pdp;
1977 vnp = StorePub(NULL, vnp, tmp, NULL, 1, gcp->entityID,
1978 gcp->itemID, gcp->thistype);
1979 MemFree(csp);
1980 break;
1981 */
1982 case OBJ_SEQFEAT_CIT:
1983 /***** not used now ********/
1984 tmp = (ValNodePtr) (gcp->thisitem); /* PubSet->data.ptrvalue */
1985 break;
1986 default:
1987 break;
1988 }
1989 *vnpp = vnp;
1990 return TRUE;
1991 }
1992
GetPubNum(GBEntryPtr gbp)1993 static Int2 GetPubNum(GBEntryPtr gbp)
1994 {
1995 ValNodePtr v;
1996 Int4 i;
1997
1998 for (v = gbp->Pub, i=0; v != NULL; v= v->next, i++);
1999
2000 return (Int2)i;
2001 }
CheckSourceFeat(Asn2ffJobPtr ajp,GBEntryPtr gbp)2002 static void CheckSourceFeat(Asn2ffJobPtr ajp, GBEntryPtr gbp)
2003 {
2004 OrgRefPtr orp;
2005 BioSourcePtr biosp;
2006 ValNodePtr vnp;
2007 DescrStructPtr ds;
2008
2009 if (gbp && gbp->feat) {
2010 if (gbp->feat->sfpSourcesize != 0)
2011 return;
2012 }
2013 ds = gbp->source_info;
2014 if ((vnp=GatherDescrByChoice(ajp, gbp, Seq_descr_source)) != NULL) {
2015 biosp = (BioSourcePtr)vnp->data.ptrvalue;
2016 orp = (OrgRefPtr) biosp->org;
2017 if (orp) {
2018 if (ds == NULL) {
2019 ds = (DescrStructPtr) MemNew(sizeof(DescrStruct));
2020 gbp->source_info = ds;
2021 }
2022 ds->vnp = vnp;
2023 ds->entityID = gbp->descr->entityID;
2024 ds->itemID = gbp->descr->itemID;
2025 ds->itemtype = gbp->descr->itemtype;
2026 gbp->descr = (DescrStructPtr)MemFree(gbp->descr);
2027 return;
2028 }
2029 }
2030 if (gbp && gbp->descr) {
2031 MemFree(gbp->descr);
2032 }
2033 if ((vnp=GatherDescrByChoice(ajp, gbp, Seq_descr_org)) != NULL) {
2034 orp = (OrgRefPtr) vnp->data.ptrvalue;
2035 if (orp) {
2036 if (ds == NULL) {
2037 ds = (DescrStructPtr) MemNew(sizeof(DescrStruct));
2038 gbp->source_info = ds;
2039 }
2040 ds->vnp = vnp;
2041 ds->entityID = gbp->descr->entityID;
2042 ds->itemID = gbp->descr->itemID;
2043 ds->itemtype = gbp->descr->itemtype;
2044 gbp->descr = (DescrStructPtr)MemFree(gbp->descr);
2045 return;
2046 }
2047 }
2048 if (gbp && gbp->descr) {
2049 gbp->descr = (DescrStructPtr)MemFree(gbp->descr);
2050 }
2051 return;
2052 }
2053
asn2hp_setup(Asn2ffJobPtr ajp,FFPrintArrayPtr PNTR papp)2054 Int4 asn2hp_setup(Asn2ffJobPtr ajp, FFPrintArrayPtr PNTR papp)
2055 {
2056 FFPrintArrayPtr pap;
2057 Int4 index, total, pub_num;
2058 GBEntryPtr gbp;
2059
2060 GetLocusPartsAwp(ajp);
2061 total=2;
2062 for (gbp=ajp->asn2ffwep->gbp; gbp; gbp = gbp->next) {
2063 gbp->descr = NULL;
2064 if (GB_GetSeqDescrComms(ajp, gbp) > 0) {
2065 total += gbp->comm_num;
2066 }
2067 pub_num = (Int2)GetPubsAwp(ajp, gbp);
2068 total += pub_num;
2069 GetGBDate(ajp, gbp);
2070 }
2071 *papp = (FFPrintArrayPtr) MemNew((size_t) total*sizeof(FFPrintArray));
2072 pap = *papp;
2073 /* pap_total = total; -- NO EFFECT */
2074 LoadPap(NULL, NULL, ajp, 0, (Uint1)0, (Uint1)0, 0, A2F_OTHER, NULL);
2075 for (gbp=ajp->asn2ffwep->gbp; gbp; gbp = gbp->next)
2076 {
2077 GetDefinitionLine(ajp, gbp);
2078 LoadPap(pap, PrintDefinitionLine, ajp, 0, (Uint1)0, (Uint1)0,
2079 line_estimate[1], A2F_OTHER, gbp);
2080 LoadPap(pap, PrintGBOrganismLine, ajp, 0, (Uint1)0, (Uint1)0,
2081 line_estimate[4], A2F_OTHER, gbp);
2082 pub_num = GetPubNum(gbp);
2083 for (index=0; index < pub_num; index++) {
2084 LoadPap(pap,
2085 PrintPubsByNumber, ajp, index, (Uint1)0, (Uint1)0,
2086 line_estimate[5], A2F_REFERENCE, gbp);
2087 }
2088 for (index=0; index < gbp->comm_num; index++) {
2089 if (index == 0) {
2090 LoadPap(pap,
2091 PrintFirstComment, ajp, index, (Uint1)0, (Uint1)0,
2092 line_estimate[5], A2F_COMMENT, gbp);
2093 } else {
2094 LoadPap(pap,
2095 PrintCommentByNumber, ajp, index, (Uint1)0, (Uint1)0,
2096 line_estimate[5], A2F_COMMENT, gbp);
2097 }
2098 }
2099 }
2100
2101 return total;
2102 }
2103
PrintLastLine(Asn2ffJobPtr ajp,GBEntryPtr gbp)2104 static void PrintLastLine (Asn2ffJobPtr ajp, GBEntryPtr gbp)
2105 {
2106 PrintTerminator ();
2107 }
2108
GetFeatDefinitionLine(Asn2ffJobPtr ajp,GBEntryPtr gbp)2109 static void GetFeatDefinitionLine(Asn2ffJobPtr ajp, GBEntryPtr gbp)
2110 {
2111 CharPtr string, string_start, title=NULL, fstr, label=NULL;
2112 ValNodePtr vnp = NULL;
2113 MolInfoPtr mfp;
2114 CharPtr buf;
2115 Int2 buflen = 1001;
2116 ItemInfoPtr iip;
2117 DescrStructPtr dsp = NULL;
2118 Uint1 tech = 0;
2119 SeqFeatPtr sfp=NULL;
2120
2121 buf = (CharPtr)MemNew(buflen+1);
2122 gbp->descr = NULL;
2123 /***************** deflines for htg sequences *****************/
2124 vnp=GatherDescrByChoice(ajp, gbp, Seq_descr_molinfo);
2125 if (vnp != NULL) {
2126 mfp = (MolInfoPtr)vnp->data.ptrvalue;
2127 if (mfp) {
2128 tech = mfp->tech;
2129 }
2130 }
2131 if (gbp && gbp->descr) {
2132 gbp->descr = (DescrStructPtr)MemFree(gbp->descr);
2133 }
2134
2135 iip = (ItemInfoPtr)MemNew(sizeof(ItemInfo));
2136 CreateDefLine(iip, gbp->bsp, buf, buflen, tech, NULL, NULL);
2137 if (iip != NULL) {
2138 dsp = (DescrStructPtr)MemNew(sizeof(DescrStruct));
2139 dsp->entityID = iip->entityID;
2140 dsp->itemID = iip->itemID;
2141 dsp->itemtype = iip->itemtype;
2142 }
2143 MemFree(iip);
2144 gbp->descr = dsp;
2145 title = buf;
2146 string_start = string = CheckEndPunctuation(title, '.');
2147
2148 while (*string != '\0')
2149 {
2150 if (*string == '\"')
2151 *string = '\'';
2152 string++;
2153 }
2154 GatherItem(ajp->entityID, ajp->itemID, OBJ_SEQFEAT,
2155 &sfp, find_item);
2156 if (sfp)
2157 label = (CharPtr) FeatDefTypeLabel(sfp);
2158 if (label) {
2159 fstr = (CharPtr)MemNew(StringLen(label) + StringLen(string_start) + 7);
2160 sprintf(fstr, "%s from: %s", label, string_start);
2161 } else {
2162 fstr = StringSave(string_start);
2163 }
2164
2165 gbp->defline = StringSave(fstr);
2166 MemFree(string_start);
2167 MemFree(fstr);
2168 MemFree(buf);
2169 }
2170
find_user_object(BioseqPtr bsp)2171 static Boolean find_user_object(BioseqPtr bsp)
2172 {
2173 UserObjectPtr uop;
2174 ObjectIdPtr oip;
2175 ValNodePtr desc;
2176
2177 for (desc=bsp->descr; desc; desc=desc->next) {
2178 if (desc->choice == Seq_descr_user) {
2179 uop = desc->data.ptrvalue;
2180 if ((oip = uop->type) == NULL) return FALSE;
2181 if (StringCmp(oip->str, "RefGeneTracking") != 0) return FALSE;
2182
2183 return TRUE;
2184 }
2185 }
2186 return FALSE;
2187 }
2188
PrintStatusLine(Asn2ffJobPtr ajp,GBEntryPtr gbp)2189 static void PrintStatusLine(Asn2ffJobPtr ajp, GBEntryPtr gbp)
2190 {
2191
2192 BioseqPtr bsp;
2193 ValNodePtr desc;
2194 UserObjectPtr uop;
2195 ObjectIdPtr oip;
2196 UserFieldPtr ufp;
2197 CharPtr u, ptr=NULL;
2198
2199 if (gbp == NULL)
2200 return;
2201 bsp = gbp->bsp;
2202 for (desc=bsp->descr; desc; desc=desc->next) {
2203 if (desc->choice == Seq_descr_user) {
2204 uop = desc->data.ptrvalue;
2205 if ((oip = uop->type) == NULL) return;
2206 if (StringCmp(oip->str, "RefGeneTracking") != 0) return;
2207 for (ufp=uop->data; ufp; ufp=ufp->next) {
2208 oip = ufp->label;
2209 printf("%s\n", oip->str);
2210 if (StringCmp(oip->str, "Status") == 0) {
2211 u = (CharPtr) ufp->data.ptrvalue;
2212 printf("%s\n", u);
2213 if (StringCmp(u, "Provisional") == 0) {
2214 ff_StartPrint(0, 0, ASN2FF_GB_MAX, NULL);
2215 ff_AddString("PROVISIONAL");
2216 TabToColumn(13);
2217 ff_AddString("This is a provisional reference sequence record that has record may be somewhat different from this one.");
2218 ff_EndPrint();
2219 }
2220 break;
2221 }
2222 }
2223 }
2224 }
2225 }
2226
asn2gb_setup(Asn2ffJobPtr ajp,FFPrintArrayPtr PNTR papp)2227 Int4 asn2gb_setup(Asn2ffJobPtr ajp, FFPrintArrayPtr PNTR papp)
2228 {
2229 FFPrintArrayPtr pap;
2230 Int4 index, total, pub_num, seqblks_num;
2231 GBEntryPtr gbp;
2232 SeqIdPtr sip;
2233 TextSeqIdPtr tsip;
2234 BioseqPtr bsp;
2235
2236 GetLocusPartsAwp(ajp);
2237 if ((gbp=ajp->asn2ffwep->gbp) != NULL) {
2238 if ((bsp = (BioseqPtr) gbp->bsp) != NULL) {
2239 for (sip=bsp->id; sip; sip=sip->next) {
2240 if (sip->choice == SEQID_OTHER) {
2241 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
2242 if (StringNCmp(tsip->accession, "NT_", 3) == 0) {
2243 ajp->contig_view = TRUE;
2244 break;
2245 }
2246 }
2247 }
2248 }
2249 }
2250 if (!ajp->genome_view) {
2251 GetSeqFeat(ajp);
2252 }
2253 if (ajp->contig_view) {
2254 ajp->ignore_top = 0;
2255 GetSeqFeat(ajp);
2256 }
2257 total=0;
2258 for (gbp=ajp->asn2ffwep->gbp; gbp; gbp = gbp->next) {
2259 if ((bsp=gbp->bsp) == NULL) {
2260 continue;
2261 }
2262 if (gbp->bsp && ajp->id_print) {
2263 sip = SeqIdFindBest(gbp->bsp->id, SEQID_GI);
2264 if (SeqIdComp(sip, ajp->id_print) != SIC_YES) {
2265 continue;
2266 }
2267 }
2268 CheckSourceFeat(ajp, gbp);
2269 if (gbp->map == TRUE && ajp->show_seq == FALSE) {
2270 total += 7;
2271 } else if (ajp->genome_view || ajp->contig_view) {
2272 total += 6;
2273 } else {
2274 total += 8;
2275 }
2276 if (ajp->show_version) {
2277 total++;
2278 }
2279 if (ajp->show_gi && (gbp->gi != -1 || gbp->ni != NULL)) {
2280 total++;
2281 }
2282 if (ajp->asn2ffwep->total_seg > 0) {
2283 total++;
2284 }
2285 gbp->descr = NULL;
2286 if (GB_GetSeqDescrComms(ajp, gbp) > 0) {
2287 total += gbp->comm_num;
2288 }
2289 if (gbp->feat && gbp->feat->sfpCommsize > 0) {
2290 total++;
2291 }
2292 if (ajp->genome_view || ajp->map_view || ajp->contig_view) {
2293 total += 2; /* FEATURES and 'source' feature*/
2294 total ++; /* last line '//' */
2295 if (gbp->map) {
2296 gbp->feat_num = GetMapFeats(ajp, gbp);
2297 total += gbp->feat_num;
2298 } else {
2299 total ++;
2300 }
2301 if (ajp->contig_view && gbp->feat) {
2302 gbp->feat_num = gbp->feat->sfpListsize;
2303 total += gbp->feat_num;
2304 }
2305 if (ajp->contig_view && ajp->show_seq == TRUE) {
2306 total += 2; /* BASE COUNT and ORIGIN*/
2307 seqblks_num = (Int4) GetNumOfSeqBlks(ajp, gbp);
2308 total += seqblks_num;
2309 }
2310 } else {
2311 if (gbp->feat) {
2312 total += 2; /* FEATURES and 'source' feature*/
2313 gbp->feat_num = gbp->feat->sfpListsize;
2314 total += gbp->feat_num;
2315 }
2316 if (ajp->show_seq == TRUE) {
2317 seqblks_num = (Int4) GetNumOfSeqBlks(ajp, gbp);
2318 total += seqblks_num;
2319 }
2320 }
2321 pub_num = (Int2)GetPubsAwp(ajp, gbp);
2322 total += pub_num;
2323 GetGBDate(ajp, gbp);
2324 }
2325 *papp = (FFPrintArrayPtr) MemNew((size_t) total*sizeof(FFPrintArray));
2326 pap = *papp;
2327 /* pap_total = total; -- NO EFFECT */
2328 LoadPap(NULL, NULL, ajp, 0, (Uint1)0, (Uint1)0, 0, A2F_OTHER, NULL);
2329 for (gbp=ajp->asn2ffwep->gbp; gbp; gbp = gbp->next)
2330 {
2331 if (gbp->bsp && ajp->id_print) {
2332 sip = SeqIdFindBest(gbp->bsp->id, SEQID_GI);
2333 if (SeqIdComp(sip, ajp->id_print) != SIC_YES) {
2334 continue;
2335 }
2336 }
2337 LoadPap(pap, PrintLocusLine, ajp, 0, (Uint1)0, (Uint1)0,
2338 line_estimate[0], A2F_OTHER, gbp);
2339 if (gbp->descr) {
2340 gbp->descr = (DescrStructPtr)MemFree(gbp->descr);
2341 gbp->descr = NULL;
2342 }
2343 flat2asn_delete_locus_user_string();
2344 flat2asn_install_locus_user_string(gbp->locus);
2345 if (ajp->slp || ajp->itemID) {
2346 GetFeatDefinitionLine(ajp, gbp);
2347 } else {
2348 GetDefinitionLine(ajp, gbp);
2349 }
2350 LoadPap(pap, PrintDefinitionLine, ajp, 0, (Uint1)0, (Uint1)0,
2351 line_estimate[1], A2F_OTHER, gbp);
2352 if (gbp->descr) {
2353 gbp->descr = (DescrStructPtr)MemFree(gbp->descr);
2354 gbp->descr = NULL;
2355 }
2356 LoadPap(pap, PrintAccessLine, ajp, 0, (Uint1)0, (Uint1)0,
2357 line_estimate[2], A2F_OTHER, gbp);
2358 flat2asn_delete_accession_user_string();
2359 flat2asn_install_accession_user_string(gbp->accession);
2360 if (ajp->show_gi) {
2361 if (gbp->gi != -1) {
2362 LoadPap(pap, PrintNCBI_GI, ajp, 0, (Uint1)0, (Uint1)0,
2363 line_estimate[2], A2F_OTHER, gbp);
2364 } else if (gbp->ni != NULL) {
2365 LoadPap(pap, PrintNID, ajp, 0, (Uint1)0, (Uint1)0,
2366 line_estimate[2], A2F_OTHER, gbp);
2367 }
2368 }
2369 if (ajp->show_version) {
2370 LoadPap(pap, PrintVersionLine, ajp, 0, (Uint1)0, (Uint1)0,
2371 line_estimate[2], A2F_OTHER, gbp);
2372 }
2373 LoadPap(pap, PrintKeywordLine, ajp, 0, (Uint1)0, (Uint1)0,
2374 line_estimate[3], A2F_OTHER, gbp);
2375 if (ajp->asn2ffwep->total_seg > 0)
2376 LoadPap(pap, PrintSegmentLine, ajp, 0, (Uint1)0, (Uint1)0,
2377 line_estimate[0], A2F_OTHER, gbp);
2378 LoadPap(pap, PrintGBSourceLine, ajp, 0, (Uint1)0, (Uint1)0,
2379 line_estimate[4], A2F_SOURCE_FEATURE, gbp);
2380 LoadPap(pap, PrintGBOrganismLine, ajp, 0, (Uint1)0, (Uint1)0,
2381 line_estimate[4], A2F_SOURCE_FEATURE, gbp);
2382 pub_num = GetPubNum(gbp);
2383 for (index=0; index < pub_num; index++) {
2384 LoadPap(pap,
2385 PrintPubsByNumber, ajp, index, (Uint1)0, (Uint1)0,
2386 line_estimate[5], A2F_REFERENCE, gbp);
2387 }
2388 for (index=0; index < gbp->comm_num; index++) {
2389 if (index == 0) {
2390 LoadPap(pap,
2391 PrintFirstComment, ajp, index, (Uint1)0, (Uint1)0,
2392 line_estimate[5], A2F_COMMENT, gbp);
2393 } else {
2394 LoadPap(pap,
2395 PrintCommentByNumber, ajp, index, (Uint1)0, (Uint1)0,
2396 line_estimate[5], A2F_COMMENT, gbp);
2397 }
2398 }
2399 if (gbp->feat && gbp->feat->sfpCommsize > 0) {
2400 LoadPap(pap, GBDescrComFeat, ajp, 0, (Uint1)0, (Uint1)0,
2401 line_estimate[2], A2F_OTHER, gbp);
2402 }
2403 if (ajp->genome_view && gbp->map == FALSE || ajp->contig_view) {
2404 LoadPap(pap, PrintFeatHeader, ajp, 0, (Uint1)0, (Uint1)0,
2405 line_estimate[6], A2F_OTHER, gbp);
2406 LoadPap(pap, PrintSourceFeat, ajp, 0, (Uint1)0, (Uint1)0,
2407 line_estimate[8], A2F_SOURCE_FEATURE, gbp);
2408 for (index=0; index < gbp->feat_num; index++) {
2409 LoadPap(pap, PrintNAFeatByNumber, ajp, index,
2410 (Uint1)0, (Uint1)0, line_estimate[8], A2F_FEATURE, gbp);
2411 }
2412 if (ajp->contig_view && ajp->show_seq == TRUE) {
2413 LoadPap(pap, PrintBaseCount, ajp, 0, (Uint1)0, (Uint1)0,
2414 line_estimate[0], A2F_OTHER, gbp);
2415 LoadPap(pap, PrintOriginLine, ajp, 0, (Uint1)0, (Uint1)0,
2416 line_estimate[0], A2F_OTHER, gbp);
2417 if (ajp->slp) {
2418 LoadPap(pap, PrintSeqRegion, ajp, index,
2419 (Uint1)1, (Uint1)0, line_estimate[9],
2420 A2F_SEQUENCE, gbp);
2421 } else {
2422 seqblks_num = GetNumOfSeqBlks(ajp, gbp);
2423 for (index=0; index < seqblks_num; index++) {
2424 if (seqblks_num == index+1) {
2425 LoadPap(pap, PrintSeqBlk, ajp, index,
2426 (Uint1)1, (Uint1)0, line_estimate[9],
2427 A2F_SEQUENCE, gbp);
2428 } else {
2429 LoadPap(pap, PrintSeqBlk, ajp, index,
2430 (Uint1)0, (Uint1)0, line_estimate[9],
2431 A2F_SEQUENCE, gbp);
2432 }
2433 }
2434 }
2435 } else {
2436 LoadPap(pap, PrintGenome, ajp, 0, (Uint1)0, (Uint1)0,
2437 line_estimate[0], A2F_OTHER, gbp);
2438 LoadPap(pap, PrintLastLine, ajp, 0, (Uint1)0, (Uint1)0,
2439 line_estimate[0], A2F_OTHER, gbp);
2440 }
2441 } else {
2442
2443 if (gbp->feat) {
2444 LoadPap(pap, PrintFeatHeader, ajp, 0, (Uint1)0, (Uint1)0,
2445 line_estimate[6], A2F_OTHER, gbp);
2446 LoadPap(pap, PrintSourceFeat, ajp, 0, (Uint1)0, (Uint1)0,
2447 line_estimate[8], A2F_SOURCE_FEATURE, gbp);
2448 for (index=0; index < gbp->feat_num; index++) {
2449 LoadPap(pap, PrintNAFeatByNumber, ajp, index,
2450 (Uint1)0, (Uint1)0, line_estimate[8], A2F_FEATURE, gbp);
2451 }
2452 }
2453 if (gbp->map == FALSE && ajp->show_seq == TRUE) {
2454 LoadPap(pap, PrintBaseCount, ajp, 0, (Uint1)0, (Uint1)0,
2455 line_estimate[0], A2F_OTHER, gbp);
2456 LoadPap(pap, PrintOriginLine, ajp, 0, (Uint1)0, (Uint1)0,
2457 line_estimate[0], A2F_OTHER, gbp);
2458 if (ajp->slp) {
2459 LoadPap(pap, PrintSeqRegion, ajp, index,
2460 (Uint1)1, (Uint1)0, line_estimate[9],
2461 A2F_SEQUENCE, gbp);
2462 } else {
2463 seqblks_num = GetNumOfSeqBlks(ajp, gbp);
2464 for (index=0; index < seqblks_num; index++) {
2465 if (seqblks_num == index+1) {
2466 LoadPap(pap, PrintSeqBlk, ajp, index,
2467 (Uint1)1, (Uint1)0, line_estimate[9],
2468 A2F_SEQUENCE, gbp);
2469 } else {
2470 LoadPap(pap, PrintSeqBlk, ajp, index,
2471 (Uint1)0, (Uint1)0, line_estimate[9],
2472 A2F_SEQUENCE, gbp);
2473 }
2474 }
2475 }
2476 } else {
2477 LoadPap(pap, PrintLastLine, ajp, 0, (Uint1)0, (Uint1)0,
2478 line_estimate[0], A2F_OTHER, gbp);
2479 }
2480 }
2481 }
2482
2483 return total;
2484 }
asn2gr_setup(Asn2ffJobPtr ajp,FFPrintArrayPtr PNTR papp)2485 Int4 asn2gr_setup(Asn2ffJobPtr ajp, FFPrintArrayPtr PNTR papp)
2486 {
2487 FFPrintArrayPtr pap;
2488 Int4 index, total, pub_num;
2489 GBEntryPtr gbp;
2490
2491 GetLocusPartsAwp(ajp);
2492 total=0;
2493 gbp=ajp->asn2ffwep->gbp;
2494 CheckSourceFeat(ajp, gbp);
2495 if (gbp->map == TRUE && ajp->show_seq == FALSE) {
2496 total += 7;
2497 } else if (ajp->genome_view) {
2498 total += 6;
2499 } else {
2500 total += 8;
2501 }
2502 gbp->descr = NULL;
2503 if (GB_GetSeqDescrComms(ajp, gbp) > 0) {
2504 total += gbp->comm_num;
2505 }
2506 if (gbp->feat && gbp->feat->sfpCommsize > 0) {
2507 total++;
2508 }
2509 pub_num = (Int2)GetPubsAwp(ajp, gbp);
2510 total += pub_num;
2511 GetGBDate(ajp, gbp);
2512
2513 *papp = (FFPrintArrayPtr) MemNew((size_t) total*sizeof(FFPrintArray));
2514 pap = *papp;
2515 LoadPap(NULL, NULL, ajp, 0, (Uint1)0, (Uint1)0, 0, A2F_OTHER, NULL);
2516 LoadPap(pap, PrintTaxonomy, ajp, 0, (Uint1)0, (Uint1)0,
2517 line_estimate[0], A2F_OTHER, gbp);
2518 pub_num = GetPubNum(gbp);
2519 for (index=0; index < pub_num; index++) {
2520 LoadPap(pap,
2521 PrintPubsByNumber, ajp, index, (Uint1)0, (Uint1)0,
2522 line_estimate[5], A2F_REFERENCE, gbp);
2523 }
2524 for (index=0; index < gbp->comm_num; index++) {
2525 if (index == 0) {
2526 LoadPap(pap,
2527 PrintFirstComment, ajp, index, (Uint1)0, (Uint1)0,
2528 line_estimate[5], A2F_COMMENT, gbp);
2529 } else {
2530 LoadPap(pap,
2531 PrintCommentByNumber, ajp, index, (Uint1)0, (Uint1)0,
2532 line_estimate[5], A2F_COMMENT, gbp);
2533 }
2534 }
2535 if (gbp->feat && gbp->feat->sfpCommsize > 0) {
2536 LoadPap(pap, GBDescrComFeat, ajp, 0, (Uint1)0, (Uint1)0,
2537 line_estimate[2], A2F_OTHER, gbp);
2538 }
2539 if (ajp->genome_view && gbp->map == FALSE) {
2540 LoadPap(pap, PrintGenome, ajp, 0, (Uint1)0, (Uint1)0,
2541 line_estimate[0], A2F_OTHER, gbp);
2542 }
2543
2544 return total;
2545 }
2546
2547 /*______________________________________________________________________
2548 **
2549 ** This code is not currently used.
2550 ** I do not remove this piece of code, just comment it out.
2551 ** -- Dmitri Lukyanov
2552 */
2553 #if 0
2554
2555 static Int2 GetCDSNumber (OrganizeFeatPtr feat)
2556
2557 {
2558 SortStructPtr p;
2559 SeqFeatPtr sfp;
2560 Int2 i,j;
2561
2562 for (p = feat->List, j=0, i=0; i < feat->sfpListsize; i++, p++) {
2563 sfp = p->sfp;
2564 if (sfp->data.choice != SEQFEAT_CDREGION) {
2565 continue;
2566 }
2567 j++;
2568 }
2569 return j;
2570 }
2571
2572 static Int2 GetPubsRptNum (ValNodePtr pubs)
2573 {
2574 PubStructPtr psp;
2575 ValNodePtr vnp;
2576 Int2 i;
2577
2578 for (vnp=pubs, i=0; vnp; vnp=vnp->next) {
2579 psp = vnp->data.ptrvalue;
2580 if (psp->choice == PUB_Sub) {
2581 continue;
2582 }
2583 i++;
2584 }
2585 return i;
2586 }
2587
2588 #endif
2589 /*______________________________________________________________________
2590 */
2591
asn2pr_setup(Asn2ffJobPtr ajp,FFPrintArrayPtr PNTR papp)2592 Int4 asn2pr_setup(Asn2ffJobPtr ajp, FFPrintArrayPtr PNTR papp)
2593 {
2594 FFPrintArrayPtr pap;
2595 Int4 index, total, feat_num, pub_num;
2596 GBEntryPtr gbp;
2597
2598 GetLocusPartsAwp(ajp);
2599 GetSeqFeat(ajp);
2600 total=0;
2601 for (gbp=ajp->asn2ffwep->gbp; gbp; gbp = gbp->next) {
2602 total += 2;
2603 if (gbp->feat) {
2604 total++;
2605 feat_num = gbp->feat->sfpListsize;
2606 total += feat_num;
2607 }
2608 pub_num = GetPubsAwp(ajp, gbp);
2609 total += pub_num;
2610 }
2611 *papp = (FFPrintArrayPtr) MemNew((size_t) total*sizeof(FFPrintArray));
2612 pap = *papp;
2613 /* pap_total = total; -- NO EFFECT */
2614 LoadPap(NULL, NULL, ajp, 0, (Uint1)0, (Uint1)0, 0, A2F_OTHER, NULL);
2615 for (gbp=ajp->asn2ffwep->gbp; gbp; gbp = gbp->next) {
2616 LoadPap(pap, PrintAccessLine, ajp, 0, (Uint1)0, (Uint1)0,
2617 line_estimate[2], A2F_OTHER, gbp);
2618 pub_num = (Int2)GetPubsAwp(ajp, gbp);
2619 for (index=0; index < pub_num; index++) {
2620 LoadPap(pap,
2621 PrintPubsByNumber, ajp, index, (Uint1)0, (Uint1)0,
2622 line_estimate[5], A2F_REFERENCE, gbp);
2623 }
2624 if (gbp->feat) {
2625 LoadPap(pap, PrintFeatHeader, ajp, 0, (Uint1)0, (Uint1)0,
2626 line_estimate[6], A2F_OTHER, gbp);
2627 for (index=0; index < gbp->feat->sfpListsize; index++) {
2628 LoadPap(pap, PrintNAFeatByNumber, ajp, index,
2629 (Uint1)0, (Uint1)0, line_estimate[8], A2F_FEATURE, gbp);
2630 }
2631 }
2632 LoadPap(pap, PrintLastLine, ajp, 0, (Uint1)0, (Uint1)0,
2633 line_estimate[0], A2F_OTHER, gbp);
2634 }
2635
2636 return total;
2637 }
2638
asn2embl_setup(Asn2ffJobPtr ajp,FFPrintArrayPtr PNTR papp)2639 Int4 asn2embl_setup(Asn2ffJobPtr ajp, FFPrintArrayPtr PNTR papp)
2640 {
2641 FFPrintArrayPtr pap;
2642 Int4 index, max, total, pub_num, seqblks_num;
2643 GBEntryPtr gbp;
2644
2645 GetLocusPartsAwp(ajp);
2646 GetSeqFeat(ajp);
2647
2648 total=0;
2649 for (gbp=ajp->asn2ffwep->gbp; gbp; gbp = gbp->next) {
2650 CheckSourceFeat(ajp, gbp);
2651 total += 7;
2652 gbp->xref_present = FALSE;
2653 if (CheckXrefLine(ajp, gbp) == TRUE) {
2654 total ++;
2655 gbp->xref_present = TRUE;
2656 }
2657 if (gbp->gi != -1 || gbp->ni != NULL) {
2658 total++;
2659 }
2660 gbp->descr = NULL;
2661 if (GB_GetSeqDescrComms(ajp, gbp) > 0) {
2662 total += gbp->comm_num;
2663 }
2664 if (gbp->feat && gbp->feat->sfpCommsize > 0) {
2665 total++;
2666 }
2667 if (gbp->feat) {
2668 total += 2; /* FEATURES and 'source' feature*/
2669 total += gbp->feat->sfpListsize;
2670 }
2671 seqblks_num = GetNumOfSeqBlks(ajp, gbp);
2672 total += seqblks_num;
2673 pub_num = GetPubsAwp(ajp, gbp);
2674 total += pub_num;
2675
2676 GetEMBLDate(ajp, gbp);
2677 GetEntryVersion(gbp);
2678 }
2679 if (ajp->ssp && ajp->hup)
2680 total --;
2681 *papp = (FFPrintArrayPtr) MemNew((size_t) total*sizeof(FFPrintArray));
2682 pap = *papp;
2683
2684 LoadPap(NULL, NULL, ajp, 0, (Uint1)0, (Uint1)0, 0, A2F_OTHER, NULL);
2685 for (gbp=ajp->asn2ffwep->gbp; gbp; gbp = gbp->next) {
2686 LoadPap(pap,
2687 PrintLocusLine, ajp, 0,(Uint1)0,(Uint1)1,line_estimate[0],
2688 A2F_OTHER, gbp);
2689 flat2asn_delete_locus_user_string();
2690 flat2asn_install_locus_user_string(gbp->locus);
2691 LoadPap(pap,
2692 PrintAccessLine,ajp,0,(Uint1)0,(Uint1)1,line_estimate[2],
2693 A2F_OTHER, gbp);
2694 flat2asn_delete_accession_user_string();
2695 flat2asn_install_accession_user_string(gbp->accession);
2696 if (gbp->gi != -1) {
2697 LoadPap(pap, PrintNCBI_GI, ajp, 0, (Uint1)0, (Uint1)0,
2698 line_estimate[2], A2F_OTHER, gbp);
2699 } else if (gbp->ni != NULL) {
2700 LoadPap(pap, PrintNID, ajp, 0, (Uint1)0, (Uint1)0,
2701 line_estimate[2], A2F_OTHER, gbp);
2702 }
2703 if (ajp->ssp == NULL || ajp->hup == FALSE) {
2704 LoadPap(pap,
2705 PrintDateLines, ajp,0,(Uint1)0,(Uint1)1,line_estimate[10],
2706 A2F_OTHER, gbp);
2707 }
2708 GetDefinitionLine(ajp, gbp);
2709 LoadPap(pap,
2710 PrintDefinitionLine,ajp,0,(Uint1)0,(Uint1)1,line_estimate[1],
2711 A2F_OTHER, gbp);
2712 LoadPap(pap, PrintKeywordLine,ajp,0,(Uint1)0,(Uint1)1,line_estimate[3],
2713 A2F_OTHER, gbp);
2714 LoadPap(pap,
2715 PrintOrganismLine,ajp,0,(Uint1)0,(Uint1)0,line_estimate[11],
2716 A2F_OTHER, gbp);
2717 pub_num = GetPubNum(gbp);
2718 for (index=0; index < pub_num; index++) {
2719 LoadPap(pap,
2720 PrintPubsByNumber, ajp, index, (Uint1)0, (Uint1)0,
2721 line_estimate[5], A2F_REFERENCE, gbp);
2722 }
2723 for (index=0; index < gbp->comm_num; index++) {
2724 if (index == 0) {
2725 LoadPap(pap,
2726 PrintFirstComment, ajp, index, (Uint1)0, (Uint1)0,
2727 line_estimate[5], A2F_COMMENT, gbp);
2728 } else {
2729 LoadPap(pap,
2730 PrintCommentByNumber, ajp, index, (Uint1)0, (Uint1)0,
2731 line_estimate[5], A2F_COMMENT, gbp);
2732 }
2733 }
2734 if (gbp->feat && gbp->feat->sfpCommsize > 0) {
2735 LoadPap(pap, GBDescrComFeat, ajp, 0, (Uint1)0, (Uint1)0,
2736 line_estimate[2], A2F_OTHER, gbp);
2737 }
2738 if (gbp->xref_present == TRUE) {
2739 LoadPap(pap,
2740 PrintXrefLine, ajp, 0, (Uint1)0, (Uint1)0, line_estimate[0],
2741 A2F_OTHER, gbp);
2742 }
2743 if (gbp->feat) {
2744 LoadPap(pap, PrintFeatHeader, ajp, 0, (Uint1)0, (Uint1)0,
2745 line_estimate[6], A2F_OTHER, gbp);
2746 }
2747 if (gbp->feat) {
2748 max = gbp->feat->sfpListsize;
2749 }
2750 LoadPap(pap,
2751 PrintSourceFeat, ajp, 0, (Uint1)0, (Uint1)1, line_estimate[8],
2752 A2F_SOURCE_FEATURE, gbp);
2753 for (index=0; index< max; index++) {
2754 if (max == index+1) {
2755 LoadPap(pap,
2756 PrintNAFeatByNumber,ajp,index,(Uint1)0,
2757 (Uint1)1,line_estimate[8], A2F_FEATURE, gbp);
2758 } else {
2759 LoadPap(pap,
2760 PrintNAFeatByNumber, ajp, index,(Uint1)0,
2761 (Uint1)0,line_estimate[8], A2F_FEATURE, gbp);
2762 }
2763 }
2764 LoadPap(pap,
2765 PrintBaseCount, ajp, 0, (Uint1)0, (Uint1)0, line_estimate[0],
2766 A2F_OTHER, gbp);
2767 seqblks_num = GetNumOfSeqBlks(ajp, gbp);
2768 for (index=0; index < seqblks_num; index++) {
2769 if (seqblks_num == index+1) {
2770 LoadPap(pap,
2771 PrintSeqBlk, ajp, index, (Uint1)1, (Uint1)0, line_estimate[9],
2772 A2F_SEQUENCE, gbp);
2773 } else {
2774 LoadPap(pap,
2775 PrintSeqBlk, ajp, index, (Uint1)0, (Uint1)0, line_estimate[9],
2776 A2F_SEQUENCE, gbp);
2777 }
2778 }
2779 }
2780
2781 return total;
2782 }
2783
2784 /*************************************************************************
2785 *asn2gp_setup
2786 *
2787 * This code calls the routines to output a GenPept Flat File
2788 *
2789 **************************************************************************/
asn2gp_setup(Asn2ffJobPtr ajp,FFPrintArrayPtr PNTR papp)2790 Int4 asn2gp_setup(Asn2ffJobPtr ajp, FFPrintArrayPtr PNTR papp)
2791 {
2792
2793 BioseqPtr bsp;
2794 FFPrintArrayPtr pap;
2795 Int4 feat_num;
2796 Int2 pub_num;
2797 Int4 index, total;
2798 Int4 seqblks_num;
2799 GBEntryPtr gbp;
2800 SeqIdPtr sip;
2801
2802 GetLocusPartsAwp(ajp);
2803 GetSeqFeat(ajp);
2804
2805 total=0;
2806 for (gbp=ajp->asn2ffwep->gbp; gbp; gbp = gbp->next) {
2807 if (gbp->bsp && ajp->id_print) {
2808 sip = SeqIdFindBest(gbp->bsp->id, SEQID_GI);
2809 if (SeqIdComp(sip, ajp->id_print) != SIC_YES) {
2810 continue;
2811 }
2812 }
2813 CheckSourceFeat(ajp, gbp);
2814 bsp = gbp->bsp;
2815 if (ASN2FF_DROP_SHORT_AA == TRUE &&
2816 ajp->asn2ffwep->total_seg == 0 && bsp->length < GENPEPT_MIN) {
2817 flat2asn_delete_accession_user_string();
2818 flat2asn_delete_locus_user_string();
2819 flat2asn_install_accession_user_string(gbp->accession);
2820 flat2asn_install_locus_user_string(gbp->locus);
2821 if (ajp->error_msgs == TRUE)
2822 ErrPostStr(SEV_INFO, ERR_ENTRY_Partial_peptide,
2823 "Entry dropped due to length.");
2824 continue;
2825 }
2826 total += 8;
2827 if (ajp->show_version) {
2828 total++;
2829 }
2830 if (gbp->gi != -1 || gbp->ni != NULL) {
2831 total++;
2832 }
2833 if (ajp->asn2ffwep->total_seg > 0) {
2834 total++;
2835 }
2836 gbp->descr = NULL;
2837 if (GP_GetSeqDescrComms(ajp, gbp) > 0) {
2838 total += gbp->comm_num;
2839 }
2840 if (gbp->feat && gbp->feat->sfpCommsize > 0) {
2841 total++;
2842 }
2843 if (gbp->feat) {
2844 total += 2; /* FEATURES and 'source' feature*/
2845 feat_num = gbp->feat->sfpListsize;
2846 total += feat_num;
2847 }
2848 seqblks_num = GetNumOfSeqBlks(ajp, gbp);
2849 total += seqblks_num;
2850 pub_num = (Int2)GetPubsAwp(ajp, gbp);
2851 total += pub_num;
2852
2853 GetGPDate(ajp, gbp);
2854 }
2855
2856 *papp = (FFPrintArrayPtr) MemNew((size_t) total*sizeof(FFPrintArray));
2857 pap = *papp;
2858
2859 LoadPap(NULL, NULL, ajp, 0, (Uint1)0, (Uint1)0, 0, A2F_OTHER, NULL);
2860 for (gbp=ajp->asn2ffwep->gbp; gbp; gbp = gbp->next) {
2861 bsp = gbp->bsp;
2862 if (bsp && ajp->id_print) {
2863 sip = SeqIdFindBest(bsp->id, SEQID_GI);
2864 if (SeqIdComp(sip, ajp->id_print) != SIC_YES) {
2865 continue;
2866 }
2867 }
2868 if (ASN2FF_DROP_SHORT_AA == TRUE &&
2869 ajp->asn2ffwep->total_seg == 0 && bsp->length < GENPEPT_MIN) {
2870 continue;
2871 }
2872 LoadPap(pap,
2873 PrintLocusLine, ajp, 0, (Uint1)0, (Uint1)0, line_estimate[0],
2874 A2F_OTHER, gbp);
2875 flat2asn_delete_locus_user_string();
2876 flat2asn_install_locus_user_string(gbp->locus);
2877 GetDefinitionLine(ajp, gbp);
2878 LoadPap(pap,
2879 PrintDefinitionLine, ajp, 0, (Uint1)0, (Uint1)0, line_estimate[1],
2880 A2F_OTHER, gbp);
2881 MemFree(gbp->descr);
2882 gbp->descr = NULL;
2883 LoadPap(pap,
2884 PrintAccessLine, ajp, 0, (Uint1)0, (Uint1)0, line_estimate[2],
2885 A2F_OTHER, gbp);
2886 flat2asn_delete_accession_user_string();
2887 flat2asn_install_accession_user_string(gbp->accession);
2888 if (gbp->gi != -1) {
2889 LoadPap(pap, PrintNCBI_GI, ajp, 0, (Uint1)0, (Uint1)0,
2890 line_estimate[2], A2F_OTHER, gbp);
2891 } else if (gbp->ni != NULL) {
2892 LoadPap(pap, PrintNID, ajp, 0, (Uint1)0, (Uint1)0,
2893 line_estimate[2], A2F_OTHER, gbp);
2894 }
2895 if (ajp->show_version) {
2896 LoadPap(pap, PrintVersionLine, ajp, 0, (Uint1)0, (Uint1)0,
2897 line_estimate[2], A2F_OTHER, gbp);
2898 }
2899 LoadPap(pap,
2900 PrintDBSourceLine, ajp, 0, (Uint1)0, (Uint1)0, line_estimate[12],
2901 A2F_OTHER, gbp);
2902 LoadPap(pap,
2903 PrintKeywordLine, ajp, 0, (Uint1)0, (Uint1)0, line_estimate[3],
2904 A2F_OTHER, gbp);
2905 if (ajp->asn2ffwep->total_seg > 0)
2906 LoadPap(pap,
2907 PrintSegmentLine, ajp, 0, (Uint1)0, (Uint1)0, line_estimate[0],
2908 A2F_OTHER, gbp);
2909 LoadPap(pap, PrintGBSourceLine, ajp, 0, (Uint1)0, (Uint1)0,
2910 line_estimate[4], A2F_OTHER, gbp);
2911 LoadPap(pap, PrintGBOrganismLine, ajp, 0, (Uint1)0, (Uint1)0,
2912 line_estimate[4], A2F_OTHER, gbp);
2913 pub_num = GetPubNum(gbp);
2914 for (index=0; index < pub_num; index++) {
2915 LoadPap(pap,
2916 PrintPubsByNumber, ajp, index, (Uint1)0, (Uint1)0,
2917 line_estimate[5], A2F_REFERENCE, gbp);
2918 }
2919 for (index=0; index < gbp->comm_num; index++) {
2920 if (index == 0) {
2921 LoadPap(pap,
2922 PrintFirstComment, ajp, index, (Uint1)0, (Uint1)0,
2923 line_estimate[5], A2F_COMMENT, gbp);
2924 } else {
2925 LoadPap(pap,
2926 PrintCommentByNumber, ajp, index, (Uint1)0, (Uint1)0,
2927 line_estimate[5], A2F_COMMENT, gbp);
2928 }
2929 }
2930 if (gbp->feat && gbp->feat->sfpCommsize > 0) {
2931 LoadPap(pap, GBDescrComFeat, ajp, 0, (Uint1)0, (Uint1)0,
2932 line_estimate[2], A2F_OTHER, gbp);
2933 }
2934 if (gbp->feat) {
2935 LoadPap(pap, PrintFeatHeader, ajp, 0, (Uint1)0, (Uint1)0,
2936 line_estimate[6], A2F_OTHER, gbp);
2937 LoadPap(pap, PrintSourceFeat, ajp, 0, (Uint1)0, (Uint1)0,
2938 line_estimate[8], A2F_SOURCE_FEATURE, gbp);
2939 if (gbp->feat) {
2940 feat_num = gbp->feat->sfpListsize;
2941 }
2942 for (index=0; index < feat_num; index++) {
2943 LoadPap(pap, PrintAAFeatByNumber, ajp, index,
2944 (Uint1)0, (Uint1)0, line_estimate[8], A2F_FEATURE, gbp);
2945 }
2946 }
2947 LoadPap(pap,
2948 PrintOriginLine, ajp, 0, (Uint1)0, (Uint1)0, line_estimate[0],
2949 A2F_OTHER, gbp);
2950 seqblks_num = GetNumOfSeqBlks(ajp, gbp);
2951 for (index=0; index < seqblks_num; index++) {
2952 if (seqblks_num == index+1) {
2953 LoadPap(pap,
2954 PrintSeqBlk, ajp, index, (Uint1)1, (Uint1)0,
2955 line_estimate[9], A2F_SEQUENCE, gbp);
2956 } else {
2957 LoadPap(pap,
2958 PrintSeqBlk, ajp, index, (Uint1)0, (Uint1)0,
2959 line_estimate[9], A2F_SEQUENCE, gbp);
2960 }
2961 }
2962 }
2963
2964 return total;
2965 }
2966
2967 /*************************************************************************
2968 *asn2ep_setup
2969 *
2970 * This code calls the routines to output an "EMBLPept" Flat File
2971 *
2972 **************************************************************************/
2973
asn2ep_setup(Asn2ffJobPtr ajp,FFPrintArrayPtr PNTR papp)2974 Int4 asn2ep_setup(Asn2ffJobPtr ajp, FFPrintArrayPtr PNTR papp)
2975 {
2976
2977 BioseqPtr bsp;
2978 FFPrintArrayPtr pap;
2979 Int4 index, total;
2980 Int4 feat_num;
2981 Int2 pub_num;
2982 Int4 seqblks_num;
2983 GBEntryPtr gbp;
2984
2985
2986 ajp->format = GENPEPT_FMT;
2987 GetLocusPartsAwp(ajp);
2988 GetSeqFeat(ajp);
2989 ajp->format = EMBLPEPT_FMT;
2990
2991 total=0;
2992 for (gbp=ajp->asn2ffwep->gbp; gbp; gbp = gbp->next) {
2993 CheckSourceFeat(ajp, gbp);
2994 bsp = gbp->bsp;
2995 if (ASN2FF_DROP_SHORT_AA == TRUE && ajp->asn2ffwep->total_seg == 0 &&
2996 bsp->length < GENPEPT_MIN) {
2997 flat2asn_delete_accession_user_string();
2998 flat2asn_delete_locus_user_string();
2999 flat2asn_install_accession_user_string(gbp->accession);
3000 flat2asn_install_locus_user_string(gbp->locus);
3001 if (ajp->error_msgs == TRUE)
3002 ErrPostStr(SEV_INFO, ERR_ENTRY_Partial_peptide,
3003 "Entry dropped due to length.");
3004 continue;
3005 }
3006 total += 8;
3007 if (ajp->asn2ffwep->total_seg > 0) {
3008 total++;
3009 }
3010 if (GP_GetSeqDescrComms(ajp, gbp) > 0) {
3011 total += gbp->comm_num;
3012 }
3013 if (gbp->feat && gbp->feat->sfpCommsize > 0) {
3014 total++;
3015 }
3016 if (gbp->feat) {
3017 total += 2; /* FEATURES and 'source' feature*/
3018 feat_num = gbp->feat->sfpListsize;
3019 total += feat_num;
3020 }
3021 seqblks_num = GetNumOfSeqBlks(ajp, gbp);
3022 total += seqblks_num;
3023 pub_num = (Int2)GetPubsAwp(ajp, gbp);
3024 total += pub_num;
3025
3026 GetEMBLDate(ajp, gbp);
3027 GetEntryVersion(gbp);
3028 }
3029
3030 *papp = (FFPrintArrayPtr) MemNew((size_t) total*sizeof(FFPrintArray));
3031 pap = *papp;
3032
3033 LoadPap(NULL, NULL, ajp, 0, (Uint1)0, (Uint1)0, 0, A2F_OTHER, NULL);
3034 for (gbp=ajp->asn2ffwep->gbp; gbp; gbp = gbp->next) {
3035 bsp = gbp->bsp;
3036 if (ASN2FF_DROP_SHORT_AA == TRUE && ajp->asn2ffwep->total_seg == 0 &&
3037 bsp->length < GENPEPT_MIN) {
3038 continue;
3039 }
3040 LoadPap(pap,
3041 PrintEPLocusLine, ajp, 0, (Uint1)0, (Uint1)1, line_estimate[0],
3042 A2F_OTHER, gbp);
3043 flat2asn_delete_locus_user_string();
3044 flat2asn_install_locus_user_string(gbp->locus);
3045 LoadPap(pap,
3046 PrintAccessLine, ajp, 0, (Uint1)0, (Uint1)1, line_estimate[2],
3047 A2F_OTHER, gbp);
3048 flat2asn_delete_accession_user_string();
3049 flat2asn_install_accession_user_string(gbp->accession);
3050 LoadPap(pap,
3051 PrintDateLines, ajp, 0, (Uint1)0, (Uint1)1, line_estimate[10],
3052 A2F_OTHER, gbp);
3053 GetDefinitionLine(ajp, gbp);
3054 LoadPap(pap,
3055 PrintDefinitionLine, ajp, 0, (Uint1)0, (Uint1)1, line_estimate[1],
3056 A2F_OTHER, gbp);
3057 LoadPap(pap,
3058 PrintKeywordLine, ajp, 0, (Uint1)0, (Uint1)1, line_estimate[3],
3059 A2F_OTHER, gbp);
3060 LoadPap(pap,
3061 PrintOrganismLine, ajp, 0, (Uint1)0, (Uint1)0, line_estimate[11],
3062 A2F_OTHER, gbp);
3063 pub_num = GetPubNum(gbp);
3064 for (index=0; index < pub_num; index++) {
3065 LoadPap(pap, PrintPubsByNumber, ajp, index, (Uint1)0, (Uint1)0,
3066 line_estimate[5], A2F_REFERENCE, gbp);
3067 }
3068 for (index=0; index < gbp->comm_num; index++) {
3069 if (index == 0) {
3070 LoadPap(pap,
3071 PrintFirstComment, ajp, index, (Uint1)0, (Uint1)0,
3072 line_estimate[5], A2F_COMMENT, gbp);
3073 } else {
3074 LoadPap(pap,
3075 PrintCommentByNumber, ajp, index, (Uint1)0, (Uint1)0,
3076 line_estimate[5], A2F_COMMENT, gbp);
3077 }
3078 }
3079 if (gbp->feat && gbp->feat->sfpCommsize > 0) {
3080 LoadPap(pap, GBDescrComFeat, ajp, 0, (Uint1)0, (Uint1)0,
3081 line_estimate[2], A2F_OTHER, gbp);
3082 }
3083 if (gbp->feat) {
3084 LoadPap(pap, PrintFeatHeader, ajp, 0, (Uint1)0, (Uint1)0,
3085 line_estimate[6], A2F_OTHER, gbp);
3086 LoadPap(pap, PrintSourceFeat, ajp, 0, (Uint1)0, (Uint1)0,
3087 line_estimate[8], A2F_SOURCE_FEATURE, gbp);
3088 for (index=0; index < gbp->feat->sfpListsize; index++) {
3089 LoadPap(pap, PrintAAFeatByNumber, ajp, index,
3090 (Uint1)0, (Uint1)0, line_estimate[8], A2F_FEATURE, gbp);
3091 }
3092 }
3093 seqblks_num = GetNumOfSeqBlks(ajp, gbp);
3094 for (index=0; index < seqblks_num; index++) {
3095 if (seqblks_num == index+1) {
3096 LoadPap(pap,
3097 PrintSeqBlk, ajp, index, (Uint1)1, (Uint1)0,
3098 line_estimate[9], A2F_SEQUENCE, gbp);
3099 } else {
3100 LoadPap(pap,
3101 PrintSeqBlk, ajp, index, (Uint1)0, (Uint1)0,
3102 line_estimate[9], A2F_SEQUENCE, gbp);
3103 }
3104 }
3105 }
3106 return total;
3107 }
3108
FreeSortStructLoc(Int4 size,SortStructPtr p)3109 static void FreeSortStructLoc(Int4 size, SortStructPtr p)
3110 {
3111 Int4 index;
3112 Int4 size_loc;
3113
3114 size_loc = p->extra_loc_cnt;
3115 if (size_loc > 0) {
3116 for (index=0; index < size_loc; index++) {
3117 SeqLocFree(p->extra_loc[index]);
3118 }
3119 MemFree(p->extra_loc);
3120 }
3121 for (index=0; index < size; index++) {
3122 if (p[index].feat_free == TRUE) {
3123 SeqFeatFree(p[index].sfp);
3124 }
3125 }
3126 return;
3127 }
3128
3129 /**********************************************************/
FreeSortStructSet(Int4 size,SortStructPtr ssp)3130 static void FreeSortStructSet(Int4 size, SortStructPtr ssp)
3131 {
3132 SortStructPtr p;
3133 Int4 i;
3134
3135 if(size <= 0)
3136 return;
3137
3138 for(i = 0, p = ssp; i < size; i++, p++)
3139 {
3140 if(p == NULL)
3141 continue;
3142 if(p->gsp != NULL)
3143 GeneStructFree(p->gsp);
3144 if(p->nsp != NULL)
3145 NoteStructFree(p->nsp);
3146 }
3147 FreeSortStructLoc(size, ssp);
3148 MemFree(ssp);
3149 }
3150
3151 /**********************************************************/
asn2ff_cleanup(Asn2ffJobPtr ajp)3152 NLM_EXTERN void asn2ff_cleanup(Asn2ffJobPtr ajp)
3153 {
3154 GBEntryPtr gbp;
3155 GBEntryPtr next;
3156 ValNodePtr v;
3157 ValNodePtr vnext;
3158 ComStructPtr s;
3159 ComStructPtr snext;
3160 OrganizeFeatPtr ofp;
3161
3162 #if 0 /***have no idea why this is needed (EY) ***/
3163
3164 if (get_www()) {
3165 return;
3166 }
3167 #endif
3168
3169 if(ajp->asn2ffwep != NULL)
3170 {
3171 for(gbp = ajp->asn2ffwep->gbp; gbp != NULL; gbp = next)
3172 {
3173 next = gbp->next;
3174 if(gbp->spp != NULL)
3175 SeqPortFree(gbp->spp);
3176 if(gbp->base_cnt_line != NULL)
3177 MemFree(gbp->base_cnt_line);
3178 if(gbp->feat != NULL)
3179 {
3180 ofp = gbp->feat;
3181 NoteStructFree(ofp->source_notes);
3182 FreeSortStructSet(ofp->sfpListsize, ofp->List);
3183 FreeSortStructSet(ofp->sfpCommsize, ofp->Commlist);
3184 FreeSortStructSet(ofp->sfpGenesize, ofp->Genelist);
3185 FreeSortStructSet(ofp->sfpOrgsize, ofp->Orglist);
3186 FreeSortStructSet(ofp->sfpSitesize, ofp->Siteslist);
3187 FreeSortStructSet(ofp->sfpSourcesize, ofp->Sourcelist);
3188 FreeSortStructSet(ofp->sfpXrefsize, ofp->Xreflist);
3189 FreeSortStructSet(ofp->biosrcsize, ofp->Biosrclist);
3190 MemFree(ofp);
3191 }
3192 for(v = gbp->Pub; v != NULL; v = vnext)
3193 {
3194 vnext = v->next;
3195 FreePubStruct(v->data.ptrvalue);
3196 MemFree(v);
3197 }
3198 for(s = gbp->comm; s != NULL; s = snext)
3199 {
3200 snext = s->next;
3201 MemFree(s->string);
3202 MemFree(s);
3203 }
3204 if(gbp->source_info != NULL)
3205 {
3206 MemFree(gbp->source_info);
3207 }
3208 if(gbp->defline != NULL)
3209 {
3210 MemFree(gbp->defline);
3211 }
3212 MemFree(gbp);
3213 }
3214 MemFree(ajp->asn2ffwep);
3215 }
3216 SeqFeatFree(ajp->sfp_out);
3217
3218 /* Delete these strings so they don't interfere with others
3219 */
3220 flat2asn_delete_locus_user_string();
3221 flat2asn_delete_accession_user_string();
3222 }
3223
3224 /*****************************************************************************
3225 * void LoadPap(FFPrintArrayPtr pap, FFPapFct fct, Asn2ffJobPtr ajp,
3226 * Int4 index, Uint1 last, Uint1 printxx, Uint1 element_type)
3227 *
3228 * This function places the parameters in the correct spaces in the
3229 * FFPrintArrayPtr.
3230 *
3231 ****************************************************************************/
LoadPap(FFPrintArrayPtr pap,FFPapFct fct,Asn2ffJobPtr ajp,Int4 index,Uint1 last,Uint1 printxx,Int2 estimate,Uint1 element_type,GBEntryPtr gbp)3232 void LoadPap(FFPrintArrayPtr pap, FFPapFct fct, Asn2ffJobPtr ajp, Int4 index, Uint1 last, Uint1 printxx, Int2 estimate, Uint1 element_type, GBEntryPtr gbp)
3233 {
3234 static Int4 pap_index;
3235 DescrStructPtr dsp;
3236
3237 if (! pap) {
3238 pap_index=0;
3239 } else {
3240 pap[pap_index].fct = fct;
3241 pap[pap_index].ajp = ajp;
3242 pap[pap_index].gbp = gbp;
3243 pap[pap_index].index = index;
3244 pap[pap_index].last = last;
3245 pap[pap_index].printxx = printxx;
3246 pap[pap_index].estimate = estimate;
3247 pap[pap_index].descr = NULL;
3248 if (element_type == A2F_SOURCE_FEATURE) {
3249 dsp = (DescrStructPtr) MemNew(sizeof(DescrStruct));
3250 pap[pap_index].descr = dsp;
3251 if (gbp->feat && gbp->feat->Sourcelist != NULL) {
3252 dsp->entityID = gbp->feat->Sourcelist[0].entityID;
3253 dsp->itemID = gbp->feat->Sourcelist[0].itemID;
3254 dsp->itemtype = gbp->feat->Sourcelist[0].itemtype;
3255 } else if (gbp->source_info != NULL) {
3256 dsp->entityID = gbp->source_info->entityID;
3257 dsp->itemID = gbp->source_info->itemID;
3258 dsp->itemtype = gbp->source_info->itemtype;
3259 }
3260 } else if (element_type == A2F_FEATURE && gbp->feat) {
3261 GetPapSeqFeatPtr (gbp, index, pap_index, pap);
3262 } else if (element_type == A2F_REFERENCE) {
3263 GetPapRefPtr (ajp, gbp, index, pap_index, pap);
3264 } else if (element_type == A2F_FEATURE_NEW && gbp->feat) {
3265 dsp = (DescrStructPtr) MemNew(sizeof(DescrStruct));
3266 pap[pap_index].descr = dsp;
3267 dsp->entityID = gbp->feat->List[index].entityID;
3268 dsp->itemID = gbp->feat->List[index].itemID;
3269 dsp->itemtype = gbp->feat->List[index].itemtype;
3270 } else if (element_type == A2F_COMMENT) {
3271 GetPapCommPtr (ajp, gbp, index, pap_index, pap);
3272 } else if (element_type == A2F_SEQUENCE) {
3273 dsp = (DescrStructPtr) MemNew(sizeof(DescrStruct));
3274 pap[pap_index].descr = dsp;
3275 dsp->entityID = gbp->entityID;
3276 dsp->itemID = gbp->itemID;
3277 dsp->itemtype = gbp->itemtype;
3278 } else {
3279 if (gbp->descr != NULL) {
3280 dsp = (DescrStructPtr) MemNew(sizeof(DescrStruct));
3281 pap[pap_index].descr = dsp;
3282 dsp->entityID = gbp->descr->entityID;
3283 dsp->itemID = gbp->descr->itemID;
3284 dsp->itemtype = gbp->descr->itemtype;
3285 }
3286 }
3287 pap_index++;
3288 }
3289
3290 return;
3291 }
3292
3293 /****************************************************************************
3294 * This function checks a SeqPortPtr, maintained on the Biotable Ptr,
3295 * and compares it's BioseqPtr to that of the BioseqPtr associated
3296 * with segment count of the btp. At present, used only for nucleic
3297 * acids (4/14/94).
3298 ****************************************************************************/
3299
CheckSeqPort(Asn2ffJobPtr ajp,GBEntryPtr gbp,Int4 start)3300 void CheckSeqPort (Asn2ffJobPtr ajp, GBEntryPtr gbp, Int4 start)
3301 {
3302 BioseqPtr bsp=gbp->bsp;
3303 SeqPortPtr spp=gbp->spp;
3304 Int4 start1;
3305
3306 if (spp) {
3307 if (ajp->slp == NULL && bsp == spp->bsp) {
3308 if (spp->curpos != start)
3309 SeqPortSeek(spp, start, SEEK_SET);
3310 } else {
3311 SeqPortFree(spp);
3312 if (ajp->slp) {
3313 /* spp = SeqPortNew(bsp, 0, -1, 0, Seq_code_iupacna);
3314 start1 = start - spp->start - SeqLocStart(ajp->slp);*/
3315 spp = SeqPortNewByLoc(ajp->slp, Seq_code_iupacna);
3316 start1 = start - spp->start;
3317 } else {
3318 spp = SeqPortNew(bsp, 0, -1, 0, Seq_code_iupacna);
3319 start1 = start - spp->start;
3320 }
3321 if (start1 != spp->curpos)
3322 SeqPortSeek(spp, start1, SEEK_SET);
3323 }
3324 } else {
3325 if (ajp->slp) {
3326 spp = SeqPortNewByLoc(ajp->slp, Seq_code_iupacna);
3327 if (spp != NULL) {
3328 start1 = start - spp->start;
3329 }
3330 /* spp = SeqPortNew(bsp, 0, -1, 0, Seq_code_iupacna);
3331 start1 = start - spp->start - SeqLocStart(ajp->slp);*/
3332 } else {
3333 spp = SeqPortNew(bsp, 0, -1, 0, Seq_code_iupacna);
3334 if (spp != NULL) {
3335 start1 = start - spp->start;
3336 }
3337 }
3338 if (spp != NULL && start1 != spp->curpos)
3339 SeqPortSeek(spp, start1, SEEK_SET);
3340 }
3341 gbp->spp = spp;
3342
3343 return;
3344 }
3345
3346
3347 /***************************************************************************
3348 *
3349 * "GetMolInfo" gets information about the molecule for the locus
3350 * line. The formatted information is in "buffer".
3351 *
3352 ***************************************************************************/
3353
GetMolInfo(Asn2ffJobPtr ajp,CharPtr buffer,GBEntryPtr gbp)3354 void GetMolInfo (Asn2ffJobPtr ajp, CharPtr buffer, GBEntryPtr gbp)
3355 {
3356 static CharPtr strand [4]= { " ", "ss-", "ds-","ms-"};
3357
3358 /* WARNING : The mol[] table was originally designed to convert
3359 MolInfo->biomol values <= 8 in the ASN.1 spec to molecule type strings.
3360 The addition of snoRNA with biomol value 12 breaks this design.
3361 Also, the new LOCUS line format requires larger molecule type
3362 strings, since the space for that field has been increased from 4 to 6.
3363 Hence new_locus_mol, utilized if ajp->old_locus_fmt is *NOT* set. */
3364
3365 static CharPtr mol [10] = {" ", "DNA ", "RNA ", "mRNA", "rRNA", "tRNA", "uRNA", "scRNA", " AA ","oRNA"};
3366
3367 static CharPtr new_locus_mol [10] = {" ", "DNA ", "RNA ", "mRNA ", "rRNA ", "tRNA ", "uRNA ", "scRNA ", " AA ", "snoRNA"};
3368
3369 static CharPtr embl_mol [8] = {"xxx", "DNA", "RNA", "RNA", "RNA", "RNA", "RNA", "AA "};
3370
3371 BioseqPtr bsp;
3372 Int2 istrand, imol;
3373 ValNodePtr vnp = NULL;
3374 MolInfoPtr mfp;
3375 Int4 length;
3376
3377 if ((bsp = gbp->bsp) == NULL)
3378 return;
3379 istrand = bsp->strand;
3380 if (istrand > 3)
3381 istrand = 0;
3382
3383 imol = bsp->mol;
3384 if (imol > 3)
3385 imol = 0;
3386
3387 if (ajp->slp) {
3388 length = SeqLocLen(ajp->slp);
3389 } else {
3390 length = bsp->length;
3391 }
3392 /*keep both old and new style, get new first*/
3393 if ((vnp=GatherDescrByChoice(ajp, gbp, Seq_descr_molinfo)) != NULL) {
3394 mfp = (MolInfoPtr)vnp->data.ptrvalue;
3395 if (mfp->biomol <= 8) {
3396 imol = (Int2) (mfp->biomol);
3397 } else if (mfp->biomol == 12) {
3398 imol = 9;
3399 }
3400 } else {
3401 for (vnp = bsp->descr; vnp; vnp = vnp->next) {
3402 if (vnp->choice == Seq_descr_mol_type) {
3403 if (vnp->data.intvalue <= 8) {
3404 imol = (Int2) (vnp->data.intvalue);
3405 }
3406 break;
3407 }
3408 }
3409 }
3410 if (imol < 2) { /* check Seq-inst.mol if mol-type is not-set or genomic */
3411 imol = bsp->mol;
3412 if (imol == 3)
3413 imol = 8;
3414 if (imol == 4)
3415 imol = 0;
3416 }
3417
3418 /* if ds-DNA don't show ds */
3419 if (imol == 1 && istrand == 2) {
3420 istrand = 0;
3421 }
3422 /* ss-any RNA don't show ss */
3423 if (imol > 2 && istrand == 1) {
3424 istrand = 0;
3425 }
3426 if (ajp->slp != NULL) {
3427 bsp->topology = 1;
3428 }
3429
3430 if (ajp->format == GENBANK_FMT || ajp->format == SELECT_FMT) {
3431 if (bsp->topology == 2) {
3432 if (ajp->old_locus_fmt == TRUE) {
3433 sprintf(buffer, "%7ld bp %s%-4s circular",
3434 (long) length, strand[istrand], mol[imol]);
3435 } else {
3436 sprintf(buffer, "%12ld bp %s%-6s circular",
3437 (long) length, strand[istrand], new_locus_mol[imol]);
3438 }
3439 } else {
3440 if (ajp->old_locus_fmt == TRUE) {
3441 sprintf(buffer, "%7ld bp %s%-4s ",
3442 (long) length, strand[istrand], mol[imol]);
3443 } else {
3444 sprintf(buffer, "%12ld bp %s%-6s linear ",
3445 (long) length, strand[istrand], new_locus_mol[imol]);
3446 }
3447 }
3448 } else if (ajp->format == GENPEPT_FMT) {
3449 if (ajp->old_locus_fmt == TRUE) {
3450 sprintf(buffer, "%7ld aa", (long) length);
3451 } else {
3452 sprintf(buffer, "%12ld aa linear", (long) length);
3453 }
3454 } else if (ajp->format == EMBL_FMT || ajp->format == PSEUDOEMBL_FMT ||
3455 ajp->format == EMBLPEPT_FMT) {
3456 if (ajp->pseudo == FALSE) { /* do authentic EMBL */
3457 if (imol < 8) {
3458 if (bsp->topology == 2)
3459 sprintf(buffer, "circular %s", embl_mol[imol]);
3460 else
3461 sprintf(buffer, "%s", embl_mol[imol]);
3462 }
3463 } else { /* Use GenBank molecule names */
3464 if (bsp->topology == 2)
3465 sprintf(buffer, "circular %s", mol[imol]);
3466 else
3467 sprintf(buffer, "%s", mol[imol]);
3468 }
3469 }
3470 return;
3471 }
3472
3473 /*************************************************************************
3474 * Checks if there is a Xref in EMBL format.
3475 * Used ONLY to make EMBL output.
3476 *This could probably be done more efficiently???????????????????
3477 **************************************************************************/
3478
CheckXrefLine(Asn2ffJobPtr ajp,GBEntryPtr gbp)3479 Boolean CheckXrefLine (Asn2ffJobPtr ajp, GBEntryPtr gbp)
3480
3481 {
3482 Boolean ret_val=FALSE;
3483 Char buffer[20];
3484 CharPtr name;
3485 EMBLBlockPtr eb=NULL;
3486 EMBLXrefPtr xref=NULL;
3487 ValNodePtr descr=NULL, ds_vnp, tvnp;
3488 DescrStructPtr dsp;
3489
3490 tvnp = GatherDescrListByChoice(ajp, gbp, Seq_descr_embl);
3491 for (descr= tvnp;
3492 descr; descr=descr->next) {
3493 dsp = (DescrStructPtr)descr->data.ptrvalue;
3494 ds_vnp = dsp->vnp;
3495 eb = (EMBLBlockPtr) ds_vnp->data.ptrvalue;
3496 for (xref=eb->xref; xref; xref=xref->next) {
3497 name=NULL;
3498 if (xref->_class) {
3499 if (xref->_class == 5)
3500 StringCpy(buffer, "SWISS-PROT");
3501 else if (xref->_class == 8)
3502 StringCpy(buffer, "EPD");
3503 else if (xref->_class == 10)
3504 StringCpy(buffer, "TFD");
3505 else if (xref->_class == 11)
3506 StringCpy(buffer, "FLYBASE");
3507 name = &(buffer[0]);
3508 } else if (xref->name) {
3509 name = xref->name;
3510 }
3511 if (name && xref->id)
3512 ret_val = TRUE;
3513 else
3514 ret_val = FALSE;
3515 }
3516 MemFree(ds_vnp);
3517 }
3518 ValNodeFreeData(tvnp);
3519 return ret_val;
3520 }
3521
PrintLocusLine(Asn2ffJobPtr ajp,GBEntryPtr gbp)3522 void PrintLocusLine(Asn2ffJobPtr ajp, GBEntryPtr gbp)
3523 {
3524 BioseqPtr bsp;
3525 Char buffer[34]; /* Gack, what a hack! */
3526
3527 if (gbp == NULL)
3528 return;
3529 gbp->descr = NULL;
3530 bsp=gbp->bsp;
3531 if (ajp->format == EMBL_FMT || ajp->format == PSEUDOEMBL_FMT ||
3532 ajp->format == EMBLPEPT_FMT)
3533 {
3534 ff_StartPrint(5, 0, ASN2FF_EMBL_MAX, "ID");
3535 ff_AddString(gbp->locus);
3536 if (ajp->hup == TRUE) {
3537 ff_AddString(" confidential; ");
3538 } else {
3539 ff_AddString(" standard; ");
3540 }
3541 GetMolInfo(ajp, buffer, gbp);
3542 ff_AddString( buffer);
3543 ff_AddString("; ");
3544 if (ajp->ssp && ajp->format == EMBL_FMT && *(gbp->div) == ' ') {
3545 ff_AddString("UNA");
3546 } else {
3547 ff_AddString(gbp->div);
3548 }
3549 ff_AddString("; ");
3550 if (ajp->slp) {
3551 ff_AddInteger("%ld", (long) SeqLocLen(ajp->slp));
3552 } else {
3553 ff_AddInteger("%ld", (long) bsp->length);
3554 }
3555 ff_AddString(" BP.");
3556 ff_EndPrint();
3557 } else {
3558 ff_StartPrint(0, 0, ASN2FF_GB_MAX, NULL);
3559 ff_AddString("LOCUS");
3560 TabToColumn(13);
3561 ff_AddString( gbp->locus);
3562 GetMolInfo(ajp, buffer, gbp);
3563 ff_AddString( buffer);
3564
3565 if (ajp->old_locus_fmt)
3566 TabToColumn(53);
3567 else
3568 TabToColumn(65);
3569 ff_AddString(gbp->div);
3570
3571 if (ajp->old_locus_fmt)
3572 TabToColumn(63);
3573 else
3574 TabToColumn(69);
3575 ff_AddString(gbp->date);
3576
3577 ff_EndPrint();
3578 }
3579 }
3580
PrintEPLocusLine(Asn2ffJobPtr ajp,GBEntryPtr gbp)3581 void PrintEPLocusLine(Asn2ffJobPtr ajp, GBEntryPtr gbp)
3582
3583 {
3584 BioseqPtr bsp=gbp->bsp;
3585 Char buffer[30];
3586
3587 if (gbp == NULL)
3588 return;
3589 gbp->descr = NULL;
3590 ff_StartPrint(5, 0, ASN2FF_EMBL_MAX, "ID");
3591 ff_AddString(gbp->locus);
3592 ff_AddString(" standard; ");
3593 GetMolInfo(ajp, buffer, gbp);
3594 ff_AddString(buffer);
3595 ff_AddString("; ");
3596 ff_AddString(gbp->div);
3597 ff_AddString("; ");
3598 ff_AddInteger("%ld", (long) bsp->length);
3599 ff_AddString(" RS.");
3600 ff_EndPrint();
3601 }
3602
3603
PrintAccessLine(Asn2ffJobPtr ajp,GBEntryPtr gbp)3604 void PrintAccessLine(Asn2ffJobPtr ajp, GBEntryPtr gbp)
3605
3606 {
3607
3608 if (gbp == NULL)
3609 return;
3610 gbp->descr = NULL;
3611 if (ajp->format == EMBL_FMT || ajp->format == PSEUDOEMBL_FMT ||
3612 ajp->format == EMBLPEPT_FMT)
3613 {
3614 ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "AC");
3615 }
3616 else
3617 {
3618 ff_StartPrint(0, 12, ASN2FF_GB_MAX, NULL);
3619 ff_AddString( "ACCESSION");
3620 TabToColumn(13);
3621 }
3622 if (ajp->ssp && ajp->hup) {
3623 ff_AddChar(';');
3624 } else if (ajp->slp) {
3625 www_accession(gbp->accession);
3626 } else {
3627 ff_AddString(gbp->accession);
3628 }
3629 if (ajp->slp == NULL) {
3630 AddExtraAccessions(ajp, gbp);
3631 }
3632 ff_EndPrint();
3633 return;
3634 }
3635
PrintVersionLine(Asn2ffJobPtr ajp,GBEntryPtr gbp)3636 void PrintVersionLine(Asn2ffJobPtr ajp, GBEntryPtr gbp)
3637 {
3638
3639 if (gbp == NULL)
3640 return;
3641 gbp->descr = NULL;
3642 if (ajp->format == EMBL_FMT || ajp->format == PSEUDOEMBL_FMT ||
3643 ajp->format == EMBLPEPT_FMT)
3644 {
3645 ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "AC");
3646 }
3647 else
3648 {
3649 ff_StartPrint(0, 12, ASN2FF_GB_MAX, NULL);
3650 ff_AddString( "VERSION");
3651 TabToColumn(13);
3652 }
3653 ff_AddString(gbp->version);
3654 if (gbp->gi != -1) {
3655 ff_AddString( " GI:");
3656 ff_AddInteger("%ld", (long) gbp->gi);
3657 }
3658 ff_EndPrint();
3659 return;
3660 }
3661
PrintNCBI_GI(Asn2ffJobPtr ajp,GBEntryPtr gbp)3662 void PrintNCBI_GI(Asn2ffJobPtr ajp, GBEntryPtr gbp)
3663
3664 {
3665
3666 if (gbp == NULL)
3667 return;
3668 gbp->descr = NULL;
3669
3670 if (ajp->format == EMBL_FMT || ajp->format == PSEUDOEMBL_FMT ||
3671 ajp->format == EMBLPEPT_FMT) {
3672 ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "NI");
3673 } else {
3674 ff_StartPrint(0, 12, ASN2FF_GB_MAX, NULL);
3675 if (ajp->format == GENBANK_FMT) {
3676 ff_AddString( "NID");
3677 } else if (ajp->format == GENPEPT_FMT) {
3678 ff_AddString( "PID");
3679 }
3680 TabToColumn(13);
3681 }
3682 ff_AddChar('g');
3683 ff_AddInteger("%ld", (long) gbp->gi);
3684 /* if (ajp->format == GENBANK_FMT) {
3685 TabToColumn(26);
3686 ff_AddString( "GI:");
3687 ff_AddInteger("%ld", (long) gbp->gi);
3688 }
3689 */
3690 ff_EndPrint();
3691 if (ajp->format == EMBL_FMT || ajp->format == PSEUDOEMBL_FMT ||
3692 ajp->format == EMBLPEPT_FMT) {
3693 PrintXX();
3694 }
3695 return;
3696 }
PrintNID(Asn2ffJobPtr ajp,GBEntryPtr gbp)3697 void PrintNID(Asn2ffJobPtr ajp, GBEntryPtr gbp)
3698
3699 {
3700
3701 if (gbp == NULL)
3702 return;
3703 gbp->descr = NULL;
3704
3705 if (ajp->format == EMBL_FMT || ajp->format == PSEUDOEMBL_FMT ||
3706 ajp->format == EMBLPEPT_FMT) {
3707 ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "NI");
3708 ff_AddString(gbp->ni);
3709 } else {
3710 ff_StartPrint(0, 12, ASN2FF_GB_MAX, NULL);
3711 if (ajp->format == GENBANK_FMT) {
3712 ff_AddString( "NID");
3713 } else if (ajp->format == GENPEPT_FMT) {
3714 ff_AddString( "PID");
3715 }
3716 TabToColumn(13);
3717 ff_AddString(gbp->ni);
3718 /* if (gbp->gi != -1) {
3719 TabToColumn(26);
3720 ff_AddString( "GI:");
3721 ff_AddInteger("%ld", (long) gbp->gi);
3722 }
3723 */
3724 }
3725 ff_EndPrint();
3726 if (ajp->format == EMBL_FMT || ajp->format == PSEUDOEMBL_FMT ||
3727 ajp->format == EMBLPEPT_FMT) {
3728 PrintXX();
3729 }
3730 return;
3731 }
PrintDateLines(Asn2ffJobPtr ajp,GBEntryPtr gbp)3732 void PrintDateLines (Asn2ffJobPtr ajp, GBEntryPtr gbp)
3733
3734 {
3735 if (gbp == NULL)
3736 return;
3737 gbp->descr = NULL;
3738 ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "DT");
3739 if (gbp->update_date)
3740 { /* both create and update date exist. */
3741 if (ajp->pseudo == FALSE)
3742 { /* In pseudo-EMBL mode only one date line */
3743 if (gbp->create_date) {
3744 ff_AddString(gbp->create_date);
3745 NewContLine();
3746 }
3747 }
3748 ff_AddString(gbp->update_date);
3749 if (gbp->embl_rel)
3750 {
3751 ff_AddString(" (Rel. ");
3752 ff_AddString(gbp->embl_rel);
3753 ff_AddString(", Last updated, Version ");
3754 ff_AddInteger("%ld", (long) gbp->embl_ver);
3755 ff_AddChar(')');
3756 }
3757 }
3758 else
3759 { /* only create date exists. */
3760 ff_AddString(gbp->create_date);
3761 if (gbp->embl_rel)
3762 {
3763 ff_AddString(" (Rel. ");
3764 ff_AddString(gbp->embl_rel);
3765 ff_AddString(", Last updated, Version ");
3766 ff_AddInteger("%ld", (long) gbp->embl_ver);
3767 ff_AddChar(')');
3768 }
3769 if (ajp->pseudo == FALSE)
3770 { /* In pseudo-EMBL only one date line. */
3771 NewContLine();
3772 ff_AddString(gbp->create_date);
3773 if (gbp->embl_rel)
3774 {
3775 ff_AddString(" (Rel. ");
3776 ff_AddString(gbp->embl_rel);
3777 ff_AddString(", Last updated, Version ");
3778 ff_AddInteger( "%ld", (long) gbp->embl_ver);
3779 ff_AddChar(')');
3780 }
3781 }
3782 }
3783 ff_EndPrint();
3784 } /* PrintDateLines */
3785
PrintSegmentLine(Asn2ffJobPtr ajp,GBEntryPtr gbp)3786 void PrintSegmentLine(Asn2ffJobPtr ajp, GBEntryPtr gbp)
3787 {
3788
3789 if (gbp == NULL)
3790 return;
3791 gbp->descr = NULL;
3792 if (ajp->asn2ffwep->total_seg > 1)
3793 {
3794 ff_StartPrint(0, 0, ASN2FF_GB_MAX, NULL);
3795 ff_AddString("SEGMENT");
3796 TabToColumn(13);
3797 ff_AddInteger("%ld", (long) gbp->num_seg);
3798 ff_AddString(" of ");
3799 ff_AddInteger("%ld", (long) ajp->asn2ffwep->total_seg);
3800 ff_EndPrint();
3801 }
3802 }
3803
AddKeyword(ValNodePtr key,CharPtr add)3804 static ValNodePtr AddKeyword(ValNodePtr key, CharPtr add)
3805 {
3806 ValNodePtr vnp;
3807
3808 for (vnp = key; vnp; vnp = vnp->next) {
3809 if (StringCmp((CharPtr)vnp->data.ptrvalue, add) == 0) {
3810 return key;
3811 }
3812 }
3813 vnp = ValNodeNew(NULL);
3814 vnp->data.ptrvalue = StringSave(add);
3815 key = tie_next(key, vnp);
3816
3817 return key;
3818 }
3819
CheckSpecialKeyword(Boolean is_est,Boolean is_sts,Boolean is_gss,CharPtr kwd)3820 static Boolean CheckSpecialKeyword(Boolean is_est, Boolean is_sts, Boolean is_gss, CharPtr kwd)
3821 {
3822 if (is_est == FALSE && is_sts == FALSE && is_gss == FALSE) {
3823 return TRUE;
3824 }
3825 if (is_est) {
3826 if (MatchArrayString(STS_kw_array, TOTAL_STSKW, kwd) != -1) {
3827 return FALSE;
3828 }
3829 if (MatchArrayString(GSS_kw_array, TOTAL_GSSKW, kwd) != -1) {
3830 return FALSE;
3831 }
3832 return TRUE;
3833 }
3834 if (is_sts) {
3835 if (MatchArrayString(EST_kw_array, TOTAL_ESTKW, kwd) != -1) {
3836 return FALSE;
3837 }
3838 if (MatchArrayString(GSS_kw_array, TOTAL_GSSKW, kwd) != -1) {
3839 return FALSE;
3840 }
3841 return TRUE;
3842 }
3843 if (is_gss) {
3844 if (MatchArrayString(STS_kw_array, TOTAL_STSKW, kwd) != -1) {
3845 return FALSE;
3846 }
3847 if (MatchArrayString(EST_kw_array, TOTAL_ESTKW, kwd) != -1) {
3848 return FALSE;
3849 }
3850 return TRUE;
3851 }
3852 return TRUE;
3853 }
3854
GetKeywordLine(Asn2ffJobPtr ajp,GBEntryPtr gbp)3855 ValNodePtr GetKeywordLine(Asn2ffJobPtr ajp, GBEntryPtr gbp)
3856
3857 {
3858 ValNodePtr block, keyword=NULL, v, vnp;
3859 GBBlockPtr gbblk;
3860 EMBLBlockPtr ebp;
3861 PirBlockPtr pbp;
3862 PrfBlockPtr prfp;
3863 SPBlockPtr spbp;
3864 MolInfoPtr mfp;
3865 Boolean is_est=FALSE, is_sts=FALSE, is_gss=FALSE;
3866 DescrStructPtr dsp;
3867 DescrStructPtr dspnext;
3868
3869 if ((block = GatherDescrByChoice(ajp, gbp, Seq_descr_molinfo)) != NULL)
3870 {
3871 mfp = (MolInfoPtr) block->data.ptrvalue;
3872 switch (mfp->tech) {
3873 case MI_TECH_htc:
3874 keyword = ValNodeNew(NULL);
3875 keyword->data.ptrvalue = StringSave("HTC");
3876 break;
3877 case MI_TECH_htgs_0:
3878 keyword = ValNodeNew(NULL);
3879 keyword->data.ptrvalue = StringSave("HTG");
3880 v = ValNodeNew(keyword);
3881 v->data.ptrvalue = StringSave("HTGS_PHASE0");
3882 break;
3883 case MI_TECH_htgs_1:
3884 keyword = ValNodeNew(NULL);
3885 keyword->data.ptrvalue = StringSave("HTG");
3886 v = ValNodeNew(keyword);
3887 v->data.ptrvalue = StringSave("HTGS_PHASE1");
3888 break;
3889 case MI_TECH_htgs_2:
3890 keyword = ValNodeNew(NULL);
3891 keyword->data.ptrvalue = StringSave("HTG");
3892 v = ValNodeNew(keyword);
3893 v->data.ptrvalue = StringSave("HTGS_PHASE2");
3894 break;
3895 case MI_TECH_htgs_3:
3896 keyword = ValNodeNew(NULL);
3897 keyword->data.ptrvalue = StringSave("HTG");
3898 break;
3899 case MI_TECH_est:
3900 is_est = TRUE;
3901 keyword = ValNodeNew(NULL);
3902 keyword->data.ptrvalue = StringSave("EST");
3903 break;
3904 case MI_TECH_sts:
3905 is_sts = TRUE;
3906 keyword = ValNodeNew(NULL);
3907 keyword->data.ptrvalue = StringSave("STS");
3908 break;
3909 case MI_TECH_survey:
3910 is_gss = TRUE;
3911 keyword = ValNodeNew(NULL);
3912 keyword->data.ptrvalue = StringSave("GSS");
3913 break;
3914 case MI_TECH_fli_cdna:
3915 keyword = ValNodeNew(NULL);
3916 keyword->data.ptrvalue = StringSave("FLI_CDNA");
3917 break;
3918 default:
3919 break;
3920 }
3921 }
3922
3923 if ((block = GatherDescrByChoice(ajp, gbp, Seq_descr_genbank)) != NULL)
3924 {
3925 gbblk = (GBBlockPtr) block->data.ptrvalue;
3926 if (gbblk->keywords != NULL) {
3927 for (vnp = gbblk->keywords; vnp; vnp = vnp->next) {
3928 if (CheckSpecialKeyword(is_est, is_sts, is_gss, (CharPtr)vnp->data.ptrvalue) == TRUE) {
3929 keyword = AddKeyword(keyword, (CharPtr)vnp->data.ptrvalue);
3930 }
3931 }
3932 return keyword;
3933 } else {
3934 if (gbp->descr) {
3935 for(dsp = gbp->descr; dsp != NULL; dsp = dspnext)
3936 {
3937 dspnext = dsp->next;
3938 MemFree(dsp);
3939 }
3940 gbp->descr = NULL;
3941 }
3942 }
3943 }
3944 if ((block = GatherDescrByChoice(ajp, gbp, Seq_descr_embl)) != NULL)
3945 {
3946 ebp = (EMBLBlockPtr) block->data.ptrvalue;
3947 if (ebp->keywords != NULL) {
3948 for (vnp = ebp->keywords; vnp; vnp = vnp->next) {
3949 if (CheckSpecialKeyword(is_est, is_sts, is_gss, (CharPtr)vnp->data.ptrvalue) == TRUE) {
3950 keyword = AddKeyword(keyword, (CharPtr)vnp->data.ptrvalue);
3951 }
3952 }
3953 return keyword;
3954 } else {
3955 if (gbp->descr) {
3956 for(dsp = gbp->descr; dsp != NULL; dsp = dspnext)
3957 {
3958 dspnext = dsp->next;
3959 MemFree(dsp);
3960 }
3961 gbp->descr = NULL;
3962 }
3963 }
3964 }
3965 if ((block = GatherDescrByChoice(ajp, gbp, Seq_descr_pir)) != NULL)
3966 {
3967 pbp = (PirBlockPtr) block->data.ptrvalue;
3968 if (pbp->keywords != NULL) {
3969 for (vnp = pbp->keywords; vnp; vnp = vnp->next) {
3970 keyword = AddKeyword(keyword, (CharPtr)vnp->data.ptrvalue);
3971 }
3972 return keyword;
3973 } else {
3974 if (gbp->descr) {
3975 for(dsp = gbp->descr; dsp != NULL; dsp = dspnext)
3976 {
3977 dspnext = dsp->next;
3978 MemFree(dsp);
3979 }
3980 gbp->descr = NULL;
3981 }
3982 }
3983 }
3984 if ((block = GatherDescrByChoice(ajp, gbp, Seq_descr_prf)) != NULL)
3985 {
3986 prfp = (PrfBlockPtr) block->data.ptrvalue;
3987 if (prfp->keywords != NULL) {
3988 for (vnp = prfp->keywords; vnp; vnp = vnp->next) {
3989 keyword = AddKeyword(keyword, (CharPtr)vnp->data.ptrvalue);
3990 }
3991 return keyword;
3992 } else {
3993 if (gbp->descr) {
3994 for(dsp = gbp->descr; dsp != NULL; dsp = dspnext)
3995 {
3996 dspnext = dsp->next;
3997 MemFree(dsp);
3998 }
3999 gbp->descr = NULL;
4000 }
4001 }
4002 }
4003 if ((block = GatherDescrByChoice(ajp, gbp, Seq_descr_sp)) != NULL)
4004 {
4005 spbp = (SPBlockPtr) block->data.ptrvalue;
4006 if (spbp->keywords != NULL) {
4007 for (vnp = spbp->keywords; vnp; vnp = vnp->next) {
4008 keyword = AddKeyword(keyword, (CharPtr)vnp->data.ptrvalue);
4009 }
4010 return keyword;
4011 } else {
4012 if (gbp->descr) {
4013 for(dsp = gbp->descr; dsp != NULL; dsp = dspnext)
4014 {
4015 dspnext = dsp->next;
4016 MemFree(dsp);
4017 }
4018 gbp->descr = NULL;
4019 }
4020 }
4021 }
4022 return keyword;
4023
4024 } /* GetKeywordLine */
4025
4026
PrintKeywordLine(Asn2ffJobPtr ajp,GBEntryPtr gbp)4027 void PrintKeywordLine(Asn2ffJobPtr ajp, GBEntryPtr gbp)
4028 {
4029 Boolean line_return = FALSE;
4030 Boolean first = TRUE;
4031 CharPtr string;
4032 Int2 tab_length=12;
4033 ValNodePtr keyword, vnp;
4034
4035 gbp->descr = NULL;
4036 keyword = GetKeywordLine(ajp, gbp);
4037
4038 if (ajp->format == EMBL_FMT || ajp->format == PSEUDOEMBL_FMT ||
4039 ajp->format == EMBLPEPT_FMT) {
4040 ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "KW");
4041 } else {
4042 ff_StartPrint(0, tab_length, ASN2FF_GB_MAX, NULL);
4043 ff_AddString("KEYWORDS");
4044 TabToColumn((Int2)(tab_length+1));
4045 }
4046 if (keyword != NULL) { /* the next line initializes the length */
4047 for (vnp=keyword; vnp != NULL; vnp=vnp->next) {
4048 string = (CharPtr)vnp->data.ptrvalue;
4049 if (first == TRUE) {
4050 first = FALSE;
4051 } else {
4052 if (line_return)
4053 NewContLine();
4054 }
4055
4056 ff_AddString(string);
4057 if (vnp->next != NULL) {
4058 ff_AddChar(';');
4059 ff_AddChar(' ');
4060 }
4061 }
4062 ValNodeFreeData(keyword);
4063 } else if (gbp->descr) {
4064 MemFree(gbp->descr);
4065 gbp->descr = NULL;
4066 }
4067 ff_AddChar('.');
4068 ff_EndPrint();
4069
4070
4071 } /* PrintKeywordLine */
4072
PrintDefinitionLine(Asn2ffJobPtr ajp,GBEntryPtr gbp)4073 void PrintDefinitionLine(Asn2ffJobPtr ajp, GBEntryPtr gbp)
4074 {
4075 if (ajp->format == EMBL_FMT || ajp->format == PSEUDOEMBL_FMT ||
4076 ajp->format == EMBLPEPT_FMT) {
4077 ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "DE");
4078 } else {
4079 ff_StartPrint(0, 12, ASN2FF_GB_MAX, NULL);
4080 ff_AddString("DEFINITION");
4081 TabToColumn(13);
4082 }
4083 ff_AddString(gbp->defline);
4084 ff_EndPrint();
4085 return;
4086 }
4087
GetDefinitionLine(Asn2ffJobPtr ajp,GBEntryPtr gbp)4088 void GetDefinitionLine(Asn2ffJobPtr ajp, GBEntryPtr gbp)
4089 {
4090 CharPtr string, string_start, title=NULL;
4091 ValNodePtr vnp = NULL;
4092 MolInfoPtr mfp;
4093 CharPtr buf;
4094 Int2 buflen = 1001;
4095 ItemInfoPtr iip;
4096 DescrStructPtr dsp = NULL;
4097 Uint1 tech = 0;
4098
4099 buf = (CharPtr)MemNew(buflen+1);
4100 gbp->descr = NULL;
4101 /* deflines for htg sequences */
4102 vnp=GatherDescrByChoice(ajp, gbp, Seq_descr_molinfo);
4103 if (vnp != NULL) {
4104 mfp = (MolInfoPtr)vnp->data.ptrvalue;
4105 if (mfp) {
4106 tech = mfp->tech;
4107 }
4108 }
4109 if (gbp && gbp->descr) {
4110 gbp->descr = (DescrStructPtr)MemFree(gbp->descr);
4111 gbp->descr = NULL;
4112 }
4113
4114 iip = (ItemInfoPtr)MemNew(sizeof(ItemInfo));
4115 CreateDefLine(iip, gbp->bsp, buf, buflen, tech, NULL, NULL);
4116 if (iip != NULL) {
4117 dsp = (DescrStructPtr)MemNew(sizeof(DescrStruct));
4118 dsp->entityID = iip->entityID;
4119 dsp->itemID = iip->itemID;
4120 dsp->itemtype = iip->itemtype;
4121 }
4122 MemFree(iip);
4123 gbp->descr = dsp;
4124 title = buf;
4125 string_start = string = CheckEndPunctuation(title, '.');
4126
4127 while (*string != '\0')
4128 {
4129 if (*string == '\"')
4130 *string = '\'';
4131 string++;
4132 }
4133
4134 gbp->defline = StringSave(string_start);
4135 MemFree(string_start);
4136 MemFree(buf);
4137 }
4138
PrintOriginLine(Asn2ffJobPtr ajp,GBEntryPtr gbp)4139 void PrintOriginLine(Asn2ffJobPtr ajp, GBEntryPtr gbp)
4140
4141 {
4142 Char buffer[68];
4143 CharPtr origin=NULL;
4144 GBBlockPtr gb;
4145 Int2 length=0;
4146 ValNodePtr vnp=NULL;
4147
4148 gbp->descr = NULL;
4149 ff_StartPrint(0, 12, ASN2FF_GB_MAX, NULL);
4150 ff_AddString("ORIGIN");
4151 TabToColumn(13);
4152 if ((vnp=GatherDescrByChoice(ajp, gbp, Seq_descr_genbank)) != NULL)
4153 {
4154 gb = (GBBlockPtr) vnp->data.ptrvalue;
4155 if (gb)
4156 {
4157 if (gb->origin && (length=StringLen(gb->origin)) > 0)
4158 { /*???? What if gb->origin is longer than 68 chars. */
4159 StringNCpy(buffer, gb->origin, 66);
4160 if (length < 66)
4161 buffer[length] = '\0';
4162 else
4163 buffer[66] = '\0';
4164 origin = CheckEndPunctuation(buffer, '.');
4165 ff_AddString(origin);
4166 }
4167 if (length > 66)
4168 ErrPostStr(SEV_WARNING, ERR_ENTRY_OriginTooLong, "");
4169 }
4170 }
4171 if (origin != NULL)
4172 MemFree(origin);
4173 ff_EndPrint();
4174
4175 }
print_source(Asn2ffJobPtr ajp,CharPtr source,OrgRefPtr orp)4176 static void print_source(Asn2ffJobPtr ajp, CharPtr source, OrgRefPtr orp)
4177 {
4178 CharPtr newsource, s;
4179 Boolean has_point = FALSE;
4180 ValNodePtr v;
4181
4182 ff_StartPrint(0, 12, ASN2FF_GB_MAX, NULL);
4183 ff_AddString("SOURCE");
4184 TabToColumn(13);
4185 if (source) {
4186 newsource = CheckEndPunctuation(source, '.');
4187 ff_AddString(newsource);
4188 MemFree(source);
4189 MemFree(newsource);
4190 } else if (orp) {
4191 source = orp->common?orp->common:orp->taxname;
4192 ff_AddString(source);
4193 if (orp->mod == NULL && source != NULL) {
4194 if (*(source + StringLen(source) -1) == '.')
4195 has_point = TRUE;
4196 }
4197 for (v = orp->mod; v; v = v->next) {
4198 has_point = FALSE;
4199 s = (CharPtr) (v->data.ptrvalue);
4200 if (*(s + StringLen(s) -1) == '.')
4201 has_point = TRUE;
4202 ff_AddString(" ");
4203 ff_AddString(s);
4204
4205 }
4206 if (!has_point)
4207 ff_AddChar('.');
4208 } else {
4209 ff_AddString("Unknown.");
4210 if (ajp->error_msgs == TRUE)
4211 ErrPostStr(SEV_WARNING, ERR_ENTRY_No_source_line, "");
4212 }
4213 ff_EndPrint();
4214 }
4215
4216 /***************************************************************************
4217 *PrintGBSourceLine
4218 *
4219 * "PrintGBSourceLine" to print the source ONLY line for
4220 * genbank and genpept FlatFiles. (modified from PrintSourceLine)
4221 *
4222 ****************************************************************************/
PrintGBSourceLine(Asn2ffJobPtr ajp,GBEntryPtr gbp)4223 void PrintGBSourceLine (Asn2ffJobPtr ajp, GBEntryPtr gbp)
4224 {
4225 CharPtr source=NULL;
4226 GBBlockPtr gb=NULL;
4227 OrgRefPtr orp=NULL;
4228 BioSourcePtr biosp;
4229 ValNodePtr vnp=NULL;
4230 SeqFeatPtr sfp;
4231 SortStructPtr p;
4232
4233
4234 if (gbp == NULL) {
4235 return;
4236 }
4237 gbp->descr = NULL;
4238 if ((vnp=GatherDescrByChoice(ajp, gbp, Seq_descr_genbank)) != NULL) {
4239 gb = (GBBlockPtr) vnp->data.ptrvalue;
4240 if (gb)
4241 source = GetGBSourceLine(gb);
4242 }
4243 if (source) {
4244 print_source(ajp, source, NULL);
4245 return;
4246 }
4247 if (gbp->descr) {
4248 gbp->descr = (DescrStructPtr)MemFree(gbp->descr);
4249 }
4250 if ((vnp=GatherDescrByChoice(ajp, gbp, Seq_descr_source)) != NULL) {
4251 biosp = (BioSourcePtr) vnp->data.ptrvalue;
4252 if (biosp != NULL) {
4253 orp = biosp->org;
4254 }
4255 } else if ((vnp=GatherDescrByChoice(ajp, gbp, Seq_descr_org)) != NULL) {
4256 orp = (OrgRefPtr) vnp->data.ptrvalue;
4257 } else if (gbp->feat && gbp->feat->sfpOrgsize != 0) {
4258 p = gbp->feat->Orglist;
4259 if ((sfp = p->sfp) == NULL) {
4260 GatherItemWithLock(p->entityID,
4261 p->itemID, p->itemtype, &sfp, find_item);
4262 }
4263 if (sfp != NULL) {
4264 orp = (OrgRefPtr) sfp->data.value.ptrvalue;
4265 }
4266 }
4267 print_source(ajp, NULL, orp);
4268 return;
4269 }
4270
print_organism(Asn2ffJobPtr ajp,GBEntryPtr gbp,OrgRefPtr orp,CharPtr lineage)4271 static void print_organism(Asn2ffJobPtr ajp, GBEntryPtr gbp, OrgRefPtr orp, CharPtr lineage)
4272 {
4273 DbtagPtr dbp;
4274 Int4 id = -1;
4275 CharPtr organelle, taxonomy=NULL;
4276
4277 if (orp) {
4278 if(orp->common && !orp->taxname)
4279 orp->taxname = TaxNameFromCommon(orp->common);
4280 if (lineage == NULL && orp->orgname) {
4281 lineage = orp->orgname->lineage;
4282 }
4283 }
4284 organelle = FlatOrganelle(ajp, gbp);
4285 ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
4286 ff_AddString("ORGANISM");
4287 TabToColumn(13);
4288 if (orp && orp->taxname) {
4289 if (organelle) {
4290 ff_AddString(organelle);
4291 }
4292 if (orp->db != NULL) {
4293 dbp = (DbtagPtr) (orp->db)->data.ptrvalue;
4294 if (StringCmp(dbp->db, "taxon") == 0)
4295 id = dbp->tag->id;
4296 }
4297 www_organism(orp->taxname, id);
4298 } else {
4299 ff_AddString("Unknown.");
4300 }
4301 MemFree(organelle);
4302 ff_EndPrint();
4303
4304 ff_StartPrint(12, 12, ASN2FF_GB_MAX, NULL);
4305 if (lineage) {
4306 taxonomy = CheckEndPunctuation(lineage, '.');
4307 ff_AddString(taxonomy);
4308 MemFree(taxonomy);
4309 } else {
4310 ff_AddString("Unclassified.");
4311 }
4312 ff_EndPrint();
4313 }
print_taxinfo(Asn2ffJobPtr ajp,GBEntryPtr gbp,OrgRefPtr orp,CharPtr lineage,Boolean is_mito)4314 static void print_taxinfo(Asn2ffJobPtr ajp, GBEntryPtr gbp, OrgRefPtr orp, CharPtr lineage, Boolean is_mito)
4315 {
4316 DbtagPtr dbp;
4317 Int4 id = -1, gcode=1;
4318 CharPtr organelle, taxonomy=NULL;
4319 static Char tmp[3];
4320
4321 if (orp) {
4322 if(orp->common && !orp->taxname)
4323 orp->taxname = TaxNameFromCommon(orp->common);
4324 if (lineage == NULL && orp->orgname) {
4325 lineage = orp->orgname->lineage;
4326 }
4327 }
4328 organelle = FlatOrganelle(ajp, gbp);
4329 ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
4330 if (orp && orp->taxname) {
4331 if (organelle) {
4332 ff_AddString(organelle);
4333 }
4334 if (orp->db != NULL) {
4335 dbp = (DbtagPtr) (orp->db)->data.ptrvalue;
4336 if (StringCmp(dbp->db, "taxon") == 0)
4337 id = dbp->tag->id;
4338 }
4339 /* ff_AddString("<BR>Taxonomy id: ");*/
4340 ff_AddString("<BR>Organism: ");
4341 www_taxid(orp->taxname, id);
4342 } else {
4343 ff_AddString("Unknown.");
4344 }
4345 MemFree(organelle);
4346 ff_EndPrint();
4347
4348 if (orp && orp->orgname) {
4349 ff_StartPrint(12, 12, ASN2FF_GB_MAX, NULL);
4350 ff_AddString("<BR>Genetic Code: ");
4351 if (StringICmp(organelle, "Mitochondrion") == 0 || is_mito == TRUE) {
4352 gcode = orp->orgname->mgcode;
4353 } else {
4354 gcode = orp->orgname->gcode;
4355 }
4356 sprintf(tmp, "%d", gcode);
4357 www_gcode(tmp);
4358 ff_EndPrint();
4359 }
4360 ff_StartPrint(12, 12, ASN2FF_GB_MAX, NULL);
4361 if (lineage) {
4362 taxonomy = CheckEndPunctuation(lineage, '.');
4363 ff_AddString("<BR>Lineage: ");
4364 ff_AddString(taxonomy);
4365 MemFree(taxonomy);
4366 } else {
4367 ff_AddString("Unclassified.");
4368 }
4369 ff_AddString("<BR>");
4370 ff_EndPrint();
4371 }
4372
4373 /***************************************************************************
4374 *PrintGBOrganismLine
4375 *
4376 * "PrintGBOrganismLine" to print the ONLY organism field for
4377 * genbank and genpept FlatFiles. (modified from PrintSourceLine)
4378 *
4379 ****************************************************************************/
PrintGBOrganismLine(Asn2ffJobPtr ajp,GBEntryPtr gbp)4380 void PrintGBOrganismLine (Asn2ffJobPtr ajp, GBEntryPtr gbp)
4381 {
4382 CharPtr lineage = NULL;
4383 GBBlockPtr gb=NULL;
4384 OrgRefPtr orp=NULL;
4385 BioSourcePtr biosp;
4386 ValNodePtr vnp=NULL;
4387 SeqFeatPtr sfp;
4388 SortStructPtr p;
4389 DescrStructPtr dsp;
4390
4391
4392 if (gbp == NULL) {
4393 return;
4394 }
4395 gbp->descr = NULL;
4396 /* Find Biosource with focus
4397 v=GatherDescrListByChoice(ajp, gbp, Seq_descr_source);
4398 for (tvnp=v; tvnp; tvnp=tvnp->next) {
4399 dsp = (DescrStructPtr) tvnp->data.ptrvalue;
4400 vnp = dsp->vnp;
4401 biosp = (BioSourcePtr) vnp->data.ptrvalue;
4402 if (biosp->is_focus == TRUE) {
4403 orp = biosp->org;
4404 gbp->descr = dsp;
4405 break;
4406 }
4407 }
4408 ValNodeFreeData(v);
4409 if (orp == NULL && gbp->feat && gbp->feat->biosrcsize != 0) {
4410 p = gbp->feat->Biosrclist;
4411 for (i = 0; i < gbp->feat->biosrcsize; i++, p++) {
4412 if ((sfp = p->sfp) == NULL) {
4413 GatherItemWithLock(p->entityID,
4414 p->itemID, p->itemtype, &sfp, find_item);
4415 }
4416 if (sfp != NULL) {
4417 biosp = (BioSourcePtr) sfp->data.value.ptrvalue;
4418 if (biosp->is_focus == TRUE) {
4419 orp = biosp->org;
4420 dsp = MemNew(sizeof(DescrStruct));
4421 gbp->descr = dsp;
4422 dsp->entityID = p->entityID;
4423 dsp->itemID = p->itemID;
4424 dsp->itemtype = p->itemtype;
4425 break;
4426 }
4427 }
4428 }
4429 }
4430 if (orp != NULL) {
4431 if (orp->orgname) {
4432 lineage = orp->orgname->lineage;
4433 }
4434 print_organism(ajp, gbp, orp, lineage);
4435 return;
4436 }
4437 */
4438 /* BioSource descr*/
4439 if ((vnp=GatherDescrByChoice(ajp, gbp, Seq_descr_source)) != NULL)
4440 {
4441 biosp = (BioSourcePtr)vnp->data.ptrvalue;
4442 orp = (OrgRefPtr) biosp->org;
4443 if (orp && orp->orgname) {
4444 lineage = orp->orgname->lineage;
4445 }
4446 }
4447 /* try to find lineage in GenBank block */
4448 if (lineage == NULL) {
4449 dsp = gbp->descr;
4450 gbp->descr = NULL;
4451 if ((vnp=GatherDescrByChoice(ajp, gbp, Seq_descr_genbank)) != NULL) {
4452 gb = (GBBlockPtr) vnp->data.ptrvalue;
4453 if (gb)
4454 lineage = gb->taxonomy;
4455 }
4456 gbp->descr = (DescrStructPtr)MemFree(gbp->descr);
4457 gbp->descr = dsp; /* keep Seq_descr_source dsp for sequin */
4458 }
4459 if (orp) {
4460 print_organism(ajp, gbp, orp, lineage);
4461 return;
4462 }
4463 /* Organism descr*/
4464 if ((vnp=GatherDescrByChoice(ajp, gbp, Seq_descr_org)) != NULL) {
4465 orp = (OrgRefPtr) vnp->data.ptrvalue;
4466 print_organism(ajp, gbp, orp, lineage);
4467 return;
4468 }
4469 /* OrgRef feature */
4470 gbp->descr = (DescrStructPtr)MemFree(gbp->descr);
4471 if (gbp->feat && gbp->feat->sfpOrgsize != 0) {
4472 p = gbp->feat->Orglist;
4473 if ((sfp = p->sfp) == NULL) {
4474 GatherItemWithLock(p->entityID,
4475 p->itemID, p->itemtype, &sfp, find_item);
4476 }
4477 if (sfp != NULL) {
4478 orp = (OrgRefPtr) sfp->data.value.ptrvalue;
4479 dsp = (DescrStructPtr)MemNew(sizeof(DescrStruct));
4480 gbp->descr = dsp;
4481 dsp->entityID = p->entityID;
4482 dsp->itemID = p->itemID;
4483 dsp->itemtype = p->itemtype;
4484 }
4485 }
4486 print_organism(ajp, gbp, orp, lineage);
4487 return;
4488 }
4489
4490 /***************************************************************************
4491 *PrintTaxonomy
4492 *
4493 * "PrintTaxonomy" to print taxonomy info in graphical view
4494 *
4495 ****************************************************************************/
PrintTaxonomy(Asn2ffJobPtr ajp,GBEntryPtr gbp)4496 void PrintTaxonomy (Asn2ffJobPtr ajp, GBEntryPtr gbp)
4497 {
4498 CharPtr lineage = NULL;
4499 GBBlockPtr gb=NULL;
4500 OrgRefPtr orp=NULL;
4501 BioSourcePtr biosp;
4502 ValNodePtr vnp=NULL;
4503 SeqFeatPtr sfp;
4504 SortStructPtr p;
4505 DescrStructPtr dsp;
4506 Boolean is_mito = FALSE;
4507
4508
4509 if (gbp == NULL) {
4510 return;
4511 }
4512 gbp->descr = NULL;
4513 /* BioSource descr*/
4514 if ((vnp=GatherDescrByChoice(ajp, gbp, Seq_descr_source)) != NULL)
4515 {
4516 biosp = (BioSourcePtr)vnp->data.ptrvalue;
4517 if (biosp->genome == 5 || biosp->genome == 4) is_mito = TRUE;
4518 orp = (OrgRefPtr) biosp->org;
4519 if (orp && orp->orgname) {
4520 lineage = orp->orgname->lineage;
4521 }
4522 }
4523 /* try to find lineage in GenBank block */
4524 if (lineage == NULL) {
4525 dsp = gbp->descr;
4526 gbp->descr = NULL;
4527 if ((vnp=GatherDescrByChoice(ajp, gbp, Seq_descr_genbank)) != NULL) {
4528 gb = (GBBlockPtr) vnp->data.ptrvalue;
4529 if (gb)
4530 lineage = gb->taxonomy;
4531 }
4532 gbp->descr = (DescrStructPtr)MemFree(gbp->descr);
4533 gbp->descr = dsp; /* keep Seq_descr_source dsp for sequin */
4534 }
4535 if (orp) {
4536 print_taxinfo(ajp, gbp, orp, lineage, is_mito);
4537 return;
4538 }
4539 /* Organism descr*/
4540 if ((vnp=GatherDescrByChoice(ajp, gbp, Seq_descr_org)) != NULL) {
4541 orp = (OrgRefPtr) vnp->data.ptrvalue;
4542 print_taxinfo(ajp, gbp, orp, lineage, is_mito);
4543 return;
4544 }
4545 /* OrgRef feature */
4546 gbp->descr = (DescrStructPtr)MemFree(gbp->descr);
4547 if (gbp->feat && gbp->feat->sfpOrgsize != 0) {
4548 p = gbp->feat->Orglist;
4549 if ((sfp = p->sfp) == NULL) {
4550 GatherItemWithLock(p->entityID,
4551 p->itemID, p->itemtype, &sfp, find_item);
4552 }
4553 if (sfp != NULL) {
4554 orp = (OrgRefPtr) sfp->data.value.ptrvalue;
4555 dsp = (DescrStructPtr)MemNew(sizeof(DescrStruct));
4556 gbp->descr = dsp;
4557 dsp->entityID = p->entityID;
4558 dsp->itemID = p->itemID;
4559 dsp->itemtype = p->itemtype;
4560 }
4561 }
4562 print_taxinfo(ajp, gbp, orp, lineage, is_mito);
4563 return;
4564 }
4565
4566 /***************************************************************************
4567 *PrintOrganismLine
4568 *
4569 * "PrintOrganismLine" to print the source and organism entries for
4570 * EMBL FlatFiles.
4571 *
4572 ****************************************************************************/
4573
PrintOrganismLine(Asn2ffJobPtr ajp,GBEntryPtr gbp)4574 void PrintOrganismLine (Asn2ffJobPtr ajp, GBEntryPtr gbp)
4575
4576 {
4577 ValNodePtr vnp=NULL;
4578 OrgRefPtr orp=NULL, orp1=NULL;
4579 CharPtr organelle, taxonomy=NULL, lineage = NULL;
4580 BioSourcePtr biosp = NULL;
4581 GBBlockPtr gb=NULL;
4582 DescrStructPtr dsp;
4583 ValNodePtr tvnp;
4584 SeqFeatPtr sfp;
4585 SortStructPtr p;
4586
4587 if (gbp == NULL) {
4588 return;
4589 }
4590 /* new first */
4591 gbp->descr = NULL;
4592 if ((tvnp=GatherDescrListByChoice(ajp, gbp, Seq_descr_source)) != NULL) {
4593 dsp = (DescrStructPtr) tvnp->data.ptrvalue;
4594 vnp = dsp->vnp;
4595 biosp = (BioSourcePtr) vnp->data.ptrvalue;
4596 orp = (OrgRefPtr) biosp->org;
4597 if (tvnp->next != NULL) {
4598 dsp = (DescrStructPtr) tvnp->next->data.ptrvalue;
4599 vnp = dsp->vnp;
4600 biosp = (BioSourcePtr) vnp->data.ptrvalue;
4601 orp1 = (OrgRefPtr) biosp->org;
4602 }
4603 ValNodeFreeData(tvnp);
4604 }
4605 if (orp && orp->orgname) {
4606 lineage = orp->orgname->lineage;
4607 }
4608 if (orp == NULL) {
4609 if ((tvnp=GatherDescrListByChoice(ajp, gbp, Seq_descr_org)) != NULL) {
4610 dsp = (DescrStructPtr) tvnp->data.ptrvalue;
4611 vnp = dsp->vnp;
4612 orp = (OrgRefPtr) vnp->data.ptrvalue;
4613 if (tvnp->next != NULL) {
4614 dsp = (DescrStructPtr) tvnp->next->data.ptrvalue;
4615 vnp = dsp->vnp;
4616 orp1 = (OrgRefPtr) (vnp->data.ptrvalue);
4617 }
4618 ValNodeFreeData(tvnp);
4619 } else if (gbp->feat && gbp->feat->sfpOrgsize != 0) {
4620 p = gbp->feat->Orglist; /* gbp->feat->Orglist[0] */
4621 if ((sfp = p->sfp) == NULL) {
4622 GatherItemWithLock(p->entityID,
4623 p->itemID, p->itemtype, &sfp, find_item);
4624 }
4625 if (sfp != NULL) {
4626 orp = (OrgRefPtr) sfp->data.value.ptrvalue;
4627 }
4628 p++; /* gbp->feat->Orglist[1] */
4629 if ((sfp = p->sfp) == NULL) {
4630 GatherItemWithLock(p->entityID,
4631 p->itemID, p->itemtype, &sfp, find_item);
4632 }
4633 if (sfp != NULL) {
4634 orp1 = (OrgRefPtr) sfp->data.value.ptrvalue;
4635 }
4636 }
4637 }
4638
4639 if (orp)
4640 if(orp->common && !orp->taxname)
4641 orp->taxname = TaxNameFromCommon(orp->common);
4642
4643 ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "OS");
4644 if (orp && orp->taxname)
4645 {
4646 ff_AddString(orp->taxname);
4647 if (orp->common)
4648 {
4649 ff_AddString(" (");
4650 ff_AddString(orp->common);
4651 ff_AddChar(')');
4652 }
4653 }
4654 else
4655 ff_AddString("Unclassified.");
4656
4657 ff_EndPrint();
4658
4659 ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "OC");
4660 if (lineage == NULL) {
4661 if ((vnp=GatherDescrByChoice(ajp, gbp, Seq_descr_genbank)) != NULL){
4662 gb = (GBBlockPtr) vnp->data.ptrvalue;
4663 lineage = gb->taxonomy;
4664 }
4665 }
4666 if (lineage) {
4667 taxonomy = CheckEndPunctuation(lineage, '.');
4668 ff_AddString(taxonomy);
4669 MemFree(taxonomy);
4670 } else {
4671 ff_AddString("Unclassified.");
4672 }
4673 ff_EndPrint();
4674
4675 if (orp1) { /* second organism */
4676 if (orp1 && orp1->taxname) {
4677 PrintXX();
4678 ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "OS");
4679 ff_AddString(orp1->taxname);
4680 if (orp1->common) {
4681 ff_AddString(" (");
4682 ff_AddString(orp1->common);
4683 ff_AddChar(')');
4684 }
4685 ff_EndPrint();
4686 }
4687 }
4688
4689 /* What about plasmids on the OG line???????????????*/
4690 /* Get this info from a qual of the SourceFeat that has qual "plasmid"??*/
4691 organelle = FlatOrganelle(ajp, gbp);
4692 if (organelle) {
4693 PrintXX();
4694 ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "OG");
4695 ff_AddString(organelle);
4696 ff_EndPrint();
4697 MemFree(organelle);
4698 }
4699
4700 } /* PrintOrganismLine */
4701
4702
4703
4704 /****************************************************************************
4705 *GetPDBSourceLine
4706 *
4707 * Gets the source from the PDBBlock.
4708 *
4709 ****************************************************************************/
4710
GetPDBSourceLine(PdbBlockPtr pdb)4711 CharPtr GetPDBSourceLine (PdbBlockPtr pdb)
4712
4713 {
4714 CharPtr source = NULL;
4715 ValNodePtr vnp;
4716
4717 if(pdb && pdb->source)
4718 {
4719 vnp = pdb->source;
4720 source = StringSave((CharPtr)vnp->data.ptrvalue);
4721 }
4722
4723 return source;
4724 }
4725
4726 /***********************************************************************
4727 * This function prints out a block of the sequence (at most
4728 * of size SEQ_BLK_SIZE).
4729 * After the last sequence block, the terminator is printed also.
4730 ***********************************************************************/
4731
PrintSeqBlkEx(Asn2ffJobPtr ajp,GBEntryPtr gbp)4732 void PrintSeqBlkEx (Asn2ffJobPtr ajp, GBEntryPtr gbp)
4733
4734 {
4735 Int4 start, stop, index=ajp->pap_index;
4736 Uint1 last=ajp->pap_last;
4737 DescrStructPtr dsp;
4738
4739 dsp = (DescrStructPtr)MemNew(sizeof(DescrStruct));
4740 gbp->descr = dsp;
4741 dsp->entityID = gbp->entityID;
4742 dsp->itemID = gbp->itemID;
4743 dsp->itemtype = gbp->itemtype;
4744 if (index == 0) {
4745 if (ajp->slp != NULL) {
4746 start = SeqLocStart(ajp->slp);
4747 } else {
4748 start = 0;
4749 }
4750 } else {
4751 if (ajp->slp != NULL) {
4752 start = index*SEQ_BLK_SIZE + SeqLocStart(ajp->slp);
4753 } else {
4754 start = index*SEQ_BLK_SIZE;
4755 }
4756 }
4757 if (last != LAST) {
4758 if (ajp->slp != NULL) {
4759 stop = SeqLocStart(ajp->slp) + (index+1)*SEQ_BLK_SIZE - 1;
4760 } else {
4761 stop = (index+1)*SEQ_BLK_SIZE - 1;
4762 }
4763 } else {
4764 if (ajp->slp != NULL) {
4765 stop = SeqLocStart(ajp->slp) + SeqLocLen(ajp->slp);
4766 } else {
4767 stop = -1;
4768 }
4769 }
4770 if (ajp->format == EMBLPEPT_FMT) {
4771 PrintEPSequence(ajp, gbp, start, stop);
4772 } else {
4773 PrintSequence(ajp, gbp, start, stop);
4774 }
4775 if (last == LAST)
4776 PrintTerminator();
4777 }
4778
PrintSeqBlk(Asn2ffJobPtr ajp,GBEntryPtr gbp)4779 void PrintSeqBlk (Asn2ffJobPtr ajp, GBEntryPtr gbp)
4780
4781 {
4782 Int4 start, stop, index=ajp->pap_index;
4783 Uint1 last=ajp->pap_last;
4784 DescrStructPtr dsp;
4785
4786 dsp = (DescrStructPtr)MemNew(sizeof(DescrStruct));
4787 gbp->descr = dsp;
4788 dsp->entityID = gbp->entityID;
4789 dsp->itemID = gbp->itemID;
4790 dsp->itemtype = gbp->itemtype;
4791 if (index == 0) {
4792 if (ajp->slp != NULL) {
4793 start = SeqLocStart(ajp->slp);
4794 } else {
4795 start = 0;
4796 }
4797 } else {
4798 start = index*SEQ_BLK_SIZE;
4799 if (ajp->slp) {
4800 start = index*SEQ_BLK_SIZE + SeqLocStart(ajp->slp);
4801 }
4802 }
4803 if (last != LAST) {
4804 stop = (index+1)*SEQ_BLK_SIZE - 1;
4805 if (ajp->slp) {
4806 stop = (index+1)*SEQ_BLK_SIZE - 1 + SeqLocStart(ajp->slp);
4807 }
4808 } else {
4809 if (ajp->slp != NULL) {
4810 stop = SeqLocStart(ajp->slp) + SeqLocLen(ajp->slp) - 1;
4811 } else {
4812 stop = -1;
4813 }
4814 }
4815 if (ajp->format == EMBLPEPT_FMT) {
4816 PrintEPSequence(ajp, gbp, start, stop);
4817 } else {
4818 PrintSequence(ajp, gbp, start, stop);
4819 }
4820 if (last == LAST)
4821 PrintTerminator();
4822
4823 MemFree(dsp);
4824 }
4825
PrintPubsByNumber(Asn2ffJobPtr ajp,GBEntryPtr gbp)4826 void PrintPubsByNumber (Asn2ffJobPtr ajp, GBEntryPtr gbp)
4827
4828 {
4829 PubStructPtr psp;
4830 ValNodePtr vnp;
4831 Int4 i, index = ajp->pap_index;
4832
4833 for (vnp=gbp->Pub, i=0; vnp && i < index; vnp=vnp->next, i++);
4834 if (vnp) {
4835 psp = (PubStructPtr)vnp->data.ptrvalue;
4836 if (ajp->format == EMBL_FMT || ajp->format == PSEUDOEMBL_FMT ||
4837 ajp->format == EMBLPEPT_FMT) {
4838 EMBL_PrintPubs(ajp, gbp, psp);
4839 } else {
4840 if (ajp->mode == PARTIAL_MODE && psp->choice == PUB_Sub) {
4841 return;
4842 } else if (ajp->format == GRAPHIK_FMT) {
4843 GR_PrintPubs(ajp, gbp, psp);
4844 } else {
4845 GB_PrintPubs(ajp, gbp, psp);
4846 }
4847 }
4848 }
4849 }
PrintFeatHeader(Asn2ffJobPtr ajp,GBEntryPtr gbp)4850 void PrintFeatHeader (Asn2ffJobPtr ajp, GBEntryPtr gbp)
4851
4852 {
4853 gbp->descr = NULL;
4854 if (ajp->format == EMBL_FMT || ajp->format == PSEUDOEMBL_FMT ||
4855 ajp->format == EMBLPEPT_FMT) {
4856 PrintXX();
4857 ff_StartPrint( 5, 0, ASN2FF_EMBL_MAX, "FH");
4858 ff_AddString("Key");
4859 TabToColumn(22);
4860 ff_AddString("Location/Qualifiers");
4861 NewContLine();
4862 } else {
4863 ff_StartPrint(0, 0, ASN2FF_GB_MAX, NULL);
4864 ff_AddString("FEATURES");
4865 TabToColumn(22);
4866 ff_AddString("Location/Qualifiers");
4867 }
4868 ff_EndPrint();
4869 }
4870
4871
4872 /**************************************************************************
4873 *void PrintTerminator ()
4874 *
4875 * Prints the double slash (//) at the end of an entry.
4876 **************************************************************************/
4877
PrintTerminator(void)4878 void PrintTerminator (void)
4879
4880 {
4881 ff_StartPrint(0, 0, 0, NULL);
4882 ff_AddChar( '/');
4883 ff_AddChar('/');
4884 ff_EndPrint();
4885 }
4886
4887 /*************************************************************************
4888 * Prints out the cross-refs from the EMBL block, in the descriptor.
4889 * Used ONLY to make EMBL output.
4890 **************************************************************************/
4891
PrintXrefLine(Asn2ffJobPtr ajp,GBEntryPtr gbp)4892 void PrintXrefLine (Asn2ffJobPtr ajp, GBEntryPtr gbp)
4893
4894 {
4895 Boolean done_once=FALSE;
4896 Char buffer[20], buffer1[20], buffer2[20];
4897 CharPtr name, string;
4898 EMBLBlockPtr eb=NULL;
4899 EMBLXrefPtr xref=NULL;
4900 ObjectIdPtr oip;
4901 ValNodePtr descr=NULL, id;
4902
4903 gbp->descr = NULL;
4904 if ((descr=GatherDescrByChoice(ajp, gbp, Seq_descr_embl)) != NULL)
4905 {
4906 eb = (EMBLBlockPtr) descr->data.ptrvalue;
4907 for (xref=eb->xref; xref; xref=xref->next)
4908 {
4909 name=NULL;
4910 if (xref->_class) {
4911 if (xref->_class == 5)
4912 StringCpy(buffer, "SWISS-PROT");
4913 else if (xref->_class == 8)
4914 StringCpy(buffer, "EPD");
4915 else if (xref->_class == 10)
4916 StringCpy(buffer, "TFD");
4917 else if (xref->_class == 11)
4918 StringCpy(buffer, "FLYBASE");
4919 name = &(buffer[0]);
4920 }
4921 else if (xref->name)
4922 name = xref->name;
4923 if (name && xref->id)
4924 {
4925 id=xref->id;
4926
4927 oip = (ObjectIdPtr)id->data.ptrvalue;
4928 if (oip->str)
4929 StringCpy(buffer1, oip->str);
4930 else if (oip->id)
4931 sprintf(buffer1, "%ld", (long) (oip->id));
4932 id = id->next;
4933 if (id)
4934 {
4935 oip = (ObjectIdPtr)id->data.ptrvalue;
4936 if (oip->str)
4937 StringCpy(buffer2, oip->str);
4938 else if (oip->id)
4939 sprintf(buffer2, "%ld", (long) (oip->id));
4940 }
4941 if (done_once == FALSE) {
4942 PrintXX();
4943 ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "DR");
4944 done_once=TRUE;
4945 } else {
4946 NewContLine();
4947 }
4948 ff_AddString(name);
4949 ff_AddString("; ");
4950 ff_AddString(buffer1);
4951 ff_AddString("; ");
4952 string = CheckEndPunctuation(buffer2, '.');
4953 ff_AddString(string);
4954 string = (CharPtr)MemFree(string);
4955 }
4956 }
4957 }
4958 if (done_once)
4959 {
4960 ff_EndPrint();
4961 /* PrintXX();*/
4962 }
4963 }
4964 /****************************************************************************
4965 *
4966 * "PrintBaseCount" counts and prints the number of a, c, g, t, and
4967 * other in a sequence.
4968 *
4969 ****************************************************************************/
4970
PrintBaseCount(Asn2ffJobPtr ajp,GBEntryPtr gbp)4971 void PrintBaseCount (Asn2ffJobPtr ajp, GBEntryPtr gbp)
4972
4973 {
4974 CharPtr buffer;
4975 Int4 base_count[5], total=0;
4976 SeqPortPtr spp = NULL;
4977 Uint1 residue;
4978 DescrStructPtr dsp;
4979 BioseqPtr bsp = gbp->bsp;
4980
4981 dsp = (DescrStructPtr) MemNew(sizeof(DescrStruct));
4982 gbp->descr = dsp;
4983 dsp->entityID = 0;
4984 dsp->itemID = 0;
4985 dsp->itemtype = 0;
4986 if (gbp->base_cnt_line)
4987 { /* Been there (at least once), done that. */
4988 buffer = gbp->base_cnt_line;
4989 } else {
4990 base_count[0]=0;
4991 base_count[1]=0;
4992 base_count[2]=0;
4993 base_count[3]=0;
4994 base_count[4]=0;
4995
4996 if (ajp->slp) {
4997 spp = SeqPortNewByLoc(ajp->slp, Seq_code_iupacna);
4998 } else {
4999 spp = SeqPortNew(gbp->bsp, 0, -1, 0, Seq_code_iupacna);
5000 }
5001 if (bsp->repr == Seq_repr_delta || bsp->repr == Seq_repr_virtual) {
5002 SeqPortSet_do_virtual(spp, TRUE);
5003 }
5004 while ((residue=SeqPortGetResidue(spp)) != SEQPORT_EOF)
5005 {
5006 if ( !IS_residue(residue) && residue != INVALID_RESIDUE )
5007 continue;
5008
5009 total++;
5010 switch (residue) {
5011 case 'A':
5012 base_count[0]++;
5013 break;
5014 case 'C':
5015 base_count[1]++;
5016 break;
5017 case 'G':
5018 base_count[2]++;
5019 break;
5020 case 'T':
5021 base_count[3]++;
5022 break;
5023 default:
5024 base_count[4]++;
5025 break;
5026 }
5027 }
5028 buffer = (CharPtr) MemNew(80*sizeof(Char));
5029 if (ajp->format == EMBL_FMT || ajp->format == PSEUDOEMBL_FMT ||
5030 ajp->format == EMBLPEPT_FMT)
5031 {
5032 sprintf(buffer,
5033 "%ld BP; %ld A; %ld C; %ld G; %ld T; %ld other;",
5034 (long) total, (long) base_count[0], (long) base_count[1],
5035 (long) base_count[2], (long) base_count[3], (long) base_count[4]);
5036 }
5037 else /* GENBANK format */
5038 {
5039 if (base_count[4] == 0)
5040 {
5041 sprintf(buffer,
5042 "%7ld a%7ld c%7ld g%7ld t",
5043 (long) base_count[0], (long) base_count[1],
5044 (long) base_count[2], (long) base_count[3]);
5045 }
5046 else
5047 {
5048 sprintf(buffer,
5049 "%7ld a%7ld c%7ld g%7ld t%7ld others",
5050 (long) base_count[0], (long) base_count[1],
5051 (long) base_count[2], (long) base_count[3], (long) base_count[4]);
5052 }
5053 }
5054 gbp->base_cnt_line = buffer;
5055 if (spp) {
5056 SeqPortFree(spp);
5057 }
5058 }
5059
5060 if (ajp->format == EMBL_FMT || ajp->format == PSEUDOEMBL_FMT ||
5061 ajp->format == EMBLPEPT_FMT)
5062 {
5063 ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "SQ");
5064 ff_AddString("Sequence ");
5065 ff_AddString(buffer);
5066 }
5067 else
5068 {
5069 ff_StartPrint(0, 0, ASN2FF_GB_MAX, NULL);
5070 ff_AddString("BASE COUNT");
5071 TabToColumn(13);
5072 ff_AddString( buffer);
5073 }
5074
5075 ff_EndPrint();
5076 } /* PrintBaseCount */
5077 /*****************************************************************************
5078 *
5079 * "PrintSequence" to get the biological sequence (in iupacna or
5080 * iupacaa format) and put it in a buffer suitable for Genbank
5081 * or EMBL format.
5082 *
5083 * The variables "start" and "stop" allow one to read from a point
5084 * not at the beginning of the sequence to a point not at the end
5085 * of the sequence.
5086 *
5087 * Rewrite to store in a buffer and print out more at once????????
5088 *****************************************************************************/
5089
PrintSequence(Asn2ffJobPtr ajp,GBEntryPtr gbp,Int4 start,Int4 stop)5090 void PrintSequence (Asn2ffJobPtr ajp, GBEntryPtr gbp, Int4 start, Int4 stop)
5091
5092 {
5093 BioseqPtr bsp=gbp->bsp;
5094 Char buffer[MAX_BTP_BUF], num_buffer[10];
5095 CharPtr ptr = &(buffer[0]), num_ptr;
5096 Int4 index, inner_index, inner_stop, total=start;
5097 Int4 loc_start;
5098 SeqPortPtr spp;
5099 Uint1 residue;
5100
5101
5102 if ((loc_start = SeqLocStart(ajp->slp)) == -1) {
5103 loc_start = 0;
5104 }
5105 total = start;
5106 if (ajp->format == GENBANK_FMT || ajp->format == SELECT_FMT)
5107 {
5108 ff_StartPrint(0, 0, ASN2FF_GB_MAX, NULL);
5109 sprintf(ptr, "%9ld ", (long) (total+1 - loc_start));
5110 ptr += StringLen(ptr);
5111 CheckSeqPort(ajp, gbp, start);
5112 spp = gbp->spp;
5113 if (spp == NULL) {
5114 ff_AddString(ptr);
5115 ff_EndPrint();
5116 return;
5117 }
5118 if (bsp->repr == Seq_repr_delta || bsp->repr == Seq_repr_virtual) {
5119 SeqPortSet_do_virtual(spp, TRUE);
5120 }
5121 if (stop == -1) {
5122 stop = spp->stop;
5123 }
5124 for (index=start; index<=stop; index += 10) {
5125 if (stop < (index+10)) {
5126 inner_stop = stop;
5127 } else {
5128 inner_stop = index+9;
5129 }
5130 for (inner_index=index; inner_index<=inner_stop; inner_index++) {
5131 if ((residue=SeqPortGetResidue(spp)) == SEQPORT_EOF) {
5132 break;
5133 }
5134 /*
5135 if (ajp->only_one) {
5136 if (residue == SEQPORT_VIRT) {
5137 *ptr = '\0';
5138 ff_AddString(buffer);
5139 NewContLine();
5140 MemSet(buffer, ' ', ptr - buffer);
5141 inner_index--;
5142 continue;
5143 }
5144 }
5145 */
5146 if ( !IS_residue(residue) && residue != INVALID_RESIDUE) {
5147 if (residue != SEQPORT_EOF) {
5148 inner_index--;
5149 continue;
5150 }
5151 inner_index--;
5152 continue;
5153 }
5154 if (residue == INVALID_RESIDUE) {
5155 residue = (Uint1) 'X';
5156 }
5157 *ptr++ = TO_LOWER(residue);
5158 }
5159 total = inner_stop+1;
5160 /* Put in a space every ten, unless it's the end of a row. */
5161 if (ROUNDUP(total-start, 60) == (total-start)) {
5162 if (total != (start+1) && total != (stop+1)) {
5163 *ptr = '\0';
5164 ptr = &buffer[0];
5165 ff_AddString(ptr);
5166 NewContLine();
5167 sprintf(ptr, "%9ld ", (long) (total+1 - loc_start));
5168 ptr += StringLen(ptr);
5169 }
5170 }
5171 else if (ROUNDUP(total-start, 10) == total-start)
5172 {
5173 *ptr = ' '; ptr++;
5174 }
5175 }
5176 *ptr = '\0';
5177 ptr = &buffer[0];
5178 ff_AddString( ptr);
5179 }
5180 else if (ajp->format == GENPEPT_FMT)
5181 {
5182 total++;
5183
5184 ff_StartPrint(0, 0, ASN2FF_GB_MAX, NULL);
5185 sprintf(ptr, "%9ld ", (long) (total - loc_start));
5186 ptr += StringLen(ptr);
5187 if (ASN2FF_IUPACAA_ONLY == TRUE)
5188 spp = SeqPortNew(bsp, start, stop, 0, Seq_code_iupacaa);
5189 else
5190 spp = SeqPortNew(bsp, start, stop, 0, Seq_code_ncbieaa);
5191 if (spp == NULL) {
5192 ff_AddString(ptr);
5193 ff_EndPrint();
5194 return;
5195 }
5196 if (bsp->repr == Seq_repr_delta || bsp->repr == Seq_repr_virtual) {
5197 SeqPortSet_do_virtual(spp, TRUE);
5198 }
5199 while ((residue=SeqPortGetResidue(spp)) != SEQPORT_EOF)
5200 {
5201 if ( !IS_residue(residue) && residue != INVALID_RESIDUE )
5202 continue;
5203 if (residue == INVALID_RESIDUE)
5204 residue = (Uint1) 'X';
5205
5206 *ptr = TO_LOWER(residue); ptr++;
5207 if (ROUNDUP(total - start, 10) == total - start)
5208 {
5209 if (ROUNDUP(total - start, 60) == total - start)
5210 {
5211 if (total != (start+1) && total != (stop+1))
5212 {
5213 *ptr = '\0';
5214 ptr = &buffer[0];
5215 ff_AddString(ptr);
5216 NewContLine();
5217 num_ptr = &(num_buffer[0]);
5218 sprintf(num_ptr, "%9ld", (long) (total+1 - loc_start));
5219 while ((*ptr = *num_ptr) != '\0')
5220 {
5221 ptr++; num_ptr++;
5222 }
5223 *ptr = ' '; ptr++;
5224 }
5225 }
5226 else
5227 {
5228 *ptr = ' '; ptr++;
5229 }
5230 }
5231 total++;
5232 }
5233 *ptr = '\0';
5234 ptr = &buffer[0];
5235 ff_AddString(ptr);
5236 SeqPortFree(spp);
5237 }
5238 else if (ajp->format == EMBL_FMT || ajp->format == PSEUDOEMBL_FMT ||
5239 ajp->format == EMBLPEPT_FMT)
5240 { /* numbers at far right, let line go to MAX_BTP_BUF */
5241
5242 ff_StartPrint(5, 5, 0, NULL);
5243 CheckSeqPort(ajp, gbp, start);
5244 spp = gbp->spp;
5245 if (spp == NULL) {
5246 ff_AddString(ptr);
5247 ff_EndPrint();
5248 return;
5249 }
5250 if (stop == -1)
5251 stop = spp->stop;
5252 for (index=start; index<=stop; index += 10)
5253 {
5254 if (stop < (index+10))
5255 inner_stop = stop;
5256 else
5257 inner_stop = index+9;
5258 for (inner_index=index; inner_index<=inner_stop; inner_index++)
5259 {
5260 residue=SeqPortGetResidue(spp);
5261 if ( !IS_residue(residue) && residue != INVALID_RESIDUE )
5262 continue;
5263 if (residue == INVALID_RESIDUE)
5264 residue = (Uint1) 'X';
5265
5266 *ptr = TO_LOWER(residue); ptr++;
5267 }
5268 total = inner_index;
5269 if (ROUNDUP(total - start, 10) == total - start)
5270 {
5271 if (ROUNDUP(total - start, 60) == total - start)
5272 {
5273 *ptr = '\0';
5274 ptr = &buffer[0];
5275 ff_AddString(ptr);
5276 TabToColumn(73);
5277 ff_AddInteger("%8ld", (long) (total - loc_start));
5278 if (ROUNDUP(total, SEQ_BLK_SIZE) != total)
5279 NewContLine();
5280 }
5281 else
5282 {
5283 *ptr = ' '; ptr++;
5284 }
5285 }
5286 }
5287 total = stop+1;
5288 if (ROUNDUP(total - start, 60) != total - start)
5289 {
5290 *ptr = '\0';
5291 ptr = &buffer[0];
5292 ff_AddString(ptr);
5293 TabToColumn(73);
5294 ff_AddInteger("%8ld", (long) (total - loc_start));
5295 }
5296 }
5297
5298 ff_EndPrint();
5299
5300
5301 } /* PrintSequence */
5302
5303 /*****************************************************************************
5304 *
5305 * "PrintEPSequence" to get the biological sequence (in iupacna or
5306 * iupacaa format) and put it in a buffer suitable for Genbank
5307 * or EMBL format.
5308 *
5309 * The variables "start" and "stop" allow one to read from a point
5310 * not at the beginning of the sequence to a point not at the end
5311 * of the sequence.
5312 *
5313 * Rewrite to store in a buffer and print out more at once????????
5314 *****************************************************************************/
5315
PrintEPSequence(Asn2ffJobPtr ajp,GBEntryPtr gbp,Int4 start,Int4 stop)5316 void PrintEPSequence (Asn2ffJobPtr ajp, GBEntryPtr gbp, Int4 start, Int4 stop)
5317
5318 {
5319 BioseqPtr bsp=gbp->bsp;
5320 Char buffer[MAX_BTP_BUF];
5321 CharPtr ptr = &(buffer[0]);
5322 Int4 index, inner_index, inner_stop, total=start;
5323 SeqPortPtr spp;
5324 Uint1 residue;
5325
5326
5327 /* numbers at far right, let line go to MAX_BTP_BUF */
5328
5329 ff_StartPrint(5, 5, 0, NULL);
5330 if (ASN2FF_IUPACAA_ONLY == TRUE)
5331 spp = SeqPortNew(bsp, start, stop, 0, Seq_code_iupacaa);
5332 else
5333 spp = SeqPortNew(bsp, start, stop, 0, Seq_code_ncbieaa);
5334 if (stop == -1)
5335 stop = spp->stop;
5336 for (index=start; index<=stop; index += 10)
5337 {
5338 if (stop < (index+10))
5339 inner_stop = stop;
5340 else
5341 inner_stop = index+9;
5342 for (inner_index=index; inner_index<=inner_stop; inner_index++)
5343 {
5344 residue=SeqPortGetResidue(spp);
5345 if ( !IS_residue(residue) && residue != INVALID_RESIDUE )
5346 continue;
5347 if (residue == INVALID_RESIDUE)
5348 residue = (Uint1) 'X';
5349
5350 *ptr = TO_LOWER(residue); ptr++;
5351 }
5352 total = inner_index;
5353 if (ROUNDUP(total, 10) == total)
5354 {
5355 if (ROUNDUP(total, 60) == total)
5356 {
5357 *ptr = '\0';
5358 ptr = &buffer[0];
5359 ff_AddString(ptr);
5360 TabToColumn(73);
5361 ff_AddInteger("%8ld", (long) total);
5362 if (ROUNDUP(total, SEQ_BLK_SIZE) != total)
5363 NewContLine();
5364 }
5365 else
5366 {
5367 *ptr = ' '; ptr++;
5368 }
5369 }
5370 }
5371 total = stop+1;
5372 if (ROUNDUP(total, 60) != total)
5373 {
5374 *ptr = '\0';
5375 ptr = &buffer[0];
5376 ff_AddString(ptr);
5377 TabToColumn(73);
5378 ff_AddInteger("%8ld", (long) total);
5379 }
5380
5381 ff_EndPrint();
5382
5383 SeqPortFree(spp);
5384
5385
5386 } /* PrintEPSequence */
5387
GatherItemWithLock(Uint2 entityID,Uint4 itemID,Uint2 itemtype,Pointer userdata,GatherItemProc userfunc)5388 void GatherItemWithLock(Uint2 entityID, Uint4 itemID, Uint2 itemtype,
5389 Pointer userdata, GatherItemProc userfunc)
5390 {
5391 GatherItem(entityID, itemID, itemtype, userdata, userfunc);
5392 return;
5393 }
5394
find_item(GatherContextPtr gcp)5395 Boolean find_item (GatherContextPtr gcp)
5396 {
5397 SeqFeatPtr sfp;
5398 SeqFeatPtr PNTR sfpp;
5399
5400
5401 sfpp = (SeqFeatPtr PNTR)gcp->userdata;
5402 switch (gcp->thistype) {
5403 case OBJ_SEQFEAT:
5404 sfp = (SeqFeatPtr) (gcp->thisitem);
5405 *sfpp = sfp;
5406 break;
5407 default:
5408 break;
5409 }
5410 return TRUE;
5411 }
5412
PrintSeqRegion(Asn2ffJobPtr ajp,GBEntryPtr gbp)5413 static void PrintSeqRegion (Asn2ffJobPtr ajp, GBEntryPtr gbp)
5414 {
5415 SeqPortPtr spp;
5416 Uint1 residue;
5417 Char buffer[MAX_BTP_BUF];
5418 CharPtr ptr = &(buffer[0]);
5419 Int4 total;
5420 BioseqPtr bsp;
5421
5422 if (ajp == NULL || ajp->slp == NULL) {
5423 return;
5424 }
5425 bsp = gbp->bsp;
5426 total = 0;
5427 ff_StartPrint(0, 0, ASN2FF_GB_MAX, NULL);
5428 spp = SeqPortNewByLoc(ajp->slp, Seq_code_iupacna);
5429 if (bsp->repr == Seq_repr_delta || bsp->repr == Seq_repr_virtual) {
5430 SeqPortSet_do_virtual(spp, TRUE);
5431 }
5432 while ((residue=SeqPortGetResidue(spp)) != SEQPORT_EOF) {
5433 if (!IS_residue(residue) && residue != INVALID_RESIDUE) {
5434 continue;
5435 }
5436 /*
5437 if (ajp->only_one) {
5438 if (residue == SEQPORT_VIRT) {
5439 *ptr = '\0';
5440 ff_AddString(buffer);
5441 NewContLine();
5442 MemSet(buffer, ' ', ptr - buffer);
5443 continue;
5444 }
5445 }
5446 */
5447 if (residue == INVALID_RESIDUE) {
5448 residue = (Uint1) 'X';
5449 }
5450 if (ROUNDUP(total, 60) == total) {
5451 if (total > 0) {
5452 *ptr = '\0';
5453 ptr = &buffer[0];
5454 ff_AddString(ptr);
5455 NewContLine();
5456 }
5457 sprintf(ptr, "%9ld ", (long) (total+1));
5458 ptr += StringLen(ptr);
5459 } else if (ROUNDUP(total, 10) == total) {
5460 *ptr++ = ' ';
5461 }
5462 *ptr++ = TO_LOWER(residue);
5463 total++;
5464
5465 }
5466 *ptr = '\0';
5467 ptr = &buffer[0];
5468 ff_AddString(ptr);
5469 SeqPortFree(spp);
5470 ff_EndPrint();
5471 PrintTerminator();
5472 }
5473