1 /*   asn2gnb3.c
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *            National Center for Biotechnology Information (NCBI)
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government do not place any restriction on its use or reproduction.
13 *  We would, however, appreciate having the NCBI and the author cited in
14 *  any work or product based on this material
15 *
16 *  Although all reasonable efforts have been taken to ensure the accuracy
17 *  and reliability of the software and data, the NLM and the U.S.
18 *  Government do not and cannot warrant the performance or results that
19 *  may be obtained by using this software or data. The NLM and the U.S.
20 *  Government disclaim all warranties, express or implied, including
21 *  warranties of performance, merchantability or fitness for any particular
22 *  purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name:  asn2gnb3.c
27 *
28 * Author:  Karl Sirotkin, Tom Madden, Tatiana Tatusov, Jonathan Kans,
29 *          Mati Shomrat
30 *
31 * Version Creation Date:   10/21/98
32 *
33 * $Revision: 1.238 $
34 *
35 * File Description:  New GenBank flatfile generator - work in progress
36 *
37 * Modifications:
38 * --------------------------------------------------------------------------
39 * ==========================================================================
40 */
41 
42 #include <ncbi.h>
43 #include <objall.h>
44 #include <objsset.h>
45 #include <objsub.h>
46 #include <objfdef.h>
47 #include <objpubme.h>
48 #include <seqport.h>
49 #include <sequtil.h>
50 #include <sqnutils.h>
51 #include <subutil.h>
52 #include <tofasta.h>
53 #include <explore.h>
54 #include <gbfeat.h>
55 #include <gbftdef.h>
56 #include <edutil.h>
57 #include <alignmgr2.h>
58 #include <asn2gnbi.h>
59 
60 #ifdef WIN_MAC
61 #if __profile__
62 #include <Profiler.h>
63 #endif
64 #endif
65 
66 static CharPtr ref_link = "https://www.ncbi.nlm.nih.gov/RefSeq/";
67 
68 static CharPtr doc_link = "https://www.ncbi.nlm.nih.gov/genome/annotation_euk/process/";
69 
70 static CharPtr ev_link = "https://www.ncbi.nlm.nih.gov/sutils/evv.cgi?";
71 
72 static CharPtr link_encode = "https://www.genome.gov/10005107";
73 
74 static CharPtr link_seqn = "https://www.ncbi.nlm.nih.gov/nuccore/";
75 static CharPtr link_seqp = "https://www.ncbi.nlm.nih.gov/protein/";
76 
77 
78 /* ********************************************************************** */
79 
AddHistCommentString(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,CharPtr prefix,CharPtr suffix,DatePtr dp,SeqIdPtr ids,Boolean is_na,Boolean use_accn)80 static void AddHistCommentString (
81   IntAsn2gbJobPtr ajp,
82   StringItemPtr ffstring,
83   CharPtr prefix,
84   CharPtr suffix,
85   DatePtr dp,
86   SeqIdPtr ids,
87   Boolean is_na,
88   Boolean use_accn
89 )
90 
91 {
92   Int2      count = 0;
93   Char      buf [256], id [42];
94   Boolean   first, skip;
95   BIG_ID    gi = 0;
96   SeqIdPtr  sip, sip2;
97   CharPtr   strd;
98 
99   if (dp == NULL || ids == NULL || prefix == NULL || suffix == NULL || ffstring == NULL) return;
100 
101   strd = asn2gb_PrintDate (dp);
102   if (strd == NULL) {
103     strd = StringSave ("?");
104   }
105 
106   for (sip = ids; sip != NULL; sip = sip->next) {
107     if (sip->choice == SEQID_GI) {
108       gi = (BIG_ID) sip->data.intvalue;
109       count++;
110     }
111   }
112 
113   if (count > 1) {
114     sprintf (buf, "%s or before %s %s", prefix, strd, suffix);
115   } else {
116     sprintf (buf, "%s %s %s", prefix, strd, suffix);
117   }
118   FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_EXPAND);
119 
120   MemFree (strd);
121 
122   if (gi == 0) {
123     FFAddOneString (ffstring, " gi:?", FALSE, FALSE, TILDE_EXPAND);
124     return;
125   }
126 
127   first = TRUE;
128   for (sip = ids; sip != NULL; sip = sip->next) {
129     if (sip->choice == SEQID_GI) {
130       gi = (BIG_ID) sip->data.intvalue;
131       if (! first) {
132         FFAddOneString (ffstring, ",", FALSE, FALSE, TILDE_IGNORE);
133       }
134       first = FALSE;
135       skip = FALSE;
136       if (use_accn) {
137         sip2 = GetSeqIdForGI (gi);
138         if (sip2 != NULL) {
139           SeqIdWrite (sip2, id, PRINTID_TEXTID_ACC_VER, sizeof (id) - 1);
140           if (StringDoesHaveText (id)) {
141             if ( GetWWW(ajp) ) {
142               FFAddOneString (ffstring, " ", FALSE, FALSE, TILDE_IGNORE);
143               FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
144               if (is_na) {
145                 FF_Add_NCBI_Base_URL (ffstring, link_seqn);
146               } else {
147                 FF_Add_NCBI_Base_URL (ffstring, link_seqp);
148               }
149               sprintf (buf, "%ld", (long) gi);
150               FFAddTextToString (ffstring, /* "val=" */ NULL, buf, "\">", FALSE, FALSE, TILDE_IGNORE);
151               FFAddOneString (ffstring, id, FALSE, FALSE, TILDE_EXPAND);
152               FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
153             } else {
154               sprintf (buf, " %s", id);
155               FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_EXPAND);
156             }
157             skip = TRUE;
158           }
159           SeqIdFree (sip2);
160         }
161       }
162       if (! skip) {
163         if ( GetWWW(ajp) ) {
164           FFAddOneString (ffstring, " gi:", FALSE, FALSE, TILDE_IGNORE);
165           FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
166           if (is_na) {
167             FF_Add_NCBI_Base_URL (ffstring, link_seqn);
168           } else {
169             FF_Add_NCBI_Base_URL (ffstring, link_seqp);
170           }
171           sprintf (buf, "%ld", (long) gi);
172           FFAddTextToString (ffstring, /* "val=" */ NULL, buf, "\">", FALSE, FALSE, TILDE_IGNORE);
173           FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_EXPAND);
174           FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
175         } else {
176           sprintf (buf, " gi:%ld", (long) gi);
177           FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_EXPAND);
178         }
179       }
180     }
181   }
182 
183   FFAddOneString (ffstring, ".", FALSE, FALSE, TILDE_EXPAND);
184 }
185 
AddUnorderedCommentString(StringItemPtr ffstring,BioseqPtr bsp)186 static void AddUnorderedCommentString (
187   StringItemPtr ffstring,
188   BioseqPtr bsp
189 )
190 
191 {
192   Char         buffer [256];
193   DeltaSeqPtr  dsp;
194   ValNodePtr   head = NULL;
195   Int4         num_gaps = 0;
196   SeqLitPtr    slitp;
197   SeqLocPtr    slocp;
198   CharPtr      str;
199 
200   if (bsp == NULL) return;
201 
202   if (bsp->repr == Seq_repr_delta) {
203     for (dsp = (DeltaSeqPtr) bsp->seq_ext; dsp != NULL; dsp = dsp->next) {
204       switch (dsp->choice) {
205         case 1:
206           slocp = (SeqLocPtr)(dsp->data.ptrvalue);
207           if (slocp == NULL) break;
208           if (slocp->choice == SEQLOC_NULL) {
209             num_gaps++;
210           }
211           break;
212         case 2:
213           slitp = (SeqLitPtr)(dsp->data.ptrvalue);
214           if (slitp == NULL) break;
215           if (slitp->seq_data == NULL || slitp->seq_data_type == Seq_code_gap) {
216             num_gaps++;
217           }
218           break;
219         default:
220           break;
221       }
222     }
223   }
224 
225   ValNodeCopyStr (&head, 0, "* NOTE: This is a partial genome representation.");
226   if (num_gaps > 0) {
227     sprintf (buffer, " It currently~* consists of %ld contigs. The true order of the pieces~", (long) (num_gaps + 1));
228     ValNodeCopyStr (&head, 0, buffer);
229     ValNodeCopyStr (&head, 0, "* is not known and their order in this sequence record is~");
230     ValNodeCopyStr (&head, 0, "* arbitrary. Gaps between the contigs are represented as~");
231     ValNodeCopyStr (&head, 0, "* runs of N, but the exact sizes of the gaps are unknown.");
232   }
233   ValNodeCopyStr (&head, 0, "~");
234 
235   str = MergeFFValNodeStrs (head);
236 
237   FFAddOneString (ffstring, str, TRUE, TRUE, TILDE_EXPAND);
238 
239   MemFree (str);
240   ValNodeFreeData (head);
241 }
242 
AddHTGSCommentString(StringItemPtr ffstring,BioseqPtr bsp,MolInfoPtr mip)243 static void AddHTGSCommentString (
244   StringItemPtr ffstring,
245   BioseqPtr bsp,
246   MolInfoPtr mip
247 )
248 
249 {
250   CharPtr      buf = NULL;
251   Char         buffer [256];
252   Int4         buflen = 0;
253   DeltaSeqPtr  dsp;
254   ValNodePtr   head = NULL;
255   Int4         num_s = 0;
256   Int4         num_g = 0;
257   CharPtr      str = NULL;
258 
259   if (bsp == NULL || mip == NULL || mip->tech < 2) return;
260 
261   if (bsp->repr == Seq_repr_delta) {
262     for (dsp = (DeltaSeqPtr) bsp->seq_ext, buflen = 0; dsp != NULL; dsp = dsp->next) {
263       buflen += 80;
264     }
265     if (buflen > 0) {
266       buf = MemNew ((size_t) (buflen + 1));
267       if (buf == NULL) return;
268       CountGapsInDeltaSeq (bsp, &num_s, &num_g, NULL, NULL, buf, buflen);
269     }
270   }
271 
272   if (mip->tech == MI_TECH_htgs_0) {
273 
274     if (num_s > 0) {
275       sprintf (buffer, "* NOTE: This record contains %ld individual~", (long) (num_g + 1));
276       ValNodeCopyStr (&head, 0, buffer);
277       ValNodeCopyStr (&head, 0, "* sequencing reads that have not been assembled into~");
278       ValNodeCopyStr (&head, 0, "* contigs. Runs of N are used to separate the reads~");
279       ValNodeCopyStr (&head, 0, "* and the order in which they appear is completely~");
280       ValNodeCopyStr (&head, 0, "* arbitrary. Low-pass sequence sampling is useful for~");
281       ValNodeCopyStr (&head, 0, "* identifying clones that may be gene-rich and allows~");
282       ValNodeCopyStr (&head, 0, "* overlap relationships among clones to be deduced.~");
283       ValNodeCopyStr (&head, 0, "* However, it should not be assumed that this clone~");
284       ValNodeCopyStr (&head, 0, "* will be sequenced to completion. In the event that~");
285       ValNodeCopyStr (&head, 0, "* the record is updated, the accession number will~");
286       ValNodeCopyStr (&head, 0, "* be preserved.");
287     }
288     ValNodeCopyStr (&head, 0, "~");
289     ValNodeCopyStr (&head, 0, buf);
290 
291   } else if (mip->tech == MI_TECH_htgs_1) {
292 
293     ValNodeCopyStr (&head, 0, "* NOTE: This is a \"working draft\" sequence.");
294     if (num_s > 0) {
295       sprintf (buffer, " It currently~* consists of %ld contigs. The true order of the pieces~", (long) (num_g + 1));
296       ValNodeCopyStr (&head, 0, buffer);
297       ValNodeCopyStr (&head, 0, "* is not known and their order in this sequence record is~");
298       ValNodeCopyStr (&head, 0, "* arbitrary. Gaps between the contigs are represented as~");
299       ValNodeCopyStr (&head, 0, "* runs of N, but the exact sizes of the gaps are unknown.");
300     }
301     ValNodeCopyStr (&head, 0, "~* This record will be updated with the finished sequence~");
302     ValNodeCopyStr (&head, 0, "* as soon as it is available and the accession number will~");
303     ValNodeCopyStr (&head, 0, "* be preserved.");
304     ValNodeCopyStr (&head, 0, "~");
305     ValNodeCopyStr (&head, 0, buf);
306 
307   } else if (mip->tech == MI_TECH_htgs_2) {
308 
309     ValNodeCopyStr (&head, 0, "* NOTE: This is a \"working draft\" sequence.");
310     if (num_s > 0) {
311       sprintf (buffer, " It currently~* consists of %ld contigs. Gaps between the contigs~", (long) (num_g + 1));
312       ValNodeCopyStr (&head, 0, buffer);
313       ValNodeCopyStr (&head, 0, "* are represented as runs of N. The order of the pieces~");
314       ValNodeCopyStr (&head, 0, "* is believed to be correct as given, however the sizes~");
315       ValNodeCopyStr (&head, 0, "* of the gaps between them are based on estimates that have~");
316       ValNodeCopyStr (&head, 0, "* provided by the submitter.");
317     }
318     ValNodeCopyStr (&head, 0, "~* This sequence will be replaced~");
319     ValNodeCopyStr (&head, 0, "* by the finished sequence as soon as it is available and~");
320     ValNodeCopyStr (&head, 0, "* the accession number will be preserved.");
321     ValNodeCopyStr (&head, 0, "~");
322     ValNodeCopyStr (&head, 0, buf);
323 
324   } else if ((str = StringForSeqTech (mip->tech)) != NULL) {
325 
326       sprintf (buffer, "Method: %s.", str);
327       ValNodeCopyStr (&head, 0, buffer);
328   }
329 
330   MemFree (buf);
331 
332   str = MergeFFValNodeStrs (head);
333 
334   FFAddOneString (ffstring, str, TRUE, TRUE, TILDE_EXPAND);
335 
336   MemFree (str);
337   ValNodeFreeData (head);
338 }
339 
AddWGSMasterCommentString(StringItemPtr ffstring,BioseqPtr bsp,CharPtr wgsaccn,CharPtr wgsname)340 static void AddWGSMasterCommentString (
341   StringItemPtr ffstring,
342   BioseqPtr bsp,
343   CharPtr wgsaccn,
344   CharPtr wgsname
345 )
346 
347 {
348   size_t             acclen;
349   BioSourcePtr       biop;
350   Char               buf [256];
351   SeqMgrDescContext  dcontext;
352   CharPtr            first = NULL;
353   CharPtr            last = NULL;
354   ObjectIdPtr        oip;
355   OrgRefPtr          orp;
356   SeqDescrPtr        sdp;
357   CharPtr            taxname = NULL;
358   UserFieldPtr       ufp;
359   UserObjectPtr      uop;
360   Char               ver [16];
361 
362   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
363   if (sdp != NULL) {
364     biop = (BioSourcePtr) sdp->data.ptrvalue;
365     if (biop != NULL) {
366       orp = biop->org;
367       if (orp != NULL) {
368         taxname = orp->taxname;
369       }
370     }
371   }
372 
373   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
374   while (sdp != NULL) {
375     uop = (UserObjectPtr) sdp->data.ptrvalue;
376     if (uop != NULL) {
377       oip = uop->type;
378       if (oip != NULL && StringICmp (oip->str, "WGSProjects") == 0) {
379         for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
380           oip = ufp->label;
381           if (oip == NULL || oip->str == NULL || ufp->choice != 1) continue;
382           if (StringICmp (oip->str, "WGS_accession_first") == 0) {
383             first = (CharPtr) ufp->data.ptrvalue;
384           } else if (StringICmp (oip->str, "WGS_accession_last") == 0) {
385             last = (CharPtr) ufp->data.ptrvalue;
386           }
387         }
388       }
389     }
390     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
391   }
392 
393   if (StringHasNoText (taxname)) {
394     taxname = "?";
395   }
396   ver [0] = '\0';
397   acclen = StringLen (wgsname);
398   if (acclen == 12) {
399     StringCpy (ver, wgsname + 4);
400     ver [2] = '\0';
401   } else if (acclen == 13) {
402     StringCpy (ver, wgsname + 4);
403     ver [2] = '\0';
404   } else if (acclen == 14) {
405     StringCpy (ver, wgsname + 4);
406     ver [2] = '\0';
407   } else if (acclen == 15) {
408     StringCpy (ver, wgsname + 7);
409     ver [2] = '\0';
410   } else if (acclen == 16) {
411     StringCpy (ver, wgsname + 7);
412     ver [2] = '\0';
413   }
414 
415   sprintf (buf, "The %s whole genome shotgun (WGS) project has the project accession %s.", taxname, wgsaccn);
416   FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND);
417 
418   sprintf (buf, "  This version of the project (%s) has the accession number %s", ver, wgsname);
419   FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_EXPAND);
420 
421   if (first == NULL && last == NULL) {
422     sprintf (buf, ".");
423     FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND);
424   } else {
425     if (first != NULL && last == NULL) {
426       last = first;
427     } else if (first == NULL && last != NULL) {
428       first = last;
429     }
430     if (StringDoesHaveText (first) && StringDoesHaveText (last)) {
431       if (StringCmp (first, last) != 0) {
432         sprintf (buf, ", and consists of sequences %s-%s.", first, last);
433         FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND);
434       } else {
435         sprintf (buf, ", and consists of sequence %s.", first);
436         FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND);
437       }
438     } else {
439       sprintf (buf, ".");
440       FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND);
441     }
442   }
443 }
444 
AddTSAMasterCommentString(StringItemPtr ffstring,BioseqPtr bsp,CharPtr tsaaccn,CharPtr tsaname)445 static void AddTSAMasterCommentString (
446   StringItemPtr ffstring,
447   BioseqPtr bsp,
448   CharPtr tsaaccn,
449   CharPtr tsaname
450 )
451 
452 {
453   size_t             acclen;
454   BioSourcePtr       biop;
455   Char               buf [256];
456   SeqMgrDescContext  dcontext;
457   CharPtr            first = NULL;
458   CharPtr            last = NULL;
459   ObjectIdPtr        oip;
460   OrgRefPtr          orp;
461   SeqDescrPtr        sdp;
462   CharPtr            taxname = NULL;
463   UserFieldPtr       ufp;
464   UserObjectPtr      uop;
465   Char               ver [16];
466 
467   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
468   if (sdp != NULL) {
469     biop = (BioSourcePtr) sdp->data.ptrvalue;
470     if (biop != NULL) {
471       orp = biop->org;
472       if (orp != NULL) {
473         taxname = orp->taxname;
474       }
475     }
476   }
477 
478   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
479   while (sdp != NULL) {
480     uop = (UserObjectPtr) sdp->data.ptrvalue;
481     if (uop != NULL) {
482       oip = uop->type;
483       if (oip != NULL) {
484         if (StringICmp (oip->str, "TSA-mRNA-List") == 0 || StringICmp (oip->str, "TSA-RNA-List") == 0) {
485           for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
486             oip = ufp->label;
487             if (oip == NULL || oip->str == NULL || ufp->choice != 1) continue;
488             if (StringICmp (oip->str, "TSA_accession_first") == 0) {
489               first = (CharPtr) ufp->data.ptrvalue;
490             } else if (StringICmp (oip->str, "TSA_accession_last") == 0) {
491               last = (CharPtr) ufp->data.ptrvalue;
492             }
493           }
494         }
495       }
496     }
497     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
498   }
499 
500   if (StringHasNoText (taxname)) {
501     taxname = "?";
502   }
503   ver [0] = '\0';
504   acclen = StringLen (tsaname);
505   if (acclen == 12) {
506     StringCpy (ver, tsaname + 4);
507     ver [2] = '\0';
508   } else if (acclen == 13) {
509     StringCpy (ver, tsaname + 4);
510     ver [2] = '\0';
511   } else if (acclen == 14) {
512     StringCpy (ver, tsaname + 4);
513     ver [2] = '\0';
514   } else if (acclen == 15) {
515     StringCpy (ver, tsaname + 7);
516     ver [2] = '\0';
517   }
518 
519   sprintf (buf, "The %s transcriptome shotgun assembly (TSA) project has the project accession %s.", taxname, tsaaccn);
520   FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND);
521 
522   sprintf (buf, "  This version of the project (%s) has the accession number %s", ver, tsaname);
523   FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_EXPAND);
524 
525   if (first == NULL && last == NULL) {
526     sprintf (buf, ".");
527     FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND);
528   } else {
529     if (first != NULL && last == NULL) {
530       last = first;
531     } else if (first == NULL && last != NULL) {
532       first = last;
533     }
534     if (StringDoesHaveText (first) && StringDoesHaveText (last)) {
535       if (StringCmp (first, last) != 0) {
536         sprintf (buf, ", and consists of sequences %s-%s.", first, last);
537         FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND);
538       } else {
539         sprintf (buf, ", and consists of sequence %s.", first);
540         FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND);
541       }
542     } else {
543       sprintf (buf, ".");
544       FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND);
545     }
546   }
547 }
548 
AddTLSMasterCommentString(StringItemPtr ffstring,BioseqPtr bsp,CharPtr tlsaccn,CharPtr tlsname)549 static void AddTLSMasterCommentString (
550   StringItemPtr ffstring,
551   BioseqPtr bsp,
552   CharPtr tlsaccn,
553   CharPtr tlsname
554 )
555 
556 {
557   size_t             acclen;
558   BioSourcePtr       biop;
559   Char               buf [256];
560   SeqMgrDescContext  dcontext;
561   CharPtr            first = NULL;
562   CharPtr            last = NULL;
563   ObjectIdPtr        oip;
564   OrgRefPtr          orp;
565   SeqDescrPtr        sdp;
566   CharPtr            taxname = NULL;
567   UserFieldPtr       ufp;
568   UserObjectPtr      uop;
569   Char               ver [16];
570 
571   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
572   if (sdp != NULL) {
573     biop = (BioSourcePtr) sdp->data.ptrvalue;
574     if (biop != NULL) {
575       orp = biop->org;
576       if (orp != NULL) {
577         taxname = orp->taxname;
578       }
579     }
580   }
581 
582   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
583   while (sdp != NULL) {
584     uop = (UserObjectPtr) sdp->data.ptrvalue;
585     if (uop != NULL) {
586       oip = uop->type;
587       if (oip != NULL && StringICmp (oip->str, "TLSProjects") == 0) {
588         for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
589           oip = ufp->label;
590           if (oip == NULL || oip->str == NULL || ufp->choice != 1) continue;
591           if (StringICmp (oip->str, "TLS_accession_first") == 0) {
592             first = (CharPtr) ufp->data.ptrvalue;
593           } else if (StringICmp (oip->str, "TLS_accession_last") == 0) {
594             last = (CharPtr) ufp->data.ptrvalue;
595           }
596         }
597       }
598     }
599     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
600   }
601 
602   if (StringHasNoText (taxname)) {
603     taxname = "?";
604   }
605   ver [0] = '\0';
606   acclen = StringLen (tlsname);
607   if (acclen == 12) {
608     StringCpy (ver, tlsname + 4);
609     ver [2] = '\0';
610   } else if (acclen == 13) {
611     StringCpy (ver, tlsname + 4);
612     ver [2] = '\0';
613   } else if (acclen == 14) {
614     StringCpy (ver, tlsname + 4);
615     ver [2] = '\0';
616   } else if (acclen == 15) {
617     StringCpy (ver, tlsname + 7);
618     ver [2] = '\0';
619   } else if (acclen == 16) {
620     StringCpy (ver, tlsname + 7);
621     ver [2] = '\0';
622   }
623 
624   sprintf (buf, "The %s targeted locus study (TLS) project has the project accession %s.", taxname, tlsaccn);
625   FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND);
626 
627   sprintf (buf, "  This version of the project (%s) has the accession number %s", ver, tlsname);
628   FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_EXPAND);
629 
630   if (first == NULL && last == NULL) {
631     sprintf (buf, ".");
632     FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND);
633   } else {
634     if (first != NULL && last == NULL) {
635       last = first;
636     } else if (first == NULL && last != NULL) {
637       first = last;
638     }
639     if (StringDoesHaveText (first) && StringDoesHaveText (last)) {
640       if (StringCmp (first, last) != 0) {
641         sprintf (buf, ", and consists of sequences %s-%s.", first, last);
642         FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND);
643       } else {
644         sprintf (buf, ", and consists of sequence %s.", first);
645         FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND);
646       }
647     } else {
648       sprintf (buf, ".");
649       FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND);
650     }
651   }
652 }
653 
GetMolInfoCommentString(BioseqPtr bsp,MolInfoPtr mip)654 static CharPtr GetMolInfoCommentString (
655   BioseqPtr bsp,
656   MolInfoPtr mip
657 )
658 
659 {
660   Boolean  is_aa;
661   CharPtr  str = NULL;
662 
663   if (bsp == NULL || mip == NULL) return NULL;
664 
665   is_aa = ISA_aa (bsp->mol);
666   switch (mip->completeness) {
667     case 1 :
668       str = "COMPLETENESS: full length";
669       break;
670     case 2 :
671       str = "COMPLETENESS: not full length";
672       break;
673     case 3 :
674       if (is_aa) {
675         str = "COMPLETENESS: incomplete on the amino end";
676       } else {
677         str = "COMPLETENESS: incomplete on the 5' end";
678       }
679       break;
680     case 4 :
681       if (is_aa) {
682         str = "COMPLETENESS: incomplete on the carboxy end";
683       } else {
684         str = "COMPLETENESS: incomplete on the 3' end";
685       }
686       break;
687     case 5 :
688       str = "COMPLETENESS: incomplete on both ends";
689       break;
690     case 6 :
691       if (is_aa) {
692         str = "COMPLETENESS: complete on the amino end";
693       } else {
694         str = "COMPLETENESS: complete on the 5' end";
695       }
696       break;
697     case 7 :
698       if (is_aa) {
699         str = "COMPLETENESS: complete on the carboxy end";
700       } else {
701         str = "COMPLETENESS: complete on the 3' end";
702       }
703       break;
704     default :
705       str = "COMPLETENESS: unknown";
706       break;
707   }
708 
709   return str;
710 }
711 
GetStrForBankit(UserObjectPtr uop,Boolean dumpMode,Boolean showedLocalId)712 static CharPtr GetStrForBankit (
713   UserObjectPtr uop,
714   Boolean dumpMode,
715   Boolean showedLocalId
716 )
717 
718 {
719   CharPtr       bic = NULL, smc = NULL, uvc = NULL, pfx = NULL, ptr;
720   ObjectIdPtr   oip;
721   UserFieldPtr  ufp;
722 
723   if (uop == NULL) return NULL;
724   if ((oip = uop->type) == NULL) return NULL;
725   if (StringCmp (oip->str, "Submission") != 0) return NULL;
726 
727   for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
728     oip = ufp->label;
729     if (StringCmp(oip->str, "UniVecComment") == 0) {
730       uvc = ufp->data.ptrvalue;
731     } else if (StringCmp(oip->str, "AdditionalComment") == 0) {
732       bic = ufp->data.ptrvalue;
733     } else if (StringCmp(oip->str, "SmartComment") == 0 && dumpMode) {
734       smc = ufp->data.ptrvalue;
735     }
736   }
737 
738   if (showedLocalId) {
739     if (StringNICmp (bic, "LocalID:", 8) == 0) {
740       bic = NULL;
741     }
742     if (StringNICmp (smc, "LocalID:", 8) == 0) {
743       smc = NULL;
744     }
745   }
746 
747   if (uvc == NULL && bic == NULL && smc == NULL) return NULL;
748 
749   ptr = (CharPtr) MemNew (StringLen (uvc) + StringLen (bic) + StringLen (smc) + 45);
750   if (uvc != NULL) {
751     StringCat (ptr, pfx);
752     StringCat (ptr, "Vector Explanation: ");
753     StringCat (ptr, uvc);
754     pfx = "~";
755   }
756   if (bic != NULL) {
757     StringCat (ptr, pfx);
758     StringCat (ptr, "Bankit Comment: ");
759     StringCat (ptr, bic);
760     pfx = "~";
761   }
762   if (smc != NULL) {
763     StringCat (ptr, pfx);
764     StringCat (ptr, "Bankit Comment: ");
765     StringCat (ptr, smc);
766     pfx = "~";
767   }
768 
769   return ptr;
770 }
771 
772 static CharPtr reftxt0 = " The reference sequence was derived from ";
773 static CharPtr reftxtg = " The reference sequence was generated based on analysis of ";
774 static CharPtr reftxti = " The reference sequence is identical to ";
775 static CharPtr reftxt1 = " This record is predicted by genome sequence analysis and is not yet supported by experimental evidence.";
776 static CharPtr reftxt2 = " This record has not yet been subject to final NCBI review.";
777 static CharPtr reftxt3 = " This record has not been reviewed and the function is unknown.";
778 static CharPtr reftxt4 = " This record has undergone validation or preliminary review.";
779 static CharPtr reftxt5 = " This record has been curated by ";
780 static CharPtr reftxt6 = " This record is predicted by automated computational analysis.";
781 static CharPtr reftxt7 = " This record is provided to represent a collection of whole genome shotgun sequences.";
782 static CharPtr reftxt9 = " This record is derived from an annotated genomic sequence (";
783 static CharPtr reftxt21 = " NCBI contigs are derived from assembled genomic sequence data.";
784 static CharPtr reftxt22 = " Features on this sequence have been produced for build ";
785 static CharPtr reftxt23 = " of the NCBI's genome annotation";
786 static CharPtr reftxt41 = " This record is based on preliminary annotation provided by ";
787 static CharPtr reftxt51 = " This record represents a single, non-redundant, protein sequence which may be annotated on many different RefSeq genomes from the same, or different, species";
788 
GetStatusForRefTrack(UserObjectPtr uop)789 static CharPtr GetStatusForRefTrack (
790   UserObjectPtr uop
791 )
792 
793 {
794   CharPtr       st;
795   ObjectIdPtr   oip;
796   UserFieldPtr  ufp, urf = NULL;
797 
798   if (uop == NULL) return NULL;
799   if ((oip = uop->type) == NULL) return NULL;
800   if (StringCmp (oip->str, "RefGeneTracking") != 0) return NULL;
801   for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
802     oip = ufp->label;
803     if (StringCmp(oip->str, "Assembly") == 0) {
804       urf = ufp;
805     }
806   }
807   /* if (urf == NULL || urf->choice != 11) return NULL; */
808   for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
809     oip = ufp->label;
810     if (StringCmp (oip->str, "Status") == 0) {
811       st = (CharPtr) ufp->data.ptrvalue;
812       if (StringICmp (st, "Inferred") == 0) {
813         return "INFERRED ";
814       } else if (StringICmp (st, "Provisional") == 0) {
815         return "PROVISIONAL ";
816       } else if (StringICmp (st, "Predicted") == 0) {
817         return "PREDICTED ";
818       } else if (StringICmp (st, "Validated") == 0) {
819         return "VALIDATED ";
820       } else if (StringICmp (st, "Reviewed") == 0) {
821         return "REVIEWED ";
822       } else if (StringICmp (st, "Model") == 0) {
823         return "MODEL ";
824       } else if (StringICmp (st, "WGS") == 0) {
825         return "WGS ";
826       } else if (StringICmp (st, "Pipeline") == 0) {
827         return "Pipeline ";
828       }
829     }
830   }
831   return NULL;
832 }
833 
834 
URLHasSuspiciousHtml(IntAsn2gbJobPtr ajp,CharPtr searchString)835 static Boolean URLHasSuspiciousHtml (
836   IntAsn2gbJobPtr ajp,
837   CharPtr searchString
838 )
839 
840 {
841   Char        ch;
842   CharPtr     ptr;
843   Int4        state;
844   ValNodePtr  matches;
845 
846   if (StringHasNoText (searchString)) return FALSE;
847 
848   state = 0;
849   ptr = searchString;
850   ch = *ptr;
851 
852   while (ch != '\0') {
853     matches = NULL;
854     ch = TO_LOWER (ch);
855     state = TextFsaNext (ajp->bad_html_fsa, state, ch, &matches);
856     if (matches != NULL) {
857       return TRUE;
858     }
859     ptr++;
860     ch = *ptr;
861   }
862 
863   return FALSE;
864 }
865 
GetGiFromAccnDotVer(CharPtr source,BIG_ID_PNTR gip)866 static Boolean GetGiFromAccnDotVer (CharPtr source, BIG_ID_PNTR gip)
867 
868 {
869   BIG_ID    gi = 0;
870   SeqIdPtr  sip;
871 
872   if (StringHasNoText (source) || gip == NULL) return FALSE;
873   *gip = 0;
874 
875   sip = SeqIdFromAccessionDotVersion (source);
876   if (sip == NULL) return FALSE;
877   gi = GetGIForSeqId (sip);
878   SeqIdFree (sip);
879   if (gi == 0) return FALSE;
880 
881   *gip = gi;
882   return TRUE;
883 }
884 
AddStrForRefTrack(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,UserObjectPtr uop,Boolean is_na,CharPtr genomeBuildNumber,CharPtr genomeVersionNumber)885 static void AddStrForRefTrack (
886   IntAsn2gbJobPtr ajp,
887   StringItemPtr ffstring,
888   UserObjectPtr uop,
889   Boolean is_na,
890   CharPtr genomeBuildNumber,
891   CharPtr genomeVersionNumber
892 )
893 
894 {
895   CharPtr       accn, curator = NULL, name, source = NULL, st, url = NULL;
896   Char          buf [64];
897   ObjectIdPtr   oip;
898   UserFieldPtr  ufp, tmp, u, urf = NULL;
899   Int4          from, to;
900   BIG_ID        gi;
901   Int2          i = 0;
902   Int2          review = 0;
903   Boolean       generated = FALSE, identical = FALSE;
904 
905   if ( uop == NULL || ffstring == NULL ) return;
906   if ((oip = uop->type) == NULL) return;
907   if (StringCmp (oip->str, "RefGeneTracking") != 0) return;
908 
909   for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
910     oip = ufp->label;
911     if (StringCmp(oip->str, "Assembly") == 0) {
912       urf = ufp;
913     } else if (StringCmp(oip->str, "IdenticalTo") == 0) {
914       urf = ufp;
915       identical = TRUE;
916     }
917     if (StringCmp (oip->str, "Status") == 0) {
918       st = (CharPtr) ufp->data.ptrvalue;
919       if (StringICmp (st, "Inferred") == 0) {
920         review = 1;
921       } else if (StringICmp (st, "Provisional") == 0) {
922         review = 2;
923       } else if (StringICmp (st, "Predicted") == 0) {
924         review = 3;
925       } else if (StringICmp (st, "Validated") == 0) {
926         review = 4;
927       } else if (StringICmp (st, "Reviewed") == 0) {
928         review = 5;
929       } else if (StringICmp (st, "Model") == 0) {
930         review = 6;
931       } else if (StringICmp (st, "WGS") == 0) {
932         review = 7;
933       } else if (StringICmp (st, "Pipeline") == 0) {
934         review = 8;
935       }
936     } else if (StringCmp (oip->str, "Generated") == 0) {
937       generated = ufp->data.boolvalue;
938     } else if (StringCmp (oip->str, "Collaborator") == 0) {
939       st = (CharPtr) ufp->data.ptrvalue;
940       if (! StringHasNoText (st)) {
941         curator = st;
942       }
943     } else if (StringCmp (oip->str, "CollaboratorURL") == 0) {
944       st = (CharPtr) ufp->data.ptrvalue;
945       if (! StringHasNoText (st)) {
946         url = st;
947       }
948     } else if (StringCmp (oip->str, "GenomicSource") == 0) {
949       st = (CharPtr) ufp->data.ptrvalue;
950       if (! StringHasNoText (st)) {
951         source = st;
952       }
953     }
954   }
955   if (urf != NULL && urf->choice == 11) {
956     for (tmp = urf->data.ptrvalue; tmp != NULL; tmp = tmp->next) {
957       if (tmp->choice != 11) continue;
958       for (u = tmp->data.ptrvalue; u != NULL; u = u->next) {
959         oip = u->label;
960         if (oip == NULL) continue;
961         if (StringCmp (oip->str, "accession") == 0 ||
962             StringCmp (oip->str, "name") == 0) {
963           i++;
964         }
965       }
966     }
967   }
968   if ( GetWWW(ajp) ) {
969     FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
970     FF_Add_NCBI_Base_URL (ffstring, ref_link);
971     FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
972     FFAddOneString (ffstring, "REFSEQ", FALSE, FALSE, TILDE_IGNORE);
973     if (review == 8) {
974       FFAddOneString (ffstring, " INFORMATION", FALSE, FALSE, TILDE_IGNORE);
975     }
976     FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
977   } else {
978     FFAddOneString (ffstring, "REFSEQ", FALSE, FALSE, TILDE_IGNORE);
979     if (review == 8) {
980       FFAddOneString (ffstring, " INFORMATION", FALSE, FALSE, TILDE_IGNORE);
981     }
982   }
983   FFAddOneString (ffstring, ":", FALSE, FALSE, TILDE_IGNORE);
984   if (review == 1) {
985     FFAddOneString (ffstring, reftxt1, FALSE, FALSE, TILDE_IGNORE);
986   } else if (review == 2) {
987     if (curator == NULL) {
988       FFAddOneString (ffstring, reftxt2, FALSE, FALSE, TILDE_IGNORE);
989     }
990   } else if (review == 3) {
991     FFAddOneString (ffstring, reftxt3, FALSE, FALSE, TILDE_IGNORE);
992   } else if (review == 4) {
993     FFAddOneString (ffstring, reftxt4, FALSE, FALSE, TILDE_IGNORE);
994   } else if (review == 5) {
995     if (curator == NULL) {
996       curator = "NCBI staff";
997     }
998   } else if (review == 6) {
999     FFAddOneString (ffstring, reftxt6, FALSE, FALSE, TILDE_IGNORE);
1000   } else if (review == 7) {
1001     FFAddOneString (ffstring, reftxt7, FALSE, FALSE, TILDE_IGNORE);
1002   } else if (review == 8) {
1003   }
1004   if (curator != NULL) {
1005     if (review == 2) {
1006       FFAddOneString (ffstring, reftxt41, FALSE, FALSE, TILDE_IGNORE);
1007     } else {
1008       FFAddOneString (ffstring, reftxt5, FALSE, FALSE, TILDE_IGNORE);
1009     }
1010     if (GetWWW (ajp) && url != NULL && (! URLHasSuspiciousHtml (ajp, url))) {
1011       if (StringNCmp (url, "http://", 7) == 0 || StringNCmp (url, "https://", 8) == 0) {
1012         FFAddTextToString(ffstring, "<a href=\"", url, "\">", FALSE, FALSE, TILDE_IGNORE);
1013         FFAddOneString (ffstring, curator, FALSE, FALSE, TILDE_IGNORE);
1014         FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
1015       } else if (StringNCmp (url, "www.", 4) == 0) {
1016         FFAddTextToString(ffstring, "<a href=http://\"", url, "\">", FALSE, FALSE, TILDE_IGNORE);
1017         FFAddOneString (ffstring, curator, FALSE, FALSE, TILDE_IGNORE);
1018         FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
1019       } else {
1020         FFAddOneString (ffstring, curator, FALSE, FALSE, TILDE_IGNORE);
1021       }
1022     } else {
1023       FFAddOneString (ffstring, curator, FALSE, FALSE, TILDE_IGNORE);
1024     }
1025     FFAddOneString (ffstring, ".", FALSE, FALSE, TILDE_IGNORE);
1026   }
1027   if (source != NULL) {
1028     FFAddOneString (ffstring, reftxt9, FALSE, FALSE, TILDE_IGNORE);
1029     gi = 0;
1030     if (GetWWW (ajp) && ValidateAccnDotVer (source) == 0 && GetGiFromAccnDotVer (source, &gi)) {
1031       FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
1032       if (is_na) {
1033         FF_Add_NCBI_Base_URL (ffstring, link_seqn);
1034       } else {
1035         FF_Add_NCBI_Base_URL (ffstring, link_seqp);
1036       }
1037       if (gi > 0) {
1038         sprintf (buf, "%ld", (long) gi);
1039         FFAddTextToString(ffstring, /* "val=" */ NULL, buf, "\">", FALSE, FALSE, TILDE_IGNORE);
1040       } else {
1041         FFAddTextToString(ffstring, /* "val=" */ NULL, source, "\">", FALSE, FALSE, TILDE_IGNORE);
1042       }
1043       FFAddOneString (ffstring, source, FALSE, FALSE, TILDE_IGNORE);
1044       FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
1045     } else {
1046       FFAddOneString (ffstring, source, FALSE, FALSE, TILDE_IGNORE);
1047     }
1048     FFAddOneString (ffstring, ").", FALSE, FALSE, TILDE_IGNORE);
1049   }
1050   if (i > 0) {
1051     if (review == 8 && (genomeBuildNumber != NULL || genomeVersionNumber != NULL)) {
1052       FFAddOneString (ffstring, reftxt22, FALSE, FALSE, TILDE_EXPAND);
1053       FFAddOneString (ffstring, genomeBuildNumber, FALSE, FALSE, TILDE_EXPAND);
1054       if (StringHasNoText (genomeVersionNumber)) {
1055         genomeVersionNumber = "1";
1056       }
1057       FFAddOneString (ffstring, " version ", FALSE, FALSE, TILDE_EXPAND);
1058       FFAddOneString (ffstring, genomeVersionNumber, FALSE, FALSE, TILDE_EXPAND);
1059       FFAddOneString (ffstring, reftxt23, FALSE, FALSE, TILDE_EXPAND);
1060 
1061       FFAddOneString (ffstring, " [see ", FALSE, FALSE, TILDE_EXPAND);
1062 
1063       if ( GetWWW(ajp) ) {
1064         FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
1065         FF_Add_NCBI_Base_URL (ffstring, doc_link);
1066         FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
1067       }
1068       FFAddOneString (ffstring, "documentation", FALSE, FALSE, TILDE_IGNORE);
1069       if ( GetWWW(ajp) ) {
1070         FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
1071       }
1072 
1073       FFAddOneString (ffstring, "].", FALSE, FALSE, TILDE_EXPAND);
1074     }
1075     if (generated) {
1076       FFAddOneString (ffstring, reftxtg, FALSE, FALSE, TILDE_IGNORE);
1077     } else if (identical) {
1078       FFAddOneString (ffstring, reftxti, FALSE, FALSE, TILDE_IGNORE);
1079     } else {
1080       FFAddOneString (ffstring, reftxt0, FALSE, FALSE, TILDE_IGNORE);
1081     }
1082 
1083     for (tmp = urf->data.ptrvalue; tmp != NULL; tmp = tmp->next) {
1084       accn = NULL;
1085       from = 0;
1086       to = 0;
1087       name = NULL;
1088       gi = 0;
1089       for (u = tmp->data.ptrvalue; u != NULL; u = u->next) {
1090         oip = u->label;
1091         if (oip != NULL && oip->str != NULL) {
1092           if (StringICmp (oip->str, "accession") == 0 && u->choice == 1) {
1093             accn = (CharPtr) u->data.ptrvalue;
1094           } else if (StringICmp (oip->str, "from") == 0 && u->choice == 2) {
1095             from = u->data.intvalue;
1096           } else if (StringICmp (oip->str, "to") == 0 && u->choice == 2) {
1097             to = u->data.intvalue;
1098           } else if (StringICmp (oip->str, "name") == 0 && u->choice == 1) {
1099             name = (CharPtr) u->data.ptrvalue;
1100           } else if (StringICmp (oip->str, "gi") == 0 && u->choice == 2) {
1101             gi = (BIG_ID) u->data.intvalue;
1102           }
1103         }
1104       }
1105       if (StringDoesHaveText (accn)) {
1106         if (GetWWW (ajp) && ValidateAccnDotVer (accn) == 0 && GetGiFromAccnDotVer (accn, &gi)) {
1107           FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
1108           if (is_na) {
1109             FF_Add_NCBI_Base_URL (ffstring, link_seqn);
1110           } else {
1111             FF_Add_NCBI_Base_URL (ffstring, link_seqp);
1112           }
1113           if (gi > 0) {
1114             sprintf (buf, "%ld", (long) gi);
1115             FFAddTextToString(ffstring, /* "val=" */ NULL, buf, "\">", FALSE, FALSE, TILDE_IGNORE);
1116           } else {
1117             FFAddTextToString(ffstring, /* "val=" */ NULL, accn, "\">", FALSE, FALSE, TILDE_IGNORE);
1118           }
1119           FFAddOneString (ffstring, accn, FALSE, FALSE, TILDE_IGNORE);
1120           FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
1121         } else if (GetWWW (ajp) && ValidateAccn (accn) == 0) {
1122           FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
1123           if (is_na) {
1124             FF_Add_NCBI_Base_URL (ffstring, link_seqn);
1125           } else {
1126             FF_Add_NCBI_Base_URL (ffstring, link_seqp);
1127           }
1128           FFAddTextToString(ffstring, /* "val=" */ NULL, accn, "\">", FALSE, FALSE, TILDE_IGNORE);
1129           FFAddOneString (ffstring, accn, FALSE, FALSE, TILDE_IGNORE);
1130           FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
1131         } else {
1132           FFAddOneString (ffstring, accn, FALSE, FALSE, TILDE_IGNORE);
1133         }
1134         if (from > 0 && to > 0) {
1135           sprintf (buf, " (range: %ld-%ld)", (long) from, (long) to);
1136           FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
1137         }
1138       } else if (StringDoesHaveText (name)) {
1139         FFAddOneString (ffstring, name, FALSE, FALSE, TILDE_IGNORE);
1140       } else continue;
1141       if (tmp->next != NULL) {
1142         ufp = tmp->next;
1143         if (ufp->next != NULL) {
1144           FFAddOneString (ffstring, ", ", FALSE, FALSE, TILDE_IGNORE);
1145         } else {
1146           FFAddOneString (ffstring, " and ", FALSE, FALSE, TILDE_IGNORE);
1147         }
1148       }
1149     }
1150     FFAddOneString (ffstring, ".", FALSE, FALSE, TILDE_EXPAND);
1151   }
1152 }
1153 
AddStrForRefSeqGenome(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,UserObjectPtr uop)1154 static void AddStrForRefSeqGenome (
1155   IntAsn2gbJobPtr ajp,
1156   StringItemPtr ffstring,
1157   UserObjectPtr uop
1158 )
1159 
1160 {
1161   CharPtr       category = NULL, calc = NULL, cca = NULL, cli = NULL, com = NULL,
1162                 fgs = NULL, mod = NULL, phy = NULL, prt = NULL, qfo = NULL,
1163                 tys = NULL, upr = NULL;
1164   ObjectIdPtr   oip;
1165   UserFieldPtr  ufp, tmp, urf = NULL;
1166 
1167   if ( uop == NULL || ffstring == NULL ) return;
1168   if ((oip = uop->type) == NULL) return;
1169   if (StringCmp (oip->str, "RefSeqGenome") != 0) return;
1170 
1171   for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
1172     oip = ufp->label;
1173     if (oip == NULL) continue;
1174     if (StringCmp (oip->str, "RefSeq Category") == 0) {
1175       category = (CharPtr) ufp->data.ptrvalue;
1176     } else if (StringCmp (oip->str, "Details") == 0) {
1177       urf = ufp;
1178     }
1179   }
1180   if (urf != NULL && urf->choice == 11) {
1181     for (tmp = urf->data.ptrvalue; tmp != NULL; tmp = tmp->next) {
1182       oip = tmp->label;
1183       if (StringCmp (oip->str, "CALC") == 0) {
1184         calc = (CharPtr) tmp->data.ptrvalue;
1185       } else if (StringCmp (oip->str, "CCA") == 0) {
1186         cca = (CharPtr) tmp->data.ptrvalue;
1187       } else if (StringCmp (oip->str, "CLI") == 0) {
1188         cli = (CharPtr) tmp->data.ptrvalue;
1189       } else if (StringCmp (oip->str, "COM") == 0) {
1190         com = (CharPtr) tmp->data.ptrvalue;
1191       } else if (StringCmp (oip->str, "FGS") == 0) {
1192         fgs = (CharPtr) tmp->data.ptrvalue;
1193       } else if (StringCmp (oip->str, "MOD") == 0) {
1194         mod = (CharPtr) tmp->data.ptrvalue;
1195       } else if (StringCmp (oip->str, "PHY") == 0) {
1196         phy = (CharPtr) tmp->data.ptrvalue;
1197       } else if (StringCmp (oip->str, "PRT") == 0) {
1198         prt = (CharPtr) tmp->data.ptrvalue;
1199       } else if (StringCmp (oip->str, "QfO") == 0) {
1200         qfo = (CharPtr) tmp->data.ptrvalue;
1201       } else if (StringCmp (oip->str, "TYS") == 0) {
1202         tys = (CharPtr) tmp->data.ptrvalue;
1203       } else if (StringCmp (oip->str, "UPR") == 0) {
1204         upr = (CharPtr) tmp->data.ptrvalue;
1205       }
1206     }
1207   }
1208   FFAddOneString (ffstring, "RefSeq Category: ", FALSE, FALSE, TILDE_IGNORE);
1209   FFAddOneString (ffstring, category, FALSE, FALSE, TILDE_IGNORE);
1210   if (calc != NULL) {
1211     FFAddOneString (ffstring, "\n           CALC: ", FALSE, FALSE, TILDE_IGNORE);
1212     FFAddOneString (ffstring, calc, FALSE, FALSE, TILDE_IGNORE);
1213   }
1214   if (cca != NULL) {
1215     FFAddOneString (ffstring, "\n            CCA: ", FALSE, FALSE, TILDE_IGNORE);
1216     FFAddOneString (ffstring, cca, FALSE, FALSE, TILDE_IGNORE);
1217   }
1218   if (cli != NULL) {
1219     FFAddOneString (ffstring, "\n            CLI: ", FALSE, FALSE, TILDE_IGNORE);
1220     FFAddOneString (ffstring, cli, FALSE, FALSE, TILDE_IGNORE);
1221   }
1222   if (com != NULL) {
1223     FFAddOneString (ffstring, "\n            COM: ", FALSE, FALSE, TILDE_IGNORE);
1224     FFAddOneString (ffstring, com, FALSE, FALSE, TILDE_IGNORE);
1225   }
1226   if (fgs != NULL) {
1227     FFAddOneString (ffstring, "\n            FGS: ", FALSE, FALSE, TILDE_IGNORE);
1228     FFAddOneString (ffstring, fgs, FALSE, FALSE, TILDE_IGNORE);
1229   }
1230   if (mod != NULL) {
1231     FFAddOneString (ffstring, "\n            MOD: ", FALSE, FALSE, TILDE_IGNORE);
1232     FFAddOneString (ffstring, mod, FALSE, FALSE, TILDE_IGNORE);
1233   }
1234   if (phy != NULL) {
1235     FFAddOneString (ffstring, "\n            PHY: ", FALSE, FALSE, TILDE_IGNORE);
1236     FFAddOneString (ffstring, phy, FALSE, FALSE, TILDE_IGNORE);
1237   }
1238   if (prt != NULL) {
1239     FFAddOneString (ffstring, "\n            PRT: ", FALSE, FALSE, TILDE_IGNORE);
1240     FFAddOneString (ffstring, prt, FALSE, FALSE, TILDE_IGNORE);
1241   }
1242   if (qfo != NULL) {
1243     FFAddOneString (ffstring, "\n            QfO: ", FALSE, FALSE, TILDE_IGNORE);
1244     FFAddOneString (ffstring, qfo, FALSE, FALSE, TILDE_IGNORE);
1245   }
1246   if (tys != NULL) {
1247     FFAddOneString (ffstring, "\n            TYS: ", FALSE, FALSE, TILDE_IGNORE);
1248     FFAddOneString (ffstring, tys, FALSE, FALSE, TILDE_IGNORE);
1249   }
1250   if (upr != NULL) {
1251     FFAddOneString (ffstring, "\n            UPR: ", FALSE, FALSE, TILDE_IGNORE);
1252     FFAddOneString (ffstring, upr, FALSE, FALSE, TILDE_IGNORE);
1253   }
1254 }
1255 
GetGenomeBuildNumber(UserObjectPtr uop)1256 static CharPtr GetGenomeBuildNumber (
1257   UserObjectPtr uop
1258 )
1259 
1260 {
1261   ObjectIdPtr   oip;
1262   CharPtr       str;
1263   UserFieldPtr  ufp;
1264 
1265   if (uop == NULL) return NULL;
1266   if ((oip = uop->type) == NULL) return NULL;
1267   if (StringCmp (oip->str, "GenomeBuild") != 0) return NULL;
1268   for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
1269     oip = ufp->label;
1270     if (StringCmp(oip->str, "NcbiAnnotation") == 0) {
1271       if (ufp->choice == 1) { /* string */
1272         str = ufp->data.ptrvalue;
1273         if (! StringHasNoText (str)) return str;
1274       }
1275     } else if (StringCmp (oip->str, "Annotation") == 0) {
1276       if (ufp->choice == 1) { /* string */
1277         str = ufp->data.ptrvalue;
1278         if (! StringHasNoText (str)) {
1279           if (StringNICmp (str, "NCBI build ", 11) == 0) {
1280             if (! StringHasNoText (str + 11)) {
1281               return (str + 11);
1282             }
1283           }
1284         }
1285       }
1286     }
1287   }
1288   return NULL;
1289 }
1290 
GetGenomeVersionNumber(UserObjectPtr uop)1291 static CharPtr GetGenomeVersionNumber (
1292   UserObjectPtr uop
1293 )
1294 
1295 {
1296   ObjectIdPtr   oip;
1297   CharPtr       str;
1298   UserFieldPtr  ufp;
1299 
1300   if (uop == NULL) return NULL;
1301   if ((oip = uop->type) == NULL) return NULL;
1302   if (StringCmp (oip->str, "GenomeBuild") != 0) return NULL;
1303   for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
1304     oip = ufp->label;
1305     if (StringCmp(oip->str, "NcbiVersion") == 0) {
1306       if (ufp->choice == 1) { /* string */
1307         str = ufp->data.ptrvalue;
1308         if (! StringHasNoText (str)) return str;
1309       }
1310     }
1311   }
1312   return NULL;
1313 }
1314 
1315 
1316 static CharPtr reftxt11 = "This record is predicted by automated computational analysis. This record is derived from a genomic sequence";
1317 static CharPtr reftxt12 = "annotated using gene prediction method:";
1318 static CharPtr reftxt13 = "and transcript sequence";
1319 
FindModelEvidenceUop(UserObjectPtr uop,Pointer userdata)1320 static void FindModelEvidenceUop (
1321   UserObjectPtr uop,
1322   Pointer userdata
1323 )
1324 
1325 {
1326   ObjectIdPtr         oip;
1327   UserObjectPtr PNTR  uopp;
1328 
1329   if (uop == NULL || userdata == NULL) return;
1330   uopp = (UserObjectPtr PNTR) userdata;
1331   oip = uop->type;
1332   if (oip == NULL) return;
1333   if (StringCmp (oip->str, "ModelEvidence") == 0) {
1334     *uopp = uop;
1335   }
1336 }
1337 
DoGetAnnotationComment(BioseqPtr bsp,CharPtr PNTR namep,UserFieldPtr PNTR assmp,BIG_ID_PNTR gip,Int4Ptr leftp,Int4Ptr rightp,CharPtr PNTR methodp,BoolPtr mrnaEv,BoolPtr estEv)1338 static Boolean DoGetAnnotationComment (
1339    BioseqPtr bsp,
1340    CharPtr PNTR namep,
1341    UserFieldPtr PNTR assmp,
1342    BIG_ID_PNTR gip,
1343    Int4Ptr leftp,
1344    Int4Ptr rightp,
1345    CharPtr PNTR methodp,
1346    BoolPtr mrnaEv,
1347    BoolPtr estEv
1348 )
1349 
1350 {
1351   UserFieldPtr       assm = NULL;
1352   Int2               ce = 0, cm = 0;
1353   SeqMgrDescContext  dcontext;
1354   BIG_ID             gi = 0;
1355   Int4               left = 0, right = 0;
1356   Int4Ptr            ints;
1357   CharPtr            method = NULL;
1358   UserObjectPtr      moduop;
1359   CharPtr            name = NULL;
1360   ObjectIdPtr        oip;
1361   SeqDescrPtr        sdp;
1362   SeqIdPtr           sip;
1363   TextSeqIdPtr       tsip;
1364   UserFieldPtr       u;
1365   UserFieldPtr       ufp;
1366   UserObjectPtr      uop;
1367 
1368   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
1369   while (sdp != NULL) {
1370     uop = (UserObjectPtr) sdp->data.ptrvalue;
1371     if (uop != NULL) {
1372       moduop = NULL;
1373       VisitUserObjectsInUop (uop, (Pointer) &moduop, FindModelEvidenceUop);
1374       if (moduop != NULL) {
1375         oip = moduop->type;
1376         if (oip != NULL && StringCmp(oip->str, "ModelEvidence") == 0) {
1377           for (ufp = moduop->data; ufp != NULL; ufp = ufp->next) {
1378             oip = ufp->label;
1379             if (oip == NULL) continue;
1380             if (StringCmp (oip->str, "Contig Name") == 0) {
1381               name = (CharPtr) ufp->data.ptrvalue;
1382             } else if (StringCmp (oip->str, "Assembly") == 0) {
1383               assm = ufp;
1384             } else if (StringCmp (oip->str, "Contig Gi") == 0) {
1385               gi = (BIG_ID) ufp->data.intvalue;
1386             } else if (StringCmp (oip->str, "Contig Span") == 0 && ufp->choice == 8 && ufp->num >= 2) {
1387               ints = (Int4Ptr) ufp->data.ptrvalue;
1388               if (ints != NULL) {
1389                 left = ints [0] + 1;
1390                 right = ints [1] + 1;
1391               }
1392             } else if (StringCmp (oip->str, "Method") == 0) {
1393               method = (CharPtr) ufp->data.ptrvalue;
1394             } else if (StringCmp (oip->str, "mRNA") == 0) {
1395               *mrnaEv = TRUE;
1396             } else if (StringCmp (oip->str, "EST") == 0) {
1397               *estEv = TRUE;
1398             } else if (StringCmp (oip->str, "Counts") == 0) {
1399               for (u = (UserFieldPtr) ufp->data.ptrvalue; u != NULL; u = u->next) {
1400                 if (u->data.ptrvalue == NULL) continue;
1401                 if (u->choice != 2) continue;
1402                 oip = u->label;
1403                 if (oip == NULL) continue;
1404                 if (StringCmp (oip->str, "mRNA") == 0) {
1405                   cm = (Int2) u->data.intvalue;
1406                   if (cm > 0) {
1407                     *mrnaEv = TRUE;
1408                   }
1409                 } else if (StringCmp (oip->str, "EST") == 0) {
1410                   ce = (Int2) u->data.intvalue;
1411                   if (ce > 0) {
1412                     *estEv = TRUE;
1413                   }
1414                 }
1415               }
1416             }
1417           }
1418           if (StringHasNoText (name) && bsp != NULL) {
1419             for (sip = bsp->id; sip != NULL; sip = sip->next) {
1420               if (sip->choice == SEQID_OTHER) {
1421                 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
1422                 if (tsip != NULL) {
1423                   name = tsip->accession;
1424                 }
1425               }
1426             }
1427           }
1428         }
1429       }
1430     }
1431     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
1432   }
1433 
1434   if (StringHasNoText (name)) return FALSE;
1435   *namep = name;
1436   *assmp = assm;
1437   *gip = gi;
1438   *leftp = left;
1439   *rightp = right;
1440   if (! StringHasNoText (method)) {
1441     *methodp = method;
1442   }
1443   return TRUE;
1444 }
1445 
GetAnnotationComment(BioseqPtr bsp,CharPtr PNTR namep,UserFieldPtr PNTR assmp,BIG_ID_PNTR gip,Int4Ptr leftp,Int4Ptr rightp,CharPtr PNTR methodp,BoolPtr mrnaEv,BoolPtr estEv)1446 static Boolean GetAnnotationComment (
1447    BioseqPtr bsp,
1448    CharPtr PNTR namep,
1449    UserFieldPtr PNTR assmp,
1450    BIG_ID_PNTR gip,
1451    Int4Ptr leftp,
1452    Int4Ptr rightp,
1453    CharPtr PNTR methodp,
1454    BoolPtr mrnaEv,
1455    BoolPtr estEv
1456 )
1457 
1458 {
1459   SeqFeatPtr  cds;
1460 
1461   if (DoGetAnnotationComment (bsp, namep, assmp, gip, leftp, rightp, methodp, mrnaEv, estEv)) return TRUE;
1462   if (ISA_aa (bsp->mol)) {
1463     cds = SeqMgrGetCDSgivenProduct (bsp, NULL);
1464     if (cds != NULL) {
1465       bsp = BioseqFindFromSeqLoc (cds->location);
1466       if (bsp != NULL) {
1467         return DoGetAnnotationComment (bsp, namep, assmp, gip, leftp, rightp, methodp, mrnaEv, estEv);
1468       }
1469     }
1470   }
1471   return FALSE;
1472 }
1473 
FindGeneFeat(SeqFeatPtr sfp,Pointer userdata)1474 static void FindGeneFeat (
1475   SeqFeatPtr sfp,
1476   Pointer userdata
1477 )
1478 
1479 {
1480   SeqFeatPtr PNTR  sfpp;
1481 
1482   if (sfp->data.choice != SEQFEAT_GENE) return;
1483   sfpp = (SeqFeatPtr PNTR) userdata;
1484   *sfpp = sfp;
1485 }
1486 
FindLocusId(ValNodePtr dbxref,CharPtr locusIDp)1487 static void FindLocusId (
1488   ValNodePtr dbxref,
1489   CharPtr locusIDp
1490 )
1491 
1492 {
1493   DbtagPtr     dbt;
1494   ObjectIdPtr  oip;
1495   ValNodePtr   vnp;
1496 
1497   for (vnp = dbxref; vnp != NULL; vnp = vnp->next) {
1498     dbt = (DbtagPtr) vnp->data.ptrvalue;
1499     if (dbt == NULL) continue;
1500     if (StringICmp (dbt->db, "LocusID") != 0 && StringICmp (dbt->db, "InterimID") != 0) continue;
1501     oip = dbt->tag;
1502     if (oip == NULL) continue;
1503     if (oip->str != NULL) {
1504       StringCpy (locusIDp, oip->str);
1505     } else if (oip->id > 0) {
1506       sprintf (locusIDp, "%ld", (long) oip->id);
1507     }
1508   }
1509 }
1510 
GetGeneAndLocus(BioseqPtr bsp,CharPtr PNTR genep,CharPtr locusIDp,CharPtr taxIDp)1511 static Boolean GetGeneAndLocus (
1512   BioseqPtr bsp,
1513   CharPtr PNTR genep,
1514   CharPtr locusIDp,
1515   CharPtr taxIDp
1516 )
1517 
1518 {
1519   BioSourcePtr       biop;
1520   DbtagPtr           dbt;
1521   SeqMgrDescContext  dcontext;
1522   SeqFeatPtr         gene = NULL;
1523   GeneRefPtr         grp;
1524   ObjectIdPtr        oip;
1525   OrgRefPtr          orp;
1526   SeqDescrPtr        sdp;
1527   SeqEntryPtr        sep;
1528   CharPtr            str;
1529   ValNodePtr         syn;
1530   ValNodePtr         vnp;
1531 
1532   sep = GetTopSeqEntryForEntityID (bsp->idx.entityID);
1533   if (sep == NULL) return FALSE;
1534   VisitFeaturesInSep (sep, (Pointer) &gene, FindGeneFeat);
1535   if (gene == NULL) return FALSE;
1536 
1537   grp = (GeneRefPtr) gene->data.value.ptrvalue;
1538   if (grp == NULL) return FALSE;
1539   if (! StringHasNoText (grp->locus)) {
1540     *genep = grp->locus;
1541   } else {
1542     syn = grp->syn;
1543     if (syn != NULL) {
1544       str = (CharPtr) syn->data.ptrvalue;
1545       if (! StringHasNoText (str)) {
1546         *genep = str;
1547       }
1548     }
1549   }
1550   FindLocusId (gene->dbxref, locusIDp);
1551   FindLocusId (grp->db, locusIDp);
1552 
1553   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
1554   if (sdp != NULL) {
1555     biop = (BioSourcePtr) sdp->data.ptrvalue;
1556     if (biop != NULL) {
1557       orp = biop->org;
1558       if (orp != NULL) {
1559         for (vnp = orp->db; vnp != NULL; vnp = vnp->next) {
1560           dbt = (DbtagPtr) vnp->data.ptrvalue;
1561           if (dbt == NULL) continue;
1562           if (StringCmp (dbt->db, "taxon") == 0) {
1563             oip = dbt->tag;
1564             if (oip == NULL) continue;
1565             if (oip->str != NULL) {
1566               StringCpy (taxIDp, oip->str);
1567             } else if (oip->id > 0) {
1568               sprintf (taxIDp, "%ld", (long) oip->id);
1569             }
1570           }
1571         }
1572       }
1573     }
1574   }
1575 
1576   if (genep == NULL || StringHasNoText (locusIDp)) return FALSE;
1577 
1578   return TRUE;
1579 }
1580 
1581 static CharPtr nsAreGapsString = "The strings of n's in this record represent gaps between contigs, and the length of each string corresponds to the length of the gap.";
1582 static CharPtr nsWGSGapsString = "The strings of n's in this record represent gaps between contigs or uncallable bases.";
1583 
IsTpa(BioseqPtr bsp,Boolean has_tpa_assembly,BoolPtr isRefSeqP,BoolPtr isTsaP)1584 static Boolean IsTpa (
1585   BioseqPtr bsp,
1586   Boolean has_tpa_assembly,
1587   BoolPtr isRefSeqP,
1588   BoolPtr isTsaP
1589 )
1590 
1591 {
1592   SeqMgrDescContext  dcontext;
1593   DbtagPtr           dbt;
1594   Boolean            has_bankit = FALSE;
1595   Boolean            has_genbank = FALSE;
1596   Boolean            has_gi = FALSE;
1597   Boolean            has_local = FALSE;
1598   Boolean            has_refseq = FALSE;
1599   Boolean            has_smart = FALSE;
1600   Boolean            has_tpa = FALSE;
1601   Boolean            is_tsa = FALSE;
1602   MolInfoPtr         mip;
1603   SeqDescrPtr        sdp;
1604   SeqIdPtr           sip;
1605 
1606   if (bsp == NULL || bsp->id == NULL) return FALSE;
1607   for (sip = bsp->id; sip != NULL; sip = sip->next) {
1608     switch (sip->choice) {
1609       case SEQID_LOCAL :
1610         has_local = TRUE;
1611         break;
1612       case SEQID_GENBANK :
1613       case SEQID_EMBL :
1614       case SEQID_DDBJ :
1615         has_genbank = TRUE;
1616         break;
1617       case SEQID_OTHER :
1618         has_refseq = TRUE;
1619         if (isRefSeqP != NULL) {
1620           *isRefSeqP = TRUE;
1621         }
1622         break;
1623       case SEQID_GI :
1624         has_gi = TRUE;
1625         break;
1626       case SEQID_TPG :
1627       case SEQID_TPE :
1628       case SEQID_TPD :
1629         has_tpa = TRUE;
1630         break;
1631       case SEQID_GENERAL :
1632         dbt = (DbtagPtr) sip->data.ptrvalue;
1633         if (dbt != NULL) {
1634           if (StringICmp (dbt->db, "BankIt") == 0) {
1635             has_bankit = TRUE;
1636           }
1637           if (StringICmp (dbt->db, "TMSMART") == 0) {
1638             has_smart = TRUE;
1639           }
1640         }
1641         break;
1642       default :
1643         break;
1644     }
1645   }
1646 
1647   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
1648   if (sdp != NULL && sdp->choice == Seq_descr_molinfo) {
1649     mip = (MolInfoPtr) sdp->data.ptrvalue;
1650     if (mip != NULL) {
1651       if (mip->tech == MI_TECH_tsa) {
1652         is_tsa = TRUE;
1653         if (isTsaP != NULL) {
1654           *isTsaP = TRUE;
1655         }
1656       }
1657     }
1658   }
1659 
1660   if (is_tsa) return TRUE;
1661   if (has_genbank) return FALSE;
1662   if (has_tpa) return TRUE;
1663   if (has_refseq) return TRUE;
1664   if (has_bankit && has_tpa_assembly) return TRUE;
1665   if (has_smart && has_tpa_assembly) return TRUE;
1666   if (has_gi) return FALSE;
1667   if (has_local && has_tpa_assembly) return TRUE;
1668 
1669   return FALSE;
1670 }
1671 
GetPrimaryStrForDelta(BioseqPtr bsp)1672 static CharPtr GetPrimaryStrForDelta (
1673   BioseqPtr bsp
1674 )
1675 
1676 {
1677   Boolean      accn;
1678   Char         buf [128], tmp [128];
1679   Int4         curr_start = 0, len, start0, start1;
1680   DbtagPtr     dbt;
1681   DeltaSeqPtr  deltasp;
1682   BIG_ID       gi;
1683   ValNodePtr   head = NULL;
1684   SeqIdPtr     id, sip;
1685   SeqIntPtr    intp;
1686   SeqLitPtr    litp;
1687   SeqLocPtr    slp;
1688   CharPtr      str;
1689   Uint1        strand;
1690 
1691   if (bsp == NULL || bsp->repr != Seq_repr_delta || bsp->seq_ext_type != 4) return NULL;
1692 
1693   for (deltasp = (DeltaSeqPtr) bsp->seq_ext; deltasp != NULL; deltasp = deltasp->next) {
1694     if (deltasp->choice == 1) {
1695       slp = (SeqLocPtr) deltasp->data.ptrvalue;
1696       if (slp != NULL && slp->choice == SEQLOC_INT) {
1697         intp = (SeqIntPtr) slp->data.ptrvalue;
1698         start0 = curr_start;
1699         start1 = intp->from;
1700         len = intp->to - intp->from + 1;
1701         curr_start += len;
1702         strand = intp->strand;
1703         sip = intp->id;
1704         if (sip == NULL) continue;
1705         id = NULL;
1706         accn = FALSE;
1707         if (sip->choice == SEQID_GI) {
1708           gi = (BIG_ID) sip->data.intvalue;
1709           if (GetAccnVerFromServer (gi, buf)) {
1710             accn = TRUE;
1711           } else {
1712             id = GetSeqIdForGI (gi);
1713           }
1714           if (id == NULL) {
1715             sprintf (buf, "%ld", (long) gi);
1716             accn = TRUE;
1717           }
1718         } else {
1719           id = SeqIdDup (sip);
1720         }
1721         if (id != NULL || accn) {
1722           if (head == NULL) {
1723             ValNodeCopyStr (&head, 0, "CONTIG_SPAN         PRIMARY_IDENTIFIER PRIMARY_SPAN        COMP");
1724           }
1725           if (id != NULL) {
1726             SeqIdWrite (id, buf, PRINTID_TEXTID_ACC_VER, sizeof (buf) - 1);
1727             if (id->choice == SEQID_GENERAL) {
1728               dbt = (DbtagPtr) id->data.ptrvalue;
1729               if (dbt != NULL && StringICmp (dbt->db, "ti") == 0) {
1730                 StringCpy (buf, "TI");
1731                 SeqIdWrite (id, buf + 2, PRINTID_TEXTID_ACC_VER, sizeof (buf) - 3);
1732               }
1733             }
1734           }
1735           sprintf (tmp, "~%ld-%ld                                        ",
1736                    (long) (start0 + 1), (long) (start0 + len));
1737           tmp [21] = '\0';
1738           StringCat (buf, "                                        ");
1739           buf [18] = '\0';
1740           StringCat (tmp, buf);
1741           sprintf (buf, " %ld-%ld                                        ",
1742                    (long) (start1 + 1), (long) (start1 + len));
1743           buf [21] = '\0';
1744           StringCat (tmp, buf);
1745           if (strand == Seq_strand_minus) {
1746             StringCat (tmp, "c");
1747           }
1748           ValNodeCopyStr (&head, 0, tmp);
1749         }
1750         SeqIdFree (id);
1751       }
1752     } else if (deltasp->choice == 2) {
1753       litp = (SeqLitPtr) deltasp->data.ptrvalue;
1754       if (litp != NULL) {
1755         curr_start += litp->length;
1756       }
1757     }
1758   }
1759 
1760   if (head == NULL) return NULL;
1761 
1762   str = MergeFFValNodeStrs (head);
1763   ValNodeFreeData (head);
1764 
1765   return str;
1766 }
1767 
GetStrForTpaOrRefSeqHist(BioseqPtr bsp,Boolean isRefSeq,Boolean isTsa,Boolean forcePrimaryBlock)1768 static CharPtr GetStrForTpaOrRefSeqHist (
1769   BioseqPtr bsp,
1770   Boolean isRefSeq,
1771   Boolean isTsa,
1772   Boolean forcePrimaryBlock
1773 )
1774 
1775 {
1776   Boolean      accn;
1777   Char         bfr [100];
1778   Char         buf [100];
1779   DbtagPtr     dbt;
1780   BIG_ID       gi;
1781   ValNodePtr   head = NULL;
1782   SeqHistPtr   hist;
1783   SeqIdPtr     id;
1784   Int2         j;
1785   int          k;
1786   Int2         max;
1787   Boolean      minus1;
1788   Boolean      minus2;
1789   Int4         oldstop = -1;
1790   Uint1        residue;
1791   SeqAlignPtr  salp;
1792   SeqAlignPtr  salptmp;
1793   StreamCache  sc;
1794   SeqIdPtr     sip;
1795   Int4         start;
1796   Int4         stop;
1797   CharPtr      str;
1798   Char         tmp [120];
1799 
1800   if (bsp == NULL) return NULL;
1801   hist = bsp->hist;
1802   if (hist != NULL && hist->assembly != NULL) {
1803     salp = SeqAlignListDup (hist->assembly);
1804     AlnMgr2IndexLite (salp);
1805     AlnMgr2SortAlnSetByNthRowPos (salp, 1);
1806     salptmp = (SeqAlignPtr) (salp->segs);
1807     while (salptmp != NULL) {
1808       AlnMgr2GetNthSeqRangeInSA (salptmp, 1, &start, &stop);
1809       sip = AlnMgr2GetNthSeqIdPtr (salptmp, 2);
1810       if (sip != NULL) {
1811         id = NULL;
1812         accn = FALSE;
1813         buf [0] = '\0';
1814         if (sip->choice == SEQID_GI) {
1815           gi = (BIG_ID) sip->data.intvalue;
1816           if (GetAccnVerFromServer (gi, buf)) {
1817             accn = TRUE;
1818           } else {
1819             id = GetSeqIdForGI (gi);
1820           }
1821           if (id == NULL && forcePrimaryBlock) {
1822             id = SeqIdDup (sip);
1823           }
1824         } else {
1825           id = SeqIdDup (sip);
1826         }
1827         if (id != NULL || accn) {
1828           if (head == NULL) {
1829             if (isRefSeq) {
1830               ValNodeCopyStr (&head, 0, "REFSEQ_SPAN         PRIMARY_IDENTIFIER PRIMARY_SPAN        COMP");
1831             } else if (isTsa) {
1832               ValNodeCopyStr (&head, 0, "TSA_SPAN            PRIMARY_IDENTIFIER PRIMARY_SPAN        COMP");
1833             } else {
1834               ValNodeCopyStr (&head, 0, "TPA_SPAN            PRIMARY_IDENTIFIER PRIMARY_SPAN        COMP");
1835             }
1836           }
1837           if (isRefSeq && oldstop > -1 && oldstop < start) {
1838             sprintf (tmp, "~%ld-%ld                                        ",
1839                      (long) (oldstop + 1), (long) (start));
1840             tmp [21] = '\0';
1841             StringCpy (bfr, "                                        ");
1842             k = 0;
1843             if (StreamCacheSetup (bsp, NULL, 0, &sc)) {
1844               if (start - oldstop < 15) {
1845                 StreamCacheSetPosition (&sc, oldstop);
1846                 bfr [k] = '"';
1847                 k++;
1848                 max = start - oldstop;
1849                 for (j = 0; j < max; j++) {
1850                   residue = StreamCacheGetResidue (&sc);
1851                   bfr [k] = (Char) residue;
1852                   k++;
1853                 }
1854                 bfr [k] = '"';
1855                 k++;
1856               } else {
1857                 StreamCacheSetPosition (&sc, oldstop);
1858                 bfr [k] = '"';
1859                 k++;
1860                 for (j = 0; j < 4; j++) {
1861                   residue = StreamCacheGetResidue (&sc);
1862                   bfr [k] = (Char) residue;
1863                   k++;
1864                 }
1865                 bfr [k] = '.';
1866                 k++;
1867                 bfr [k] = '.';
1868                 k++;
1869                 bfr [k] = '.';
1870                 k++;
1871                 StreamCacheSetPosition (&sc, start - 4);
1872                 for (j = 0; j < 4; j++) {
1873                   residue = StreamCacheGetResidue (&sc);
1874                   bfr [k] = (Char) residue;
1875                   k++;
1876                 }
1877                 bfr [k] = '"';
1878                 k++;
1879               }
1880             } else {
1881               /*
1882               StringCpy (bfr, "inserted base(s)");
1883               */
1884             }
1885             bfr [k] = '\0';
1886             StringCat (bfr, "                                        ");
1887             bfr [18] = '\0';
1888             StringCat (tmp, bfr);
1889             sprintf (bfr, " %ld-%ld                                        ",
1890                      (long) 1, (long) (start - oldstop));
1891             bfr [21] = '\0';
1892             StringCat (tmp, bfr);
1893             ValNodeCopyStr (&head, 0, tmp);
1894           }
1895           oldstop = stop + 1;
1896           if (id != NULL) {
1897             SeqIdWrite (id, buf, PRINTID_TEXTID_ACC_VER, sizeof (buf) - 1);
1898             if (id->choice == SEQID_GENERAL) {
1899               dbt = (DbtagPtr) id->data.ptrvalue;
1900               if (dbt != NULL && StringICmp (dbt->db, "ti") == 0) {
1901                 StringCpy (buf, "TI");
1902                 SeqIdWrite (id, buf + 2, PRINTID_TEXTID_ACC_VER, sizeof (buf) - 3);
1903               }
1904             }
1905           }
1906           sprintf (tmp, "~%ld-%ld                                        ",
1907                    (long) (start + 1), (long) (stop + 1));
1908           /*
1909           i = 39 - StringLen (buf);
1910           if (i > 0) {
1911             tmp [i] = '\0';
1912           } else {
1913             tmp [21] = '\0';
1914           }
1915           */
1916           tmp [21] = '\0';
1917           StringCat (buf, "                                        ");
1918           buf [18] = '\0';
1919           StringCat (tmp, buf);
1920           AlnMgr2GetNthSeqRangeInSA (salptmp, 2, &start, &stop);
1921           sprintf (buf, " %ld-%ld                                        ",
1922                    (long) (start + 1), (long) (stop + 1));
1923           buf [21] = '\0';
1924           StringCat (tmp, buf);
1925           minus1 = (Boolean) (AlnMgr2GetNthStrand (salptmp, 1) == Seq_strand_minus);
1926           minus2 = (Boolean) (AlnMgr2GetNthStrand (salptmp, 2) == Seq_strand_minus);
1927           if (minus1 || minus2) {
1928             if (! (minus1 && minus2)) {
1929               StringCat (tmp, "c");
1930             }
1931           }
1932           ValNodeCopyStr (&head, 0, tmp);
1933         }
1934         SeqIdFree (id);
1935       }
1936       SeqIdFree (sip);
1937       salptmp = salptmp->next;
1938     }
1939     SeqAlignFree (salp);
1940   }
1941 
1942   if (head == NULL) return NULL;
1943 
1944   str = MergeFFValNodeStrs (head);
1945   ValNodeFreeData (head);
1946 
1947   return str;
1948 }
1949 
1950 static CharPtr tpaString = "THIRD PARTY ANNOTATION DATABASE: This TPA record uses data from DDBJ/EMBL/GenBank ";
1951 
GetStrForTPA(UserObjectPtr uop,BioseqPtr bsp)1952 static CharPtr GetStrForTPA (
1953   UserObjectPtr uop,
1954   BioseqPtr bsp
1955 )
1956 
1957 {
1958   Char          ch;
1959   UserFieldPtr  curr;
1960   SeqHistPtr    hist;
1961   Int2          i;
1962   Char          id [41];
1963   Boolean       isRefSeq = FALSE;
1964   Boolean       isTsa = FALSE;
1965   Int2          j;
1966   size_t        len;
1967   ObjectIdPtr   oip;
1968   CharPtr       ptr;
1969   CharPtr       str;
1970   CharPtr       tmp;
1971   UserFieldPtr  ufp;
1972 
1973   if (uop == NULL) return NULL;
1974   if ((oip = uop->type) == NULL) return NULL;
1975   if (StringCmp (oip->str, "TpaAssembly") != 0) return NULL;
1976   if (bsp == NULL) return NULL;
1977   hist = bsp->hist;
1978   if (hist != NULL && hist->assembly != NULL) return NULL;
1979   if (! IsTpa (bsp, TRUE, &isRefSeq, &isTsa)) return NULL;
1980   if (isRefSeq) return NULL;
1981 
1982   len = StringLen (tpaString) + StringLen ("entries ") + StringLen ("and ") + 5;
1983   i = 0;
1984   for (curr = uop->data; curr != NULL; curr = curr->next) {
1985     if (curr->choice != 11) continue;
1986     for (ufp = curr->data.ptrvalue; ufp != NULL; ufp = ufp->next) {
1987       if (ufp->choice != 1) continue;
1988       oip = ufp->label;
1989       if (oip == NULL || StringICmp (oip->str, "accession") != 0) continue;
1990       str = (CharPtr) ufp->data.ptrvalue;
1991       if (StringHasNoText (str)) continue;
1992       len += StringLen (str) + 2;
1993       i++;
1994     }
1995   }
1996   if (i == 0) return NULL;
1997 
1998   ptr = (CharPtr) MemNew (len);
1999   if (ptr == NULL) return NULL;
2000   StringCpy (ptr, tpaString);
2001   if (i > 1) {
2002     StringCat (ptr, "entries ");
2003   } else {
2004     StringCat (ptr, "entry ");
2005   }
2006 
2007   j = 0;
2008   for (curr = uop->data; curr != NULL; curr = curr->next) {
2009     if (curr->choice != 11) continue;
2010     for (ufp = curr->data.ptrvalue; ufp != NULL; ufp = ufp->next) {
2011       if (ufp->choice != 1) continue;
2012       oip = ufp->label;
2013       if (oip == NULL || StringICmp (oip->str, "accession") != 0) continue;
2014       str = (CharPtr) ufp->data.ptrvalue;
2015       if (StringHasNoText (str)) continue;
2016       StringNCpy_0 (id, str, sizeof (id));
2017       tmp = id;
2018       ch = *tmp;
2019       while (ch != '\0') {
2020         if (IS_LOWER (ch)) {
2021           *tmp = TO_UPPER (ch);
2022         }
2023         tmp++;
2024         ch = *tmp;
2025       }
2026       if (j == i - 1 && i > 1) {
2027         StringCat (ptr, " and ");
2028       } else if (j > 0) {
2029         StringCat (ptr, ", ");
2030       }
2031       StringCat (ptr, id);
2032       j++;
2033     }
2034   }
2035 
2036   return ptr;
2037 }
2038 
GetStrForGenome(UserObjectPtr uop,BioseqPtr bsp)2039 static CharPtr GetStrForGenome (
2040   UserObjectPtr uop,
2041   BioseqPtr bsp
2042 )
2043 
2044 {
2045   ObjectIdPtr  oip;
2046 
2047   if (uop == NULL) return NULL;
2048   if ((oip = uop->type) == NULL) return NULL;
2049   if (StringCmp (oip->str, "GenomeInfo") != 0) return NULL;
2050 
2051   /* !!! need to implement !!! */
2052 
2053   return NULL;
2054 }
2055 
AddAltPrimaryBlock(Asn2gbWorkPtr awp)2056 static void AddAltPrimaryBlock (
2057   Asn2gbWorkPtr awp
2058 )
2059 
2060 {
2061   IntAsn2gbJobPtr  ajp;
2062   Asn2gbSectPtr    asp;
2063   BaseBlockPtr     bbp = NULL;
2064   BioseqPtr        bsp;
2065   GBSeqPtr         gbseq;
2066   CharPtr          str;
2067   StringItemPtr    ffstring;
2068 
2069   if (awp == NULL) return;
2070   ajp = awp->ajp;
2071   if (ajp == NULL) return;
2072   bsp = awp->bsp;
2073   if (bsp == NULL) return;
2074   asp = awp->asp;
2075   if (asp == NULL) return;
2076 
2077   ffstring = FFGetString(ajp);
2078   if ( ffstring == NULL ) return;
2079 
2080   str = GetPrimaryStrForDelta (bsp);
2081   if (str != NULL) {
2082 
2083     bbp = (BaseBlockPtr) Asn2gbAddBlock (awp, PRIMARY_BLOCK, sizeof (BaseBlock));
2084     if (bbp != NULL) {
2085 
2086       FFStartPrint (ffstring, awp->format, 0, 12, "PRIMARY", 12, 5, 5, "PR", TRUE);
2087 
2088       FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_EXPAND);
2089 
2090       bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "PR");
2091 
2092       /* optionally populate gbseq for XML-ized GenBank format */
2093 
2094       if (ajp->gbseq) {
2095         gbseq = &asp->gbseq;
2096       } else {
2097         gbseq = NULL;
2098       }
2099 
2100       if (gbseq != NULL) {
2101         gbseq->primary = StringSave (str);
2102       }
2103 
2104       if (awp->afp != NULL) {
2105         DoImmediateFormat (awp->afp, (BaseBlockPtr) bbp);
2106       }
2107     }
2108     MemFree (str);
2109   }
2110 
2111   FFRecycleString(ajp, ffstring);
2112 }
2113 
GeStrForTSA(UserObjectPtr uop)2114 static CharPtr GeStrForTSA (
2115   UserObjectPtr uop
2116 )
2117 
2118 {
2119   Int4          asf, ast, prf, prt;
2120   Char          buf [128], tmp [128];
2121   UserFieldPtr  curr;
2122   Boolean       has_asf, has_ast, has_prf, has_prt;
2123   ValNodePtr    head = NULL;
2124   ObjectIdPtr   oip;
2125   CharPtr       pid;
2126   CharPtr       str;
2127   UserFieldPtr  ufp;
2128 
2129   if (uop == NULL) return NULL;
2130   if ((oip = uop->type) == NULL) return NULL;
2131   if (StringCmp (oip->str, "TSA") != 0) return NULL;
2132 
2133   for (curr = uop->data; curr != NULL; curr = curr->next) {
2134     if (curr->choice != 11) continue;
2135     asf = 0;
2136     ast = 0;
2137     prf = 0;
2138     prt = 0;
2139     pid = NULL;
2140     has_asf = FALSE;
2141     has_ast = FALSE;
2142     has_prf = FALSE;
2143     has_prt = FALSE;
2144     for (ufp = curr->data.ptrvalue; ufp != NULL; ufp = ufp->next) {
2145       oip = ufp->label;
2146       if (oip == NULL) continue;
2147       if (StringICmp (oip->str, "assembly from") == 0 && ufp->choice == 2) {
2148         asf = (Int4) ufp->data.intvalue;
2149         has_asf = TRUE;
2150       } else if (StringICmp (oip->str, "assembly to") == 0 && ufp->choice == 2) {
2151         ast = (Int4) ufp->data.intvalue;
2152         has_ast = TRUE;
2153       } else if (StringICmp (oip->str, "primary from") == 0 && ufp->choice == 2) {
2154         prf = (Int4) ufp->data.intvalue;
2155         has_prf = TRUE;
2156       } else if (StringICmp (oip->str, "primary to") == 0 && ufp->choice == 2) {
2157         prt = (Int4) ufp->data.intvalue;
2158         has_prt = TRUE;
2159       } else if (StringICmp (oip->str, "primary ID") == 0 && ufp->choice == 1) {
2160         pid = (CharPtr) ufp->data.ptrvalue;
2161       }
2162     }
2163     if (has_asf && has_ast && has_prf && has_prt && pid != NULL) {
2164       if (head == NULL) {
2165         ValNodeCopyStr (&head, 0, "TSA_SPAN            PRIMARY_IDENTIFIER PRIMARY_SPAN        COMP");
2166       }
2167       StringCpy (buf, pid);
2168       if (StringNCmp (pid, "gnl|ti|", 7) == 0) {
2169         StringCpy (buf, "TI");
2170         StringCat (buf, pid + 7);
2171       }
2172       sprintf (tmp, "~%ld-%ld                                        ",
2173                (long) (asf + 1), (long) (ast + 1));
2174       tmp [21] = '\0';
2175       StringCat (buf, "                                        ");
2176       buf [18] = '\0';
2177       StringCat (tmp, buf);
2178       sprintf (buf, " %ld-%ld                                        ",
2179                (long) (prf + 1), (long) (prt + 1));
2180       buf [21] = '\0';
2181       StringCat (tmp, buf);
2182       if (prf > prt) {
2183         StringCat (tmp, "c");
2184       }
2185       ValNodeCopyStr (&head, 0, tmp);
2186     }
2187   }
2188 
2189   if (head == NULL) return NULL;
2190 
2191   str = MergeFFValNodeStrs (head);
2192   ValNodeFreeData (head);
2193 
2194   return str;
2195 }
2196 
AddTsaBlock(Asn2gbWorkPtr awp,UserObjectPtr uop)2197 static void AddTsaBlock (
2198   Asn2gbWorkPtr awp,
2199   UserObjectPtr uop
2200 )
2201 
2202 {
2203   IntAsn2gbJobPtr  ajp;
2204   Asn2gbSectPtr    asp;
2205   BaseBlockPtr     bbp = NULL;
2206   BioseqPtr        bsp;
2207   GBSeqPtr         gbseq;
2208   CharPtr          str;
2209   StringItemPtr    ffstring;
2210 
2211   if (awp == NULL) return;
2212   ajp = awp->ajp;
2213   if (ajp == NULL) return;
2214   bsp = awp->bsp;
2215   if (bsp == NULL) return;
2216   asp = awp->asp;
2217   if (asp == NULL) return;
2218 
2219   ffstring = FFGetString(ajp);
2220   if ( ffstring == NULL ) return;
2221 
2222   str = GeStrForTSA (uop);
2223   if (str != NULL) {
2224 
2225     bbp = (BaseBlockPtr) Asn2gbAddBlock (awp, PRIMARY_BLOCK, sizeof (BaseBlock));
2226     if (bbp != NULL) {
2227 
2228       FFStartPrint (ffstring, awp->format, 0, 12, "PRIMARY", 12, 5, 5, "PR", TRUE);
2229 
2230       FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_EXPAND);
2231 
2232       bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "PR");
2233 
2234       /* optionally populate gbseq for XML-ized GenBank format */
2235 
2236       if (ajp->gbseq) {
2237         gbseq = &asp->gbseq;
2238       } else {
2239         gbseq = NULL;
2240       }
2241 
2242       if (gbseq != NULL) {
2243         gbseq->primary = StringSave (str);
2244       }
2245 
2246       if (awp->afp != NULL) {
2247         DoImmediateFormat (awp->afp, (BaseBlockPtr) bbp);
2248       }
2249     }
2250     MemFree (str);
2251   }
2252 
2253   FFRecycleString(ajp, ffstring);
2254 }
2255 
AddPrimaryBlock(Asn2gbWorkPtr awp)2256 NLM_EXTERN void AddPrimaryBlock (
2257   Asn2gbWorkPtr awp
2258 )
2259 
2260 {
2261   IntAsn2gbJobPtr    ajp;
2262   Asn2gbSectPtr      asp;
2263   BaseBlockPtr       bbp = NULL;
2264   BioseqPtr          bsp;
2265   SeqMgrDescContext  dcontext;
2266   GBSeqPtr           gbseq;
2267   Boolean            has_tpa_assembly = FALSE;
2268   Boolean            has_tsa = FALSE;
2269   SeqHistPtr         hist;
2270   Boolean            isRefSeq = FALSE;
2271   Boolean            isTsa = FALSE;
2272   ObjectIdPtr        oip;
2273   SeqDescrPtr        sdp;
2274   CharPtr            str;
2275   UserObjectPtr      uop;
2276   StringItemPtr      ffstring;
2277 
2278   if (awp == NULL) return;
2279   ajp = awp->ajp;
2280   if (ajp == NULL) return;
2281   bsp = awp->bsp;
2282   if (bsp == NULL) return;
2283   asp = awp->asp;
2284   if (asp == NULL) return;
2285 
2286   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
2287   while (sdp != NULL) {
2288     uop = (UserObjectPtr) sdp->data.ptrvalue;
2289     if (uop != NULL) {
2290       oip = uop->type;
2291       if (oip != NULL) {
2292         if (StringCmp (oip->str, "TpaAssembly") == 0) {
2293           has_tpa_assembly = TRUE;
2294         } else if (StringCmp (oip->str, "TSA") == 0) {
2295           has_tsa = TRUE;
2296         }
2297       }
2298     }
2299     if (has_tpa_assembly || has_tsa) {
2300       sdp = NULL;
2301     } else {
2302       sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
2303     }
2304   }
2305 
2306   if (has_tsa) {
2307     AddTsaBlock (awp, uop);
2308     return;
2309   }
2310 
2311   hist = bsp->hist;
2312   if ((! IsTpa (bsp, has_tpa_assembly, &isRefSeq, &isTsa)) ||
2313       hist == NULL || hist->assembly == NULL) {
2314     if (awp->forcePrimaryBlock) {
2315       AddAltPrimaryBlock (awp);
2316     }
2317     return;
2318   }
2319 
2320   ffstring = FFGetString(ajp);
2321   if ( ffstring == NULL ) return;
2322 
2323   str = GetStrForTpaOrRefSeqHist (bsp, isRefSeq, isTsa, awp->forcePrimaryBlock);
2324   if (str != NULL) {
2325 
2326     bbp = (BaseBlockPtr) Asn2gbAddBlock (awp, PRIMARY_BLOCK, sizeof (BaseBlock));
2327     if (bbp != NULL) {
2328 
2329       if (has_tpa_assembly) {
2330         bbp->entityID = dcontext.entityID;
2331         bbp->itemID = dcontext.itemID;
2332         bbp->itemtype = OBJ_SEQDESC;
2333       }
2334 
2335       FFStartPrint (ffstring, awp->format, 0, 12, "PRIMARY", 12, 5, 5, "PR", TRUE);
2336 
2337       FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_EXPAND);
2338 
2339       bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "PR");
2340 
2341       /* optionally populate gbseq for XML-ized GenBank format */
2342 
2343       if (ajp->gbseq) {
2344         gbseq = &asp->gbseq;
2345       } else {
2346         gbseq = NULL;
2347       }
2348 
2349       if (gbseq != NULL) {
2350         gbseq->primary = StringSave (str);
2351       }
2352 
2353       if (awp->afp != NULL) {
2354         DoImmediateFormat (awp->afp, (BaseBlockPtr) bbp);
2355       }
2356     }
2357     MemFree (str);
2358   }
2359 
2360   FFRecycleString(ajp, ffstring);
2361 }
2362 
2363 static CharPtr reftxt32 = "It is defined by coordinates on the sequence of chromosome";
2364 static CharPtr reftxt33 = "from the";
2365 static CharPtr reftxt34 = "assembly of the human genome (NCBI build";
2366 static CharPtr reftxt35 = ").";
2367 
GetEncodeString(UserObjectPtr uop,BioseqPtr bsp)2368 static CharPtr GetEncodeString (
2369   UserObjectPtr uop,
2370   BioseqPtr bsp
2371 )
2372 
2373 {
2374   CharPtr            assembly_date = NULL;
2375   BioSourcePtr       biop;
2376   CharPtr            chromosome = NULL;
2377   SeqMgrDescContext  dcontext;
2378   size_t             len;
2379   CharPtr            ncbi_annotation = NULL;
2380   ObjectIdPtr        oip;
2381   SeqDescrPtr        sdp;
2382   SubSourcePtr       ssp;
2383   CharPtr            str;
2384   UserFieldPtr       ufp;
2385 
2386   if (uop == NULL || bsp == NULL) return NULL;
2387 
2388   for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
2389     oip = ufp->label;
2390     if (oip == NULL || oip->str == NULL || ufp->choice != 1) continue;
2391     if (StringICmp (oip->str, "AssemblyDate") == 0) {
2392       assembly_date = (CharPtr) ufp->data.ptrvalue;
2393     } else if (StringICmp (oip->str, "NcbiAnnotation") == 0) {
2394       ncbi_annotation = (CharPtr) ufp->data.ptrvalue;
2395     }
2396   }
2397 
2398   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
2399   if (sdp != NULL) {
2400     biop = (BioSourcePtr) sdp->data.ptrvalue;
2401     if (biop != NULL) {
2402       for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
2403         if (ssp->subtype == SUBSRC_chromosome) {
2404           chromosome = ssp->name;
2405         }
2406       }
2407     }
2408   }
2409 
2410   if (chromosome == NULL || assembly_date == NULL || ncbi_annotation == NULL) return NULL;
2411 
2412   if (StringHasNoText (chromosome)) {
2413     chromosome = "?";
2414   }
2415   if (StringHasNoText (assembly_date)) {
2416     assembly_date = "?";
2417   }
2418   if (StringHasNoText (ncbi_annotation)) {
2419     ncbi_annotation = "?";
2420   }
2421 
2422   len = StringLen (reftxt32) + StringLen (reftxt33) +
2423         StringLen (reftxt34) + StringLen (reftxt35) +
2424         StringLen (chromosome) +
2425         StringLen (assembly_date) +
2426         StringLen (ncbi_annotation);
2427 
2428   str = (CharPtr) MemNew (sizeof (Char) * (len + 10));
2429   if (str == NULL) return NULL;
2430 
2431   sprintf (str, "%s %s %s %s %s %s%s", reftxt32, chromosome, reftxt33,
2432            assembly_date, reftxt34, ncbi_annotation, reftxt35);
2433 
2434   return str;
2435 }
2436 
2437 
2438 typedef struct unverifiedtypeinfodata {
2439   CharPtr match_name;
2440   CharPtr comment_text;
2441 } UnverifiedTypeInfoData, PNTR UnverifiedTypeInfoPtr;
2442 
2443 
2444 static UnverifiedTypeInfoData s_UnverifiedTypeInfo[] = {
2445   { "Organism", "source organism" },
2446   { "Features", "sequence and/or annotation" },
2447   { "Misassembled", "sequence assembly" }
2448 };
2449 
2450 
GetUnverifiedMatchName(Int4 unverified_type)2451 NLM_EXTERN CharPtr GetUnverifiedMatchName (Int4 unverified_type)
2452 {
2453   if (unverified_type < 0 || unverified_type > eUnverifiedType_Max) {
2454     return NULL;
2455   } else {
2456     return s_UnverifiedTypeInfo[unverified_type].match_name;
2457   }
2458 }
2459 
2460 
GetUnverifiedFlags(UserObjectPtr uop,BoolPtr unverified_flags)2461 static void GetUnverifiedFlags (UserObjectPtr uop, BoolPtr unverified_flags)
2462 {
2463   Int4 i;
2464   UserFieldPtr ufp;
2465   ObjectIdPtr  oip;
2466   CharPtr      str;
2467   Boolean any = FALSE;
2468 
2469   if (unverified_flags == NULL) {
2470     return;
2471   }
2472   for (i = 0; i < eUnverifiedType_Max; i++) {
2473     unverified_flags[i] = FALSE;
2474   }
2475   if (uop == NULL) {
2476     return;
2477   }
2478 
2479   for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
2480     oip = ufp->label;
2481     if (oip != NULL && StringCmp (oip->str, "Type") == 0 && ufp->choice == 1) {
2482       str = (CharPtr) ufp->data.ptrvalue;
2483       for (i = 0; i < eUnverifiedType_Max; i++) {
2484         if (StringICmp (str, s_UnverifiedTypeInfo[i].match_name) == 0) {
2485           unverified_flags[i] = TRUE;
2486           any = TRUE;
2487           break;
2488         }
2489       }
2490     }
2491   }
2492   if (!any) {
2493     /* default in the past was to use feature if not source */
2494     unverified_flags[eUnverifiedType_Features] = TRUE;
2495   }
2496 }
2497 
2498 
CommentTextFromUnverifiedFlags(BoolPtr unverified_flags)2499 static CharPtr CommentTextFromUnverifiedFlags(BoolPtr unverified_flags)
2500 {
2501   Int4 i, len, num_items = 0, item;
2502   CharPtr comment_start = "GenBank staff is unable to verify ";
2503   CharPtr comment_end = " provided by the submitter.";
2504   CharPtr and = "and ";
2505   CharPtr comma = ", ";
2506   CharPtr comment = NULL;
2507 
2508   if (unverified_flags == NULL) {
2509     return NULL;
2510   }
2511 
2512   len = StringLen (comment_start) + StringLen (comment_end) + 1;
2513   for (i = 0; i < eUnverifiedType_Max; i++) {
2514     if (unverified_flags[i]) {
2515       num_items++;
2516       len += StringLen (s_UnverifiedTypeInfo[i].comment_text);
2517     }
2518   }
2519   if (num_items > 1) {
2520     len += StringLen (and);
2521     if (num_items > 2) {
2522       len += StringLen (comma) * (num_items - 1);
2523     } else {
2524       len += 1;
2525     }
2526   } else if (num_items == 0) {
2527     return NULL;
2528   }
2529 
2530   comment = (CharPtr) MemNew (sizeof (Char) * len);
2531   StringCpy (comment, comment_start);
2532   item = 0;
2533   for (i = 0; i < eUnverifiedType_Max; i++) {
2534     if (unverified_flags[i]) {
2535       if (item > 0) {
2536         if (num_items > 2) {
2537           StringCat (comment, comma);
2538         }
2539         if (item == num_items - 1) {
2540           if (num_items == 2) {
2541             StringCat (comment, " ");
2542           }
2543           StringCat (comment, and);
2544         }
2545       }
2546       StringCat (comment, s_UnverifiedTypeInfo[i].comment_text);
2547       item++;
2548     }
2549   }
2550   StringCat (comment, comment_end);
2551   return comment;
2552 }
2553 
GetFileTrackPoint(SeqPntPtr spp,PackSeqPntPtr psp,Int4 index)2554 static Int4 GetFileTrackPoint (SeqPntPtr spp, PackSeqPntPtr psp, Int4 index)
2555 
2556 {
2557   if (spp != NULL) {
2558     return spp->point;
2559   } else if (psp != NULL) {
2560     return PackSeqPntGet (psp, index);
2561   }
2562   return 0;
2563 }
2564 
CommentsAreDifferent(CharPtr str,CharPtr last_name)2565 static Boolean CommentsAreDifferent (CharPtr str, CharPtr last_name)
2566 
2567 {
2568   size_t  lens, lenl;
2569 
2570   if (str == NULL && last_name == NULL) return FALSE;
2571 
2572   if (StringCmp (str, last_name) == 0) return FALSE;
2573 
2574   lens = StringLen (str);
2575   lenl = StringLen (last_name);
2576 
2577   if (lens == lenl + 1) {
2578     if (StringNCmp (str, last_name, lenl) == 0) {
2579       if (str [lens - 1] == '.') {
2580         return FALSE;
2581       }
2582     }
2583   } else if (lenl == lens + 1) {
2584     if (StringNCmp (str, last_name, lens) == 0) {
2585       if (last_name [lenl - 1] == '.') {
2586         return FALSE;
2587       }
2588     }
2589   }
2590 
2591   return TRUE;
2592 }
2593 
AddCommentBlock(Asn2gbWorkPtr awp)2594 NLM_EXTERN void AddCommentBlock (
2595   Asn2gbWorkPtr awp
2596 )
2597 
2598 {
2599   size_t             acclen;
2600   CharPtr            accn;
2601   SeqMgrAndContext   acontext;
2602   AnnotDescPtr       adp;
2603   Boolean            annotDescCommentToComment = FALSE;
2604   IntAsn2gbJobPtr    ajp;
2605   UserFieldPtr       assm = NULL;
2606   CharPtr            authaccessvalue = NULL;
2607   Int4               authaccess_itemID = 0;
2608   BioseqPtr          bsp;
2609   Char               buf [2048];
2610   CommentBlockPtr    cbp;
2611   Char               ch;
2612   Int2               chunk;
2613   Int2               count;
2614   CharPtr PNTR       cpp;
2615   Boolean            didGenome = FALSE;
2616   Boolean            didRefTrack = FALSE;
2617   Boolean            didTPA = FALSE;
2618   DbtagPtr           dbt;
2619   SeqMgrDescContext  dcontext;
2620   DeltaSeqPtr        dsp;
2621   UserObjectPtr      encodeUop = NULL;
2622   Boolean            estEv = FALSE;
2623   BioseqPtr          farbsp;
2624   Uint2              fareid;
2625   /*
2626   SeqMgrFeatContext  fcontext;
2627   */
2628   CharPtr            field;
2629   PackSeqPntPtr      filetrackpsp = NULL;
2630   SeqPntPtr          filetrackspp = NULL;
2631   CharPtr            filetrackURL = NULL;
2632   Int4               basemodNum = 0;
2633   CharPtr PNTR       basemodURLhead = NULL;
2634   CharPtr            basemodURL = NULL;
2635   Int4               filetrack_itemID = 0;
2636   Boolean            first = TRUE;
2637   UserObjectPtr      firstGenAnnotSCAD = NULL;
2638   CharPtr            firstGenAnnotSCStr = NULL;
2639   Int4               frags;
2640   GBBlockPtr         gbp;
2641   CharPtr            geneName = NULL;
2642   CharPtr            genomeBuildNumber = NULL;
2643   CharPtr            genomeVersionNumber = NULL;
2644   BIG_ID             gi = 0;
2645   Int4               gsdbid = 0;
2646   /*
2647   Boolean            has_gaps = FALSE;
2648   */
2649   Boolean            hasRefTrackStatus = FALSE;
2650   SeqHistPtr         hist;
2651   Int4               idx;
2652   Boolean            is_collab = FALSE;
2653   Boolean            is_encode = FALSE;
2654   Boolean            is_other = FALSE;
2655   Boolean            is_tpa = FALSE;
2656   Boolean            is_wgs = FALSE;
2657   Boolean            isRefSeqStandard = FALSE;
2658   Boolean            is_unverified = FALSE;
2659   Int4               j;
2660   Int4               last;
2661   Boolean            last_had_tilde = FALSE;
2662   CharPtr            last_name;
2663   Int4               left;
2664   size_t             len;
2665   /*
2666   SeqLitPtr          litp;
2667   */
2668   ObjectIdPtr        localID = NULL;
2669   Char               locusID [32];
2670   CharPtr            method = NULL;
2671   MolInfoPtr         mip;
2672   Boolean            mrnaEv = FALSE;
2673   SeqIdPtr           msip;
2674   CharPtr            name = NULL;
2675   ObjectIdPtr        ncbifileID = NULL;
2676   CharPtr            nm;
2677   Int4               num;
2678   ObjectIdPtr        oip;
2679   Boolean            okay;
2680   CharPtr            origLocalID = NULL;
2681   /*
2682   BioseqPtr          parent;
2683   */
2684   CharPtr            pfx;
2685   CharPtr            plural;
2686   Int4               pos;
2687   Int4               right;
2688   SeqDescrPtr        sdp;
2689   SeqFeatPtr         sfp;
2690   CharPtr            sfx;
2691   Boolean            showedLocalID = FALSE;
2692   Boolean            showGBBSource = FALSE;
2693   SeqIdPtr           sip;
2694   SeqLocPtr          slp;
2695   CharPtr            str;
2696   Char               taxID [64];
2697   CharPtr            tlsaccn = NULL;
2698   CharPtr            tlsname = NULL;
2699   Char               tmp [128];
2700   CharPtr            tsaaccn = NULL;
2701   CharPtr            tsaname = NULL;
2702   TextSeqIdPtr       tsip;
2703   TextSeqIdPtr       tlstsip = NULL;
2704   UserFieldPtr       tufp;
2705   UserFieldPtr       ufp;
2706   Boolean            unordered = FALSE;
2707   Int4               unverified_itemID = 0;
2708   UserObjectPtr      uop;
2709   Int4               version;
2710   ValNodePtr         vnp;
2711   CharPtr            wgsaccn = NULL;
2712   CharPtr            wgsname = NULL;
2713   StringItemPtr      ffstring = NULL;
2714   Boolean            unverified_flags[eUnverifiedType_Max];
2715   CharPtr            unverified_comment;
2716 
2717   if (awp == NULL) return;
2718   ajp = awp->ajp;
2719   if (ajp == NULL) return;
2720   bsp = awp->bsp;
2721   if (bsp == NULL) return;
2722 
2723   if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
2724       (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
2725     sprintf (buf, "<a name=\"comment_%s\"></a>", awp->currAccVerLabel);
2726     DoQuickLinkFormat (awp->afp, buf);
2727   }
2728 
2729   ffstring = FFGetString(ajp);
2730   if ( ffstring ==  NULL ) return;
2731 
2732   GetUnverifiedFlags(NULL, unverified_flags);
2733   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
2734   while (sdp != NULL) {
2735     uop = (UserObjectPtr) sdp->data.ptrvalue;
2736     if (uop != NULL) {
2737       str = GetStatusForRefTrack (uop);
2738       if (str != NULL) {
2739         hasRefTrackStatus = TRUE;
2740       }
2741       if (genomeBuildNumber == NULL) {
2742         genomeBuildNumber = GetGenomeBuildNumber (uop);
2743       }
2744       if (genomeVersionNumber == NULL) {
2745         genomeVersionNumber = GetGenomeVersionNumber (uop);
2746       }
2747       oip = uop->type;
2748       if (oip != NULL) {
2749         if (StringICmp (oip->str, "Unverified") == 0) {
2750           is_unverified = TRUE;
2751           unverified_itemID = dcontext.itemID;
2752           GetUnverifiedFlags(uop, unverified_flags);
2753         }
2754         if (StringICmp (oip->str, "ENCODE") == 0) {
2755           is_encode = TRUE;
2756           encodeUop = uop;
2757         }
2758         if (StringICmp (oip->str, "FileTrack") == 0) {
2759           filetrack_itemID = dcontext.itemID;
2760           for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
2761             oip = ufp->label;
2762             if (oip == NULL) continue;
2763             if (StringCmp (oip->str, "FileTrackURL") == 0 || StringCmp (oip->str, "Map-FileTrackURL") == 0) {
2764               if (ufp->choice == 1 && ufp->data.ptrvalue != NULL) {
2765                 filetrackURL = (CharPtr) ufp->data.ptrvalue;
2766               } else if (ufp->choice == 7 && ufp->data.ptrvalue != NULL && ufp->num > 0) {
2767                 cpp = (CharPtr PNTR) ufp->data.ptrvalue;
2768                 if (cpp != NULL) {
2769                   filetrackURL = cpp [0];
2770                 }
2771               }
2772             } else if (StringCmp (oip->str, "BaseModification-FileTrackURL") == 0) {
2773               if (ufp->choice == 1 && ufp->data.ptrvalue != NULL) {
2774                 basemodURL = (CharPtr) ufp->data.ptrvalue;
2775                 basemodNum = 1;
2776               } else if (ufp->choice == 7 && ufp->data.ptrvalue != NULL && ufp->num > 0) {
2777                 cpp = (CharPtr PNTR) ufp->data.ptrvalue;
2778                 if (cpp != NULL) {
2779                   basemodURLhead = cpp;
2780                   basemodNum = ufp->num;
2781                 }
2782               }
2783             }
2784           }
2785         }
2786         if (StringICmp (oip->str, "AuthorizedAccess") == 0) {
2787           authaccess_itemID = dcontext.itemID;
2788           for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
2789             oip = ufp->label;
2790             if (oip == NULL) continue;
2791             if (StringCmp (oip->str, "Study") != 0) continue;
2792             if (ufp->choice != 1 || ufp->data.ptrvalue == NULL) continue;
2793             authaccessvalue = (CharPtr) ufp->data.ptrvalue;
2794           }
2795         }
2796       }
2797     }
2798     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
2799   }
2800 
2801   if (is_unverified) {
2802     cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
2803     if (cbp != NULL) {
2804 
2805       cbp->entityID = awp->entityID;
2806       cbp->itemID = unverified_itemID;
2807       cbp->itemtype = OBJ_SEQDESC;
2808       cbp->first = first;
2809       cbp->no_blank_before = last_had_tilde;
2810       first = FALSE;
2811 
2812       if (cbp->first) {
2813         FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
2814       } else {
2815         FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
2816       }
2817 
2818       unverified_comment = CommentTextFromUnverifiedFlags(unverified_flags);
2819       if (unverified_comment != NULL) {
2820           FFAddOneString (ffstring,
2821                           unverified_comment,
2822                           FALSE, FALSE, TILDE_IGNORE);
2823           unverified_comment = MemFree (unverified_comment);
2824       }
2825 
2826       cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
2827       FFRecycleString(ajp, ffstring);
2828       ffstring = FFGetString(ajp);
2829 
2830       last_had_tilde = FALSE;
2831       if (awp->afp != NULL) {
2832         DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
2833       }
2834     }
2835   }
2836 
2837   if (bsp->repr == Seq_repr_map && bsp->seq_ext_type == 3) {
2838     for (sfp = (SeqFeatPtr) bsp->seq_ext; sfp != NULL; sfp = sfp->next) {
2839       if (sfp->data.choice != SEQFEAT_RSITE) continue;
2840       slp = sfp->location;
2841       if (slp == NULL) continue;
2842       if (slp->choice == SEQLOC_PNT) {
2843         filetrackspp = (SeqPntPtr) slp->data.ptrvalue;
2844       } else if (slp->choice == SEQLOC_PACKED_PNT) {
2845         filetrackpsp = (PackSeqPntPtr) slp->data.ptrvalue;
2846       }
2847     }
2848   }
2849 
2850   if (authaccessvalue != NULL) {
2851     cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
2852     if (cbp != NULL) {
2853 
2854       cbp->entityID = awp->entityID;
2855       cbp->itemID = authaccess_itemID;
2856       cbp->itemtype = OBJ_SEQDESC;
2857       cbp->first = first;
2858       cbp->no_blank_before = last_had_tilde;
2859       first = FALSE;
2860 
2861       if (cbp->first) {
2862         FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
2863       } else {
2864         FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
2865       }
2866 
2867       FFAddOneString (ffstring, "These data are available through the dbGaP authorized access system. ", FALSE, FALSE, TILDE_IGNORE);
2868       if (GetWWW (ajp)) {
2869         FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
2870         FFAddOneString (ffstring, "https://dbgap.ncbi.nlm.nih.gov/aa/wga.cgi?adddataset=", FALSE, FALSE, TILDE_IGNORE);
2871         FFAddOneString (ffstring, authaccessvalue, FALSE, FALSE, TILDE_IGNORE);
2872         FFAddOneString (ffstring, "&page=login", FALSE, FALSE, TILDE_IGNORE);
2873         FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
2874         FFAddOneString (ffstring, "Request access", FALSE, FALSE, TILDE_IGNORE);
2875         FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
2876         FFAddOneString (ffstring, " to Study ", FALSE, FALSE, TILDE_IGNORE);
2877         FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
2878         FFAddOneString (ffstring, "https://www.ncbi.nlm.nih.gov/projects/gap/cgi-bin/study.cgi?study_id=", FALSE, FALSE, TILDE_IGNORE);
2879         FFAddOneString (ffstring, authaccessvalue, FALSE, FALSE, TILDE_IGNORE);
2880         FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
2881         FFAddOneString (ffstring, authaccessvalue, FALSE, FALSE, TILDE_IGNORE);
2882         FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
2883       } else {
2884         FFAddOneString (ffstring, "Request access to Study ", FALSE, FALSE, TILDE_IGNORE);
2885         FFAddOneString (ffstring, authaccessvalue, FALSE, FALSE, TILDE_IGNORE);
2886       }
2887       FFAddOneString (ffstring, ".", FALSE, FALSE, TILDE_IGNORE);
2888 
2889       cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
2890       FFRecycleString(ajp, ffstring);
2891       ffstring = FFGetString(ajp);
2892 
2893       last_had_tilde = FALSE;
2894       if (awp->afp != NULL) {
2895         DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
2896       }
2897     }
2898   }
2899 
2900   /*
2901   look for Seq-annot.desc.comment on annots packaged on current bioseq,
2902   Genome-Annotation structured comment will suppress GenomeBuild user object
2903   */
2904 
2905   adp = SeqMgrGetNextAnnotDesc (bsp, NULL, Annot_descr_user, &acontext);
2906   while (adp != NULL) {
2907     uop = (UserObjectPtr) adp->data.ptrvalue;
2908     if (uop != NULL) {
2909       oip = uop->type;
2910       if (oip != NULL) {
2911         if (StringCmp (oip->str, "AnnotDescCommentPolicy") == 0) {
2912           for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
2913             oip = ufp->label;
2914             if (oip == NULL || ufp->data.ptrvalue == NULL) continue;
2915             if (StringCmp (oip->str, "Policy") == 0) {
2916               if (StringICmp ((CharPtr) ufp->data.ptrvalue, "ShowInComment") == 0) {
2917                 annotDescCommentToComment = TRUE;
2918               }
2919             }
2920           }
2921         } else if (StringICmp (oip->str, "StructuredComment") == 0) {
2922           if (firstGenAnnotSCAD == NULL) {
2923             for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
2924               if (ufp->choice != 1) continue;
2925               oip = ufp->label;
2926               if (oip == NULL) continue;
2927               field = oip->str;
2928               if (StringHasNoText (field)) continue;
2929               if (StringCmp (field, "StructuredCommentPrefix") == 0) {
2930                 if (StringCmp ((CharPtr) ufp->data.ptrvalue, "##Genome-Annotation-Data-START##") == 0) {
2931                   firstGenAnnotSCAD = uop;
2932                   genomeBuildNumber = NULL;
2933                   genomeVersionNumber = NULL;
2934                   firstGenAnnotSCStr = GetStrForStructuredComment (ajp, firstGenAnnotSCAD);
2935                 }
2936               }
2937             }
2938           }
2939         }
2940       }
2941     }
2942     adp = SeqMgrGetNextAnnotDesc (bsp, adp, Annot_descr_user, &acontext);
2943   }
2944 
2945   /*
2946   also look on first far sequence component of NCBI_GENOMES records
2947   */
2948 
2949   if (awp->isNCBIGenomes && firstGenAnnotSCAD == NULL && bsp->repr == Seq_repr_delta && bsp->seq_ext_type == 4) {
2950     for (dsp = (DeltaSeqPtr) (bsp->seq_ext); dsp != NULL; dsp = dsp->next) {
2951       if (dsp->choice != 1) continue;
2952       slp = (SeqLocPtr) dsp->data.ptrvalue;
2953       if (slp == NULL) continue;
2954       sip = SeqLocId (slp);
2955       if (sip == NULL) continue;
2956       farbsp = BioseqLockById (sip);
2957       if (farbsp == NULL) break;
2958       fareid = ObjMgrGetEntityIDForPointer (farbsp);
2959       SeqMgrIndexFeatures (fareid, NULL);
2960       adp = SeqMgrGetNextAnnotDesc (farbsp, NULL, Annot_descr_user, &acontext);
2961       while (adp != NULL && firstGenAnnotSCAD == NULL) {
2962         uop = (UserObjectPtr) adp->data.ptrvalue;
2963         if (uop != NULL) {
2964           oip = uop->type;
2965           if (oip != NULL) {
2966             if (StringICmp (oip->str, "StructuredComment") == 0) {
2967               if (firstGenAnnotSCAD == NULL) {
2968                 for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
2969                   if (ufp->choice != 1) continue;
2970                   oip = ufp->label;
2971                   if (oip == NULL) continue;
2972                   field = oip->str;
2973                   if (StringHasNoText (field)) continue;
2974                   if (StringCmp (field, "StructuredCommentPrefix") == 0) {
2975                     if (StringCmp ((CharPtr) ufp->data.ptrvalue, "##Genome-Annotation-Data-START##") == 0) {
2976                       firstGenAnnotSCAD = uop;
2977                       genomeBuildNumber = NULL;
2978                       genomeVersionNumber = NULL;
2979                       firstGenAnnotSCStr = GetStrForStructuredComment (ajp, firstGenAnnotSCAD);
2980                     }
2981                   }
2982                 }
2983               }
2984             }
2985           }
2986         }
2987         adp = SeqMgrGetNextAnnotDesc (farbsp, adp, Annot_descr_user, &acontext);
2988       }
2989       if (firstGenAnnotSCAD == NULL) {
2990         sdp = SeqMgrGetNextDescriptor (farbsp, NULL, Seq_descr_user, &dcontext);
2991         while (sdp != NULL) {
2992           uop = (UserObjectPtr) sdp->data.ptrvalue;
2993           if (uop != NULL) {
2994             oip = uop->type;
2995             if (oip != NULL && StringCmp (oip->str, "StructuredComment") == 0) {
2996               for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
2997                 if (ufp->choice != 1) continue;
2998                 oip = ufp->label;
2999                 if (oip == NULL) continue;
3000                 field = oip->str;
3001                 if (StringHasNoText (field)) continue;
3002                 if (StringCmp (field, "StructuredCommentPrefix") == 0) {
3003                   if (StringCmp ((CharPtr) ufp->data.ptrvalue, "##Genome-Annotation-Data-START##") == 0) {
3004                     firstGenAnnotSCAD = uop;
3005                     genomeBuildNumber = NULL;
3006                     genomeVersionNumber = NULL;
3007                     firstGenAnnotSCStr = GetStrForStructuredComment (ajp, firstGenAnnotSCAD);
3008                   }
3009                 }
3010               }
3011             }
3012           }
3013           sdp = SeqMgrGetNextDescriptor (farbsp, sdp, Seq_descr_user, &dcontext);
3014         }
3015       }
3016       BioseqUnlock (farbsp);
3017       break;
3018     }
3019   }
3020 
3021   /*
3022   also look for Genome-Annotation structured comment descriptor to suppress GenomeBuild user object
3023   */
3024 
3025   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
3026   while (sdp != NULL) {
3027     uop = (UserObjectPtr) sdp->data.ptrvalue;
3028     if (uop != NULL) {
3029       oip = uop->type;
3030       if (oip != NULL && StringCmp (oip->str, "StructuredComment") == 0) {
3031         for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
3032           if (ufp->choice != 1) continue;
3033           oip = ufp->label;
3034           if (oip == NULL) continue;
3035           field = oip->str;
3036           if (StringHasNoText (field)) continue;
3037           if (StringCmp (field, "StructuredCommentPrefix") == 0) {
3038             if (StringCmp ((CharPtr) ufp->data.ptrvalue, "##Genome-Annotation-Data-START##") == 0) {
3039               genomeBuildNumber = NULL;
3040               genomeVersionNumber = NULL;
3041               if (firstGenAnnotSCAD == NULL) {
3042                 firstGenAnnotSCAD = uop;
3043                 firstGenAnnotSCStr = GetStrForStructuredComment (ajp, firstGenAnnotSCAD);
3044               }
3045             }
3046           }
3047         }
3048       }
3049     }
3050     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
3051   }
3052 
3053   gi = 0;
3054   for (sip = bsp->id; sip != NULL; sip = sip->next) {
3055     tsip = NULL;
3056     if (sip->choice == SEQID_OTHER) {
3057       tsip = (TextSeqIdPtr) sip->data.ptrvalue;
3058 
3059       if (tsip != NULL) {
3060         is_other = TRUE;
3061         if (StringNCmp (tsip->accession, "NC_", 3) == 0 || StringNCmp (tsip->accession, "AC_", 3) == 0) {
3062           if (hasRefTrackStatus) {
3063             /* will print elsewhere */
3064           } else if (! StringHasNoText (genomeBuildNumber)) {
3065             cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
3066             if (cbp != NULL) {
3067 
3068               cbp->entityID = awp->entityID;
3069               cbp->first = first;
3070               cbp->no_blank_before = last_had_tilde;
3071               first = FALSE;
3072 
3073               if (cbp->first) {
3074                 FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
3075               } else {
3076                 FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
3077               }
3078 
3079               FFAddOneString (ffstring, "GENOME ANNOTATION ", FALSE, FALSE, TILDE_IGNORE);
3080 
3081               if ( GetWWW(ajp) ) {
3082                 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3083                 FF_Add_NCBI_Base_URL (ffstring, ref_link);
3084                 FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
3085               }
3086               FFAddOneString (ffstring, "REFSEQ", FALSE, FALSE, TILDE_IGNORE);
3087               if ( GetWWW(ajp) ) {
3088                 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
3089               }
3090               FFAddOneString (ffstring, ":", FALSE, FALSE, TILDE_IGNORE);
3091 
3092               FFAddOneString (ffstring, reftxt22, FALSE, FALSE, TILDE_EXPAND);
3093               FFAddOneString (ffstring, genomeBuildNumber, FALSE, FALSE, TILDE_EXPAND);
3094               if (StringHasNoText (genomeVersionNumber)) {
3095                 genomeVersionNumber = "1";
3096               }
3097               FFAddOneString (ffstring, " version ", FALSE, FALSE, TILDE_EXPAND);
3098               FFAddOneString (ffstring, genomeVersionNumber, FALSE, FALSE, TILDE_EXPAND);
3099               FFAddOneString (ffstring, reftxt23, FALSE, FALSE, TILDE_EXPAND);
3100 
3101               FFAddOneString (ffstring, " [see ", FALSE, FALSE, TILDE_EXPAND);
3102 
3103               if ( GetWWW(ajp) ) {
3104                 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3105                 FF_Add_NCBI_Base_URL (ffstring, doc_link);
3106                 FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
3107               }
3108               FFAddOneString (ffstring, "documentation", FALSE, FALSE, TILDE_IGNORE);
3109               if ( GetWWW(ajp) ) {
3110                 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
3111               }
3112 
3113               FFAddOneString (ffstring, "].", FALSE, FALSE, TILDE_EXPAND);
3114 
3115               cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
3116               FFRecycleString(ajp, ffstring);
3117               ffstring = FFGetString(ajp);
3118 
3119               last_had_tilde = FALSE;
3120               if (awp->afp != NULL) {
3121                 DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
3122               }
3123             }
3124           }
3125 
3126         } else if (StringNCmp(tsip->accession, "NT_", 3) == 0 || StringNCmp(tsip->accession, "NW_", 3) == 0) {
3127 
3128           if (is_encode) {
3129             cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
3130             if (cbp != NULL) {
3131 
3132               cbp->entityID = awp->entityID;
3133               cbp->first = first;
3134               cbp->no_blank_before = last_had_tilde;
3135               first = FALSE;
3136 
3137               if (cbp->first) {
3138                 FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
3139               } else {
3140                 FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
3141               }
3142 
3143               FFAddOneString (ffstring, "REFSEQ", FALSE, FALSE, TILDE_IGNORE);
3144               FFAddOneString (ffstring, ":  ", FALSE, FALSE, TILDE_IGNORE);
3145 
3146               FFAddOneString (ffstring, "This record was provided by the ", FALSE, FALSE, TILDE_EXPAND);
3147               if ( GetWWW(ajp) ) {
3148                 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3149                 FF_Add_NCBI_Base_URL (ffstring, link_encode);
3150                 FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
3151               }
3152               FFAddOneString (ffstring, "ENCODE", FALSE, FALSE, TILDE_EXPAND);
3153               if ( GetWWW(ajp) ) {
3154                 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
3155               }
3156               FFAddOneString (ffstring, " project.", FALSE, FALSE, TILDE_EXPAND);
3157 
3158               str = GetEncodeString (encodeUop, bsp);
3159               if (str != NULL) {
3160                 FFAddOneString (ffstring, "  ", FALSE, FALSE, TILDE_EXPAND);
3161                 FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_EXPAND);
3162               }
3163               MemFree (str);
3164 
3165               cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
3166               FFRecycleString(ajp, ffstring);
3167               ffstring = FFGetString(ajp);
3168 
3169               last_had_tilde = FALSE;
3170               if (awp->afp != NULL) {
3171                 DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
3172               }
3173             }
3174 
3175           } else if (! hasRefTrackStatus) {
3176 
3177             cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
3178             if (cbp != NULL) {
3179 
3180               cbp->entityID = awp->entityID;
3181               cbp->first = first;
3182               cbp->no_blank_before = last_had_tilde;
3183               first = FALSE;
3184 
3185               if (cbp->first) {
3186                 FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
3187               } else {
3188                 FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
3189               }
3190 
3191               FFAddOneString (ffstring, "GENOME ANNOTATION ", FALSE, FALSE, TILDE_IGNORE);
3192 
3193               if ( GetWWW(ajp) ) {
3194                 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3195                 FF_Add_NCBI_Base_URL (ffstring, ref_link);
3196                 FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
3197               }
3198               FFAddOneString (ffstring, "REFSEQ", FALSE, FALSE, TILDE_IGNORE);
3199               if ( GetWWW(ajp) ) {
3200                 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
3201               }
3202               FFAddOneString (ffstring, ":", FALSE, FALSE, TILDE_IGNORE);
3203 
3204               if (! StringHasNoText (genomeBuildNumber)) {
3205                 FFAddOneString (ffstring, reftxt22, FALSE, FALSE, TILDE_EXPAND);
3206                 FFAddOneString (ffstring, genomeBuildNumber, FALSE, FALSE, TILDE_EXPAND);
3207                 if (StringHasNoText (genomeVersionNumber)) {
3208                   genomeVersionNumber = "1";
3209                 }
3210                 FFAddOneString (ffstring, " version ", FALSE, FALSE, TILDE_EXPAND);
3211                 FFAddOneString (ffstring, genomeVersionNumber, FALSE, FALSE, TILDE_EXPAND);
3212                 FFAddOneString (ffstring, reftxt23, FALSE, FALSE, TILDE_EXPAND);
3213 
3214                 FFAddOneString (ffstring, " [see ", FALSE, FALSE, TILDE_EXPAND);
3215 
3216                 if ( GetWWW(ajp) ) {
3217                   FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3218                   FF_Add_NCBI_Base_URL (ffstring, doc_link);
3219                   FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
3220                 }
3221                 FFAddOneString (ffstring, "documentation", FALSE, FALSE, TILDE_IGNORE);
3222                 if ( GetWWW(ajp) ) {
3223                   FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
3224                 }
3225 
3226                 FFAddOneString (ffstring, "].", FALSE, FALSE, TILDE_EXPAND);
3227               } else {
3228 
3229                 FFAddOneString (ffstring, reftxt21, TRUE, FALSE, TILDE_EXPAND);
3230 
3231                 FFAddOneString (ffstring, "~Also see:~    ", FALSE, FALSE, TILDE_EXPAND);
3232 
3233                 if ( GetWWW(ajp) ) {
3234                   FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3235                   FF_Add_NCBI_Base_URL (ffstring, doc_link);
3236                   FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
3237                 }
3238                 FFAddOneString (ffstring, "Documentation", FALSE, FALSE, TILDE_IGNORE);
3239                 if ( GetWWW(ajp) ) {
3240                   FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
3241                 }
3242 
3243                 FFAddOneString (ffstring, " of NCBI's Annotation Process~    ", FALSE, FALSE, TILDE_EXPAND);
3244               }
3245 
3246               cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
3247               FFRecycleString(ajp, ffstring);
3248               ffstring = FFGetString(ajp);
3249 
3250               last_had_tilde = TRUE;
3251               if (awp->afp != NULL) {
3252                 DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
3253               }
3254             }
3255           }
3256 
3257         } else if (StringNCmp(tsip->accession, "XP_", 3) == 0 ||
3258                    StringNCmp(tsip->accession, "XM_", 3) == 0 ||
3259                    StringNCmp(tsip->accession, "XR_", 3) == 0 ||
3260                    StringNCmp(tsip->accession, "ZP_", 3) == 0) {
3261 
3262           name = NULL;
3263           gi = 0;
3264           version = 0;
3265           left = 0;
3266           right = 0;
3267           method = NULL;
3268           mrnaEv = FALSE;
3269           estEv = FALSE;
3270           if (GetAnnotationComment (bsp, &name, &assm, &gi, &left, &right, &method, &mrnaEv, &estEv)) {
3271 
3272             cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
3273             if (cbp != NULL) {
3274 
3275               cbp->entityID = awp->entityID;
3276               cbp->first = first;
3277               cbp->no_blank_before = last_had_tilde;
3278               first = FALSE;
3279 
3280               if (cbp->first) {
3281                 FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
3282               } else {
3283                 FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
3284               }
3285 
3286               FFAddOneString (ffstring, "MODEL ", FALSE, FALSE, TILDE_IGNORE);
3287 
3288               if ( GetWWW(ajp) ) {
3289                 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3290                 FF_Add_NCBI_Base_URL (ffstring, ref_link);
3291                 FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
3292               }
3293               FFAddOneString (ffstring, "REFSEQ", FALSE, FALSE, TILDE_IGNORE);
3294               if ( GetWWW(ajp) ) {
3295                 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
3296               }
3297               FFAddOneString (ffstring, ":  ", FALSE, FALSE, TILDE_IGNORE);
3298 
3299               FFAddTextToString (ffstring, NULL, reftxt11, " (", FALSE, FALSE, TILDE_IGNORE);
3300 
3301               if ( GetWWW(ajp) ) {
3302                 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3303                 if (IS_protdb_accession (name)) {
3304                   FF_Add_NCBI_Base_URL (ffstring, link_seqp);
3305                 } else {
3306                   FF_Add_NCBI_Base_URL (ffstring, link_seqn);
3307                 }
3308                 if (gi > 0) {
3309                   sprintf (tmp, "%ld", (long) gi);
3310                   FFAddOneString (ffstring, tmp, FALSE, FALSE, TILDE_IGNORE);
3311                   msip = GetSeqIdForGI (gi);
3312                   if (msip != NULL) {
3313                     switch (msip->choice) {
3314                       case SEQID_GENBANK:
3315                       case SEQID_EMBL:
3316                       case SEQID_DDBJ:
3317                       case SEQID_OTHER:
3318                       case SEQID_TPG:
3319                       case SEQID_TPE:
3320                       case SEQID_TPD:
3321                       case SEQID_PIR:
3322                       case SEQID_SWISSPROT:
3323                         tsip = (TextSeqIdPtr) msip->data.ptrvalue;
3324                         if (tsip != NULL) {
3325                           if (StringICmp (name, tsip->accession) == 0) {
3326                             version = tsip->version;
3327                           }
3328                         }
3329                         break;
3330                       default:
3331                         break;
3332                     }
3333                   }
3334                 } else if (ValidateAccnDotVer (name) == 0 && GetGiFromAccnDotVer (name, &gi)) {
3335                   sprintf (tmp, "%ld", (long) gi);
3336                   FFAddOneString (ffstring, tmp, FALSE, FALSE, TILDE_IGNORE);
3337                 } else {
3338                   FFAddOneString (ffstring, name, FALSE, FALSE, TILDE_IGNORE);
3339                 }
3340                 gi = 0;
3341                 FFAddOneString (ffstring, "?report=graph", FALSE, FALSE, TILDE_IGNORE);
3342                 if (left > 0 && right > 0) {
3343                   if (left > 500) {
3344                     left -= 500;
3345                   } else {
3346                     left = 1;
3347                   }
3348                   right += 500;
3349                   sprintf (tmp, "&v=%ld:%ld", (long) left, (long) right);
3350                   FFAddOneString (ffstring, tmp, FALSE, FALSE, TILDE_IGNORE);
3351                 }
3352                 FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
3353                 FFAddOneString (ffstring, name, FALSE, FALSE, TILDE_IGNORE);
3354                 if (version > 0 && StringChr (name, '.') == NULL) {
3355                   sprintf (tmp, ".%ld", (long) version);
3356                   FFAddOneString (ffstring, tmp, FALSE, FALSE, TILDE_IGNORE);
3357                 }
3358                 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
3359               } else if (StringChr (name, '.') == NULL && gi > 0) {
3360                 msip = GetSeqIdForGI (gi);
3361                 if (msip != NULL) {
3362                   switch (msip->choice) {
3363                     case SEQID_GENBANK:
3364                     case SEQID_EMBL:
3365                     case SEQID_DDBJ:
3366                     case SEQID_OTHER:
3367                     case SEQID_TPG:
3368                     case SEQID_TPE:
3369                     case SEQID_TPD:
3370                     case SEQID_PIR:
3371                     case SEQID_SWISSPROT:
3372                       tsip = (TextSeqIdPtr) msip->data.ptrvalue;
3373                       if (tsip != NULL) {
3374                         if (StringICmp (name, tsip->accession) == 0) {
3375                           version = tsip->version;
3376                         }
3377                       }
3378                       break;
3379                     default:
3380                       break;
3381                   }
3382                 }
3383                 FFAddOneString (ffstring, name, FALSE, FALSE, TILDE_IGNORE);
3384                 if (version > 0) {
3385                   sprintf (tmp, ".%ld", (long) version);
3386                   FFAddOneString (ffstring, tmp, FALSE, FALSE, TILDE_IGNORE);
3387                 }
3388               } else {
3389                 FFAddOneString (ffstring, name, FALSE, FALSE, TILDE_IGNORE);
3390               }
3391 
3392               FFAddOneString (ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
3393 
3394               if (assm != NULL) {
3395 
3396                 plural = " (";
3397                 count = 0;
3398                 for (tufp = assm->data.ptrvalue; tufp != NULL; tufp = tufp->next)  {
3399                   ufp = tufp->data.ptrvalue;
3400                   if (ufp != NULL) {
3401                     oip = ufp->label;
3402                     if (oip != NULL && oip->str != NULL && StringICmp (oip->str, "accession") == 0 && ufp->choice == 1) {
3403                       accn = (CharPtr) ufp->data.ptrvalue;
3404                       if (StringDoesHaveText (accn)) {
3405                         count++;
3406                       }
3407                     }
3408                   }
3409                 }
3410                 if (count > 1) {
3411                   plural = "s (";
3412                 }
3413 
3414                 if (count > 0) {
3415                   FFAddTextToString (ffstring, " ", reftxt13, plural, FALSE, FALSE, TILDE_IGNORE);
3416 
3417                   for (tufp = assm->data.ptrvalue; tufp != NULL; tufp = tufp->next) {
3418                     accn = NULL;
3419                     ufp = tufp->data.ptrvalue;
3420                     if (ufp != NULL) {
3421                       oip = ufp->label;
3422                       if (oip != NULL && oip->str != NULL && StringICmp (oip->str, "accession") == 0 && ufp->choice == 1) {
3423                         accn = (CharPtr) ufp->data.ptrvalue;
3424                       }
3425                     }
3426                     if (StringDoesHaveText (accn)) {
3427                       if (GetWWW (ajp) && ValidateAccnDotVer (accn) == 0 && GetGiFromAccnDotVer (accn, &gi)) {
3428                         FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3429                         if (IS_protdb_accession (nm)) {
3430                             FF_Add_NCBI_Base_URL (ffstring, link_seqp);
3431                         } else {
3432                             FF_Add_NCBI_Base_URL (ffstring, link_seqn);
3433                         }
3434                         if (gi > 0) {
3435                           sprintf (buf, "%ld", (long) gi);
3436                           FFAddTextToString(ffstring, /* "val=" */ NULL, buf, "\">", FALSE, FALSE, TILDE_IGNORE);
3437                         } else {
3438                           FFAddTextToString(ffstring, /* "val=" */ NULL, accn, "\">", FALSE, FALSE, TILDE_IGNORE);
3439                         }
3440                         FFAddOneString (ffstring, accn, FALSE, FALSE, TILDE_IGNORE);
3441                         FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
3442                       } else if (GetWWW (ajp) && ValidateAccn (accn) == 0) {
3443                         FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3444                         if (IS_protdb_accession (nm)) {
3445                             FF_Add_NCBI_Base_URL (ffstring, link_seqp);
3446                           } else {
3447                           FF_Add_NCBI_Base_URL (ffstring, link_seqn);
3448                         }
3449                         FFAddTextToString(ffstring, /* "val=" */ NULL, accn, "\">", FALSE, FALSE, TILDE_IGNORE);
3450                         FFAddOneString (ffstring, accn, FALSE, FALSE, TILDE_IGNORE);
3451                         FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
3452                       } else {
3453                         FFAddOneString (ffstring, accn, FALSE, FALSE, TILDE_IGNORE);
3454                       }
3455                     } else if (StringDoesHaveText (nm)) {
3456                       FFAddOneString (ffstring, nm, FALSE, FALSE, TILDE_IGNORE);
3457                     } else continue;
3458                     if (tufp->next != NULL) {
3459                       ufp = tufp->next;
3460                       if (ufp->next != NULL) {
3461                         FFAddOneString (ffstring, ", ", FALSE, FALSE, TILDE_IGNORE);
3462                       } else {
3463                         FFAddOneString (ffstring, " and ", FALSE, FALSE, TILDE_IGNORE);
3464                       }
3465                     }
3466                   }
3467 
3468                   FFAddOneString (ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
3469                 }
3470               }
3471 
3472               if (method != NULL) {
3473                 FFAddOneString (ffstring, " ", FALSE, FALSE, TILDE_IGNORE);
3474                 FFAddOneString (ffstring, reftxt12, FALSE, FALSE, TILDE_IGNORE);
3475                 FFAddOneString (ffstring, " ", FALSE, FALSE, TILDE_IGNORE);
3476                 FFAddOneString (ffstring, method, FALSE, FALSE, TILDE_IGNORE);
3477               }
3478 
3479               if (mrnaEv || estEv) {
3480                 FFAddOneString (ffstring, ", supported by ", FALSE, FALSE, TILDE_IGNORE);
3481                 if (mrnaEv && estEv) {
3482                   FFAddOneString (ffstring, "mRNA and EST ", FALSE, FALSE, TILDE_IGNORE);
3483                 } else if (mrnaEv) {
3484                   FFAddOneString (ffstring, "mRNA ", FALSE, FALSE, TILDE_IGNORE);
3485                 } else {
3486                   FFAddOneString (ffstring, "EST ", FALSE, FALSE, TILDE_IGNORE);
3487                 }
3488                 geneName = NULL;
3489                 locusID [0] = '\0';
3490                 taxID [0] = '\0';
3491                 if ( GetWWW(ajp) && GetGeneAndLocus (bsp, &geneName, locusID, taxID)) {
3492                   FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3493                   FF_Add_NCBI_Base_URL (ffstring, ev_link);
3494                   FFAddTextToString (ffstring, "contig=", name, NULL, FALSE, FALSE, TILDE_IGNORE);
3495                   FFAddTextToString (ffstring, "&gene=", geneName, NULL, FALSE, FALSE, TILDE_IGNORE);
3496                   FFAddTextToString (ffstring, "&lid=", locusID, NULL, FALSE, FALSE, TILDE_IGNORE);
3497                   if (! StringHasNoText (taxID)) {
3498                     FFAddTextToString (ffstring, "&taxid=", taxID, NULL, FALSE, FALSE, TILDE_IGNORE);
3499                   }
3500                   FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
3501                   FFAddOneString (ffstring, "evidence", FALSE, FALSE, TILDE_IGNORE);
3502                   FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
3503                 } else {
3504                   FFAddOneString (ffstring, "evidence", FALSE, FALSE, TILDE_IGNORE);
3505                 }
3506               }
3507 
3508               FFAddOneString (ffstring, ".", FALSE, FALSE, TILDE_IGNORE);
3509 
3510               FFAddOneString (ffstring, "~Also see:~    ", FALSE, FALSE, TILDE_EXPAND);
3511 
3512               if ( GetWWW(ajp) ) {
3513                 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3514                 FF_Add_NCBI_Base_URL (ffstring, doc_link);
3515                 FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
3516               }
3517               FFAddOneString (ffstring, "Documentation", FALSE, FALSE, TILDE_IGNORE);
3518               if ( GetWWW(ajp) ) {
3519                 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
3520               }
3521 
3522               FFAddOneString (ffstring, " of NCBI's Annotation Process~    ", FALSE, FALSE, TILDE_EXPAND);
3523 
3524               cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
3525               FFRecycleString(ajp, ffstring);
3526               ffstring = FFGetString(ajp);
3527 
3528               last_had_tilde = TRUE;
3529               if (awp->afp != NULL) {
3530                 DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
3531               }
3532             }
3533           }
3534         } else if (StringNCmp(tsip->accession, "WP_", 3) == 0) {
3535           cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
3536           if (cbp != NULL) {
3537 
3538             cbp->entityID = awp->entityID;
3539             cbp->itemID = unverified_itemID;
3540             cbp->itemtype = OBJ_SEQDESC;
3541             cbp->first = first;
3542             cbp->no_blank_before = last_had_tilde;
3543             first = FALSE;
3544 
3545             if (cbp->first) {
3546               FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
3547             } else {
3548               FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
3549             }
3550 
3551             FFAddOneString (ffstring, "REFSEQ:", FALSE, FALSE, TILDE_IGNORE);
3552             FFAddOneString (ffstring, reftxt51, FALSE, FALSE, TILDE_IGNORE);
3553             FFAddOneString (ffstring, ".", FALSE, FALSE, TILDE_IGNORE);
3554 
3555             cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
3556             FFRecycleString(ajp, ffstring);
3557             ffstring = FFGetString(ajp);
3558 
3559             last_had_tilde = FALSE;
3560             if (awp->afp != NULL) {
3561               DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
3562             }
3563           }
3564         } else if (StringNCmp(tsip->accession, "NZ_", 3) == 0) {
3565           if (StringLen (tsip->accession) == 15) {
3566             is_wgs = TRUE;
3567             if (StringCmp (tsip->accession + 9, "000000") == 0) {
3568               wgsaccn = tsip->accession;
3569               wgsname = tsip->name;
3570             }
3571           } else if (StringLen (tsip->accession) == 16) {
3572             is_wgs = TRUE;
3573             if (StringCmp (tsip->accession + 10, "000000") == 0) {
3574               wgsaccn = tsip->accession;
3575               wgsname = tsip->name;
3576             }
3577           }
3578        } else {
3579           if (StringLen (tsip->accession) == 15) {
3580             is_wgs = TRUE;
3581             if (StringCmp (tsip->accession + 9, "000000") == 0) {
3582               wgsaccn = tsip->accession;
3583               wgsname = tsip->name; /* master accession has 8 zeroes, name has project version plus 6 zeroes */
3584             }
3585           }
3586         }
3587       }
3588 
3589     } else if (sip->choice == SEQID_TPG || sip->choice == SEQID_TPE || sip->choice == SEQID_TPD) {
3590 
3591       is_tpa = TRUE;
3592 
3593       tsip = (TextSeqIdPtr) sip->data.ptrvalue;
3594       if (tsip != NULL && tsip->accession != NULL) {
3595         acclen = StringLen (tsip->accession);
3596         tsaaccn = tsip->accession;
3597         tsaname = tsip->name;
3598         if (acclen == 12) {
3599           is_wgs = TRUE;
3600           if (StringCmp (tsip->accession + 6, "000000") == 0) {
3601             wgsaccn = tsip->accession;
3602             wgsname = tsip->name; /* master accession has 8 zeroes, name has project version plus 6 zeroes */
3603           }
3604         } else if (acclen == 13) {
3605           is_wgs = TRUE;
3606           if (StringCmp (tsip->accession + 6, "0000000") == 0) {
3607             wgsaccn = tsip->accession;
3608             wgsname = tsip->name; /* master accession has 9 zeroes, name has project version plus 7 zeroes */
3609           }
3610         } else if (acclen == 14) {
3611           is_wgs = TRUE;
3612           if (StringCmp (tsip->accession + 6, "00000000") == 0) {
3613             wgsaccn = tsip->accession;
3614             wgsname = tsip->name; /* master accession has 10 zeroes, name has project version plus 8 zeroes */
3615           }
3616         } else if (ajp->newSourceOrg && StringLen (tsip->accession) == 6) {
3617           ch = tsip->accession [0];
3618           if (ch == 'J' || ch == 'K' || ch == 'L' || ch == 'M') {
3619             showGBBSource = TRUE;
3620           }
3621         }
3622       }
3623 
3624     } else if (sip->choice == SEQID_GENBANK || sip->choice == SEQID_EMBL || sip->choice == SEQID_DDBJ) {
3625 
3626       is_collab = TRUE;
3627 
3628       tsip = (TextSeqIdPtr) sip->data.ptrvalue;
3629       if (tsip != NULL && tsip->accession != NULL) {
3630         acclen = StringLen (tsip->accession);
3631         tsaaccn = tsip->accession;
3632         tsaname = tsip->name;
3633         if (acclen == 12) {
3634           is_wgs = TRUE;
3635           if (StringCmp (tsip->accession + 6, "000000") == 0) {
3636             wgsaccn = tsip->accession;
3637             wgsname = tsip->name; /* master accession has 8 zeroes, name has project version plus 6 zeroes */
3638           }
3639         } else if (acclen == 13) {
3640           is_wgs = TRUE;
3641           if (StringCmp (tsip->accession + 6, "0000000") == 0) {
3642             wgsaccn = tsip->accession;
3643             wgsname = tsip->name; /* master accession has 9 zeroes, name has project version plus 7 zeroes */
3644           }
3645         } else if (acclen == 14) {
3646           is_wgs = TRUE;
3647           if (StringCmp (tsip->accession + 6, "00000000") == 0) {
3648             wgsaccn = tsip->accession;
3649             wgsname = tsip->name; /* master accession has 10 zeroes, name has project version plus 8 zeroes */
3650           }
3651         } else if (ajp->newSourceOrg && StringLen (tsip->accession) == 6) {
3652           ch = tsip->accession [0];
3653           if (ch == 'J' || ch == 'K' || ch == 'L' || ch == 'M') {
3654             showGBBSource = TRUE;
3655           }
3656         }
3657       }
3658 
3659     } else if (sip->choice == SEQID_GENERAL) {
3660       dbt = (DbtagPtr) sip->data.ptrvalue;
3661 
3662       /* show GSDB sequence identifier */
3663 
3664       if (dbt != NULL && StringCmp (dbt->db, "GSDB") == 0 && dbt->tag != NULL) {
3665         cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
3666         if (cbp != NULL) {
3667 
3668           cbp->entityID = awp->entityID;
3669           cbp->first = first;
3670           cbp->no_blank_before = last_had_tilde;
3671           first = FALSE;
3672 
3673           /* string will be created after we know if there are additional comments */
3674 
3675           gsdbid = dbt->tag->id;
3676           sprintf (buf, "GSDB:S:%ld.", (long) gsdbid);
3677 
3678           if (cbp->first) {
3679             FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
3680           } else {
3681             FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
3682           }
3683 
3684           /* CheckEndPunctuation, ConvertDoubleQuotes, and ExpandTildes already taken into account */
3685 
3686           FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
3687 
3688           cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
3689           FFRecycleString(ajp, ffstring);
3690           ffstring = FFGetString(ajp);
3691 
3692           last_had_tilde = FALSE;
3693           if (awp->afp != NULL) {
3694             DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
3695           }
3696         }
3697       } else if (dbt != NULL && StringCmp (dbt->db, "NCBIFILE") == 0 && dbt->tag != NULL) {
3698         ncbifileID = dbt->tag;
3699       }
3700 
3701     } else if (sip->choice == SEQID_GI) {
3702       gi = (BIG_ID) sip->data.intvalue;
3703 
3704     } else if (sip->choice == SEQID_LOCAL) {
3705       localID = (ObjectIdPtr) sip->data.ptrvalue;
3706     }
3707 
3708     if (tsip != NULL) {
3709       tlstsip = tsip;
3710     }
3711   }
3712 
3713   origLocalID = FastaGetOriginalId (bsp);
3714 
3715   if (localID != NULL) {
3716     if (is_tpa || is_collab) {
3717       if (awp->mode == SEQUIN_MODE || awp->mode == DUMP_MODE) {
3718         buf [0] = '\0';
3719         if (StringDoesHaveText (origLocalID)) {
3720           if (StringLen (origLocalID) < 1000) {
3721             sprintf (buf, "LocalID: %s", origLocalID);
3722             showedLocalID = TRUE;
3723           } else {
3724             sprintf (buf, "LocalID string too large");
3725           }
3726         } else if (! StringHasNoText (localID->str)) {
3727           if (StringLen (localID->str) < 1000) {
3728             sprintf (buf, "LocalID: %s", localID->str);
3729             showedLocalID = TRUE;
3730           } else {
3731             sprintf (buf, "LocalID string too large");
3732           }
3733         } else {
3734           sprintf (buf, "LocalID: %ld", (long) localID->id);
3735           showedLocalID = TRUE;
3736         }
3737 
3738         cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
3739         if (cbp != NULL) {
3740 
3741           cbp->entityID = awp->entityID;
3742           cbp->first = first;
3743           cbp->no_blank_before = last_had_tilde;
3744           first = FALSE;
3745 
3746           if (cbp->first) {
3747             FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
3748           } else {
3749             FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
3750           }
3751 
3752           FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_EXPAND);
3753 
3754           cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12,5, 5, "CC");
3755           FFRecycleString(ajp, ffstring);
3756           ffstring = FFGetString(ajp);
3757 
3758           last_had_tilde = FALSE;
3759           if (awp->afp != NULL) {
3760             DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
3761           }
3762         }
3763       }
3764     }
3765   }
3766 
3767   if (ncbifileID != NULL) {
3768     if (is_tpa || is_collab) {
3769       if (awp->mode == SEQUIN_MODE || awp->mode == DUMP_MODE) {
3770         buf [0] = '\0';
3771         if (! StringHasNoText (ncbifileID->str)) {
3772           if (StringLen (ncbifileID->str) < 1000) {
3773             sprintf (buf, "FileID: %s", ncbifileID->str);
3774           } else {
3775             sprintf (buf, "FileID string too large");
3776           }
3777         } else {
3778           sprintf (buf, "FileID: %ld", (long) ncbifileID->id);
3779         }
3780 
3781         cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
3782         if (cbp != NULL) {
3783 
3784           cbp->entityID = awp->entityID;
3785           cbp->first = first;
3786           cbp->no_blank_before = last_had_tilde;
3787           first = FALSE;
3788 
3789           if (cbp->first) {
3790             FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
3791           } else {
3792             FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
3793           }
3794 
3795           FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_EXPAND);
3796 
3797           cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12,5, 5, "CC");
3798           FFRecycleString(ajp, ffstring);
3799           ffstring = FFGetString(ajp);
3800 
3801           last_had_tilde = FALSE;
3802           if (awp->afp != NULL) {
3803             DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
3804           }
3805         }
3806       }
3807     }
3808   }
3809 
3810   /* RefSeq results in allocated comment string */
3811 
3812   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
3813   while (sdp != NULL) {
3814 
3815     uop = (UserObjectPtr) sdp->data.ptrvalue;
3816     if (uop != NULL) {
3817 
3818       if (! didTPA) {
3819         str = GetStrForTPA (uop, bsp);
3820         if (str != NULL) {
3821 
3822           cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
3823           if (cbp != NULL) {
3824 
3825             cbp->entityID = dcontext.entityID;
3826             cbp->itemID = dcontext.itemID;
3827             cbp->itemtype = OBJ_SEQDESC;
3828             cbp->first = first;
3829             cbp->no_blank_before = last_had_tilde;
3830             first = FALSE;
3831 
3832             if (cbp->first) {
3833               FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
3834             } else {
3835               FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
3836             }
3837 
3838             FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_EXPAND);
3839 
3840             cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12,5, 5, "CC");
3841             FFRecycleString(ajp, ffstring);
3842             ffstring = FFGetString(ajp);
3843 
3844             last_had_tilde = FALSE;
3845             if (awp->afp != NULL) {
3846               DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
3847             }
3848           }
3849           MemFree (str);
3850           didTPA = TRUE;
3851         }
3852       }
3853 
3854       if (! ajp->flags.hideBankItComment) {
3855         str = GetStrForBankit (uop, (Boolean) (awp->mode == DUMP_MODE),
3856                                (Boolean) (showedLocalID && awp->mode == SEQUIN_MODE));
3857         if (str != NULL) {
3858 
3859           cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
3860           if (cbp != NULL) {
3861 
3862             cbp->entityID = dcontext.entityID;
3863             cbp->itemID = dcontext.itemID;
3864             cbp->itemtype = OBJ_SEQDESC;
3865             cbp->first = first;
3866             cbp->no_blank_before = last_had_tilde;
3867             first = FALSE;
3868 
3869             if (cbp->first) {
3870               FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
3871             } else {
3872               FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
3873             }
3874 
3875             FFAddOneString (ffstring, str, TRUE, FALSE, TILDE_EXPAND);
3876 
3877             cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12,5, 5, "CC");
3878             FFRecycleString(ajp, ffstring);
3879             ffstring = FFGetString(ajp);
3880 
3881             last_had_tilde = FALSE;
3882             if (awp->afp != NULL) {
3883               DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
3884             }
3885           }
3886           MemFree (str);
3887         }
3888       }
3889 
3890       if (! didRefTrack) {
3891         str = GetStatusForRefTrack (uop);
3892         if (str != NULL) {
3893 
3894           cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
3895           if (cbp != NULL) {
3896 
3897             cbp->entityID = dcontext.entityID;
3898             cbp->itemID = dcontext.itemID;
3899             cbp->itemtype = OBJ_SEQDESC;
3900             cbp->first = first;
3901             cbp->no_blank_before = last_had_tilde;
3902             first = FALSE;
3903 
3904             if (cbp->first) {
3905               FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
3906             } else {
3907               FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
3908             }
3909 
3910             if (StringICmp (str, "Pipeline ") != 0) {
3911               FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_EXPAND);
3912             }
3913 
3914             AddStrForRefTrack (ajp, ffstring, uop, ISA_na (bsp->mol), genomeBuildNumber, genomeVersionNumber);
3915 
3916             cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12,5, 5, "CC");
3917             FFRecycleString(ajp, ffstring);
3918             ffstring = FFGetString(ajp);
3919 
3920             last_had_tilde = FALSE;
3921             if (awp->afp != NULL) {
3922               DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
3923             }
3924           }
3925           /* do not free static str from GetStatusForRefTrack */
3926           didRefTrack = TRUE;
3927         }
3928       }
3929 
3930       if (! didGenome) {
3931         str = GetStrForGenome (uop, bsp);
3932         if (str != NULL) {
3933 
3934           cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
3935           if (cbp != NULL) {
3936 
3937             cbp->entityID = dcontext.entityID;
3938             cbp->itemID = dcontext.itemID;
3939             cbp->itemtype = OBJ_SEQDESC;
3940             cbp->first = first;
3941             cbp->no_blank_before = last_had_tilde;
3942             first = FALSE;
3943 
3944             if (cbp->first) {
3945               FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
3946             } else {
3947               FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
3948             }
3949 
3950             FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_EXPAND);
3951 
3952             cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
3953             FFRecycleString(ajp, ffstring);
3954             ffstring = FFGetString(ajp);
3955 
3956             last_had_tilde = FALSE;
3957             if (awp->afp != NULL) {
3958               DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
3959             }
3960           }
3961           MemFree (str);
3962           didGenome = TRUE;
3963         }
3964       }
3965     }
3966     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
3967   }
3968 
3969   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
3970   while (sdp != NULL) {
3971     uop = (UserObjectPtr) sdp->data.ptrvalue;
3972     if (uop != NULL) {
3973       oip = uop->type;
3974       if (oip != NULL && StringCmp (oip->str, "RefSeqGene") == 0) {
3975         for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
3976           oip = ufp->label;
3977           if (oip != NULL && StringCmp(oip->str, "Status") == 0 && ufp->choice == 1) {
3978             str = (CharPtr) ufp->data.ptrvalue;
3979             if (str != NULL && StringICmp (str, "Reference Standard") == 0) {
3980               isRefSeqStandard = TRUE;
3981             }
3982           }
3983         }
3984       }
3985     }
3986     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
3987   }
3988   if (isRefSeqStandard) {
3989     cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
3990     if (cbp != NULL) {
3991 
3992       cbp->entityID = awp->entityID;
3993       cbp->first = first;
3994       cbp->no_blank_before = last_had_tilde;
3995       first = FALSE;
3996 
3997       if (cbp->first) {
3998         FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
3999       } else {
4000         FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
4001       }
4002 
4003         FFAddOneString (ffstring, "This sequence is a reference standard in the ",
4004                         FALSE, FALSE, TILDE_IGNORE);
4005       if ( GetWWW(ajp) ) {
4006         FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
4007         FF_Add_NCBI_Base_URL (ffstring, "https://www.ncbi.nlm.nih.gov/refseq/rsg/");
4008         FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
4009         FFAddOneString (ffstring, "RefSeqGene", FALSE, FALSE, TILDE_IGNORE);
4010         FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
4011       } else {
4012         FFAddOneString (ffstring, "RefSeqGene", FALSE, FALSE, TILDE_IGNORE);
4013       }
4014       FFAddOneString (ffstring, " project.", FALSE, FALSE, TILDE_IGNORE);
4015 
4016       cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
4017       FFRecycleString(ajp, ffstring);
4018       ffstring = FFGetString(ajp);
4019 
4020       last_had_tilde = FALSE;
4021       if (awp->afp != NULL) {
4022         DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
4023       }
4024     }
4025   }
4026 
4027   /*
4028   if (bsp->repr == Seq_repr_delta && bsp->seq_ext_type == 4 && is_wgs) {
4029     has_gaps = FALSE;
4030     for (dsp = (DeltaSeqPtr) bsp->seq_ext; dsp; dsp=dsp->next) {
4031       if (dsp->choice == 2) {
4032         litp = (SeqLitPtr) dsp->data.ptrvalue;
4033         if (litp != NULL) {
4034           if ((litp->seq_data == NULL || litp->seq_data_type == Seq_code_gap) &&
4035               litp->length > 0) {
4036             has_gaps = TRUE;
4037           }
4038         }
4039       }
4040     }
4041     if (has_gaps) {
4042       cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
4043       if (cbp != NULL) {
4044 
4045         cbp->entityID = awp->entityID;
4046         cbp->first = first;
4047         cbp->no_blank_before = last_had_tilde;
4048         first = FALSE;
4049 
4050         if (cbp->first) {
4051           FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
4052         } else {
4053           FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
4054         }
4055 
4056         if (is_wgs) {
4057           FFAddOneString (ffstring, nsWGSGapsString, TRUE, FALSE, TILDE_EXPAND);
4058         } else {
4059           FFAddOneString (ffstring, nsAreGapsString, TRUE, FALSE, TILDE_EXPAND);
4060         }
4061 
4062         cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
4063         FFRecycleString(ajp, ffstring);
4064         ffstring = FFGetString(ajp);
4065 
4066         last_had_tilde = FALSE;
4067         if (awp->afp != NULL) {
4068           DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
4069         }
4070       }
4071     }
4072   }
4073   */
4074 
4075   /* Seq-hist results in allocated comment string */
4076 
4077   hist = bsp->hist;
4078   if (hist != NULL) {
4079 
4080     if (hist->replaced_by_ids != NULL && hist->replaced_by_date != NULL) {
4081 
4082       okay = TRUE;
4083       for (sip = hist->replaced_by_ids; sip != NULL; sip = sip->next) {
4084         if (sip->choice == SEQID_GI) {
4085           if (gi == (BIG_ID) sip->data.intvalue) {
4086             okay = FALSE;
4087           }
4088         }
4089       }
4090 
4091       if (okay) {
4092         cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
4093         if (cbp != NULL) {
4094 
4095           cbp->entityID = awp->entityID;
4096           cbp->first = first;
4097           cbp->no_blank_before = last_had_tilde;
4098           first = FALSE;
4099 
4100           if (cbp->first) {
4101             FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
4102           } else {
4103             FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
4104           }
4105 
4106           if (wgsaccn != NULL) {
4107             AddHistCommentString (ajp, ffstring, "[WARNING] On", "this project was updated. The new version is",
4108                                   hist->replaced_by_date, hist->replaced_by_ids, ISA_na (bsp->mol), TRUE);
4109           } else {
4110             AddHistCommentString (ajp, ffstring, "[WARNING] On", "this sequence was replaced by",
4111                                   hist->replaced_by_date, hist->replaced_by_ids, ISA_na (bsp->mol), FALSE);
4112           }
4113 
4114           cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
4115           FFRecycleString(ajp, ffstring);
4116           ffstring = FFGetString(ajp);
4117 
4118           last_had_tilde = FALSE;
4119           if (awp->afp != NULL) {
4120             DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
4121           }
4122         }
4123       }
4124     }
4125 
4126     if (hist->replace_ids != NULL && hist->replace_date != NULL && awp->mode != SEQUIN_MODE) {
4127 
4128       okay = TRUE;
4129       for (sip = hist->replace_ids; sip != NULL; sip = sip->next) {
4130         if (sip->choice == SEQID_GI) {
4131           if (gi == (BIG_ID) sip->data.intvalue) {
4132             okay = FALSE;
4133           }
4134         }
4135       }
4136 
4137       if (okay) {
4138         cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
4139         if (cbp != NULL) {
4140 
4141           cbp->entityID = awp->entityID;
4142           cbp->first = first;
4143           cbp->no_blank_before = last_had_tilde;
4144           first = FALSE;
4145 
4146           if (cbp->first) {
4147             FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
4148           } else {
4149             FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
4150           }
4151 
4152           AddHistCommentString (ajp, ffstring, "On", "this sequence version replaced",
4153                                 hist->replace_date, hist->replace_ids, ISA_na (bsp->mol), FALSE);
4154 
4155           cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
4156           FFRecycleString(ajp, ffstring);
4157           ffstring = FFGetString(ajp);
4158 
4159           last_had_tilde = FALSE;
4160           if (awp->afp != NULL) {
4161             DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
4162           }
4163         }
4164       }
4165     }
4166 
4167   }
4168 
4169   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
4170   while (sdp != NULL) {
4171     uop = (UserObjectPtr) sdp->data.ptrvalue;
4172     if (uop != NULL) {
4173       oip = uop->type;
4174       if (oip != NULL) {
4175         if (StringCmp (oip->str, "RefSeqGenome") == 0) {
4176           cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
4177           if (cbp != NULL) {
4178 
4179             cbp->entityID = dcontext.entityID;
4180             cbp->itemID = dcontext.itemID;
4181             cbp->itemtype = OBJ_SEQDESC;
4182             cbp->first = first;
4183             cbp->no_blank_before = last_had_tilde;
4184             first = FALSE;
4185 
4186             if (cbp->first) {
4187               FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
4188             } else {
4189               FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
4190             }
4191 
4192             AddStrForRefSeqGenome (ajp, ffstring, uop);
4193 
4194             cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
4195             FFRecycleString(ajp, ffstring);
4196             ffstring = FFGetString(ajp);
4197 
4198             last_had_tilde = FALSE;
4199             if (awp->afp != NULL) {
4200               DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
4201             }
4202           }
4203         }
4204       }
4205     }
4206     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
4207   }
4208 
4209 
4210 
4211   /* just save IDs for comment, maploc, and region descriptors */
4212 
4213   /*
4214   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_comment, &dcontext);
4215   while (sdp != NULL) {
4216     if (sdp->data.ptrvalue != NULL) {
4217       cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
4218       if (cbp != NULL) {
4219         cbp->entityID = dcontext.entityID;
4220         cbp->itemID = dcontext.itemID;
4221         cbp->itemtype = OBJ_SEQDESC;
4222         cbp->first = first;
4223         cbp->no_blank_before = last_had_tilde;
4224         first = FALSE;
4225 
4226         last_had_tilde = FALSE;
4227         if (awp->afp != NULL) {
4228           DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
4229         }
4230       }
4231     }
4232     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_comment, &dcontext);
4233   }
4234   */
4235 
4236   /* WGS master comment goes before comment descriptors */
4237 
4238   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
4239   if (sdp != NULL) {
4240 
4241     mip = (MolInfoPtr) sdp->data.ptrvalue;
4242     if (mip != NULL) {
4243       if (mip->tech == MI_TECH_wgs) {
4244 
4245         if (wgsname != NULL) {
4246 
4247           cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
4248           if (cbp != NULL) {
4249 
4250             /*
4251             cbp->entityID = dcontext.entityID;
4252             cbp->itemID = dcontext.itemID;
4253             cbp->itemtype = OBJ_SEQDESC;
4254             */
4255             cbp->entityID = awp->entityID;
4256             cbp->first = first;
4257             cbp->no_blank_before = last_had_tilde;
4258             first = FALSE;
4259 
4260             if (cbp->first) {
4261               FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
4262             } else {
4263               FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
4264             }
4265 
4266             AddWGSMasterCommentString (ffstring, bsp, wgsaccn, wgsname);
4267 
4268             cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
4269             FFRecycleString(ajp, ffstring);
4270             ffstring = FFGetString(ajp);
4271 
4272             cbp->itemID = dcontext.itemID;
4273             cbp->itemtype = OBJ_SEQDESC;
4274             last_had_tilde = FALSE;
4275             if (awp->afp != NULL) {
4276               DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
4277             }
4278             cbp->itemID = 0;
4279             cbp->itemtype = 0;
4280           }
4281         }
4282       } else if (mip->tech == MI_TECH_tsa) {
4283 
4284         if (tsaname != NULL && bsp->repr == Seq_repr_virtual) {
4285 
4286           cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
4287           if (cbp != NULL) {
4288 
4289             /*
4290             cbp->entityID = dcontext.entityID;
4291             cbp->itemID = dcontext.itemID;
4292             cbp->itemtype = OBJ_SEQDESC;
4293             */
4294             cbp->entityID = awp->entityID;
4295             cbp->first = first;
4296             cbp->no_blank_before = last_had_tilde;
4297             first = FALSE;
4298 
4299             if (cbp->first) {
4300               FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
4301             } else {
4302               FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
4303             }
4304 
4305             AddTSAMasterCommentString (ffstring, bsp, tsaaccn, tsaname);
4306 
4307             cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
4308             FFRecycleString(ajp, ffstring);
4309             ffstring = FFGetString(ajp);
4310 
4311             cbp->itemID = dcontext.itemID;
4312             cbp->itemtype = OBJ_SEQDESC;
4313             last_had_tilde = FALSE;
4314             if (awp->afp != NULL) {
4315               DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
4316             }
4317             cbp->itemID = 0;
4318             cbp->itemtype = 0;
4319           }
4320         }
4321       } else if (mip->tech == MI_TECH_targeted) {
4322 
4323         if (tlstsip != NULL) {
4324           tlsaccn = tlstsip->accession;
4325           tlsname = tlstsip->name;
4326 
4327           if (tlsname != NULL && bsp->repr == Seq_repr_virtual) {
4328 
4329             cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
4330             if (cbp != NULL) {
4331 
4332               /*
4333               cbp->entityID = dcontext.entityID;
4334               cbp->itemID = dcontext.itemID;
4335               cbp->itemtype = OBJ_SEQDESC;
4336               */
4337               cbp->entityID = awp->entityID;
4338               cbp->first = first;
4339               cbp->no_blank_before = last_had_tilde;
4340               first = FALSE;
4341 
4342               if (cbp->first) {
4343                 FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
4344               } else {
4345                 FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
4346               }
4347 
4348               AddTLSMasterCommentString (ffstring, bsp, tlsaccn, tlsname);
4349 
4350               cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
4351               FFRecycleString(ajp, ffstring);
4352               ffstring = FFGetString(ajp);
4353 
4354               cbp->itemID = dcontext.itemID;
4355               cbp->itemtype = OBJ_SEQDESC;
4356               last_had_tilde = FALSE;
4357               if (awp->afp != NULL) {
4358                 DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
4359               }
4360               cbp->itemID = 0;
4361               cbp->itemtype = 0;
4362             }
4363           }
4364         }
4365       }
4366     }
4367   }
4368 
4369   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &dcontext);
4370   if (sdp != NULL) {
4371     gbp = (GBBlockPtr) sdp->data.ptrvalue;
4372     if (gbp != NULL) {
4373       unordered = FALSE;
4374       for (vnp = gbp->keywords; vnp != NULL; vnp = vnp->next) {
4375         str = (CharPtr) vnp->data.ptrvalue;
4376         if (StringCmp (str, "UNORDERED") == 0) {
4377           unordered = TRUE;
4378         }
4379       }
4380       if (unordered) {
4381         cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
4382         if (cbp != NULL) {
4383 
4384           cbp->entityID = dcontext.entityID;
4385           cbp->itemID = dcontext.itemID;
4386           cbp->itemtype = OBJ_SEQDESC;
4387           cbp->first = first;
4388           cbp->no_blank_before = last_had_tilde;
4389           first = FALSE;
4390 
4391           if (cbp->first) {
4392             FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
4393           } else {
4394             FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
4395           }
4396 
4397           AddUnorderedCommentString (ffstring, bsp);
4398 
4399           cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
4400           FFRecycleString(ajp, ffstring);
4401           ffstring = FFGetString(ajp);
4402 
4403           last_had_tilde = FALSE;
4404           if (awp->afp != NULL) {
4405             DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
4406           }
4407         }
4408       }
4409     }
4410   }
4411 
4412   if (showGBBSource) {
4413     sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &dcontext);
4414     if (sdp != NULL) {
4415       gbp = (GBBlockPtr) sdp->data.ptrvalue;
4416       if (gbp != NULL && (! StringHasNoText (gbp->source))) {
4417         cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
4418         if (cbp != NULL) {
4419 
4420           cbp->entityID = dcontext.entityID;
4421           cbp->itemID = dcontext.itemID;
4422           cbp->itemtype = OBJ_SEQDESC;
4423           cbp->first = first;
4424           cbp->no_blank_before = last_had_tilde;
4425           first = FALSE;
4426 
4427           if (cbp->first) {
4428             FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
4429           } else {
4430             FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
4431           }
4432 
4433           FFAddOneString (ffstring, "Original source text: ", FALSE, FALSE, TILDE_EXPAND);
4434           FFAddOneString (ffstring, gbp->source, TRUE, TRUE, TILDE_EXPAND);
4435 
4436           cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
4437           FFRecycleString(ajp, ffstring);
4438           ffstring = FFGetString(ajp);
4439 
4440           last_had_tilde = FALSE;
4441           if (awp->afp != NULL) {
4442             DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
4443           }
4444         }
4445       }
4446     }
4447   }
4448 
4449   last_name = NULL;
4450   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_comment, &dcontext);
4451   while (sdp != NULL) {
4452     str = (CharPtr) sdp->data.ptrvalue;
4453     if (StringDoesHaveText (str) && (last_name == NULL || CommentsAreDifferent (str, last_name) || awp->mode == DUMP_MODE)) {
4454       cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
4455       if (cbp != NULL) {
4456 
4457         last_name = (CharPtr) str;
4458 
4459         cbp->entityID = dcontext.entityID;
4460         cbp->itemID = dcontext.itemID;
4461         cbp->itemtype = OBJ_SEQDESC;
4462         cbp->first = first;
4463         cbp->no_blank_before = last_had_tilde;
4464         first = FALSE;
4465 
4466         last_had_tilde = FALSE;
4467         len = StringLen (str);
4468         if (len > 4 && str [len - 1] == '~' && str [len - 2] == '~') {
4469           last_had_tilde = TRUE;
4470         }
4471         if (awp->afp != NULL) {
4472           DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
4473         }
4474       }
4475     }
4476     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_comment, &dcontext);
4477   }
4478 
4479   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_maploc, &dcontext);
4480   while (sdp != NULL) {
4481     cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
4482     if (cbp != NULL) {
4483 
4484       cbp->entityID = dcontext.entityID;
4485       cbp->itemID = dcontext.itemID;
4486       cbp->itemtype = OBJ_SEQDESC;
4487       cbp->first = first;
4488       cbp->no_blank_before = last_had_tilde;
4489       first = FALSE;
4490 
4491       last_had_tilde = FALSE;
4492       if (awp->afp != NULL) {
4493         DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
4494       }
4495     }
4496     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_maploc, &dcontext);
4497   }
4498 
4499   last_name = NULL;
4500   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_region, &dcontext);
4501   while (sdp != NULL) {
4502     str = (CharPtr) sdp->data.ptrvalue;
4503     if (StringDoesHaveText (str) &&
4504         ((last_name == NULL || StringCmp (str, last_name) != 0) || awp->mode == DUMP_MODE) &&
4505         (StringCmp (str, ".") != 0 || awp->mode == DUMP_MODE)) {
4506       cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
4507       if (cbp != NULL) {
4508 
4509         last_name = (CharPtr) str;
4510 
4511         cbp->entityID = dcontext.entityID;
4512         cbp->itemID = dcontext.itemID;
4513         cbp->itemtype = OBJ_SEQDESC;
4514         cbp->first = first;
4515         cbp->no_blank_before = last_had_tilde;
4516         first = FALSE;
4517 
4518         last_had_tilde = FALSE;
4519         if (awp->afp != NULL) {
4520           DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
4521         }
4522       }
4523     }
4524     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_region, &dcontext);
4525   }
4526 
4527   last_name = NULL;
4528   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_name, &dcontext);
4529   while (sdp != NULL) {
4530     str = (CharPtr) sdp->data.ptrvalue;
4531     if (StringDoesHaveText (str) &&
4532         ((last_name == NULL || StringCmp (str, last_name) != 0) || awp->mode == DUMP_MODE) &&
4533         (StringCmp (str, ".") != 0 || awp->mode == DUMP_MODE)) {
4534       cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
4535       if (cbp != NULL) {
4536 
4537         last_name = (CharPtr) str;
4538 
4539         cbp->entityID = dcontext.entityID;
4540         cbp->itemID = dcontext.itemID;
4541         cbp->itemtype = OBJ_SEQDESC;
4542         cbp->first = first;
4543         cbp->no_blank_before = last_had_tilde;
4544         first = FALSE;
4545 
4546         last_had_tilde = FALSE;
4547         if (awp->afp != NULL) {
4548           DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
4549         }
4550       }
4551     }
4552     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_name, &dcontext);
4553   }
4554 
4555   if (basemodNum > 0 && (basemodURLhead != NULL || basemodURL != NULL)) {
4556     cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
4557     if (cbp != NULL) {
4558 
4559       cbp->entityID = awp->entityID;
4560       cbp->itemID = filetrack_itemID;
4561       cbp->itemtype = OBJ_SEQDESC;
4562       cbp->first = first;
4563       cbp->no_blank_before = last_had_tilde;
4564       first = FALSE;
4565 
4566       if (cbp->first) {
4567         FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
4568       } else {
4569         FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
4570       }
4571 
4572       if (! last_had_tilde && ! cbp->first) {
4573         FFAddOneString (ffstring, "\n", FALSE, FALSE, TILDE_EXPAND);
4574       }
4575 
4576       if (basemodNum == 1) {
4577         FFAddOneString (ffstring, "This genome has a ", FALSE, FALSE, TILDE_IGNORE);
4578         if (GetWWW (ajp)) {
4579           str = NULL;
4580           if (basemodURL != NULL) {
4581             str = basemodURL;
4582           } else if (basemodURLhead != NULL) {
4583             str = basemodURLhead [0];
4584           }
4585           if (StringDoesHaveText (str)) {
4586             FFAddOneString(ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
4587             FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
4588             FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
4589             FFAddOneString (ffstring, "base modification file", FALSE, FALSE, TILDE_IGNORE);
4590             FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
4591           }
4592         } else {
4593           FFAddOneString (ffstring, "base modification file", FALSE, FALSE, TILDE_IGNORE);
4594         }
4595         FFAddOneString (ffstring, " available.", FALSE, FALSE, TILDE_IGNORE);
4596       } else {
4597         FFAddOneString (ffstring, "There are ", FALSE, FALSE, TILDE_IGNORE);
4598         sprintf (buf, "%ld", (long) basemodNum);
4599         FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
4600         FFAddOneString (ffstring, " base modification files", FALSE, FALSE, TILDE_IGNORE);
4601         if (GetWWW (ajp)) {
4602           pfx = " (";
4603           sfx = "";
4604           for (j = 0; j < basemodNum; j++) {
4605             str = NULL;
4606             if (basemodURL != NULL) {
4607               str = basemodURL;
4608             } else if (basemodURLhead != NULL) {
4609               str = basemodURLhead [j];
4610             }
4611             if (StringHasNoText (str)) continue;
4612             FFAddOneString (ffstring, pfx, FALSE, FALSE, TILDE_IGNORE);
4613             FFAddOneString(ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
4614             FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
4615             FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
4616             sprintf (buf, "%ld", (long) (j + 1));
4617             FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
4618             FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
4619             if (basemodNum == 2) {
4620               pfx = " and ";
4621             } else if (j == basemodNum - 2) {
4622               pfx = ", and ";
4623             } else {
4624               pfx = ", ";
4625             }
4626             sfx = ")";
4627           }
4628           FFAddOneString (ffstring, sfx, FALSE, FALSE, TILDE_IGNORE);
4629         }
4630         FFAddOneString (ffstring, " available for this genome.", FALSE, FALSE, TILDE_IGNORE);
4631       }
4632 
4633       cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
4634       FFRecycleString(ajp, ffstring);
4635       ffstring = FFGetString(ajp);
4636 
4637       last_had_tilde = FALSE;
4638       if (awp->afp != NULL) {
4639         DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
4640       }
4641     }
4642   }
4643 
4644   /* StructuredComment user object */
4645 
4646   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
4647   while (sdp != NULL) {
4648     uop = (UserObjectPtr) sdp->data.ptrvalue;
4649     if (uop != NULL) {
4650       oip = uop->type;
4651       if (oip != NULL && StringCmp (oip->str, "StructuredComment") == 0) {
4652         for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
4653           if (ufp->choice != 1) continue;
4654           oip = ufp->label;
4655           if (oip == NULL) continue;
4656           field = oip->str;
4657           if (StringHasNoText (field)) continue;
4658           if (StringCmp (field, "StructuredCommentPrefix") == 0) {
4659             if (StringCmp ((CharPtr) ufp->data.ptrvalue, "##Genome-Annotation-Data-START##") == 0) {
4660               if (firstGenAnnotSCAD == NULL) {
4661                 firstGenAnnotSCAD = uop;
4662                 genomeBuildNumber = NULL;
4663                 genomeVersionNumber = NULL;
4664                 firstGenAnnotSCStr = GetStrForStructuredComment (ajp, firstGenAnnotSCAD);
4665                 uop = NULL;
4666               } else {
4667                 firstGenAnnotSCAD = NULL;
4668               }
4669               break;
4670             }
4671           }
4672         }
4673         if (uop != NULL) {
4674           cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
4675           if (cbp != NULL) {
4676 
4677             cbp->entityID = dcontext.entityID;
4678             cbp->itemID = dcontext.itemID;
4679             cbp->itemtype = OBJ_SEQDESC;
4680             cbp->first = first;
4681             cbp->no_blank_before = last_had_tilde;
4682             first = FALSE;
4683 
4684             last_had_tilde = FALSE;
4685             if (awp->afp != NULL) {
4686               DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
4687             }
4688           }
4689         }
4690       }
4691     }
4692     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
4693   }
4694 
4695   /* HTGS results in allocated comment string */
4696 
4697   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
4698   if (sdp != NULL) {
4699 
4700     mip = (MolInfoPtr) sdp->data.ptrvalue;
4701     if (mip != NULL) {
4702       if (mip->completeness != 0 && is_other) {
4703 
4704         str = GetMolInfoCommentString (bsp, mip);
4705 
4706         if (str != NULL) {
4707           cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
4708           if (cbp != NULL) {
4709 
4710             cbp->entityID = dcontext.entityID;
4711             cbp->itemID = dcontext.itemID;
4712             cbp->itemtype = OBJ_SEQDESC;
4713             cbp->first = first;
4714             cbp->no_blank_before = last_had_tilde;
4715             first = FALSE;
4716 
4717             if (cbp->first) {
4718               FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
4719             } else {
4720               FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
4721             }
4722 
4723             FFAddOneString (ffstring, str, TRUE, FALSE, TILDE_EXPAND);
4724 
4725             cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
4726             FFRecycleString(ajp, ffstring);
4727             ffstring = FFGetString(ajp);
4728 
4729             last_had_tilde = FALSE;
4730             if (awp->afp != NULL) {
4731               DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
4732             }
4733           }
4734         }
4735 
4736       }
4737       if (mip->tech == MI_TECH_htgs_0 ||
4738           mip->tech == MI_TECH_htgs_1 ||
4739           mip->tech == MI_TECH_htgs_2) {
4740 
4741         cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
4742         if (cbp != NULL) {
4743 
4744           /*
4745           cbp->entityID = dcontext.entityID;
4746           cbp->itemID = dcontext.itemID;
4747           cbp->itemtype = OBJ_SEQDESC;
4748           */
4749           cbp->entityID = awp->entityID;
4750           cbp->first = first;
4751           cbp->no_blank_before = last_had_tilde;
4752           first = FALSE;
4753 
4754           if (cbp->first) {
4755             FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
4756           } else {
4757             FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
4758           }
4759 
4760           AddHTGSCommentString (ffstring, bsp, mip);
4761 
4762           cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
4763           FFRecycleString(ajp, ffstring);
4764           ffstring = FFGetString(ajp);
4765 
4766           last_had_tilde = FALSE;
4767           if (awp->afp != NULL) {
4768             DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
4769           }
4770         }
4771 
4772       } else {
4773         str = StringForSeqTech (mip->tech);
4774         if (! StringHasNoText (str)) {
4775 
4776           cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
4777           if (cbp != NULL) {
4778 
4779             /*
4780             cbp->entityID = dcontext.entityID;
4781             cbp->itemID = dcontext.itemID;
4782             cbp->itemtype = OBJ_SEQDESC;
4783             */
4784             cbp->entityID = awp->entityID;
4785             cbp->first = first;
4786             cbp->no_blank_before = last_had_tilde;
4787             first = FALSE;
4788 
4789             if (cbp->first) {
4790               FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
4791             } else {
4792               FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
4793             }
4794 
4795             FFAddTextToString (ffstring, "Method: ", str, NULL, TRUE, FALSE, TILDE_EXPAND);
4796 
4797             cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
4798             FFRecycleString(ajp, ffstring);
4799             ffstring = FFGetString(ajp);
4800 
4801             last_had_tilde = FALSE;
4802             if (awp->afp != NULL) {
4803               DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
4804             }
4805           }
4806         }
4807       }
4808     }
4809   }
4810 
4811   /* no longer adding comment features that are full length on appropriate segment */
4812 
4813   /*
4814   parent = awp->parent;
4815   if (parent == NULL) return;
4816 
4817   sfp = SeqMgrGetNextFeature (parent, NULL, SEQFEAT_COMMENT, 0, &fcontext);
4818   while (sfp != NULL) {
4819     if (fcontext.left == awp->from && fcontext.right == awp->to) {
4820       cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
4821       if (cbp != NULL) {
4822 
4823         cbp->entityID = fcontext.entityID;
4824         cbp->itemID = fcontext.itemID;
4825         cbp->itemtype = OBJ_SEQFEAT;
4826         cbp->first = first;
4827         cbp->no_blank_before = last_had_tilde;
4828         first = FALSE;
4829 
4830         last_had_tilde = FALSE;
4831         if (awp->afp != NULL) {
4832           DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
4833         }
4834       }
4835     }
4836     sfp = SeqMgrGetNextFeature (parent, sfp, SEQFEAT_COMMENT, 0, &fcontext);
4837   }
4838   */
4839 
4840   /*
4841   search for Seq-annot.desc.comment on annots packaged on current bioseq
4842   is now done earlier in order to suppress GenomeBuild user object comment
4843   */
4844 
4845   /*
4846   annotDescCommentToComment = FALSE;
4847   adp = SeqMgrGetNextAnnotDesc (bsp, NULL, Annot_descr_user, &acontext);
4848   while (adp != NULL) {
4849     uop = (UserObjectPtr) adp->data.ptrvalue;
4850     if (uop != NULL) {
4851       oip = uop->type;
4852       if (oip != NULL) {
4853         if (StringCmp (oip->str, "AnnotDescCommentPolicy") == 0) {
4854           for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
4855             oip = ufp->label;
4856             if (oip == NULL || ufp->data.ptrvalue == NULL) continue;
4857             if (StringCmp (oip->str, "Policy") == 0) {
4858               if (StringICmp ((CharPtr) ufp->data.ptrvalue, "ShowInComment") == 0) {
4859                 annotDescCommentToComment = TRUE;
4860               }
4861             }
4862           }
4863         } else if (StringICmp (oip->str, "StructuredComment") == 0) {
4864           if (firstGenAnnotSCAD == NULL) {
4865             for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
4866               if (ufp->choice != 1) continue;
4867               oip = ufp->label;
4868               if (oip == NULL) continue;
4869               field = oip->str;
4870               if (StringHasNoText (field)) continue;
4871               if (StringCmp (field, "StructuredCommentPrefix") == 0) {
4872                 if (StringCmp ((CharPtr) ufp->data.ptrvalue, "##Genome-Annotation-Data-START##") == 0) {
4873                   firstGenAnnotSCAD = uop;
4874                 }
4875               }
4876             }
4877           }
4878         }
4879       }
4880     }
4881     adp = SeqMgrGetNextAnnotDesc (bsp, adp, Annot_descr_user, &acontext);
4882   }
4883   */
4884 
4885   if (annotDescCommentToComment) {
4886     adp = SeqMgrGetNextAnnotDesc (bsp, NULL, Annot_descr_comment, &acontext);
4887     while (adp != NULL) {
4888       str = (CharPtr) adp->data.ptrvalue;
4889       if (StringDoesHaveText (str)) {
4890         cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
4891         if (cbp != NULL) {
4892 
4893           cbp->entityID = awp->entityID;
4894           cbp->first = first;
4895           cbp->no_blank_before = last_had_tilde;
4896           first = FALSE;
4897 
4898           if (cbp->first) {
4899             FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
4900           } else {
4901             FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
4902           }
4903 
4904           FFAddOneString (ffstring, str, TRUE, FALSE, TILDE_EXPAND);
4905 
4906           cbp->string = FFEndPrint (ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
4907           FFRecycleString (ajp, ffstring);
4908           ffstring = FFGetString (ajp);
4909 
4910           last_had_tilde = FALSE;
4911           if (awp->afp != NULL) {
4912             DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
4913           }
4914         }
4915       }
4916       adp = SeqMgrGetNextAnnotDesc (bsp, adp, Annot_descr_comment, &acontext);
4917     }
4918   }
4919 
4920   if (firstGenAnnotSCAD != NULL) {
4921     if (StringDoesHaveText (firstGenAnnotSCStr)) {
4922       cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
4923       if (cbp != NULL) {
4924 
4925         cbp->entityID = awp->entityID;
4926         cbp->first = first;
4927         cbp->no_blank_before = last_had_tilde;
4928 
4929         if (cbp->first) {
4930           FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
4931         } else {
4932           FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
4933           if (! last_had_tilde) {
4934             FFAddOneString (ffstring, "\n", FALSE, FALSE, TILDE_EXPAND);
4935           }
4936         }
4937 
4938         first = FALSE;
4939 
4940         FFAddOneString (ffstring, firstGenAnnotSCStr, FALSE, FALSE, TILDE_EXPAND);
4941 
4942         cbp->string = FFEndPrint (ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
4943         FFRecycleString (ajp, ffstring);
4944         ffstring = FFGetString (ajp);
4945 
4946         if (awp->afp != NULL) {
4947           DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
4948         }
4949       }
4950     }
4951   }
4952   if (firstGenAnnotSCStr != NULL) {
4953     MemFree (firstGenAnnotSCStr);
4954   }
4955 
4956   num = 0;
4957   if (filetrackspp != NULL) {
4958     num = 1;
4959   } else if (filetrackpsp != NULL) {
4960     num = PackSeqPntNum (filetrackpsp);
4961   }
4962   if (num > 0) {
4963     cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
4964     if (cbp != NULL) {
4965 
4966       cbp->entityID = awp->entityID;
4967       cbp->itemID = filetrack_itemID;
4968       cbp->itemtype = OBJ_SEQDESC;
4969       cbp->first = first;
4970       cbp->no_blank_before = last_had_tilde;
4971       first = FALSE;
4972 
4973       if (cbp->first) {
4974         FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
4975       } else {
4976         FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
4977       }
4978 
4979       FFAddOneString (ffstring, "This ", FALSE, FALSE, TILDE_IGNORE);
4980       if (GetWWW (ajp) && filetrackURL != NULL) {
4981         FFAddOneString(ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
4982         FFAddOneString (ffstring, filetrackURL, FALSE, FALSE, TILDE_IGNORE);
4983         FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
4984         FFAddOneString (ffstring, "map", FALSE, FALSE, TILDE_IGNORE);
4985         FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
4986       } else {
4987         FFAddOneString (ffstring, "map", FALSE, FALSE, TILDE_IGNORE);
4988       }
4989       FFAddOneString (ffstring, " has ", FALSE, FALSE, TILDE_IGNORE);
4990       frags = num;
4991 
4992       if (bsp->topology != TOPOLOGY_CIRCULAR) {
4993         if (num > 1 && GetFileTrackPoint (filetrackspp, filetrackpsp, num - 1) < bsp->length - 1 ) {
4994           frags = num + 1;
4995         }
4996       }
4997 
4998       sprintf (tmp, "%ld", (long) frags);
4999       FFAddOneString (ffstring, tmp, FALSE, FALSE, TILDE_IGNORE);
5000       if (frags > 1) {
5001         FFAddOneString (ffstring, " pieces:", FALSE, FALSE, TILDE_IGNORE);
5002       } else if (frags == 1) {
5003         FFAddOneString (ffstring, " piece:", FALSE, FALSE, TILDE_IGNORE);
5004       }
5005 
5006       last = 1;
5007       pos = GetFileTrackPoint (filetrackspp, filetrackpsp, 0) + 1;
5008       if (bsp->topology != TOPOLOGY_CIRCULAR) {
5009 
5010         FFAddNewLine (ffstring);
5011         sprintf (tmp, "*  %7ld %7ld: fragment of %ld bp in length",
5012                  (long) last, (long) pos, (long) (pos - last + 1));
5013         FFAddOneString (ffstring, tmp, FALSE, FALSE, TILDE_IGNORE);
5014 
5015       }
5016       last = pos + 1;
5017 
5018       chunk = 0;
5019       for (idx = 1; idx < num; idx++) {
5020 
5021         chunk++;
5022         if (chunk >= 100) {
5023           chunk = 0;
5024 
5025           cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
5026           FFRecycleString(ajp, ffstring);
5027           ffstring = FFGetString(ajp);
5028 
5029           last_had_tilde = FALSE;
5030           if (awp->afp != NULL) {
5031             DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
5032           }
5033 
5034           cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
5035           if (cbp != NULL) {
5036             cbp->entityID = awp->entityID;
5037             cbp->itemID = filetrack_itemID;
5038             cbp->itemtype = OBJ_SEQDESC;
5039             cbp->first = FALSE;
5040             FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
5041           }
5042         } else {
5043           FFAddNewLine (ffstring);
5044         }
5045 
5046         pos = GetFileTrackPoint (filetrackspp, filetrackpsp, idx) + 1;
5047 
5048         sprintf (tmp, "*  %7ld %7ld: fragment of %ld bp in length",
5049                  (long) last, (long) pos, (long) (pos - last + 1));
5050         FFAddOneString (ffstring, tmp, FALSE, FALSE, TILDE_IGNORE);
5051 
5052         last = pos + 1;
5053       }
5054 
5055       if (bsp->topology != TOPOLOGY_CIRCULAR) {
5056         pos = bsp->length;
5057 
5058         if (last < pos) {
5059           FFAddNewLine (ffstring);
5060           sprintf (tmp, "*  %7ld %7ld: fragment of %ld bp in length",
5061                    (long) last, (long) pos, (long) (pos - last + 1));
5062           FFAddOneString (ffstring, tmp, FALSE, FALSE, TILDE_IGNORE);
5063         }
5064 
5065       } else {
5066         pos = GetFileTrackPoint (filetrackspp, filetrackpsp, 0) + 1;
5067 
5068         FFAddNewLine (ffstring);
5069         sprintf (tmp, "*  %7ld %7ld: fragment of %ld bp in length",
5070                  (long) last, (long) pos, (long) (bsp->length + pos - last + 1));
5071         FFAddOneString (ffstring, tmp, FALSE, FALSE, TILDE_IGNORE);
5072       }
5073 
5074       cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
5075       FFRecycleString(ajp, ffstring);
5076       ffstring = FFGetString(ajp);
5077 
5078       last_had_tilde = FALSE;
5079       if (awp->afp != NULL) {
5080         DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
5081       }
5082     }
5083   }
5084 
5085   FFRecycleString(ajp, ffstring);
5086 }
5087 
AddFeatHeaderBlock(Asn2gbWorkPtr awp)5088 NLM_EXTERN void AddFeatHeaderBlock (
5089   Asn2gbWorkPtr awp
5090 )
5091 
5092 {
5093   IntAsn2gbJobPtr ajp;
5094   BaseBlockPtr    bbp;
5095   Char            buf [128];
5096   StringItemPtr   ffstring;
5097   CharPtr         suffix = NULL;
5098 
5099   if (awp == NULL) return;
5100   ajp = awp->ajp;
5101   if (ajp == NULL) return;
5102 
5103   bbp = Asn2gbAddBlock (awp, FEATHEADER_BLOCK, sizeof (BaseBlock));
5104   if (bbp == NULL) return;
5105 
5106   bbp->entityID = awp->entityID;
5107 
5108   if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
5109       (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
5110     sprintf (buf, "<a name=\"feature_%s\"></a>", awp->currAccVerLabel);
5111     DoQuickLinkFormat (awp->afp, buf);
5112   }
5113 
5114   if (awp->format != FTABLE_FMT) {
5115     ffstring = FFGetString(ajp);
5116     if ( ffstring == NULL ) return;
5117 
5118     FFStartPrint (ffstring, awp->format, 0, 12, "FEATURES", 21, 5, 0, "FH", TRUE);
5119 
5120     if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
5121       FFAddOneString (ffstring, "Key", FALSE, FALSE, TILDE_IGNORE);
5122       FFAddNChar(ffstring, ' ', 13 , FALSE);
5123     }
5124 
5125     FFAddOneString (ffstring, "Location/Qualifiers", FALSE, FALSE, TILDE_TO_SPACES);
5126 
5127     if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
5128       FFAddNewLine(ffstring);
5129       FFAddNewLine(ffstring);
5130     }
5131 
5132     suffix = FFEndPrint(ajp, ffstring, awp->format, 12, 21, 5, 0, "FH");
5133     FFRecycleString(ajp, ffstring);
5134   }
5135 
5136   bbp->string = suffix;
5137 
5138   if (awp->afp != NULL) {
5139     DoImmediateFormat (awp->afp, bbp);
5140   }
5141 }
5142 
ComputeSourceHash(CharPtr key,Uint2 start)5143 static Uint2 ComputeSourceHash (
5144   CharPtr key,
5145   Uint2 start
5146 )
5147 
5148 {
5149   Uint4  h;
5150   Uint2  M;
5151   Uint2  S;
5152 
5153   if (key == NULL) return start;
5154 
5155   M = 101; /* prime key */
5156   S = 256; /* size of alphabet */
5157 
5158   for (h = start; *key != '\0'; key++) {
5159     h = (S * h + *key) % M;
5160   }
5161 
5162   return (Uint2) h;
5163 }
5164 
AddSource(Asn2gbWorkPtr awp,ValNodePtr PNTR head,BioSourcePtr biop,CharPtr comment)5165 static BaseBlockPtr AddSource (
5166   Asn2gbWorkPtr awp,
5167   ValNodePtr PNTR head,
5168   BioSourcePtr biop,
5169   CharPtr comment
5170 )
5171 
5172 {
5173   BaseBlockPtr    bbp;
5174   DbtagPtr        dbt;
5175   Uint2           hash;
5176   SourceType      idx;
5177   IntSrcBlockPtr  isp;
5178   ObjectIdPtr     oip;
5179   OrgModPtr       omp;
5180   OrgNamePtr      onp;
5181   OrgRefPtr       orp;
5182   SubSourcePtr    ssp;
5183   CharPtr         str;
5184   Uint1           subtype;
5185   Char            tmp [16];
5186   ValNodePtr      vnp;
5187 
5188   if (awp == NULL || head == NULL || biop == NULL) return NULL;
5189 
5190   bbp = (BaseBlockPtr) MemNew (sizeof (IntSrcBlock));
5191   if (bbp == NULL) return NULL;
5192   bbp->blocktype = SOURCEFEAT_BLOCK;
5193   bbp->section = awp->currsection;
5194 
5195   ValNodeAddPointer (head, 0, bbp);
5196 
5197   isp = (IntSrcBlockPtr) bbp;
5198   isp->biop = biop;
5199   isp->is_focus = biop->is_focus;
5200   if (biop->origin == 5) {
5201     isp->is_synthetic = TRUE;
5202   }
5203 
5204   orp = biop->org;
5205   if (orp == NULL) return bbp;
5206 
5207   if (StringICmp (orp->taxname, "synthetic construct") == 0) {
5208     isp->is_synthetic = TRUE;
5209   }
5210 
5211   isp->orghash = ComputeSourceHash (orp->taxname, 0);
5212   isp->taxname = orp->taxname;
5213 
5214   hash = 0;
5215   onp = orp->orgname;
5216   if (onp != NULL) {
5217     if (StringICmp (onp->div, "SYN") == 0) {
5218       isp->is_synthetic = TRUE;
5219     }
5220     isp->omp = onp->mod;
5221     for (omp = onp->mod; omp != NULL; omp = omp->next) {
5222       subtype = omp->subtype;
5223       if (subtype == 253) {
5224         subtype = 39;
5225       } else if (subtype == 254) {
5226         subtype = 40;
5227       } else if (subtype == 255) {
5228         subtype = 41;
5229       }
5230       if (subtype < 42) {
5231         idx = orgModToSourceIdx [subtype];
5232         if (idx > 0 && idx < ASN2GNBK_TOTAL_SOURCE) {
5233           str = asn2gnbk_source_quals [idx].name;
5234           hash = ComputeSourceHash (str, hash);
5235           hash = ComputeSourceHash (omp->subname, hash);
5236         }
5237       }
5238     }
5239   }
5240   if (comment != NULL) {
5241     hash = ComputeSourceHash ("note", hash);
5242     hash = ComputeSourceHash (comment, hash);
5243   }
5244   isp->modhash = hash;
5245 
5246   hash = 0;
5247   for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
5248     subtype = ssp->subtype;
5249     if (subtype == 255) {
5250       subtype = 44;
5251     }
5252     if (subtype < 45) {
5253       idx = subSourceToSourceIdx [subtype];
5254       if (idx > 0 && idx < ASN2GNBK_TOTAL_SOURCE) {
5255         str = asn2gnbk_source_quals [idx].name;
5256         hash = ComputeSourceHash (str, hash);
5257         hash = ComputeSourceHash (ssp->name, hash);
5258       }
5259     }
5260   }
5261   isp->subhash = hash;
5262   isp->ssp = biop->subtype;
5263 
5264   hash = 0;
5265   for (vnp = orp->db; vnp != NULL; vnp = vnp->next) {
5266     dbt = (DbtagPtr) vnp->data.ptrvalue;
5267     if (dbt != NULL) {
5268       hash = ComputeSourceHash (dbt->db, hash);
5269       oip = dbt->tag;
5270       if (oip != NULL) {
5271         if (oip->str != NULL) {
5272           hash = ComputeSourceHash (oip->str, hash);
5273         } else {
5274           sprintf (tmp, "%ld", (long) oip->id);
5275           hash = ComputeSourceHash (tmp, hash);
5276         }
5277       }
5278     }
5279   }
5280   isp->xrfhash = hash;
5281   isp->vnp = orp->db;
5282 
5283   return bbp;
5284 }
5285 
SortSourcesByHash(VoidPtr ptr1,VoidPtr ptr2)5286 static int LIBCALLBACK SortSourcesByHash (
5287   VoidPtr ptr1,
5288   VoidPtr ptr2
5289 )
5290 
5291 {
5292   Int4            diff;
5293   IntSrcBlockPtr  isp1;
5294   IntSrcBlockPtr  isp2;
5295   ValNodePtr      vnp1;
5296   ValNodePtr      vnp2;
5297 
5298   if (ptr1 == NULL || ptr2 == NULL) return 0;
5299   vnp1 = *((ValNodePtr PNTR) ptr1);
5300   vnp2 = *((ValNodePtr PNTR) ptr2);
5301   if (vnp1 == NULL || vnp2 == NULL) return 0;
5302   isp1 = (IntSrcBlockPtr) vnp1->data.ptrvalue;
5303   isp2 = (IntSrcBlockPtr) vnp2->data.ptrvalue;
5304   if (isp1 == NULL || isp2 == NULL) return 0;
5305 
5306   if (isp1->is_focus && (! isp2->is_focus)) return -1;
5307   if (isp2->is_focus && (! isp1->is_focus)) return 1;
5308 
5309   diff = isp1->orghash - isp2->orghash;
5310   if (diff > 0) return -1;
5311   if (diff < 0) return 1;
5312 
5313   diff = isp1->xrfhash - isp2->xrfhash;
5314   if (diff > 0) return -1;
5315   if (diff < 0) return 1;
5316 
5317   /* sort so that sources with modifiers come first */
5318 
5319   diff = isp1->modhash - isp2->modhash;
5320   if (diff > 0) return -1;
5321   if (diff < 0) return 1;
5322 
5323   diff = isp1->subhash - isp2->subhash;
5324   if (diff > 0) return -1;
5325   if (diff < 0) return 1;
5326 
5327   /* if all hashes are equal, descriptor comes first */
5328 
5329   if (isp1->is_descriptor && (! isp2->is_descriptor)) {
5330     return -1;
5331   } else if (isp2->is_descriptor && (! isp1->is_descriptor)) {
5332     return 1;
5333   }
5334 
5335   /* now sort identical sources by position, to only fuse abutting ones */
5336   /* feature with smallest left extreme is first */
5337 
5338   if (isp1->left > isp2->left) {
5339     return 1;
5340   } else if (isp1->left < isp2->left) {
5341     return -1;
5342   }
5343 
5344   /* if same left extreme, shortest source feature is first just for flatfile */
5345 
5346   if (isp1->right > isp2->right) {
5347     return 1;
5348   } else if (isp1->right < isp2->right) {
5349     return -1;
5350   }
5351 
5352   return 0;
5353 }
5354 
SortSourcesByPos(VoidPtr ptr1,VoidPtr ptr2)5355 static int LIBCALLBACK SortSourcesByPos (
5356   VoidPtr ptr1,
5357   VoidPtr ptr2
5358 )
5359 
5360 {
5361   IntSrcBlockPtr  isp1;
5362   IntSrcBlockPtr  isp2;
5363   ValNodePtr      vnp1;
5364   ValNodePtr      vnp2;
5365 
5366   if (ptr1 == NULL || ptr2 == NULL) return 0;
5367   vnp1 = *((ValNodePtr PNTR) ptr1);
5368   vnp2 = *((ValNodePtr PNTR) ptr2);
5369   if (vnp1 == NULL || vnp2 == NULL) return 0;
5370   isp1 = (IntSrcBlockPtr) vnp1->data.ptrvalue;
5371   isp2 = (IntSrcBlockPtr) vnp2->data.ptrvalue;
5372   if (isp1 == NULL || isp2 == NULL) return 0;
5373 
5374   /* descriptor always goes first */
5375 
5376   if (isp1->is_descriptor && (! isp2->is_descriptor)) {
5377     return -1;
5378   } else if (isp2->is_descriptor && (! isp1->is_descriptor)) {
5379     return 1;
5380   }
5381 
5382   /* feature with smallest left extreme is first */
5383 
5384   if (isp1->left > isp2->left) {
5385     return 1;
5386   } else if (isp1->left < isp2->left) {
5387     return -1;
5388   }
5389 
5390   /* if same left extreme, shortest source feature is first just for flatfile */
5391 
5392   if (isp1->right > isp2->right) {
5393     return 1;
5394   } else if (isp1->right < isp2->right) {
5395     return -1;
5396   }
5397 
5398   return 0;
5399 }
5400 
5401 /*                                                                   */
5402 /* s_isFuzzyLoc () -- Determines is a location has fuzzy coordinates */
5403 /*                                                                   */
5404 
s_isFuzzyLoc(SeqLocPtr pLocation)5405 static Boolean s_isFuzzyLoc ( SeqLocPtr pLocation )
5406 {
5407   SeqIntPtr pIntLocation;
5408 
5409   if (pLocation == NULL)
5410     return FALSE;
5411 
5412   if (pLocation->choice != SEQLOC_INT)
5413     return FALSE;
5414 
5415   if (pLocation->data.ptrvalue == NULL)
5416     return FALSE;
5417 
5418   pIntLocation = (SeqIntPtr) pLocation->data.ptrvalue;
5419 
5420   if ((pIntLocation->if_from != NULL) && (pIntLocation->if_from->choice == 2))
5421     return TRUE;
5422 
5423   if ((pIntLocation->if_to != NULL) && (pIntLocation->if_to->choice == 2))
5424     return TRUE;
5425 
5426   return FALSE;
5427 }
5428 
GetSourcesOnBioseq(Asn2gbWorkPtr awp,BioseqPtr target,BioseqPtr bsp,Int4 from,Int4 to,SeqFeatPtr cds)5429 static void GetSourcesOnBioseq (
5430   Asn2gbWorkPtr awp,
5431   BioseqPtr target,
5432   BioseqPtr bsp,
5433   Int4 from,
5434   Int4 to,
5435   SeqFeatPtr cds
5436 )
5437 
5438 {
5439   IntAsn2gbJobPtr    ajp;
5440   BaseBlockPtr       bbp;
5441   BioSourcePtr       biop;
5442   SeqMgrDescContext  dcontext;
5443   SeqMgrFeatContext  fcontext;
5444   Boolean            hasNulls;
5445   Int4               left;
5446   Boolean            loop = FALSE;
5447   Int2               idx;
5448   IntSrcBlockPtr     isp;
5449   Boolean            is_wp = FALSE;
5450   Int4Ptr            ivals;
5451   SeqLocPtr          newloc;
5452   Boolean            noLeft;
5453   Boolean            noRight;
5454   Int2               numivals;
5455   Int2               num_super_kingdom = 0;
5456   Boolean            okay;
5457   OrgNamePtr         onp;
5458   OrgRefPtr          orp;
5459   ObjValNodePtr      ovp;
5460   Int4               right;
5461   SeqDescrPtr        sdp;
5462   ValNodePtr         sdplist = NULL;
5463   SeqFeatPtr         sfp;
5464   SeqInt             sint;
5465   SeqIntPtr          sintp;
5466   SeqIdPtr           sip;
5467   SeqLocPtr          slp, slpx;
5468   Boolean            split;
5469   SeqPntPtr          spp;
5470   Int4               start;
5471   Int4               stop;
5472   Uint1              strand;
5473   Boolean            super_kingdoms_different = FALSE;
5474   CharPtr            super_kingdom_name = NULL;
5475   TaxElementPtr      tep;
5476   TextSeqIdPtr       tsip;
5477   ValNode            vn;
5478   ValNodePtr         vnp;
5479   ValNodePtr         vnp2;
5480 
5481   if (awp == NULL || target == NULL || bsp == NULL) return;
5482   ajp = awp->ajp;
5483   if (ajp == NULL) return;
5484 
5485   if (cds != NULL) {
5486     slp = AsnIoMemCopy ((Pointer) cds->location, (AsnReadFunc) SeqLocAsnRead, (AsnWriteFunc) SeqLocAsnWrite);
5487     if (slp != NULL) {
5488       for (slpx = SeqLocFindNext (slp, NULL); slpx != NULL; slpx = SeqLocFindNext (slp, slpx)) {
5489         if (slpx->choice == SEQLOC_INT) {
5490           sintp = (SeqIntPtr) slpx->data.ptrvalue;
5491           if (sintp != NULL) {
5492             sintp->strand = Seq_strand_both;
5493           }
5494         } else if (slpx->choice == SEQLOC_PNT) {
5495           spp = (SeqPntPtr) slpx->data.ptrvalue;
5496           if (spp != NULL) {
5497             spp->strand = Seq_strand_both;
5498           }
5499         }
5500       }
5501     }
5502     sfp = SeqMgrGetOverlappingSource (slp, &fcontext);
5503     SeqLocFree (slp);
5504     if (sfp != NULL) {
5505       biop = (BioSourcePtr) sfp->data.value.ptrvalue;
5506       bbp = AddSource (awp, &(awp->srchead), biop, sfp->comment);
5507       if (bbp != NULL) {
5508 
5509         bbp->entityID = sfp->idx.entityID;
5510         bbp->itemID = sfp->idx.itemID;
5511         bbp->itemtype = OBJ_SEQFEAT;
5512 
5513         isp = (IntSrcBlockPtr) bbp;
5514         CheckSeqLocForPartial (sfp->location, &noLeft, &noRight);
5515         hasNulls = LocationHasNullsBetween (sfp->location);
5516         isp->loc = SeqLocMerge (target, sfp->location, NULL, FALSE, TRUE, hasNulls);
5517         SetSeqLocPartial (isp->loc, noLeft, noRight);
5518         isp->left = fcontext.left;
5519         isp->right = fcontext.right;
5520         isp->comment = sfp->comment;
5521       }
5522     }
5523 
5524     return;
5525   }
5526 
5527   if (awp->format != FTABLE_FMT || awp->mode == DUMP_MODE) {
5528 
5529     /* full length loc for descriptors */
5530 
5531     sint.from = 0;
5532     if (ajp->ajp.slp != NULL) {
5533       sint.to = SeqLocLen (ajp->ajp.slp) - 1;
5534     } else {
5535       sint.to = bsp->length - 1;
5536     }
5537     sint.strand = Seq_strand_plus;
5538     sint.id = SeqIdStripLocus (SeqIdDup (SeqIdFindBest (bsp->id, 0)));
5539     sint.if_from = NULL;
5540     sint.if_to = NULL;
5541 
5542     vn.choice = SEQLOC_INT;
5543     vn.data.ptrvalue = (Pointer) &sint;
5544     vn.next = NULL;
5545 
5546     /* if SWISS-PROT, may have multiple source descriptors */
5547 
5548     if (ISA_aa (bsp->mol)) {
5549       for (sip = bsp->id; sip != NULL; sip = sip->next) {
5550         if (sip->choice == SEQID_SWISSPROT) {
5551           loop = TRUE;
5552         } else if (sip->choice == SEQID_OTHER) {
5553           tsip = (TextSeqIdPtr) sip->data.ptrvalue;
5554           if (tsip != NULL && StringNICmp (tsip->accession, "WP_", 3) == 0) {
5555             is_wp = TRUE;
5556           }
5557         }
5558       }
5559     }
5560 
5561     sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
5562     while (sdp != NULL) {
5563       ValNodeAddPointer (&sdplist, 0, (Pointer) sdp);
5564       biop = (BioSourcePtr) sdp->data.ptrvalue;
5565       if (biop != NULL) {
5566         orp = biop->org;
5567         if (orp != NULL) {
5568           onp = orp->orgname;
5569           if (onp != NULL) {
5570             if (onp->choice == 5) {
5571               for (tep = (TaxElementPtr) onp->data; tep != NULL; tep = tep->next) {
5572                 if (tep->fixed_level == 0 && StringICmp (tep->level, "superkingdom") == 0) {
5573                   num_super_kingdom++;
5574                   if (super_kingdom_name == NULL) {
5575                     super_kingdom_name = tep->name;
5576                   } else if (StringICmp (super_kingdom_name, tep->name) != 0) {
5577                     super_kingdoms_different = TRUE;
5578                   }
5579                 }
5580               }
5581             }
5582           }
5583         }
5584       }
5585       sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext);
5586     }
5587 
5588     vnp = sdplist;
5589     while (vnp != NULL) {
5590       sdp = (SeqDescrPtr) vnp->data.ptrvalue;
5591 
5592       if (sdp != NULL && sdp->extended != 0) {
5593         ovp = (ObjValNodePtr) sdp;
5594 
5595         /* check if descriptor on part already added on segmented bioseq */
5596 
5597         okay = TRUE;
5598         for (vnp2 = awp->srchead; vnp2 != NULL && okay; vnp2 = vnp2->next) {
5599           bbp = (BaseBlockPtr) vnp2->data.ptrvalue;
5600           if (bbp != NULL) {
5601             if (bbp->entityID == ovp->idx.entityID &&
5602                 bbp->itemID == ovp->idx.itemID &&
5603                 bbp->itemtype == OBJ_SEQDESC) {
5604               okay = FALSE;
5605             }
5606           }
5607         }
5608 
5609         if (okay) {
5610           biop = (BioSourcePtr) sdp->data.ptrvalue;
5611           bbp = AddSource (awp, &(awp->srchead), biop, NULL);
5612           if (bbp != NULL) {
5613 
5614             bbp->entityID = ovp->idx.entityID;
5615             bbp->itemID = ovp->idx.itemID;
5616             bbp->itemtype = OBJ_SEQDESC;
5617 
5618             isp = (IntSrcBlockPtr) bbp;
5619             isp->loc = SeqLocMerge (target, &vn, NULL, FALSE, TRUE, FALSE);
5620             isp->left = 0;
5621             isp->right = bsp->length - 1;
5622             isp->is_descriptor = TRUE;
5623           }
5624         }
5625       }
5626 
5627       if ((num_super_kingdom > 1 && super_kingdoms_different && is_wp) || loop) {
5628         vnp = vnp->next;
5629       } else {
5630         vnp = NULL;
5631       }
5632     }
5633 
5634     SeqIdFree (sint.id);
5635   }
5636 
5637   ValNodeFree (sdplist);
5638 
5639   if ((! awp->contig) || awp->showconsource) {
5640 
5641     /* features are indexed on parent if segmented */
5642 
5643     bsp = awp->parent;
5644 
5645     sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext);
5646     while (sfp != NULL) {
5647       ivals = fcontext.ivals;
5648       numivals = fcontext.numivals;
5649       if (ivals != NULL && numivals > 0) {
5650 
5651         idx = (numivals - 1) * 2;
5652         start = ivals [idx];
5653         stop = ivals [idx + 1];
5654         if (stop >= from && stop <= to && (ajp->ajp.slp == NULL || SeqLocCompare (sfp->location, ajp->ajp.slp) > 0)) {
5655 
5656           biop = (BioSourcePtr) sfp->data.value.ptrvalue;
5657           bbp = AddSource (awp, &(awp->srchead), biop, sfp->comment);
5658           if (bbp != NULL) {
5659 
5660             bbp->entityID = fcontext.entityID;
5661             bbp->itemID = fcontext.itemID;
5662             bbp->itemtype = OBJ_SEQFEAT;
5663 
5664             isp = (IntSrcBlockPtr) bbp;
5665             if (sfp->location != NULL && sfp->location->choice == SEQLOC_PNT) {
5666               isp->loc = AsnIoMemCopy ((Pointer) sfp->location,
5667                                        (AsnReadFunc) SeqLocAsnRead,
5668                                        (AsnWriteFunc) SeqLocAsnWrite);
5669             } else if (s_isFuzzyLoc (sfp->location)) {
5670               isp->loc = AsnIoMemCopy ((Pointer) sfp->location,
5671                                       (AsnReadFunc) SeqLocAsnRead,
5672                                       (AsnWriteFunc) SeqLocAsnWrite);
5673             } else if (SeqLocId(sfp->location) == NULL) {
5674               isp->loc = AsnIoMemCopy ((Pointer) sfp->location,
5675                                        (AsnReadFunc) SeqLocAsnRead,
5676                                        (AsnWriteFunc) SeqLocAsnWrite);
5677             } else {
5678               CheckSeqLocForPartial (sfp->location, &noLeft, &noRight);
5679               hasNulls = LocationHasNullsBetween (sfp->location);
5680               isp->loc = SeqLocMerge (target, sfp->location, NULL, FALSE, TRUE, hasNulls);
5681               SetSeqLocPartial (isp->loc, noLeft, noRight);
5682             }
5683             isp->left = fcontext.left;
5684             isp->right = fcontext.right;
5685             isp->comment = sfp->comment;
5686             if (ajp->ajp.slp != NULL) {
5687               sip = SeqIdParse ("lcl|dummy");
5688               left = GetOffsetInBioseq (ajp->ajp.slp, bsp, SEQLOC_LEFT_END);
5689               right = GetOffsetInBioseq (ajp->ajp.slp, bsp, SEQLOC_RIGHT_END);
5690               strand = SeqLocStrand (ajp->ajp.slp);
5691               split = FALSE;
5692               newloc = SeqLocReMapEx (sip, ajp->ajp.slp, isp->loc, 0, FALSE, ajp->masterStyle, ajp->relaxedMapping);
5693               /*
5694               newloc = SeqLocCopyRegion (sip, isp->loc, bsp, left, right, strand, &split);
5695               */
5696               SeqIdFree (sip);
5697               if (newloc != NULL) {
5698                 A2GBSeqLocReplaceID (newloc, ajp->ajp.slp);
5699                 isp->loc = SeqLocFree (isp->loc);
5700                 isp->loc = newloc;
5701                 isp->left = left;
5702                 isp->right = right;
5703               }
5704             }
5705           }
5706         }
5707       }
5708 
5709       sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_BIOSRC, 0, &fcontext);
5710     }
5711   }
5712 }
5713 
GetSourcesOnSeg(SeqLocPtr slp,SeqMgrSegmentContextPtr context)5714 static Boolean LIBCALLBACK GetSourcesOnSeg (
5715   SeqLocPtr slp,
5716   SeqMgrSegmentContextPtr context
5717 )
5718 
5719 {
5720   Asn2gbWorkPtr  awp;
5721   BioseqPtr      bsp;
5722   Int4           from;
5723   SeqLocPtr      loc;
5724   SeqEntryPtr    oldscope;
5725   SeqEntryPtr    sep;
5726   SeqIdPtr       sip;
5727   Int4           to;
5728 
5729   if (slp == NULL || context == NULL) return FALSE;
5730   awp = (Asn2gbWorkPtr) context->userdata;
5731 
5732   from = context->cumOffset;
5733   to = from + context->to - context->from;
5734 
5735   sip = SeqLocId (slp);
5736   if (sip == NULL) {
5737     loc = SeqLocFindNext (slp, NULL);
5738     if (loc != NULL) {
5739       sip = SeqLocId (loc);
5740     }
5741   }
5742   if (sip == NULL) return TRUE;
5743 
5744   /* biosource descriptors only on parts within entity */
5745 
5746   sep = GetTopSeqEntryForEntityID (awp->entityID);
5747   oldscope = SeqEntrySetScope (sep);
5748   bsp = BioseqFind (sip);
5749   SeqEntrySetScope (oldscope);
5750 
5751   if (bsp != NULL) {
5752     GetSourcesOnBioseq (awp, awp->target, bsp, from, to, NULL);
5753     return TRUE;
5754   }
5755 
5756   /* if we ever want to fetch remote sources, code goes here */
5757 
5758 #if 0
5759   Uint2          entityID;
5760 
5761   /* may remote fetch genome component if not already in memory */
5762 
5763   bsp = BioseqLockById (sip);
5764 
5765   if (bsp == NULL) return TRUE;
5766 
5767   entityID = ObjMgrGetEntityIDForPointer (bsp);
5768 
5769   if (entityID != awp->entityID) {
5770 
5771     /* if segment not packaged in record, may need to feature index it */
5772 
5773     if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
5774       SeqMgrIndexFeatures (entityID, NULL);
5775     }
5776 
5777     /* collect features indexed on the remote bioseq */
5778 
5779     from = 0;
5780     to = bsp->length - 1;
5781   }
5782 
5783   GetSourcesOnBioseq (awp, awp->target, bsp, from, to, NULL);
5784 
5785   BioseqUnlock (bsp);
5786 #endif
5787 
5788   return TRUE;
5789 }
5790 
5791 /* isIdenticalSource() -- Checks to see if two sources are identical */
5792 /*                        by comparing the actual values in the      */
5793 /*                        fields.  This only gets called if the two  */
5794 /*                        sources hashed the same -- it's a double-  */
5795 /*                        check since two non-identical things will  */
5796 /*                        occassionally hash to the same value.      */
5797 /*                        Now checks for adjacent or overlapping.    */
5798 
isIdenticalSource(IntSrcBlockPtr isp1,IntSrcBlockPtr isp2)5799 static Boolean isIdenticalSource (IntSrcBlockPtr isp1, IntSrcBlockPtr isp2)
5800 {
5801   OrgModPtr     omp1;
5802   OrgModPtr     omp2;
5803   SubSourcePtr  ssp1;
5804   SubSourcePtr  ssp2;
5805   ValNodePtr    vnp1;
5806   ValNodePtr    vnp2;
5807   ObjectIdPtr   oip1;
5808   ObjectIdPtr   oip2;
5809   DbtagPtr      dbt1;
5810   DbtagPtr      dbt2;
5811 
5812   if (isp1->is_focus != isp2->is_focus)
5813     return FALSE;
5814 
5815   /* Compare the taxonomy names */
5816 
5817   if (StringICmp(isp1->taxname,isp2->taxname) != 0)
5818     return FALSE;
5819 
5820   /* Compare the comment */
5821 
5822   if (StringICmp(isp1->comment,isp2->comment) != 0)
5823     return FALSE;
5824 
5825   /* Compare the org mods */
5826 
5827   omp1 = isp1->omp;
5828   omp2 = isp2->omp;
5829   while (omp1 != NULL && omp2 != NULL)
5830     {
5831       if (omp1->subtype != omp2->subtype)
5832         return FALSE;
5833       if (StringICmp (omp1->subname, omp2->subname) != 0)
5834         return FALSE;
5835       omp1 = omp1->next;
5836       omp2 = omp2->next;
5837     }
5838 
5839   if (omp1 != NULL || omp2 != NULL)
5840     return FALSE;
5841 
5842   /* Compare the subtypes */
5843 
5844   ssp1 = isp1->ssp;
5845   ssp2 = isp2->ssp;
5846 
5847   while (ssp1 != NULL && ssp2 != NULL)
5848     {
5849       if (ssp1->subtype != ssp2->subtype)
5850         return FALSE;
5851       if (StringICmp(ssp1->name, ssp2->name) != 0)
5852         return FALSE;
5853       ssp1 = ssp1->next;
5854       ssp2 = ssp2->next;
5855     }
5856 
5857   if (ssp1 != NULL || ssp2 != NULL)
5858     return FALSE;
5859 
5860   /* Compare the DB tags */
5861 
5862   vnp1 = isp1->vnp;
5863   vnp2 = isp2->vnp;
5864 
5865   while (vnp1 != NULL && vnp2 != NULL)
5866     {
5867       dbt1 = (DbtagPtr) vnp1->data.ptrvalue;
5868       dbt2 = (DbtagPtr) vnp2->data.ptrvalue;
5869 
5870       if ((dbt1 != NULL) && (dbt2 != NULL)) {
5871         if (StringCmp (dbt1->db, dbt2->db) != 0)
5872           return FALSE;
5873 
5874         oip1 = dbt1->tag;
5875         oip2 = dbt2->tag;
5876         if ((oip1 != NULL) && (oip2 != NULL)) {
5877           if (oip1->str != NULL) {
5878             if (StringICmp(oip1->str, oip2->str) != 0)
5879               return FALSE;
5880           } else  {
5881             if (oip1->id != oip2->id)
5882               return FALSE;
5883           }
5884         }
5885         else if (oip1 != NULL)
5886           return FALSE;
5887         else if (oip2 != NULL)
5888           return FALSE;
5889       }
5890       else if (dbt1 != NULL)
5891         return FALSE;
5892       else if (dbt2 != NULL)
5893         return FALSE;
5894 
5895       vnp1 = vnp1->next;
5896       vnp2 = vnp2->next;
5897     }
5898 
5899   if (vnp1 != NULL || vnp2 != NULL)
5900     return FALSE;
5901 
5902   /* now check for not adjacent or overlapping */
5903 
5904   if (isp2->right + 1 < isp1->left) return FALSE;
5905 
5906   /* If it passed all checks, then they */
5907   /* are the same, so return true.      */
5908 
5909   return TRUE;
5910 }
5911 
CleanupPackedSeqInt(SeqLocPtr location)5912 static void CleanupPackedSeqInt (SeqLocPtr location)
5913 
5914 {
5915   SeqLocPtr  head = NULL;
5916   SeqIntPtr  loc;
5917   SeqIntPtr  sintp;
5918   SeqLocPtr  slp;
5919 
5920   if (location == NULL || location->choice != SEQLOC_PACKED_INT || location->data.ptrvalue == NULL) return;
5921 
5922   slp = SeqLocFindNext (location, NULL);
5923   while (slp != NULL) {
5924     if (slp->choice == SEQLOC_INT) {
5925       sintp = (SeqIntPtr) slp->data.ptrvalue;
5926       if (sintp != NULL) {
5927         loc = AsnIoMemCopy (sintp, (AsnReadFunc) SeqIntAsnRead,
5928                             (AsnWriteFunc) SeqIntAsnWrite);
5929         ValNodeAddPointer (&head, SEQLOC_INT, loc);
5930       }
5931     }
5932     slp = SeqLocFindNext (location, slp);
5933   }
5934   if (head == NULL) return;
5935 
5936   location->data.ptrvalue = SeqLocFree (location->data.ptrvalue);
5937   location->data.ptrvalue = head;
5938 
5939   slp = location->data.ptrvalue;
5940   if (slp == NULL || slp->next != NULL) return;
5941     /* here seqloc_packed_int points to a single location element, so no need for seqloc_packed_int parent */
5942     location->choice = slp->choice;
5943     location->data.ptrvalue = (Pointer) slp->data.ptrvalue;
5944     MemFree (slp);
5945 }
5946 
x_NotSpecialTaxName(CharPtr taxname)5947 static Boolean x_NotSpecialTaxName (
5948   CharPtr taxname
5949 )
5950 
5951 {
5952   if (StringHasNoText (taxname)) return TRUE;
5953 
5954   if (StringICmp (taxname, "synthetic construct") == 0) return FALSE;
5955   if (StringICmp (taxname, "artificial sequence") == 0) return FALSE;
5956   if (StringStr (taxname, "vector") != NULL) return FALSE;
5957   if (StringStr (taxname, "Vector") != NULL) return FALSE;
5958 
5959   return TRUE;
5960 }
5961 
AddSourceFeatBlock(Asn2gbWorkPtr awp)5962 NLM_EXTERN void AddSourceFeatBlock (
5963   Asn2gbWorkPtr awp
5964 )
5965 
5966 {
5967   IntAsn2gbJobPtr    ajp;
5968   Asn2gbSectPtr      asp;
5969   BaseBlockPtr       bbp;
5970   BioSourcePtr       biop;
5971   BioseqPtr          bsp;
5972   SeqFeatPtr         cds;
5973   SeqMgrFeatContext  context;
5974   BioseqPtr          dna;
5975   SeqLocPtr          duploc;
5976   Boolean            excise;
5977   GBFeaturePtr       gbfeat = NULL;
5978   GBSeqPtr           gbseq;
5979   ValNodePtr         head = NULL;
5980   IntSrcBlockPtr     isp;
5981   IntSrcBlockPtr     lastisp;
5982   IntSrcBlockPtr     descrIsp;
5983   ValNodePtr         next;
5984   OrgRefPtr          orp;
5985   Char               pfx [128], sfx [128];
5986   ValNodePtr         PNTR prev;
5987   SeqDescrPtr        sdp;
5988   SeqInt             sint;
5989   SeqIdPtr           sip;
5990   SeqLocPtr          slp;
5991   Int4               source_count = 0;
5992   CharPtr            str;
5993   BioseqPtr          target = NULL;
5994   CharPtr            taxname;
5995   ValNode            vn;
5996   ValNodePtr         vnp;
5997   Boolean            descHasFocus = FALSE;
5998   StringItemPtr      ffstring;
5999 
6000   if (awp == NULL) return;
6001   ajp = awp->ajp;
6002   if (ajp == NULL) return;
6003   asp = awp->asp;
6004   if (asp == NULL) return;
6005   bsp = awp->bsp;
6006   if (bsp == NULL) return;
6007 
6008   ffstring = FFGetString(ajp);
6009   if ( ffstring == NULL ) return;
6010 
6011   pfx [0] = '\0';
6012   sfx [0] = '\0';
6013 
6014   /* collect biosources on bioseq */
6015 
6016   awp->srchead = NULL;
6017 
6018   if (ISA_aa (bsp->mol)) {
6019 
6020     /* if protein, get sources applicable to DNA location of CDS */
6021 
6022     sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_source, NULL);
6023     if (sdp != NULL && sdp->choice == Seq_descr_source) {
6024       biop = (BioSourcePtr) sdp->data.ptrvalue;
6025       if (biop != NULL) {
6026         orp = biop->org;
6027         if (orp != NULL) {
6028           taxname = orp->taxname;
6029           if (StringHasNoText (taxname) || x_NotSpecialTaxName (taxname)) {
6030             cds = SeqMgrGetCDSgivenProduct (bsp, &context);
6031             if (cds != NULL) {
6032               dna = BioseqFindFromSeqLoc (cds->location);
6033               if (dna != NULL) {
6034                 GetSourcesOnBioseq (awp, dna, dna, context.left, context.right, cds);
6035                 target = dna;
6036               }
6037             }
6038           }
6039         }
6040       }
6041     }
6042   }
6043 
6044   if (awp->srchead == NULL) {
6045     GetSourcesOnBioseq (awp, bsp, bsp, awp->from, awp->to, NULL);
6046     target = bsp;
6047   }
6048 
6049   if (bsp->repr == Seq_repr_seg) {
6050 
6051     /* collect biosource descriptors on local parts */
6052 
6053     SeqMgrExploreSegments (bsp, (Pointer) awp, GetSourcesOnSeg);
6054     target = awp->target;
6055   }
6056 
6057   head = awp->srchead;
6058   awp->srchead = NULL;
6059 
6060   if (head == NULL && (awp->format != FTABLE_FMT || awp->mode == DUMP_MODE)) {
6061 
6062     if (ajp->gbseq) {
6063       gbseq = &asp->gbseq;
6064     } else {
6065       gbseq = NULL;
6066     }
6067 
6068     sint.from = 0;
6069     sint.to = bsp->length - 1;
6070     sint.strand = Seq_strand_plus;
6071     sint.id = SeqIdStripLocus (SeqIdDup (SeqIdFindBest (bsp->id, 0)));
6072     sint.if_from = NULL;
6073     sint.if_to = NULL;
6074 
6075     vn.choice = SEQLOC_INT;
6076     vn.data.ptrvalue = (Pointer) &sint;
6077     vn.next = NULL;
6078 
6079     FFStartPrint (ffstring, awp->format, 5, 21, NULL, 0, 5, 21, "FT", FALSE);
6080 
6081     /*
6082     for (sip = bsp->id; sip != NULL; sip = sip->next) {
6083       if (sip->choice == SEQID_GI) {
6084         currGi = (BIG_ID) sip->data.intvalue;
6085       }
6086     }
6087     */
6088     if (GetWWW (ajp) && ajp->mode == ENTREZ_MODE && ajp->seqspans &&
6089         (ajp->format == GENBANK_FMT || ajp->format == GENPEPT_FMT)) {
6090       sprintf (pfx, "<span id=\"feature_%s_source_0\" class=\"feature\">", awp->currAccVerLabel);
6091     }
6092 
6093     FFAddOneString(ffstring, "source", FALSE, FALSE, TILDE_IGNORE);
6094     FFAddNChar(ffstring, ' ', 21 - 5 - StringLen("source"), FALSE);
6095 
6096     if (gbseq != NULL) {
6097       gbfeat = GBFeatureNew ();
6098       if (gbfeat != NULL) {
6099         gbfeat->key = StringSave ("source");
6100       }
6101     }
6102 
6103     str = FFFlatLoc (ajp, bsp, &vn, (Boolean) (awp->style == MASTER_STYLE), FALSE);
6104     if ( GetWWW(ajp) ) {
6105       FF_www_featloc (ffstring, str);
6106     } else {
6107       FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
6108     }
6109 
6110     if (gbseq != NULL) {
6111       if (gbfeat != NULL) {
6112         if (! StringHasNoText (str)) {
6113           gbfeat->location = StringSave (str);
6114         } else {
6115           gbfeat->location = StringSave ("");
6116         }
6117       }
6118     }
6119 
6120     MemFree (str);
6121 
6122     if (ajp->flags.needOrganismQual) {
6123       FFAddNewLine(ffstring);
6124       FFAddTextToString (ffstring, "/organism=\"", "unknown", "\"", FALSE, TRUE, TILDE_TO_SPACES);
6125 #ifdef ASN2GNBK_PRINT_UNKNOWN_ORG
6126     } else {
6127       FFAddNewLine(ffstring);
6128       FFAddTextToString (ffstring, "/organism=\"", "unknown", "\"", FALSE, TRUE, TILDE_TO_SPACES);
6129 #endif
6130     }
6131 
6132     str = GetMolTypeQual (bsp);
6133     if (StringICmp (str, "ncRNA") == 0) {
6134       str = "other RNA";
6135     }
6136     if (str == NULL) {
6137       switch (bsp->mol) {
6138         case Seq_mol_dna :
6139           str = "unassigned DNA";
6140           break;
6141         case Seq_mol_rna :
6142           str = "unassigned RNA";
6143           break;
6144         case Seq_mol_aa :
6145           break;
6146         default :
6147           str = "unassigned DNA";
6148           break;
6149       }
6150     }
6151     if (str != NULL) {
6152       FFAddNewLine(ffstring);
6153       FFAddTextToString (ffstring, "/mol_type=\"", str, "\"", FALSE, TRUE, TILDE_TO_SPACES);
6154     }
6155 
6156     if (GetWWW (ajp) && ajp->mode == ENTREZ_MODE && ajp->seqspans &&
6157         (ajp->format == GENBANK_FMT || ajp->format == GENPEPT_FMT)) {
6158       sprintf (sfx, "</span>");
6159     }
6160 
6161     str = FFEndPrintEx (ajp, ffstring, awp->format, 5, 21, 5, 21, "FT", pfx, sfx);
6162 
6163     bbp = (BaseBlockPtr) Asn2gbAddBlock (awp, SOURCEFEAT_BLOCK, sizeof (IntSrcBlock));
6164     if (bbp != NULL) {
6165       bbp->section = awp->currsection;
6166       bbp->string = str;
6167     } else {
6168       MemFree(str);
6169     }
6170     FFRecycleString(ajp, ffstring);
6171 
6172     if (awp->afp != NULL) {
6173       DoImmediateFormat (awp->afp, (BaseBlockPtr) bbp);
6174     }
6175 
6176     /* optionally populate gbseq for XML-ized GenBank format */
6177 
6178     if (gbseq != NULL) {
6179       if (gbfeat != NULL) {
6180         AddFeatureToGbseq (gbseq, gbfeat, str, NULL);
6181       }
6182     }
6183 
6184     return;
6185   }
6186 
6187   if (head == NULL) return;
6188 
6189   /* sort by hash values */
6190 
6191   head = ValNodeSort (head, SortSourcesByHash);
6192 
6193   /* unique sources, excise duplicates from list */
6194 
6195   prev = &(head);
6196   vnp = head;
6197   lastisp = NULL;
6198   while (vnp != NULL) {
6199     excise = FALSE;
6200     next = vnp->next;
6201     isp = (IntSrcBlockPtr) vnp->data.ptrvalue;
6202     if (isp->is_descriptor && isp->is_focus)
6203       descHasFocus = TRUE;
6204     if (lastisp != NULL) {
6205       if (isp != NULL) {
6206         if (lastisp->is_focus == isp->is_focus &&
6207             lastisp->orghash == isp->orghash &&
6208             lastisp->xrfhash == isp->xrfhash) {
6209 
6210           /* check for identical modifiers */
6211 
6212           if (lastisp->modhash == isp->modhash &&
6213               lastisp->subhash == isp->subhash) {
6214 
6215             excise = isIdenticalSource (isp, lastisp);
6216 
6217           /* or modifiers only in lastisp (e.g., on part bioseq) */
6218 
6219           } else if (isp->modhash == 0 && isp->subhash == 0) {
6220             excise = isIdenticalSource (isp, lastisp);
6221           }
6222         }
6223       }
6224     }
6225     if (awp->mode == DUMP_MODE) {
6226       excise = FALSE;
6227     }
6228     /* does not fuse equivalent source features for local, general, refseq, and 2+6 genbank ids */
6229     if (excise && awp->sourcePubFuse) {
6230       *prev = vnp->next;
6231       vnp->next = NULL;
6232 
6233       /* combine locations of duplicate sources */
6234 
6235       if (lastisp != NULL) {
6236         slp = SeqLocMerge (target, lastisp->loc, isp->loc, FALSE, TRUE, FALSE);
6237         lastisp->loc = SeqLocFree (lastisp->loc);
6238         lastisp->loc = slp;
6239         lastisp->left = MIN (lastisp->left,isp->left);
6240         lastisp->right = MAX (lastisp->right, isp->right);
6241       }
6242 
6243       /* and remove duplicate source */
6244 
6245       SeqLocFree (isp->loc);
6246       MemFree (isp);
6247       ValNodeFree (vnp);
6248 
6249     } else {
6250 
6251       prev = &(vnp->next);
6252       lastisp = isp;
6253     }
6254     vnp = next;
6255   }
6256 
6257   /* Sort again, by location this time */
6258 
6259   head = ValNodeSort (head, SortSourcesByPos);
6260 
6261   /* If the descriptor has a focus, then subtract */
6262   /* out all the other source locations.          */
6263 
6264   descrIsp = (IntSrcBlockPtr) head->data.ptrvalue; /* Sorted 1st by now */
6265 
6266   if ((descHasFocus) && (! descrIsp->is_synthetic)) {
6267 
6268     vnp = head;
6269     duploc = AsnIoMemCopy ((Pointer) descrIsp->loc,
6270                            (AsnReadFunc) SeqLocAsnRead,
6271                            (AsnWriteFunc) SeqLocAsnWrite);
6272     vnp = vnp->next;
6273     while (vnp != NULL) {
6274       isp = (IntSrcBlockPtr) vnp->data.ptrvalue;
6275       if (SeqLocAinB (descrIsp->loc, isp->loc) >= 0) {
6276         vnp = NULL; /* break the chain */
6277         descrIsp->loc = SeqLocFree (descrIsp->loc);
6278         descrIsp->loc = duploc;
6279         duploc = NULL;
6280       } else {
6281         descrIsp->loc = SeqLocSubtract (descrIsp->loc, isp->loc);
6282         vnp = vnp->next;
6283       }
6284     }
6285     CleanupPackedSeqInt (descrIsp->loc);
6286     descrIsp->left  = SeqLocStart (descrIsp->loc);
6287     descrIsp->right = SeqLocStop (descrIsp->loc);
6288     SeqLocFree (duploc);
6289   }
6290 
6291   /* if features completely subtracted descriptor
6292      intervals, suppress in release, entrez modes */
6293 
6294   if (descrIsp->loc == NULL && ajp->flags.hideEmptySource && head->next != NULL) {
6295     vnp = head->next;
6296     head->next = NULL;
6297     ValNodeFreeData (head);
6298     head = vnp;
6299   }
6300 
6301   /* finally link into blocks for current section */
6302 
6303   ValNodeLink (&(awp->lastblock), head);
6304   vnp = awp->lastblock;
6305   if (vnp == NULL) return;
6306   while (vnp->next != NULL) {
6307     vnp = vnp->next;
6308   }
6309 
6310   awp->lastblock = vnp;
6311   if (awp->blockList == NULL) {
6312     awp->blockList = vnp;
6313   }
6314   FFRecycleString(ajp, ffstring);
6315 
6316   for (vnp = head; vnp != NULL; vnp = vnp->next) {
6317     isp = (IntSrcBlockPtr) vnp->data.ptrvalue;
6318     if (isp == NULL) continue;
6319     isp->source_count = source_count;
6320     source_count++;
6321   }
6322 
6323   if (awp->afp != NULL) {
6324     for (vnp = head; vnp != NULL; vnp = vnp->next) {
6325       isp = (IntSrcBlockPtr) vnp->data.ptrvalue;
6326       if (isp == NULL) continue;
6327       DoImmediateFormat (awp->afp, (BaseBlockPtr) isp);
6328     }
6329   }
6330 
6331 }
6332 
IsCDD(SeqFeatPtr sfp)6333 static Boolean IsCDD (
6334   SeqFeatPtr sfp
6335 )
6336 
6337 {
6338   DbtagPtr    dbt;
6339   ValNodePtr  vnp;
6340 
6341   for (vnp = sfp->dbxref; vnp != NULL; vnp = vnp->next) {
6342     dbt = (DbtagPtr) vnp->data.ptrvalue;
6343     if (dbt != NULL && StringCmp (dbt->db, "CDD") == 0) return TRUE;
6344   }
6345 
6346   return FALSE;
6347 }
6348 
SetIfpFeatCount(IntFeatBlockPtr ifp,IntAsn2gbJobPtr ajp,Asn2gbWorkPtr awp,Boolean isProt)6349 NLM_EXTERN void SetIfpFeatCount (
6350   IntFeatBlockPtr ifp,
6351   IntAsn2gbJobPtr ajp,
6352   Asn2gbWorkPtr awp,
6353   Boolean isProt
6354 )
6355 
6356 {
6357   FeatBlockPtr      fbp;
6358   Uint1             featdeftype;
6359   IntAsn2gbSectPtr  iasp;
6360   Boolean           is_other = FALSE;
6361 
6362   if (ifp == NULL || ajp == NULL || awp == NULL) return;
6363   iasp = (IntAsn2gbSectPtr) awp->asp;
6364   if (iasp == NULL) return;
6365 
6366   fbp = (FeatBlockPtr) ifp;
6367 
6368   featdeftype = fbp->featdeftype;
6369 
6370   if (featdeftype == FEATDEF_COMMENT) {
6371     featdeftype = FEATDEF_misc_feature;
6372   }
6373 
6374   if (! isProt) {
6375     if (featdeftype == FEATDEF_REGION || featdeftype == FEATDEF_BOND || featdeftype == FEATDEF_SITE) {
6376       featdeftype = FEATDEF_misc_feature;
6377     }
6378   }
6379 
6380   if (ajp->format == GENPEPT_FMT && isProt) {
6381     if (ifp->mapToPep) {
6382       if (featdeftype >= FEATDEF_preprotein && featdeftype <= FEATDEF_transit_peptide_aa) {
6383         featdeftype = FEATDEF_preprotein;
6384       }
6385     }
6386   }
6387 
6388   if (featdeftype == FEATDEF_Imp_CDS) {
6389     featdeftype = FEATDEF_CDS;
6390   }
6391   if (featdeftype == FEATDEF_preRNA) {
6392     featdeftype = FEATDEF_precursor_RNA;
6393   }
6394   if (featdeftype == FEATDEF_otherRNA) {
6395     featdeftype = FEATDEF_misc_RNA;
6396   }
6397   if (featdeftype == FEATDEF_mat_peptide_aa) {
6398     featdeftype = FEATDEF_mat_peptide;
6399   }
6400   if (featdeftype == FEATDEF_sig_peptide_aa) {
6401     featdeftype = FEATDEF_sig_peptide;
6402   }
6403   if (featdeftype == FEATDEF_transit_peptide_aa) {
6404     featdeftype = FEATDEF_transit_peptide;
6405   }
6406 
6407   if (ajp->refseqConventions || awp->isRefSeq) {
6408     is_other = TRUE;
6409   }
6410 
6411   if (! isProt) {
6412     if (featdeftype == FEATDEF_preprotein) {
6413       if (! is_other) {
6414         featdeftype = FEATDEF_misc_feature;
6415       }
6416     }
6417   }
6418 
6419   if (featdeftype == FEATDEF_CLONEREF) {
6420     if (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE) {
6421       featdeftype = FEATDEF_misc_feature;
6422     }
6423   }
6424 
6425   if (featdeftype == FEATDEF_repeat_unit && (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE)) {
6426     featdeftype = FEATDEF_repeat_region;
6427   }
6428 
6429   if (featdeftype < FEATDEF_MAX) {
6430     ifp->feat_count = iasp->feat_counts [featdeftype];
6431     (iasp->feat_counts [featdeftype])++;
6432   }
6433 }
6434 
GetFeatsOnCdsProduct(SeqFeatPtr cds,BioseqPtr nbsp,BioseqPtr pbsp,IntAsn2gbJobPtr ajp,Asn2gbWorkPtr awp)6435 static void GetFeatsOnCdsProduct (
6436   SeqFeatPtr cds,
6437   BioseqPtr nbsp,
6438   BioseqPtr pbsp,
6439   IntAsn2gbJobPtr ajp,
6440   Asn2gbWorkPtr awp
6441 )
6442 
6443 {
6444   FeatBlockPtr       fbp;
6445   IntFeatBlockPtr    ifp;
6446   Boolean            isRefSeq;
6447   Int4               lastleft;
6448   Int4               lastright;
6449   SeqAnnotPtr        lastsap;
6450   SeqFeatPtr         lastsfp;
6451   SeqLocPtr          location;
6452   SeqLocPtr          newloc;
6453   SeqMgrFeatContext  pcontext;
6454   SeqFeatPtr         prt;
6455   SeqIdPtr           sip;
6456   SeqLocPtr          slp;
6457   Boolean            suppress;
6458 
6459   if (cds == NULL || ajp == NULL || awp == NULL) return;
6460   if (nbsp == NULL || pbsp == NULL || (! ISA_aa (pbsp->mol))) return;
6461 
6462   if (awp->hideCdsProdFeats) return;
6463 
6464   isRefSeq = FALSE;
6465   for (sip = nbsp->id; sip != NULL; sip = sip->next) {
6466     if (sip->choice == SEQID_OTHER) {
6467       isRefSeq = TRUE;
6468     }
6469   }
6470 
6471   /* explore mat_peptides, sites, etc. */
6472 
6473   lastsfp = NULL;
6474   lastsap = NULL;
6475   lastleft = 0;
6476   lastright = 0;
6477 
6478   prt = SeqMgrGetNextFeature (pbsp, NULL, 0, 0, &pcontext);
6479   while (prt != NULL) {
6480 
6481     if (pcontext.featdeftype == FEATDEF_REGION ||
6482         pcontext.featdeftype == FEATDEF_SITE ||
6483         pcontext.featdeftype == FEATDEF_BOND ||
6484         pcontext.featdeftype == FEATDEF_mat_peptide_aa ||
6485         pcontext.featdeftype == FEATDEF_sig_peptide_aa ||
6486         pcontext.featdeftype == FEATDEF_transit_peptide_aa ||
6487         pcontext.featdeftype == FEATDEF_preprotein ||
6488         (pcontext.featdeftype == FEATDEF_propeptide_aa /* && isRefSeq */)) {
6489 
6490       if (awp->hideSitesBondsRegions && (pcontext.featdeftype == FEATDEF_REGION ||
6491                                          pcontext.featdeftype == FEATDEF_SITE ||
6492                                          pcontext.featdeftype == FEATDEF_BOND)) {
6493 
6494         /* hide site, bond, and region features */
6495 
6496       } else if (awp->hideCddFeats && pcontext.featdeftype == FEATDEF_REGION && IsCDD (prt)) {
6497 
6498         /* passing this test prevents mapping of COG CDD region features */
6499 
6500       } else if (pcontext.dnaStop >= awp->from && pcontext.dnaStop <= awp->to) {
6501 
6502         /* suppress duplicate features (on protein) */
6503 
6504         suppress = FALSE;
6505         if (lastsfp != NULL && lastsap != NULL) {
6506           if (lastsfp->idx.subtype == prt->idx.subtype &&
6507               lastleft == pcontext.left &&
6508               lastright == pcontext.right) {
6509               if (lastsap == pcontext.sap ||
6510                   (lastsap->desc == NULL && pcontext.sap->desc == NULL)) {
6511               if (AsnIoMemComp (lastsfp, prt, (AsnWriteFunc) SeqFeatAsnWrite)) {
6512                 suppress = TRUE;
6513               }
6514             }
6515           }
6516         }
6517 
6518         /* make sure feature maps within nucleotide sublocation */
6519 
6520         if (! suppress) {
6521           if (ajp->ajp.slp != NULL) {
6522             location = aaFeatLoc_to_dnaFeatLoc (cds, prt->location);
6523             slp = SeqLocMerge (nbsp, location, NULL, FALSE, TRUE, FALSE);
6524             if (slp != NULL) {
6525               sip = SeqIdParse ("lcl|dummy");
6526               newloc = SeqLocReMapEx (sip, ajp->ajp.slp, slp, 0, FALSE, ajp->masterStyle, ajp->relaxedMapping);
6527               SeqIdFree (sip);
6528               SeqLocFree (slp);
6529               if (newloc == NULL) {
6530                 suppress = TRUE;
6531               }
6532               SeqLocFree (newloc);
6533             } else {
6534               suppress = TRUE;
6535             }
6536             SeqLocFree (location);
6537           }
6538         }
6539 
6540         if (! suppress) {
6541 
6542           fbp = (FeatBlockPtr) Asn2gbAddBlock (awp, FEATURE_BLOCK, sizeof (IntFeatBlock));
6543           if (fbp != NULL) {
6544 
6545             fbp->entityID = pcontext.entityID;
6546             fbp->itemID = pcontext.itemID;
6547             fbp->itemtype = OBJ_SEQFEAT;
6548             fbp->featdeftype = pcontext.featdeftype;
6549             ifp = (IntFeatBlockPtr) fbp;
6550             ifp->mapToNuc = TRUE;
6551             ifp->mapToProt = FALSE;
6552             ifp->mapToGen = FALSE;
6553             ifp->mapToMrna = FALSE;
6554             ifp->mapToPep = FALSE;
6555             ifp->left = 0;
6556             ifp->right = 0;
6557             SetIfpFeatCount (ifp, ajp, awp, FALSE);
6558             ifp->firstfeat = awp->firstfeat;
6559             awp->firstfeat = FALSE;
6560 
6561             if (awp->afp != NULL) {
6562               DoImmediateFormat (awp->afp, (BaseBlockPtr) fbp);
6563             }
6564           }
6565         }
6566 
6567         lastsfp = prt;
6568         lastsap = pcontext.sap;
6569         lastleft = pcontext.left;
6570         lastright = pcontext.right;
6571 
6572       }
6573     }
6574     prt = SeqMgrGetNextFeature (pbsp, prt, 0, 0, &pcontext);
6575   }
6576 }
6577 
GetRemoteFeatsOnCdsProduct(SeqFeatPtr cds,BioseqPtr nbsp,BioseqPtr pbsp,IntAsn2gbJobPtr ajp,Asn2gbWorkPtr awp)6578 static void GetRemoteFeatsOnCdsProduct (
6579   SeqFeatPtr cds,
6580   BioseqPtr nbsp,
6581   BioseqPtr pbsp,
6582   IntAsn2gbJobPtr ajp,
6583   Asn2gbWorkPtr awp
6584 )
6585 
6586 {
6587   BioseqPtr        bsp;
6588   FeatBlockPtr     fbp;
6589   ValNodePtr       head = NULL;
6590   IntFeatBlockPtr  ifp;
6591   Boolean          isRefSeq;
6592   Int4             lastleft;
6593   Int4             lastright;
6594   SeqAnnotPtr      lastsap;
6595   SeqFeatPtr       lastsfp;
6596   SeqLocPtr        location;
6597   SeqLocPtr        newloc;
6598   SeqFeatPtr       prt;
6599   ValNodePtr       publist;
6600   Asn2gbFreeFunc   remotefree;
6601   Asn2gbLockFunc   remotelock;
6602   ValNodePtr       remotevnp;
6603   SeqAnnotPtr      sap;
6604   SeqFeatPtr       sfp;
6605   SeqIdPtr         sip;
6606   SeqLocPtr        slp;
6607   Boolean          suppress;
6608   ValNodePtr       vnp;
6609 
6610   if (cds == NULL || ajp == NULL || awp == NULL) return;
6611   if (nbsp == NULL || pbsp == NULL || (! ISA_aa (pbsp->mol))) return;
6612 
6613   if (awp->hideCdsProdFeats) return;
6614 
6615   if (ajp->remotelock == NULL) return;
6616 
6617   remotelock = ajp->remotelock;
6618   remotefree = ajp->remotefree;
6619 
6620   sip = SeqIdFindBest (pbsp->id, SEQID_GI);
6621   if (sip == NULL) return;
6622 
6623   remotevnp = remotelock (sip, ajp->remotedata);
6624   if (remotevnp == NULL) return;
6625 
6626   /* do cleanup of remotely fetched feature tables */
6627 
6628   for (vnp = remotevnp; vnp != NULL; vnp = vnp->next) {
6629     bsp = (BioseqPtr) vnp->data.ptrvalue;
6630     if (bsp == NULL) continue;
6631     for (sap = bsp->annot; sap != NULL; sap = sap->next) {
6632       if (sap->type != 1) continue;
6633       for (sfp = (SeqFeatPtr) sap->data; sfp != NULL; sfp = sfp->next) {
6634         publist = NULL;
6635         CleanUpSeqFeat (sfp, FALSE, FALSE, TRUE, TRUE, &publist);
6636         sfp->idx.subtype = FindFeatDefType (sfp);
6637         ValNodeFreeData (publist);
6638         ValNodeAddPointer (&head, 0, (Pointer) sfp);
6639       }
6640     }
6641   }
6642 
6643   if (head == NULL) return;
6644 
6645   isRefSeq = FALSE;
6646   for (sip = nbsp->id; sip != NULL; sip = sip->next) {
6647     if (sip->choice == SEQID_OTHER) {
6648       isRefSeq = TRUE;
6649     }
6650   }
6651 
6652   /* explore mat_peptides, sites, etc. */
6653 
6654   lastsfp = NULL;
6655   lastsap = NULL;
6656   lastleft = 0;
6657   lastright = 0;
6658 
6659   for (vnp = head; vnp != NULL; vnp = vnp->next) {
6660 
6661     prt = (SeqFeatPtr) vnp->data.ptrvalue;
6662     if (prt == NULL) continue;
6663 
6664     if (prt->idx.subtype == FEATDEF_REGION ||
6665         prt->idx.subtype == FEATDEF_SITE ||
6666         prt->idx.subtype == FEATDEF_BOND ||
6667         prt->idx.subtype == FEATDEF_mat_peptide_aa ||
6668         prt->idx.subtype == FEATDEF_sig_peptide_aa ||
6669         prt->idx.subtype == FEATDEF_transit_peptide_aa ||
6670         prt->idx.subtype == FEATDEF_preprotein ||
6671         (prt->idx.subtype == FEATDEF_propeptide_aa /* && isRefSeq */)) {
6672 
6673       if (awp->hideSitesBondsRegions && (prt->idx.subtype == FEATDEF_REGION ||
6674                                          prt->idx.subtype == FEATDEF_SITE ||
6675                                          prt->idx.subtype == FEATDEF_BOND)) {
6676 
6677         /* hide site, bond, and region features */
6678 
6679       } else if (awp->hideCddFeats && prt->idx.subtype == FEATDEF_REGION && IsCDD (prt)) {
6680 
6681         /* passing this test prevents mapping of COG CDD region features */
6682 
6683       } else {
6684 
6685         suppress = FALSE;
6686 
6687         /* make sure feature maps within nucleotide sublocation */
6688 
6689         if (! suppress) {
6690           if (ajp->ajp.slp != NULL) {
6691             location = aaFeatLoc_to_dnaFeatLoc (cds, prt->location);
6692             slp = SeqLocMerge (nbsp, location, NULL, FALSE, TRUE, FALSE);
6693             if (slp != NULL) {
6694               sip = SeqIdParse ("lcl|dummy");
6695               newloc = SeqLocReMapEx (sip, ajp->ajp.slp, slp, 0, FALSE, ajp->masterStyle, ajp->relaxedMapping);
6696               SeqIdFree (sip);
6697               SeqLocFree (slp);
6698               if (newloc == NULL) {
6699                 suppress = TRUE;
6700               }
6701               SeqLocFree (newloc);
6702             } else {
6703               suppress = TRUE;
6704             }
6705             SeqLocFree (location);
6706           }
6707         }
6708 
6709         if (! suppress) {
6710 
6711           fbp = (FeatBlockPtr) Asn2gbAddBlock (awp, FEATURE_BLOCK, sizeof (IntFeatBlock));
6712           if (fbp != NULL) {
6713 
6714             fbp->entityID = 0;
6715             fbp->itemID = 0;
6716             fbp->itemtype = OBJ_SEQFEAT;
6717             fbp->featdeftype = prt->idx.subtype;
6718             ifp = (IntFeatBlockPtr) fbp;
6719             ifp->mapToNuc = TRUE;
6720             ifp->mapToProt = FALSE;
6721             ifp->mapToGen = FALSE;
6722             ifp->mapToMrna = FALSE;
6723             ifp->mapToPep = FALSE;
6724             ifp->left = 0;
6725             ifp->right = 0;
6726             SetIfpFeatCount (ifp, ajp, awp, FALSE);
6727             ifp->firstfeat = awp->firstfeat;
6728             awp->firstfeat = FALSE;
6729 
6730             if (awp->afp != NULL) {
6731               DoImmediateRemoteFeatureFormat (awp->afp, (BaseBlockPtr) fbp, prt);
6732             }
6733           }
6734         }
6735       }
6736     }
6737   }
6738 
6739   ValNodeFree (head);
6740 
6741   if (remotefree != NULL) {
6742     remotefree (remotevnp, ajp->remotedata);
6743   } else {
6744     /* otherwise free Bioseqs and ValNode chain ourselves */
6745     for (vnp = remotevnp; vnp != NULL; vnp = vnp->next) {
6746       bsp = (BioseqPtr) vnp->data.ptrvalue;
6747       if (bsp != NULL) {
6748         BioseqFree (bsp);
6749       }
6750     }
6751     ValNodeFree (remotevnp);
6752   }
6753 }
6754 
NotEMBLorDDBJ(BioseqPtr bsp)6755 static Boolean NotEMBLorDDBJ (
6756   BioseqPtr bsp
6757 )
6758 
6759 {
6760   SeqIdPtr  sip;
6761 
6762   if (bsp == NULL) return TRUE;
6763   for (sip = bsp->id; sip != NULL; sip = sip->next) {
6764     if (sip->choice == SEQID_EMBL || sip->choice == SEQID_TPE) return FALSE;
6765     if (sip->choice == SEQID_DDBJ || sip->choice == SEQID_TPD) return FALSE;
6766   }
6767   return TRUE;
6768 }
6769 
6770 /*
6771 static Boolean EquivProtFeats (
6772   SeqFeatPtr prot1,
6773   SeqFeatPtr prot2
6774 )
6775 
6776 {
6777   ProtRefPtr  prp1, prp2;
6778 
6779   if (prot1 == NULL || prot2 == NULL) return FALSE;
6780   prp1 = (ProtRefPtr) prot1->data.value.ptrvalue;
6781   prp2 = (ProtRefPtr) prot2->data.value.ptrvalue;
6782   if (prp1 == NULL || prp2 == NULL) return FALSE;
6783 
6784   if (! AsnIoMemComp (prp1, prp2, (AsnWriteFunc) ProtRefAsnWrite)) return FALSE;
6785 
6786   if (StringDoesHaveText (prot1->comment) && StringDoesHaveText (prot2->comment)) {
6787     if (StringCmp (prot1->comment, prot2->comment) != 0) return FALSE;
6788   }
6789 
6790   return TRUE;
6791 }
6792 */
6793 
6794 /*
6795 static Boolean EquivProtFeats (
6796   SeqFeatPtr prot1,
6797   SeqFeatPtr prot2
6798 )
6799 
6800 {
6801   SeqFeatPtr  cpy1, cpy2;
6802   Boolean     rsult = FALSE;
6803   SeqLocPtr   tmp;
6804 
6805   if (prot1 == NULL || prot2 == NULL) return FALSE;
6806 
6807   cpy1 = AsnIoMemCopy ((Pointer) prot1,
6808                        (AsnReadFunc) SeqFeatAsnRead,
6809                        (AsnWriteFunc) SeqFeatAsnWrite);
6810   cpy2 = AsnIoMemCopy ((Pointer) prot2,
6811                        (AsnReadFunc) SeqFeatAsnRead,
6812                        (AsnWriteFunc) SeqFeatAsnWrite);
6813   if (cpy1 == NULL || cpy2 == NULL) return FALSE;
6814 
6815   tmp = cpy1->location;
6816   cpy1->location = cpy2->location;
6817 
6818   rsult = AsnIoMemComp (cpy1, cpy2, (AsnWriteFunc) SeqFeatAsnWrite);
6819 
6820   cpy1->location = tmp;
6821   SeqFeatFree (cpy1);
6822   SeqFeatFree (cpy2);
6823 
6824   return rsult;
6825 }
6826 */
6827 
LocInBioseq(SeqLocPtr slp,BioseqPtr bsp)6828 static Boolean LocInBioseq (
6829   SeqLocPtr slp,
6830   BioseqPtr bsp
6831 )
6832 
6833 {
6834   SeqIdPtr  sip;
6835 
6836   if (slp == NULL || bsp == NULL) return FALSE;
6837   sip = SeqLocId (slp);
6838   if (sip == NULL) return FALSE;
6839   return SeqIdIn (sip, bsp->id);
6840 }
6841 
GetFeatsOnBioseq(SeqFeatPtr sfp,SeqMgrFeatContextPtr fcontext)6842 static Boolean LIBCALLBACK GetFeatsOnBioseq (
6843   SeqFeatPtr sfp,
6844   SeqMgrFeatContextPtr fcontext
6845 )
6846 
6847 {
6848   IntAsn2gbJobPtr    ajp;
6849   Asn2gbSectPtr      asp;
6850   Asn2gbWorkPtr      awp;
6851   BioseqPtr          bsp;
6852   Char               buf [41];
6853   SeqFeatPtr         cds;
6854   SeqMgrFeatContext  cdscontext;
6855   FeatBlockPtr       fbp;
6856   SeqLocPtr          firstslp;
6857   SeqFeatPtr         gap;
6858   GBQualPtr          gbq;
6859   /*
6860   SeqFeatPtr         gene;
6861   */
6862   BIG_ID             gi;
6863   GeneRefPtr         grp;
6864   Boolean            has_est_len;
6865   Boolean            has_gap_type;
6866   IntCdsBlockPtr     icp;
6867   Int2               idx;
6868   IntFeatBlockPtr    ifp;
6869   IntPrtBlockPtr     ipp;
6870   Boolean            is_whole;
6871   Int4Ptr            ivals;
6872   Int2               j;
6873   Boolean            juststop = FALSE;
6874   SeqAnnotPtr        lastsap;
6875   SeqFeatPtr         lastsfp;
6876   SeqLocPtr          lastslp;
6877   SeqLocPtr          newloc;
6878   Int2               numivals;
6879   Boolean            okay;
6880   SeqEntryPtr        oldscope;
6881   BioseqPtr          parent;
6882   Boolean            partial5;
6883   Boolean            partial3;
6884   ValNodePtr         ppr;
6885   BioseqPtr          prod;
6886   ProtRefPtr         prp;
6887   Boolean            psdo;
6888   Boolean            pseudo = FALSE;
6889   RNAGenPtr          rgp;
6890   RnaRefPtr          rrp;
6891   SeqEntryPtr        sep;
6892   SeqIntPtr          sintp;
6893   SeqIdPtr           sip;
6894   SeqLocPtr          slp;
6895   Int4               start;
6896   Int4               stop;
6897   Boolean            supr;
6898   TextSeqIdPtr       tsip;
6899   ValNodePtr         vnp;
6900   /*
6901   SeqMgrDescContext  dcontext;
6902   PubdescPtr         pdp;
6903   SeqDescrPtr        sdp;
6904   */
6905 
6906   if (sfp == NULL || fcontext == NULL) return FALSE;
6907   awp = (Asn2gbWorkPtr) fcontext->userdata;
6908   if (awp == NULL) return FALSE;
6909   ajp = awp->ajp;
6910   if (ajp == NULL) return FALSE;
6911   asp = awp->asp;
6912   if (asp == NULL) return FALSE;
6913   bsp = asp->bsp;
6914   if (bsp == NULL) return FALSE;
6915 
6916   if (fcontext->featdeftype == FEATDEF_PUB ||
6917       fcontext->featdeftype == FEATDEF_NON_STD_RESIDUE ||
6918       fcontext->featdeftype == FEATDEF_RSITE ||
6919       fcontext->featdeftype == FEATDEF_SEQ) return TRUE;
6920 
6921   if (fcontext->featdeftype == FEATDEF_BIOSRC) return TRUE;
6922 
6923   if (ajp->flags.validateFeats &&
6924       (fcontext->featdeftype == FEATDEF_BAD ||
6925        fcontext->featdeftype == FEATDEF_virion)) {
6926     return TRUE;
6927   }
6928 
6929   if (ISA_na (bsp->mol) && fcontext->featdeftype == FEATDEF_HET) return TRUE;
6930 
6931   /* check feature customization flags */
6932 
6933   if (awp->hideImpFeats && sfp->data.choice == SEQFEAT_IMP && fcontext->featdeftype != FEATDEF_operon) return TRUE;
6934   if (awp->hideVariations && fcontext->featdeftype == FEATDEF_variation) return TRUE;
6935   if (awp->hideRepeatRegions && fcontext->featdeftype == FEATDEF_repeat_region) return TRUE;
6936   if (awp->hideRepeatRegions && fcontext->featdeftype == FEATDEF_mobile_element) return TRUE;
6937   if (awp->hideGaps && fcontext->featdeftype == FEATDEF_gap) return TRUE;
6938   if (ISA_aa (bsp->mol) && fcontext->featdeftype == FEATDEF_REGION &&
6939       awp->hideCddFeats && IsCDD (sfp)) return TRUE;
6940   if (awp->hideSitesBondsRegions && (fcontext->featdeftype == FEATDEF_REGION ||
6941                                      fcontext->featdeftype == FEATDEF_SITE ||
6942                                      fcontext->featdeftype == FEATDEF_BOND)) return TRUE;
6943 
6944   /* DDBJ does not want to show gene features */
6945 
6946   if (fcontext->seqfeattype == SEQFEAT_GENE && awp->hideGeneFeats) return TRUE;
6947 
6948   /* no longer suppressing comment features that are full length */
6949 
6950   /*
6951   if (fcontext->seqfeattype == SEQFEAT_COMMENT &&
6952       fcontext->left == awp->from && fcontext->right == awp->to) return TRUE;
6953   */
6954 
6955   /*
6956   if (ISA_aa (bsp->mol) && awp->format == GENPEPT_FMT && fcontext->seqfeattype == SEQFEAT_PROT) {
6957     if (fcontext->left == awp->from && fcontext->right == awp->to) {
6958       if (awp->bestprot != sfp) {
6959         if (EquivProtFeats (awp->bestprot, sfp)) return TRUE;
6960       }
6961     }
6962   }
6963   */
6964 
6965   ivals = fcontext->ivals;
6966   numivals = fcontext->numivals;
6967 
6968   /* check to see if last interval is on this awp->from - awp->to range */
6969 
6970   if (ivals != NULL && numivals > 0) {
6971     idx = (numivals - 1) * 2;
6972     start = ivals [idx];
6973     stop = ivals [idx + 1];
6974     if (stop < awp->from || stop > awp->to) {
6975 
6976       /* may need to map sig_peptide on a different segment */
6977 
6978       if (fcontext->seqfeattype == SEQFEAT_CDREGION) {
6979         sip = SeqLocIdForProduct (sfp->product);
6980         bsp = BioseqFind (sip);
6981         GetFeatsOnCdsProduct (sfp, asp->bsp, bsp, ajp, awp);
6982       }
6983 
6984       if (! awp->showAllFeats) return TRUE;
6985 
6986       /* if showing one segment, only show features covering this segment */
6987 
6988       if (fcontext->right < awp->from || fcontext->left > awp->to) return TRUE;
6989 
6990     } else if (fcontext->farloc && NotEMBLorDDBJ (awp->bsp)) {
6991 
6992       /* last interval may not have been mapped to bioseq if far */
6993 
6994       firstslp = NULL;
6995       lastslp = NULL;
6996 
6997       slp = SeqLocFindNext (sfp->location, NULL);
6998       while (slp != NULL) {
6999         if (slp->choice != SEQLOC_NULL) {
7000           lastslp = slp;
7001           if (firstslp == NULL) {
7002             firstslp = slp;
7003           }
7004         }
7005         slp = SeqLocFindNext (sfp->location, slp);
7006       }
7007 
7008       /* !!! EMBL may have different desired behavior on where to map !!! */
7009 
7010       if (firstslp != NULL && SeqLocStrand (firstslp) == Seq_strand_minus) {
7011         slp = firstslp;
7012       } else {
7013         slp = lastslp;
7014       }
7015 
7016       if (slp != NULL) {
7017         sip = SeqLocId (slp);
7018         if (sip != NULL) {
7019           bsp = BioseqFindCore (sip);
7020           if (bsp == NULL || (bsp != awp->parent && bsp != awp->bsp)) {
7021 
7022             return TRUE;
7023           }
7024         }
7025       }
7026     }
7027   }
7028 
7029   /* make sure feature is within sublocation */
7030 
7031   if (ajp->ajp.slp != NULL) {
7032     if (SeqLocCompare (sfp->location, ajp->ajp.slp) == SLC_NO_MATCH) {
7033       slp = SeqLocMerge (bsp, sfp->location, NULL, FALSE, TRUE, FALSE);
7034       if (slp == NULL) return TRUE;
7035       sip = SeqIdParse ("lcl|dummy");
7036       newloc = SeqLocReMapEx (sip, ajp->ajp.slp, slp, 0, FALSE, ajp->masterStyle, ajp->relaxedMapping);
7037       SeqIdFree (sip);
7038       SeqLocFree (slp);
7039       if (newloc == NULL) return TRUE;
7040       SeqLocFree (newloc);
7041     }
7042   }
7043 
7044   /* suppress duplicate features (on nucleotide) */
7045 
7046   lastsfp = awp->lastsfp;
7047   lastsap = awp->lastsap;
7048   if (lastsfp != NULL && lastsap != NULL) {
7049     if (lastsfp->idx.subtype == sfp->idx.subtype &&
7050         awp->lastleft == fcontext->left &&
7051         awp->lastright == fcontext->right) {
7052         if (lastsap == fcontext->sap ||
7053             (lastsap->desc == NULL && fcontext->sap->desc == NULL)) {
7054         if (AsnIoMemComp (lastsfp, sfp, (AsnWriteFunc) SeqFeatAsnWrite)) {
7055           return TRUE;
7056         }
7057       }
7058     }
7059   }
7060 
7061   /* if RELEASE_MODE, verify that features have all mandatory qualifiers */
7062 
7063   if (ajp->flags.needRequiredQuals) {
7064     okay = FALSE;
7065 
7066     switch (fcontext->featdeftype) {
7067 
7068     case FEATDEF_CDS:
7069       if (ajp->flags.checkCDSproductID) {
7070         /* non-pseudo CDS must have /product */
7071         if (sfp->pseudo) {
7072           pseudo = TRUE;
7073         }
7074         /*
7075         grp = SeqMgrGetGeneXref (sfp);
7076         */
7077         grp = GetGeneByFeat (sfp, &psdo, &supr);
7078         if (psdo) {
7079           pseudo = TRUE;
7080         }
7081         /*
7082         if (grp == NULL) {
7083           sep = GetTopSeqEntryForEntityID (ajp->ajp.entityID);
7084           oldscope = SeqEntrySetScope (sep);
7085           gene = SeqMgrGetOverlappingGene (sfp->location, NULL);
7086           SeqEntrySetScope (oldscope);
7087           if (gene != NULL) {
7088             grp = (GeneRefPtr) gene->data.value.ptrvalue;
7089             if (gene->pseudo) {
7090               pseudo = TRUE;
7091             }
7092           }
7093         }
7094         */
7095         if (grp != NULL && grp->pseudo) {
7096           pseudo = TRUE;
7097         }
7098         if (sfp->location != NULL) {
7099           if (CheckSeqLocForPartial (sfp->location, &partial5, &partial3)) {
7100             if (partial5 && (! partial3)) {
7101               if (SeqLocLen (sfp->location) <= 5) {
7102                 juststop = TRUE;
7103               }
7104             }
7105           }
7106         }
7107         if (pseudo || juststop) {
7108           okay = TRUE;
7109         } else if (sfp->product != NULL) {
7110           sip = SeqLocIdForProduct (sfp->product);
7111           if (sip != NULL) {
7112             if ((sip->choice == SEQID_GI && sip->data.intvalue > 0) ||
7113                 sip->choice == SEQID_LOCAL) {
7114               sep = GetTopSeqEntryForEntityID (ajp->ajp.entityID);
7115               oldscope = SeqEntrySetScope (sep);
7116               prod = BioseqFind (sip);
7117               SeqEntrySetScope (oldscope);
7118               if (prod != NULL) {
7119                 for (sip = prod->id; sip != NULL; sip = sip->next) {
7120                   if (sip->choice == SEQID_GENBANK ||
7121                      sip->choice == SEQID_EMBL ||
7122                       sip->choice == SEQID_DDBJ ||
7123                       sip->choice == SEQID_OTHER ||
7124                       sip->choice == SEQID_PATENT ||
7125                       sip->choice == SEQID_TPG ||
7126                       sip->choice == SEQID_TPE ||
7127                       sip->choice == SEQID_TPD ||
7128                       sip->choice == SEQID_GPIPE) {
7129                     tsip = (TextSeqIdPtr) sip->data.ptrvalue;
7130                     if (tsip != NULL && (StringDoesHaveText (tsip->accession))) {
7131                       if (ValidateAccn (tsip->accession) == 0)
7132                       okay = TRUE;
7133                     }
7134                   }
7135                 }
7136               } else if (sip->choice == SEQID_GI && sip->data.intvalue > 0) {
7137                 /* RELEASE_MODE requires that /protein_id is an accession */
7138                 gi = sip->data.intvalue;
7139                 if (GetAccnVerFromServer (gi, buf)) {
7140                   okay = TRUE;
7141                 } else {
7142                   sip = GetSeqIdForGI (gi);
7143                   if (sip != NULL) {
7144                     okay = TRUE;
7145                   }
7146                 }
7147               }
7148             } else if (sip->choice == SEQID_GENBANK ||
7149                        sip->choice == SEQID_EMBL ||
7150                        sip->choice == SEQID_DDBJ ||
7151                        sip->choice == SEQID_OTHER ||
7152                        sip->choice == SEQID_PATENT ||
7153                        sip->choice == SEQID_TPG ||
7154                        sip->choice == SEQID_TPE ||
7155                        sip->choice == SEQID_TPD ||
7156                        sip->choice == SEQID_GPIPE) {
7157               tsip = (TextSeqIdPtr) sip->data.ptrvalue;
7158               if (tsip != NULL && (StringDoesHaveText (tsip->accession))) {
7159                 if (ValidateAccn (tsip->accession) == 0)
7160                 okay = TRUE;
7161               }
7162             }
7163           }
7164         } else {
7165           if (sfp->excpt && (StringDoesHaveText (sfp->except_text))) {
7166             if (StringStr (sfp->except_text, "rearrangement required for product") != NULL) {
7167               okay = TRUE;
7168             }
7169           }
7170         }
7171       } else {
7172         okay = TRUE;
7173       }
7174       if (! okay) {
7175         ajp->relModeError = TRUE;
7176       }
7177       break;
7178 
7179     case FEATDEF_conflict:
7180       if (sfp->cit == NULL) {
7181         /* RefSeq allows conflict with accession in comment instead of sfp->cit */
7182         for (sip = bsp->id; sip != NULL; sip = sip->next) {
7183           if (sip->choice == SEQID_OTHER) {
7184             if (StringDoesHaveText (sfp->comment)) {
7185               okay = TRUE;
7186             }
7187           }
7188         }
7189       }
7190       /* continue on to old_sequence */
7191     case FEATDEF_old_sequence:
7192       /* conflict and old_sequence require a publication printable on the segment */
7193       vnp = sfp->cit;
7194 
7195       if (vnp != NULL && asp->referenceArray != NULL) {
7196         for (ppr = vnp->data.ptrvalue; ppr != NULL; ppr = ppr->next) {
7197           j = MatchRef (ppr, asp->referenceArray, asp->numReferences);
7198           if (j > 0) {
7199             okay = TRUE;
7200             break;
7201           }
7202         }
7203       }
7204       if (! okay) {
7205         /* compare qualifier can now substitute for citation qualifier */
7206         gbq = sfp->qual;
7207         while (gbq != NULL) {
7208           if (StringICmp (gbq->qual, "compare") == 0 && (StringDoesHaveText (gbq->val))) {
7209             okay = TRUE;
7210             break;
7211           }
7212           gbq = gbq->next;
7213         }
7214       }
7215       break;
7216 
7217     case FEATDEF_GENE:
7218       /* gene requires /gene or /locus_tag, but desc or syn can be mapped to /gene */
7219       grp = (GeneRefPtr) sfp->data.value.ptrvalue;
7220       if (grp != NULL) {
7221         if (StringDoesHaveText (grp->locus)) {
7222           okay = TRUE;
7223         }  else if (StringDoesHaveText (grp->locus_tag)) {
7224           okay = TRUE;
7225         } else if (StringDoesHaveText (grp->desc)) {
7226           okay = TRUE;
7227         } else {
7228           vnp = grp->syn;
7229           if (vnp != NULL) {
7230             if (StringDoesHaveText (vnp->data.ptrvalue)) {
7231               okay = TRUE;
7232             }
7233           }
7234         }
7235       }
7236       break;
7237 
7238     case FEATDEF_protein_bind:
7239     case FEATDEF_misc_binding:
7240       /* protein_bind or misc_binding require FTQUAL_bound_moiety */
7241       gbq = sfp->qual;
7242       while (gbq != NULL) {
7243         if (StringICmp (gbq->qual, "bound_moiety") == 0 && (StringDoesHaveText (gbq->val))) {
7244           okay = TRUE;
7245           break;
7246         }
7247         gbq = gbq->next;
7248       }
7249       break;
7250 
7251     case FEATDEF_modified_base:
7252       /* modified_base requires FTQUAL_mod_base */
7253       gbq = sfp->qual;
7254       while (gbq != NULL) {
7255         if (StringICmp (gbq->qual, "mod_base") == 0 && (StringDoesHaveText (gbq->val))) {
7256           okay = TRUE;
7257           break;
7258         }
7259         gbq = gbq->next;
7260       }
7261       break;
7262 
7263     case FEATDEF_gap:
7264       /* gap requires FTQUAL_estimated_length */
7265       gbq = sfp->qual;
7266       while (gbq != NULL) {
7267         if (StringICmp (gbq->qual, "estimated_length") == 0 && (StringDoesHaveText (gbq->val))) {
7268           okay = TRUE;
7269           break;
7270         }
7271         gbq = gbq->next;
7272       }
7273       break;
7274 
7275     case FEATDEF_operon:
7276       /* operon requires FTQUAL_operon */
7277       gbq = sfp->qual;
7278       while (gbq != NULL) {
7279         if (StringICmp (gbq->qual, "operon") == 0 && (StringDoesHaveText (gbq->val))) {
7280           okay = TRUE;
7281           break;
7282         }
7283         gbq = gbq->next;
7284       }
7285       break;
7286 
7287     case FEATDEF_ncRNA:
7288       /* ncRNA requires FTQUAL_ncRNA_class */
7289       gbq = sfp->qual;
7290       while (gbq != NULL) {
7291         if (StringICmp (gbq->qual, "ncRNA_class") == 0 && (StringDoesHaveText (gbq->val))) {
7292           okay = TRUE;
7293           break;
7294         }
7295         gbq = gbq->next;
7296       }
7297       rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
7298       if (rrp != NULL && rrp->ext.choice == 3) {
7299         rgp = (RNAGenPtr) rrp->ext.value.ptrvalue;
7300         if (rgp != NULL) {
7301           if (StringDoesHaveText (rgp->_class)) {
7302             okay = TRUE;
7303             break;
7304           }
7305         }
7306       }
7307       break;
7308 
7309     case FEATDEF_mobile_element:
7310       /* mobile_element requires FTQUAL_mobile_element_type */
7311       gbq = sfp->qual;
7312       while (gbq != NULL) {
7313         if (StringICmp (gbq->qual, "mobile_element_type") == 0 && (StringDoesHaveText (gbq->val))) {
7314           okay = TRUE;
7315           break;
7316         }
7317         gbq = gbq->next;
7318       }
7319       break;
7320 
7321     case FEATDEF_assembly_gap:
7322       /* assembly_gap requires FTQUAL_estimated_length and FTQUAL_gap_type */
7323       has_est_len = FALSE;
7324       has_gap_type = FALSE;
7325       gbq = sfp->qual;
7326       while (gbq != NULL) {
7327         if (StringDoesHaveText (gbq->val)) {
7328           if (StringICmp (gbq->qual, "estimated_length") == 0) {
7329             has_est_len = TRUE;
7330           } else if (StringICmp (gbq->qual, "gap_type") == 0) {
7331             has_gap_type = TRUE;
7332           }
7333         }
7334         gbq = gbq->next;
7335       }
7336       if (has_est_len && has_gap_type) {
7337         okay = TRUE;
7338       }
7339       break;
7340 
7341     case FEATDEF_regulatory:
7342       /* regulatory requires FTQUAL_regulatory_class */
7343       gbq = sfp->qual;
7344       while (gbq != NULL) {
7345         if (StringICmp (gbq->qual, "regulatory_class") == 0 && (StringDoesHaveText (gbq->val))) {
7346           okay = TRUE;
7347           break;
7348         }
7349         gbq = gbq->next;
7350       }
7351       break;
7352 
7353     default:
7354       if (fcontext->featdeftype >= FEATDEF_GENE && fcontext->featdeftype < FEATDEF_MAX) {
7355         okay = TRUE;
7356       }
7357       break;
7358     }
7359 
7360     if (okay == FALSE) return TRUE;
7361   }
7362 
7363   /* if RELEASE_MODE, suppress features with location on near segmented Bioseq */
7364 
7365   if (ajp->flags.suppressSegLoc) {
7366     bsp = awp->parent;
7367     if (bsp != NULL && bsp->repr == Seq_repr_seg && SegHasParts (bsp)) {
7368       slp = SeqLocFindNext (sfp->location, NULL);
7369       while (slp != NULL) {
7370         sip = SeqLocId (slp);
7371         if (sip != NULL) {
7372           if (SeqIdIn (sip, bsp->id)) return TRUE;
7373         }
7374         slp = SeqLocFindNext (sfp->location, slp);
7375       }
7376     }
7377   }
7378 
7379   gap = awp->currfargap;
7380   if (gap != NULL && awp->afp != NULL) {
7381     while (gap != NULL && LocInBioseq (gap->location, asp->bsp) && GetOffsetInBioseq (gap->location, asp->bsp, SEQLOC_LEFT_END) < fcontext->left) {
7382 
7383       fbp = (FeatBlockPtr) Asn2gbAddBlock (awp, FEATURE_BLOCK, sizeof (IntFeatBlock));
7384       if (fbp != NULL) {
7385         fbp->entityID = 0;
7386         fbp->itemID = 0;
7387         fbp->itemtype = OBJ_SEQFEAT;
7388         fbp->featdeftype = FEATDEF_gap;
7389         ifp = (IntFeatBlockPtr) fbp;
7390         ifp->mapToNuc = FALSE;
7391         ifp->mapToProt = FALSE;
7392         ifp->mapToGen = FALSE;
7393         ifp->mapToMrna = FALSE;
7394         ifp->mapToPep = FALSE;
7395         ifp->left = 0;
7396         ifp->right = 0;
7397         if (bsp != NULL) {
7398           SetIfpFeatCount (ifp, ajp, awp, ISA_aa (bsp->mol));
7399         }
7400         ifp->firstfeat = awp->firstfeat;
7401         awp->firstfeat = FALSE;
7402         if (awp->afp != NULL) {
7403           DoImmediateRemoteFeatureFormat (awp->afp, (BaseBlockPtr) fbp, gap);
7404         }
7405       }
7406 
7407       awp->currfargap = gap->next;
7408       gap = awp->currfargap;
7409     }
7410   }
7411 
7412   /* check for Imp-feat gap that is same as next Seq-lit gap - but need to check against scaffold coordinate */
7413   if (! NotEMBLorDDBJ (awp->bsp)) {
7414     if (gap != NULL && LocInBioseq (gap->location, asp->bsp) && fcontext->featdeftype == FEATDEF_gap &&
7415         GetOffsetInBioseq (gap->location, asp->bsp, SEQLOC_LEFT_END) == fcontext->left &&
7416         GetOffsetInBioseq (gap->location, asp->bsp, SEQLOC_RIGHT_END) == fcontext->right) {
7417       awp->currfargap = gap->next;
7418     }
7419   }
7420 
7421   awp->lastsfp = sfp;
7422   awp->lastsap = fcontext->sap;
7423   awp->lastleft = fcontext->left;
7424   awp->lastright = fcontext->right;
7425 
7426   if (fcontext->seqfeattype == SEQFEAT_CDREGION) {
7427     fbp = (FeatBlockPtr) Asn2gbAddBlock (awp, FEATURE_BLOCK, sizeof (IntCdsBlock));
7428   } else if (fcontext->seqfeattype == SEQFEAT_PROT) {
7429     fbp = (FeatBlockPtr) Asn2gbAddBlock (awp, FEATURE_BLOCK, sizeof (IntPrtBlock));
7430   } else {
7431     fbp = (FeatBlockPtr) Asn2gbAddBlock (awp, FEATURE_BLOCK, sizeof (IntFeatBlock));
7432   }
7433   if (fbp == NULL) return TRUE;
7434 
7435   fbp->entityID = fcontext->entityID;
7436   fbp->itemID = fcontext->itemID;
7437   fbp->itemtype = OBJ_SEQFEAT;
7438   fbp->featdeftype = fcontext->featdeftype;
7439   ifp = (IntFeatBlockPtr) fbp;
7440   ifp->mapToNuc = FALSE;
7441   ifp->mapToProt = FALSE;
7442   ifp->mapToGen = FALSE;
7443   ifp->mapToMrna = FALSE;
7444   ifp->mapToPep = FALSE;
7445   ifp->left = 0;
7446   ifp->right = 0;
7447   if (bsp != NULL) {
7448     SetIfpFeatCount (ifp, ajp, awp, ISA_aa (bsp->mol));
7449   }
7450   ifp->firstfeat = awp->firstfeat;
7451   awp->firstfeat = FALSE;
7452 
7453   /* local centromere, telomere, rep_origin, and region features (e.g, on eukaryotic NC record) do not contribute to test for far fetch suppression */
7454   if (sfp->idx.subtype != FEATDEF_centromere &&
7455       sfp->idx.subtype != FEATDEF_telomere &&
7456       sfp->idx.subtype != FEATDEF_rep_origin &&
7457       sfp->idx.subtype != FEATDEF_REGION) {
7458 
7459     /* this allows remote SNP, CDD, MGC, etc., not to be treated as local annotation */
7460     if (awp->entityID != fbp->entityID || fbp->itemID <= awp->localFeatCount) {
7461       awp->featseen = TRUE;
7462     }
7463     awp->featjustseen = TRUE;
7464   }
7465 
7466   if (fcontext->seqfeattype == SEQFEAT_PROT) {
7467 
7468     /* set calculated molecular weight flags for proteins */
7469 
7470     ifp->isPrt = TRUE;
7471     ipp = (IntPrtBlockPtr) fbp;
7472     prp = (ProtRefPtr) sfp->data.value.ptrvalue;
7473     if (prp != NULL) {
7474       if (prp->processed < 2) {
7475         is_whole = FALSE;
7476         slp = sfp->location;
7477         if (slp != NULL) {
7478           if (slp->choice == SEQLOC_WHOLE) {
7479             is_whole = TRUE;
7480           } else if (slp->choice == SEQLOC_INT) {
7481             sintp = (SeqIntPtr) slp->data.ptrvalue;
7482             if (sintp != NULL &&
7483                 bsp != NULL &&
7484                 sintp->from == 0 &&
7485                 sintp->to == bsp->length - 1) {
7486               is_whole = TRUE;
7487             }
7488           }
7489         }
7490         if (is_whole) {
7491           ipp->is_whole_loc = TRUE;
7492           if (awp->has_sig_peptide) {
7493             if (awp->has_mat_peptide) {
7494               ipp->suppress_mol_wt = TRUE;
7495             } else if (awp->sig_pept_trim_len > 0) {
7496               ipp->sig_pept_trim_len = awp->sig_pept_trim_len;
7497             }
7498           } else {
7499             ipp->trim_initial_met = TRUE;
7500           }
7501         }
7502       }
7503     }
7504   }
7505 
7506   if (awp->afp != NULL) {
7507     DoImmediateFormat (awp->afp, (BaseBlockPtr) fbp);
7508   }
7509 
7510   /* optionally map CDS from cDNA onto genomic */
7511 
7512   if (awp->isGPS && bsp != NULL && ISA_na (bsp->mol) && awp->copyGpsCdsUp &&
7513       fcontext->featdeftype == FEATDEF_mRNA) {
7514     sip = SeqLocIdForProduct (sfp->product);
7515     bsp = BioseqFind (sip);
7516     if (bsp != NULL && ISA_na (bsp->mol)) {
7517       cds = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_CDREGION, 0, &cdscontext);
7518       if (cds != NULL) {
7519         fbp = (FeatBlockPtr) Asn2gbAddBlock (awp, FEATURE_BLOCK, sizeof (IntCdsBlock));
7520         if (fbp != NULL) {
7521 
7522           fbp->entityID = cdscontext.entityID;
7523           fbp->itemID = cdscontext.itemID;
7524           fbp->itemtype = OBJ_SEQFEAT;
7525           fbp->featdeftype = cdscontext.featdeftype;
7526           ifp = (IntFeatBlockPtr) fbp;
7527           ifp->mapToNuc = FALSE;
7528           ifp->mapToProt = FALSE;
7529           ifp->mapToGen = TRUE;
7530           ifp->mapToMrna = FALSE;
7531           ifp->mapToPep = FALSE;
7532           ifp->left = 0;
7533           ifp->right = 0;
7534           SetIfpFeatCount (ifp, ajp, awp, FALSE);
7535           ifp->firstfeat = awp->firstfeat;
7536           awp->firstfeat = FALSE;
7537 
7538           if (awp->afp != NULL) {
7539             DoImmediateFormat (awp->afp, (BaseBlockPtr) fbp);
7540           }
7541         }
7542       }
7543     }
7544   }
7545 
7546   if (fcontext->seqfeattype != SEQFEAT_CDREGION) return TRUE;
7547 
7548   /* if feature table format, do not get features from protein product */
7549 
7550   if (awp->format == FTABLE_FMT) return TRUE;
7551 
7552   /* if CDS, collect more information from product protein bioseq - may be part */
7553 
7554   sip = SeqLocIdForProduct (sfp->product);
7555   bsp = BioseqFind (sip);
7556   if (bsp == NULL || (! ISA_aa (bsp->mol))) return TRUE;
7557 
7558   ifp->isCDS = TRUE;
7559   icp = (IntCdsBlockPtr) ifp;
7560 
7561   /* first explore pubs to pick up figure and maploc - no longer shown */
7562 
7563   /*
7564   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pub, &dcontext);
7565   while (sdp != NULL) {
7566     pdp = (PubdescPtr) sdp->data.ptrvalue;
7567     if (pdp != NULL) {
7568       if (icp->fig == NULL) {
7569         icp->fig = StringSaveNoNull (pdp->fig);
7570       }
7571       if (icp->maploc == NULL) {
7572         icp->maploc = StringSaveNoNull (pdp->maploc);
7573       }
7574     }
7575     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_pub, &dcontext);
7576   }
7577   */
7578 
7579   /* product may be segmented part, and remaining features are indexed on parent */
7580 
7581   parent = SeqMgrGetParentOfPart (bsp, NULL);
7582   if (parent != NULL) {
7583     bsp = parent;
7584   }
7585 
7586   /* then explore mat_peptides, sites, etc. */
7587 
7588   GetFeatsOnCdsProduct (sfp, asp->bsp, bsp, ajp, awp);
7589 
7590   GetRemoteFeatsOnCdsProduct (sfp, asp->bsp, bsp, ajp, awp);
7591 
7592   return TRUE;
7593 }
7594 
7595 /*
7596 static Boolean TestGetAccnVerFromServer (BIG_ID gi, CharPtr buf)
7597 
7598 {
7599   Char      accn [64];
7600   SeqIdPtr  sip;
7601 
7602   if (buf == NULL) return FALSE;
7603   *buf = '\0';
7604   sip = GetSeqIdForGI (gi);
7605   if (sip == NULL) return FALSE;
7606   SeqIdWrite (sip, accn, PRINTID_TEXTID_ACC_VER, sizeof (accn) - 1);
7607   SeqIdFree (sip);
7608   if (StringLen (accn) < 40) {
7609     StringCpy (buf, accn);
7610   }
7611   return TRUE;
7612 }
7613 */
7614 
GetWgsNode(Asn2gbWorkPtr awp,CharPtr accn)7615 static WgsAccnPtr GetWgsNode (
7616   Asn2gbWorkPtr awp,
7617   CharPtr accn
7618 )
7619 
7620 {
7621   ValNodePtr  vnp;
7622   WgsAccnPtr  wap = NULL;
7623 
7624   if (awp == NULL || StringHasNoText (accn)) return NULL;
7625 
7626   for (vnp = awp->wgsaccnlist; vnp != NULL; vnp = vnp->next) {
7627     wap = (WgsAccnPtr) vnp->data.ptrvalue;
7628     if (wap == NULL) continue;
7629     if (StringCmp (accn, wap->accn) == 0) return wap;
7630   }
7631   wap = (WgsAccnPtr) MemNew (sizeof (WgsAccn));
7632   if (wap == NULL) return NULL;
7633   StringCpy (wap->accn, accn);
7634   ValNodeAddPointer (&(awp->wgsaccnlist), 0, (Pointer) wap);
7635   return wap;
7636 }
7637 
GetFeatsOnSeg(SeqLocPtr slp,SeqMgrSegmentContextPtr context)7638 static Boolean LIBCALLBACK GetFeatsOnSeg (
7639   SeqLocPtr slp,
7640   SeqMgrSegmentContextPtr context
7641 )
7642 
7643 {
7644   Char             accn [41];
7645   Uint4            accntype;
7646   IntAsn2gbJobPtr  ajp;
7647   Asn2gbWorkPtr    awp;
7648   BioseqPtr        bsp;
7649   time_t           currTime;
7650   Uint2            entityID;
7651   Int4             from;
7652   BIG_ID           gi;
7653   Int4             left;
7654   SeqLocPtr        loc;
7655   CharPtr          ptr;
7656   Int4             right;
7657   SeqIdPtr         sip;
7658   Int4             to;
7659   WgsAccnPtr       wap = NULL;
7660 
7661   if (slp == NULL || context == NULL) return FALSE;
7662   awp = (Asn2gbWorkPtr) context->userdata;
7663   if (awp == NULL) return FALSE;
7664   ajp = awp->ajp;
7665   if (ajp == NULL) return FALSE;
7666 
7667   /* do not fetch outside of desired component */
7668 
7669   if (ajp->ajp.slp != NULL) {
7670     left = GetOffsetInBioseq (ajp->ajp.slp, awp->parent, SEQLOC_LEFT_END);
7671     right = GetOffsetInBioseq (ajp->ajp.slp, awp->parent, SEQLOC_RIGHT_END);
7672 
7673     from = context->cumOffset;
7674     to = from + context->to - context->from;
7675 
7676     if (left > to) return TRUE;
7677     if (right < from) return TRUE;
7678   }
7679 
7680   from = awp->from;
7681   to = awp->to;
7682 
7683   sip = SeqLocId (slp);
7684   if (sip == NULL) {
7685     loc = SeqLocFindNext (slp, NULL);
7686     if (loc != NULL) {
7687       sip = SeqLocId (loc);
7688     }
7689   }
7690   if (sip == NULL) return TRUE;
7691 
7692   /* if Web Entrez WGS */
7693 
7694   if (awp->farFeatTimeLimit) {
7695     if (sip->choice == SEQID_GI) {
7696       gi = (BIG_ID) sip->data.intvalue;
7697       if (GetAccnVerFromServer (gi, accn)) {
7698         ptr = StringChr (accn, '.');
7699         if (ptr != NULL) {
7700           *ptr = '\0';
7701         }
7702         accntype = WHICH_db_accession (accn);
7703         if (ACCN_IS_WGS (accntype)) {
7704           accn [4] = '\0';
7705           wap = GetWgsNode (awp, accn);
7706           if (wap != NULL) {
7707             (wap->count)++;
7708             if (wap->count > 50) {
7709               if (! wap->hasfeats) return TRUE;
7710             }
7711           }
7712         }
7713       }
7714     }
7715     if (! awp->featseen) {
7716       currTime = GetSecs ();
7717       if (currTime - awp->farFeatStartTime > 25) return FALSE;
7718     }
7719   }
7720 
7721   /* may want to remote fetch genome component if not already in memory */
7722 
7723   bsp = BioseqLockById (sip);
7724 
7725   if (bsp == NULL) return TRUE;
7726 
7727   entityID = ObjMgrGetEntityIDForPointer (bsp);
7728 
7729   if (entityID != awp->entityID) {
7730 
7731     /* if segment not packaged in record, may need to feature index it */
7732 
7733     if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
7734       SeqMgrIndexFeatures (entityID, NULL);
7735     }
7736 
7737     /* collect features indexed on the remote bioseq */
7738 
7739     awp->from = 0;
7740     awp->to = bsp->length - 1;
7741   }
7742 
7743   awp->lastsfp = NULL;
7744   awp->lastsap = NULL;
7745   awp->lastleft = 0;
7746   awp->lastright = 0;
7747 
7748   awp->featjustseen = FALSE;
7749 
7750   if (context->strand == Seq_strand_minus) {
7751     SeqMgrExploreFeaturesRev (bsp, (Pointer) awp, GetFeatsOnBioseq, /* awp->slp */ slp, NULL, NULL);
7752   } else {
7753     SeqMgrExploreFeatures (bsp, (Pointer) awp, GetFeatsOnBioseq, /* awp->slp */ slp, NULL, NULL);
7754   }
7755 
7756   if (awp->featjustseen && wap != NULL) {
7757     wap->hasfeats = TRUE;
7758   }
7759 
7760   /* restore original from and to */
7761 
7762   awp->from = from;
7763   awp->to = to;
7764 
7765   BioseqUnlock (bsp);
7766 
7767   return TRUE;
7768 }
7769 
AddFeatureBlock(Asn2gbWorkPtr awp)7770 NLM_EXTERN void AddFeatureBlock (
7771   Asn2gbWorkPtr awp
7772 )
7773 
7774 {
7775   IntAsn2gbJobPtr    ajp;
7776   BioseqPtr          bsp;
7777   SeqFeatPtr         cds;
7778   SeqMgrDescContext  dcontext;
7779   SeqMgrFeatContext  fcontext;
7780   FeatBlockPtr       fbp;
7781   SeqFeatPtr         gene;
7782   IntFeatBlockPtr    ifp;
7783   Boolean            is_other;
7784   MolInfoPtr         mip;
7785   SeqFeatPtr         mrna;
7786   SeqMgrFeatContext  pcontext;
7787   SeqFeatPtr         prot;
7788   SeqDescrPtr        sdp;
7789   SeqIdPtr           sip;
7790   SeqLocPtr          slp;
7791 
7792   if (awp == NULL) return;
7793   ajp = awp->ajp;
7794   if (ajp == NULL) return;
7795   bsp = awp->parent;
7796   if (bsp == NULL) return;
7797 
7798   awp->lastsfp = NULL;
7799   awp->lastsap = NULL;
7800   awp->lastleft = 0;
7801   awp->lastright = 0;
7802 
7803   /* for protein molecular weight calculation, need sig_peptide, etc. */
7804 
7805   awp->has_mat_peptide = FALSE;
7806   awp->has_sig_peptide = FALSE;
7807   awp->sig_pept_trim_len = 0;
7808 
7809   if (awp->format == GENPEPT_FMT && ISA_aa (bsp->mol)) {
7810     awp->bestprot = SeqMgrGetBestProteinFeature (bsp, &pcontext);
7811 
7812     prot = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext);
7813     while (prot != NULL) {
7814       if (fcontext.featdeftype == FEATDEF_sig_peptide_aa ||
7815           fcontext.featdeftype == FEATDEF_transit_peptide_aa) {
7816         awp->has_sig_peptide = TRUE;
7817         if (fcontext.left == 0 && fcontext.right < bsp->length - 1) {
7818           awp->sig_pept_trim_len = fcontext.right + 1;
7819         }
7820       } else if (fcontext.featdeftype == FEATDEF_mat_peptide_aa) {
7821         awp->has_mat_peptide = TRUE;
7822       }
7823 
7824       prot = SeqMgrGetNextFeature (bsp, prot, 0, 0, &fcontext);
7825     }
7826   }
7827 
7828   /* optionally map gene from genomic onto cDNA */
7829 
7830   if (awp->isGPS && ISA_na (bsp->mol) && awp->copyGpsGeneDown) {
7831     sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
7832     if (sdp != NULL && sdp->choice == Seq_descr_molinfo) {
7833       mip = (MolInfoPtr) sdp->data.ptrvalue;
7834       if (mip != NULL) {
7835         if (mip->biomol == MOLECULE_TYPE_MRNA) {
7836           mrna = SeqMgrGetRNAgivenProduct (bsp, NULL);
7837           if (mrna != NULL) {
7838             gene = SeqMgrGetOverlappingGene (mrna->location, &fcontext);
7839             if (gene != NULL && gene->data.choice == SEQFEAT_GENE) {
7840 
7841               fbp = (FeatBlockPtr) Asn2gbAddBlock (awp, FEATURE_BLOCK, sizeof (IntCdsBlock));
7842               if (fbp != NULL) {
7843 
7844                 fbp->entityID = fcontext.entityID;
7845                 fbp->itemID = fcontext.itemID;
7846                 fbp->itemtype = OBJ_SEQFEAT;
7847                 fbp->featdeftype = fcontext.featdeftype;
7848                 ifp = (IntFeatBlockPtr) fbp;
7849                 ifp->mapToNuc = FALSE;
7850                 ifp->mapToProt = FALSE;
7851                 ifp->mapToGen = FALSE;
7852                 ifp->mapToMrna = TRUE;
7853                 ifp->mapToPep = FALSE;
7854                 ifp->isCDS = TRUE;
7855                 ifp->left = 0;
7856                 ifp->right = 0;
7857                 SetIfpFeatCount (ifp, ajp, awp, FALSE);
7858                 ifp->firstfeat = awp->firstfeat;
7859                 awp->firstfeat = FALSE;
7860 
7861                 if (awp->afp != NULL) {
7862                   DoImmediateFormat (awp->afp, (BaseBlockPtr) fbp);
7863                 }
7864               }
7865             }
7866           }
7867         }
7868       }
7869     }
7870   }
7871 
7872   awp->farFeatTimeLimit = FALSE;
7873   if (bsp->repr == Seq_repr_seg || bsp->repr == Seq_repr_delta || bsp->repr == Seq_repr_ref) {
7874     if (awp->mode == ENTREZ_MODE) {
7875       awp->farFeatTimeLimit = TRUE;
7876     }
7877     /*
7878     if (GetWWW (ajp) && awp->mode == ENTREZ_MODE) {
7879       sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
7880       if (sdp != NULL && sdp->choice == Seq_descr_molinfo && sdp->data.ptrvalue != NULL) {
7881         mip = (MolInfoPtr) sdp->data.ptrvalue;
7882         if (mip->tech == MI_TECH_wgs || mip->tech == MI_TECH_composite_wgs_htgs) {
7883           awp->farFeatTimeLimit = TRUE;
7884         }
7885       }
7886     }
7887     */
7888   }
7889 
7890   if (! awp->onlyNearFeats) {
7891     if (awp->farFeatsSuppress) {
7892 
7893       if (bsp->repr == Seq_repr_seg || bsp->repr == Seq_repr_delta || bsp->repr == Seq_repr_ref) {
7894 
7895         /* get start time for 25 second timeout in Web Entrez far WGS records */
7896 
7897         if (awp->farFeatTimeLimit) {
7898           awp->farFeatStartTime = GetSecs ();
7899         }
7900 
7901         /* if farFeatsSuppress first collect features on remote segments in MASTER_STYLE */
7902 
7903         SeqMgrExploreSegments (bsp, (Pointer) awp, GetFeatsOnSeg);
7904 
7905         awp->wgsaccnlist = ValNodeFreeData (awp->wgsaccnlist);
7906       }
7907     }
7908   }
7909 
7910   if ((! awp->farFeatsSuppress) || (! awp->featseen)) {
7911 
7912     /* reminder - features on near parts are indexed on segmented Bioseq */
7913 
7914     slp = ajp->ajp.slp;
7915     if (slp != NULL && SeqLocStrand (slp) == Seq_strand_minus) {
7916       SeqMgrExploreFeaturesRev (bsp, (Pointer) awp, GetFeatsOnBioseq, awp->slp, NULL, NULL);
7917     } else {
7918       SeqMgrExploreFeatures (bsp, (Pointer) awp, GetFeatsOnBioseq, awp->slp, NULL, NULL);
7919     }
7920   }
7921 
7922 
7923   if (awp->format == GENPEPT_FMT && ISA_aa (bsp->mol)) {
7924     cds = SeqMgrGetCDSgivenProduct (bsp, &fcontext);
7925     if (cds != NULL && cds->data.choice == SEQFEAT_CDREGION) {
7926 
7927       if (fcontext.entityID > 0 && fcontext.itemID > 0) {
7928 
7929         fbp = (FeatBlockPtr) Asn2gbAddBlock (awp, FEATURE_BLOCK, sizeof (IntCdsBlock));
7930         if (fbp != NULL) {
7931 
7932           fbp->entityID = fcontext.entityID;
7933           fbp->itemID = fcontext.itemID;
7934           fbp->itemtype = OBJ_SEQFEAT;
7935           fbp->featdeftype = fcontext.featdeftype;
7936           ifp = (IntFeatBlockPtr) fbp;
7937           ifp->mapToNuc = FALSE;
7938           ifp->mapToProt = TRUE;
7939           ifp->mapToGen = FALSE;
7940           ifp->mapToMrna = FALSE;
7941           ifp->mapToPep = FALSE;
7942           ifp->isCDS = TRUE;
7943           ifp->left = 0;
7944           ifp->right = 0;
7945           SetIfpFeatCount (ifp, ajp, awp, TRUE);
7946           ifp->firstfeat = awp->firstfeat;
7947           awp->firstfeat = FALSE;
7948 
7949           if (awp->afp != NULL) {
7950             DoImmediateFormat (awp->afp, (BaseBlockPtr) fbp);
7951           }
7952         }
7953       } else if (cds->idx.entityID > 0 && cds->idx.itemID > 0) {
7954 
7955         /* if protein bioseq and cds feature but no nucleotide, handle as special case */
7956 
7957         fbp = (FeatBlockPtr) Asn2gbAddBlock (awp, FEATURE_BLOCK, sizeof (IntCdsBlock));
7958         if (fbp != NULL) {
7959 
7960           fbp->entityID = cds->idx.entityID;
7961           fbp->itemID = cds->idx.itemID;
7962           fbp->itemtype = OBJ_SEQFEAT;
7963           fbp->featdeftype = FEATDEF_CDS;
7964           ifp = (IntFeatBlockPtr) fbp;
7965           ifp->mapToNuc = FALSE;
7966           ifp->mapToProt = TRUE;
7967           ifp->mapToGen = FALSE;
7968           ifp->mapToMrna = FALSE;
7969           ifp->mapToPep = FALSE;
7970           ifp->isCDS = TRUE;
7971           ifp->left = 0;
7972           ifp->right = 0;
7973           SetIfpFeatCount (ifp, ajp, awp, TRUE);
7974           ifp->firstfeat = awp->firstfeat;
7975           awp->firstfeat = FALSE;
7976 
7977           if (awp->afp != NULL) {
7978             DoImmediateFormat (awp->afp, (BaseBlockPtr) fbp);
7979           }
7980         }
7981       }
7982     }
7983     prot = SeqMgrGetPROTgivenProduct (bsp, &fcontext);
7984     if (prot != NULL && prot->data.choice == SEQFEAT_PROT) {
7985 
7986       is_other = FALSE;
7987       for (sip = bsp->id; sip != NULL; sip = sip->next) {
7988         if (sip->choice == SEQID_OTHER) {
7989           is_other = TRUE;
7990         }
7991       }
7992 
7993       /* for RefSeq records or GenBank not release_mode */
7994       if (is_other || (! ajp->flags.forGbRelease)) {
7995 
7996         fbp = (FeatBlockPtr) Asn2gbAddBlock (awp, FEATURE_BLOCK, sizeof (IntCdsBlock));
7997         if (fbp != NULL) {
7998 
7999           fbp->entityID = fcontext.entityID;
8000           fbp->itemID = fcontext.itemID;
8001           fbp->itemtype = OBJ_SEQFEAT;
8002           fbp->featdeftype = fcontext.featdeftype;
8003           ifp = (IntFeatBlockPtr) fbp;
8004           ifp->mapToNuc = FALSE;
8005           ifp->mapToProt = FALSE;
8006           ifp->mapToGen = FALSE;
8007           ifp->mapToMrna = FALSE;
8008           ifp->mapToPep = TRUE;
8009           ifp->left = 0;
8010           ifp->right = 0;
8011           SetIfpFeatCount (ifp, ajp, awp, TRUE);
8012           ifp->firstfeat = awp->firstfeat;
8013           awp->firstfeat = FALSE;
8014 
8015           if (awp->afp != NULL) {
8016             DoImmediateFormat (awp->afp, (BaseBlockPtr) fbp);
8017           }
8018         }
8019       }
8020     }
8021   }
8022 
8023   if (awp->onlyNearFeats) return;
8024 
8025   if (awp->nearFeatsSuppress && awp->featseen) return;
8026 
8027   if (! awp->farFeatsSuppress) {
8028 
8029     if (bsp->repr == Seq_repr_seg || bsp->repr == Seq_repr_delta || bsp->repr == Seq_repr_ref) {
8030 
8031       /* get start time for 25 second timeout in Web Entrez far WGS records */
8032 
8033       if (awp->farFeatTimeLimit) {
8034         awp->farFeatStartTime = GetSecs ();
8035       }
8036 
8037       /* if not farFeatsSuppress now collect features on remote segments in MASTER_STYLE */
8038 
8039       SeqMgrExploreSegments (bsp, (Pointer) awp, GetFeatsOnSeg);
8040 
8041       awp->wgsaccnlist = ValNodeFreeData (awp->wgsaccnlist);
8042     }
8043   }
8044 }
8045 
AddFeatStatsBlock(Asn2gbWorkPtr awp)8046 NLM_EXTERN void AddFeatStatsBlock (
8047   Asn2gbWorkPtr awp
8048 )
8049 
8050 {
8051   IntAsn2gbJobPtr  ajp;
8052   BaseBlockPtr     bbp;
8053   BioseqPtr        bsp;
8054   StringItemPtr    ffstring;
8055 
8056   if (awp == NULL) return;
8057   ajp = awp->ajp;
8058   if ( ajp == NULL ) return;
8059   bsp = awp->bsp;
8060   if (bsp == NULL) return;
8061 
8062   if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) return;
8063 
8064   bbp = Asn2gbAddBlock (awp, FEAT_STATS_BLOCK, sizeof (BaseBlock));
8065   if (bbp != NULL) {
8066     ffstring = FFGetString (ajp);
8067     if (ffstring != NULL) {
8068       FFStartPrint (ffstring, awp->format, 0, 12, "FEATSTATS", 12, 0, 0, NULL, FALSE);
8069 
8070       FFAddOneString (ffstring, "placeholder", FALSE, FALSE, TILDE_TO_SPACES);
8071 
8072       bbp->string = FFEndPrint (ajp, ffstring, awp->format, 12, 12, 0, 0, NULL);
8073       FFRecycleString(ajp, ffstring);
8074     }
8075 
8076     if (awp->afp != NULL) {
8077       DoImmediateFormat (awp->afp, bbp);
8078     }
8079   }
8080 }
8081 
8082