1 /*   asn2gnb2.c
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *            National Center for Biotechnology Information (NCBI)
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government do not place any restriction on its use or reproduction.
13 *  We would, however, appreciate having the NCBI and the author cited in
14 *  any work or product based on this material
15 *
16 *  Although all reasonable efforts have been taken to ensure the accuracy
17 *  and reliability of the software and data, the NLM and the U.S.
18 *  Government do not and cannot warrant the performance or results that
19 *  may be obtained by using this software or data. The NLM and the U.S.
20 *  Government disclaim all warranties, express or implied, including
21 *  warranties of performance, merchantability or fitness for any particular
22 *  purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name:  asn2gnb2.c
27 *
28 * Author:  Karl Sirotkin, Tom Madden, Tatiana Tatusov, Jonathan Kans,
29 *          Mati Shomrat
30 *
31 * Version Creation Date:   10/21/98
32 *
33 * $Revision: 1.238 $
34 *
35 * File Description:  New GenBank flatfile generator - work in progress
36 *
37 * Modifications:
38 * --------------------------------------------------------------------------
39 * ==========================================================================
40 */
41 
42 #include <ncbi.h>
43 #include <objall.h>
44 #include <objsset.h>
45 #include <objsub.h>
46 #include <objfdef.h>
47 #include <objpubme.h>
48 #include <seqport.h>
49 #include <sequtil.h>
50 #include <sqnutils.h>
51 #include <subutil.h>
52 #include <tofasta.h>
53 #include <explore.h>
54 #include <gather.h>
55 #include <gbfeat.h>
56 #include <gbftdef.h>
57 #include <edutil.h>
58 #include <validerr.h>
59 #include <objvalid.h>
60 #include <valapi.h>
61 #include <asn2gnbi.h>
62 #include <asn2gnbi.h>
63 
64 #ifdef WIN_MAC
65 #if __profile__
66 #include <Profiler.h>
67 #endif
68 #endif
69 
70 static CharPtr link_projid = "https://www.ncbi.nlm.nih.gov/bioproject/";
71 
72 static CharPtr link_bioproj = "https://www.ncbi.nlm.nih.gov/bioproject/";
73 
74 static CharPtr link_biosamp = "https://www.ncbi.nlm.nih.gov/biosample/";
75 
76 static CharPtr link_assembl = "https://www.ncbi.nlm.nih.gov/assembly/";
77 
78 static CharPtr link_srr = "https://www.ncbi.nlm.nih.gov/sra/";
79 static CharPtr link_srz = "https://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?analysis=";
80 
81 static CharPtr link_accn = "https://www.ncbi.nlm.nih.gov/sites/entrez?";
82 
83 static CharPtr link_wgs = "https://www.ncbi.nlm.nih.gov/Traces/wgs?";
84 static CharPtr link_wgsscaf = "https://www.ncbi.nlm.nih.gov/nuccore?";
85 
86 static CharPtr link_tls = "https://www.ncbi.nlm.nih.gov/Traces/wgs?";
87 
88 static CharPtr link_tsa = "https://www.ncbi.nlm.nih.gov/Traces/wgs?";
89 
90 static CharPtr link_cage = "https://www.ncbi.nlm.nih.gov/sites/entrez?";
91 
92 static CharPtr link_sp = "http://www.uniprot.org/uniprot/";
93 
94 static CharPtr link_mmdb = "https://www.ncbi.nlm.nih.gov/Structure/mmdb/mmdbsrv.cgi?uid=";
95 
96 /*
97 static CharPtr link_featn = "https://www.ncbi.nlm.nih.gov/nuccore/";
98 static CharPtr link_featp = "https://www.ncbi.nlm.nih.gov/protein/";
99 */
100 
101 static CharPtr link_seqn = "https://www.ncbi.nlm.nih.gov/nuccore/";
102 static CharPtr link_seqp = "https://www.ncbi.nlm.nih.gov/protein/";
103 
104 /*
105 static CharPtr link_omim = "https://www.ncbi.nlm.nih.gov/omim/";
106 */
107 
108 
109 
110 /* ********************************************************************** */
111 
112 /* add functions allocate specific blocks, populate with paragraph print info */
113 
114 static CharPtr strd [4] = {
115   "   ", "ss-", "ds-", "ms-"
116 };
117 
118 static CharPtr gnbk_mol [16] = {
119   "    ", "DNA ", "RNA ", "mRNA", "rRNA", "tRNA", /* "snRNA" */ "RNA", /* "scRNA" */ "RNA",
120   " AA ", "DNA ", "DNA ", "cRNA ", /* "snoRNA" */ "RNA", "RNA ", "RNA ", "tmRNA "
121 };
122 
123 /* EMBL_FMT in RELEASE_MODE or ENTREZ_MODE, otherwise use gnbk_mol */
124 
125 static CharPtr embl_mol [16] = {
126   "xxx", "DNA", "RNA", "RNA", "RNA", "RNA", "RNA", "RNA",
127   "AA ", "DNA", "DNA", "RNA", "RNA", "RNA", "RNA", "RNA"
128 };
129 
130 static CharPtr embl_divs [18] = {
131   "FUN", "INV", "MAM", "ORG", "PHG", "PLN", "PRI", "PRO", "ROD"
132   "SYN", "UNA", "VRL", "VRT", "PAT", "EST", "STS", "HUM", "HTC"
133 };
134 
135 static Uint1 imolToMoltype [16] = {
136   0, 1, 2, 5, 4, 3, 6, 7, 9, 1, 1, 2, 8, 2, 10, 11
137 };
138 
139 static CharPtr gbseq_strd [4] = {
140   NULL, "single", "double", "mixed"
141 };
142 
143 static CharPtr gbseq_mol [16] = {
144   "?", "DNA", "RNA", "mRNA", "rRNA", "tRNA", /* "snRNA" */ "RNA", /* "scRNA" */ "RNA",
145   "AA", "DNA", "DNA", "cRNA", /* "snoRNA" */ "RNA", "RNA", /* "ncRNA" */ "RNA", "tmRNA "
146 };
147 
148 static CharPtr gbseq_top [3] = {
149   NULL, "linear", "circular"
150 };
151 
GetBestDate(DatePtr a,DatePtr b)152 static DatePtr GetBestDate (
153   DatePtr a,
154   DatePtr b
155 )
156 
157 {
158   Int2  status;
159 
160   if (a == NULL) return b;
161   if (b == NULL) return a;
162 
163   status = DateMatch (a, b, FALSE);
164   if (status == 1) return a;
165 
166   return b;
167 }
168 
169 /*--------------------------------------------------------*/
170 /*                                                        */
171 /*  s_IsSeperatorNeeded()                                 */
172 /*                                                        */
173 /*--------------------------------------------------------*/
174 
s_IsSeperatorNeeded(CharPtr baseString,Int4 baseLength,Int2 suffixLength)175 static Boolean s_IsSeperatorNeeded(CharPtr baseString, Int4 baseLength, Int2 suffixLength)
176 {
177   Char lastChar;
178   Char nextToLastChar;
179 
180   lastChar = baseString[baseLength - 1];
181   nextToLastChar = baseString[baseLength - 2];
182 
183   /* This first check put here to emulate what may be a  */
184   /* bug in the original code (in CheckLocusLength() )   */
185   /* which adds an 'S' segment seperator only if it      */
186   /* DOES make the string longer than the max.           */
187 
188   if (baseLength + suffixLength < 16)
189     return FALSE;
190 
191   /* If the last character is not a digit */
192   /* then don't use a seperator.          */
193 
194   if (!IS_DIGIT(lastChar))
195     return FALSE;
196 
197   /* If the last two characters are a non-digit   */
198   /* followed by a '0', then don't use seperator. */
199 
200   if ((lastChar == '0') && (!IS_DIGIT(nextToLastChar)))
201     return FALSE;
202 
203   /* If we made it to here, use a seperator */
204 
205   return TRUE;
206 }
207 
208 /*--------------------------------------------------------*/
209 /*                                                        */
210 /*  s_LocusAddSuffix() -                                  */
211 /*                                                        */
212 /*--------------------------------------------------------*/
213 
s_LocusAddSuffix(CharPtr locus,Asn2gbWorkPtr awp)214 static Boolean s_LocusAddSuffix (CharPtr locus, Asn2gbWorkPtr awp)
215 {
216   size_t  buflen;
217   Char    ch;
218   Char    segCountStr[6];
219   Int2    segCountStrLen;
220   Char    segSuffix[5];
221 
222   buflen = StringLen (locus);
223 
224   /* If there's one or less segments, */
225   /* no suffix is needed.             */
226 
227   if (awp->numsegs <= 1)
228     return FALSE;
229 
230   /* If the basestring has one or less */
231   /* characters, no suffix is needed.  */
232 
233   if (buflen <=1)
234     return FALSE;
235 
236   /* Add the suffix */
237 
238   ch = locus[buflen-1];
239   sprintf(segCountStr,"%d",awp->numsegs);
240   segCountStrLen = StringLen(segCountStr);
241   segSuffix[0] = '\0';
242 
243   if (s_IsSeperatorNeeded(locus,buflen,segCountStrLen) == TRUE)
244     sprintf(segSuffix,"S%0*d",segCountStrLen,awp->seg);
245   else
246     sprintf(segSuffix,"%0*d",segCountStrLen,awp->seg);
247   StringCat(locus,segSuffix);
248 
249   /* Return successfully */
250 
251   return TRUE;
252 }
253 
254 /*--------------------------------------------------------*/
255 /*                                                        */
256 /*  s_LocusAdjustLength() -                               */
257 /*                                                        */
258 /*--------------------------------------------------------*/
259 
s_LocusAdjustLength(CharPtr locus,Int2 maxLength)260 static Boolean s_LocusAdjustLength(CharPtr locus, Int2 maxLength)
261 {
262   Int2     trimCount;
263   Int2     buflen;
264   CharPtr  buftmp;
265 
266   buflen = StringLen (locus);
267   if (buflen <= maxLength) return FALSE;
268 
269   buftmp = (CharPtr) MemNew(maxLength + 1);
270 
271   /* If the sequence id is an NCBI locus of the */
272   /* form HSU00001, then make sure that if      */
273   /* there is trimming the HS gets trimmed off  */
274   /* as a unit, never just the 'H'.             */
275 
276   trimCount = buflen - maxLength;
277   if (trimCount == 1)
278     if (IS_ALPHA(locus[0]) != 0 &&
279         IS_ALPHA(locus[1]) != 0 &&
280         IS_ALPHA(locus[2]) != 0 &&
281         IS_DIGIT(locus[3]) != 0 &&
282         IS_DIGIT(locus[4]) != 0 &&
283         IS_DIGIT(locus[5]) != 0 &&
284         IS_DIGIT(locus[6]) != 0 &&
285         IS_DIGIT(locus[7]) != 0 &&
286         locus[8] == 'S' &&
287         locus[9] == '\0')
288       trimCount++;
289 
290   /* Left truncate the sequence id */
291 
292   StringCpy(buftmp, &locus[trimCount]);
293   StringCpy(locus, buftmp);
294 
295   MemFree(buftmp);
296   return TRUE;
297 }
298 
299 /*--------------------------------------------------------*/
300 /*                                                        */
301 /*  AddLocusBlock() -                                     */
302 /*                                                        */
303 /*--------------------------------------------------------*/
304 
GetBestDateForBsp(BioseqPtr bsp)305 static DatePtr GetBestDateForBsp (
306   BioseqPtr bsp
307 )
308 
309 {
310   DatePtr            best_date = NULL;
311   SeqMgrDescContext  dcontext;
312   DatePtr            dp;
313   EMBLBlockPtr       ebp;
314   GBBlockPtr         gbp;
315   PdbBlockPtr        pdp;
316   PdbRepPtr          prp;
317   SeqDescrPtr        sdp;
318   SPBlockPtr         spp;
319 
320   if (bsp == NULL) return NULL;
321 
322   dp = NULL;
323   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_update_date, &dcontext);
324   if (sdp != NULL) {
325     dp = (DatePtr) sdp->data.ptrvalue;
326     best_date = GetBestDate (dp, best_date);
327   }
328 
329   /* !!! temporarily also look at genbank block entry date !!! */
330 
331   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &dcontext);
332   if (sdp != NULL) {
333     gbp = (GBBlockPtr) sdp->data.ptrvalue;
334     if (gbp != NULL) {
335       dp = gbp->entry_date;
336       best_date = GetBestDate (dp, best_date);
337     }
338   }
339 
340   /* more complicated code for dates from various objects goes here */
341 
342   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_embl, &dcontext);
343   if (sdp != NULL) {
344     ebp = (EMBLBlockPtr) sdp->data.ptrvalue;
345     if (ebp != NULL) {
346       dp = ebp->creation_date;
347       best_date = GetBestDate (dp, best_date);
348       dp = ebp->update_date;
349       best_date = GetBestDate (dp, best_date);
350     }
351   }
352 
353   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_sp, &dcontext);
354   if (sdp != NULL) {
355     spp = (SPBlockPtr) sdp->data.ptrvalue;
356     if (spp != NULL) {
357       dp = spp->created;
358       if (dp != NULL && dp->data [0] == 1) {
359         best_date = GetBestDate (dp, best_date);
360       }
361       dp = spp->sequpd;
362       if (dp != NULL && dp->data [0] == 1) {
363         best_date = GetBestDate (dp, best_date);
364       }
365       dp = spp->annotupd;
366       if (dp != NULL && dp->data [0] == 1) {
367         best_date = GetBestDate (dp, best_date);
368       }
369     }
370   }
371 
372   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pdb, &dcontext);
373   if (sdp != NULL) {
374     pdp = (PdbBlockPtr) sdp->data.ptrvalue;
375     if (pdp != NULL) {
376       dp = pdp->deposition;
377       if (dp != NULL && dp->data [0] == 1) {
378         best_date = GetBestDate (dp, best_date);
379       }
380       prp = pdp->replace;
381       if (prp != NULL) {
382         dp = prp->date;
383         if (dp != NULL && dp->data[0] == 1) {
384           best_date = GetBestDate (dp, best_date);
385         }
386       }
387     }
388   }
389 
390   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_create_date, &dcontext);
391   if (sdp != NULL) {
392     dp = (DatePtr) sdp->data.ptrvalue;
393     if (dp != NULL) {
394       best_date = GetBestDate (dp, best_date);
395     }
396   }
397 
398   return best_date;
399 }
400 
LocusHasBadChars(CharPtr locus)401 static Boolean LocusHasBadChars (
402   CharPtr locus
403 )
404 
405 {
406   Char     ch;
407   CharPtr  ptr;
408 
409   ptr = locus;
410   ch = *ptr;
411   while (ch != '\0') {
412     if (! (IS_ALPHA(ch) || IS_DIGIT(ch) || ch == '_')) {
413       return TRUE;
414     }
415     ptr++;
416     ch = *ptr;
417   }
418   return FALSE;
419 }
420 
LookupAccnForNavLink(BIG_ID gi,CharPtr seqid,size_t len,CharPtr dfault)421 static void LookupAccnForNavLink (
422   BIG_ID gi,
423   CharPtr seqid,
424   size_t len,
425   CharPtr dfault
426 )
427 
428 {
429   SeqIdPtr  sip;
430 
431   if (seqid == NULL) return;
432   *seqid = '\0';
433   if (gi > 0) {
434     if (GetAccnVerFromServer (gi, seqid)) return;
435     sip = GetSeqIdForGI (gi);
436     if (sip != NULL) {
437       if (SeqIdWrite (sip, seqid, PRINTID_TEXTID_ACC_VER, len) != NULL) {
438         SeqIdFree (sip);
439         return;
440       }
441       SeqIdFree (sip);
442     }
443   }
444   if (dfault == NULL) return;
445   StringCpy (seqid, dfault);
446 }
447 
AddLocusBlock(Asn2gbWorkPtr awp,Boolean willshowwgs,Boolean willshowtsa,Boolean willshowtls,Boolean willshowcage,Boolean willshowgenome,Boolean willshowcontig,Boolean willshowsequence)448 NLM_EXTERN void AddLocusBlock (
449   Asn2gbWorkPtr awp,
450   Boolean willshowwgs,
451   Boolean willshowtsa,
452   Boolean willshowtls,
453   Boolean willshowcage,
454   Boolean willshowgenome,
455   Boolean willshowcontig,
456   Boolean willshowsequence
457 )
458 
459 {
460   size_t             acclen;
461   IntAsn2gbJobPtr    ajp;
462   Asn2gbSectPtr      asp;
463   BaseBlockPtr       bbp;
464   DatePtr            best_date = NULL;
465   BioSourcePtr       biop;
466   Int2               bmol = 0;
467   BioseqPtr          bsp;
468   Char               buf [1024];
469   Boolean            cagemaster = FALSE;
470   SeqFeatPtr         cds;
471   Char               ch1, ch2, ch3;
472   BIG_ID             currGi;
473   Char               dataclass [10];
474   Char               date [40];
475   SeqMgrDescContext  dcontext;
476   Char               div [10];
477   BioseqPtr          dna;
478   DatePtr            dp;
479   CharPtr            ebmol;
480   EMBLBlockPtr       ebp;
481   Char               embldiv [10];
482   SeqMgrFeatContext  fcontext;
483   StringItemPtr      ffstring;
484   GBBlockPtr         gbp;
485   Char               gene [32];
486   Boolean            genome_view;
487   GBSeqPtr           gbseq;
488   ValNodePtr         gilistpos;
489   SeqIdPtr           gpp = NULL;
490   Boolean            has_next_pref_ul = FALSE;
491   Boolean            hasComment;
492   Char               id [41];
493   Int2               imol = 0;
494   IndxPtr            index;
495   Int2               istrand;
496   Boolean            is_nm = FALSE;
497   Boolean            is_np = FALSE;
498   Boolean            is_nz = FALSE;
499   Boolean            is_env_sample = FALSE;
500   Boolean            is_transgenic = FALSE;
501   Boolean            is_tpa = FALSE;
502   Char               len [32];
503   Int4               length;
504   size_t             loclen;
505   Char               locus [41];
506   MolInfoPtr         mip;
507   Char               mol [64];
508   BIG_ID             nextGi;
509   BioseqPtr          nm = NULL;
510   BioseqPtr          nuc;
511   ObjectIdPtr        oip;
512   OrgNamePtr         onp;
513   Uint1              origin;
514   CharPtr            original_id = NULL;
515   OrgRefPtr          orp;
516   BioseqPtr          parent;
517   BIG_ID             prevGi;
518   CharPtr            ptr;
519   SeqDescrPtr        sdp;
520   Char               sect [128];
521   Char               seg [32];
522   Char               seqid [128];
523   SeqFeatPtr         sfp;
524   SeqHistPtr         hist;
525   SeqIdPtr           sip;
526   SubSourcePtr       ssp;
527   CharPtr            str;
528   CharPtr            suffix = NULL;
529   Uint1              tech;
530   Boolean            tlsmaster = FALSE;
531   Uint1              topology;
532   Boolean            tsamaster = FALSE;
533   TextSeqIdPtr       tsip;
534   UserObjectPtr      uop;
535   Char               ver [16];
536   Int2               version;
537   ValNodePtr         vnp;
538   Boolean            wgsmaster = FALSE;
539   Int2               moltype, strandedness, topol;
540   /*
541   BIG_ID             gi = 0;
542   Char               gi_buf [32];
543   Boolean            is_aa;
544   CharPtr            prefix = NULL;
545   */
546 
547   if (awp == NULL) return;
548   ajp = awp->ajp;
549   if (ajp == NULL) return;
550   bsp = awp->bsp;
551   if (bsp == NULL) return;
552   asp = awp->asp;
553   if (asp == NULL) return;
554 
555   bbp = Asn2gbAddBlock (awp, LOCUS_BLOCK, sizeof (BaseBlock));
556   if (bbp == NULL) return;
557 
558   ffstring = FFGetString(ajp);
559   if ( ffstring == NULL ) return;
560 
561   mol [0] = '\0';
562   len [0] = '\0';
563   div [0] = '\0';
564   embldiv [0] = '\0';
565   dataclass [0] = '\0';
566   date [0] = '\0';
567   gene [0] = '\0';
568   locus [0] = '\0';
569 
570   genome_view = FALSE;
571   if (bsp->repr == Seq_repr_seg && (! SegHasParts (bsp))) {
572     genome_view = TRUE;
573 
574   }
575   if (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp))) {
576     genome_view = TRUE;
577   }
578 
579   /* locus id */
580 
581   if (ShouldUseOriginalID (bsp)) {
582     original_id = FastaGetOriginalId (bsp);
583   }
584 
585   sip = NULL;
586   version = 0;
587   for (sip = bsp->id; sip != NULL; sip = sip->next) {
588     if (sip->choice == SEQID_OTHER) {
589       tsip = (TextSeqIdPtr) sip->data.ptrvalue;
590       if (tsip != NULL) {
591         version = tsip->version;
592         if (StringNCmp (tsip->accession, "NM_", 3) == 0 ||
593             StringNCmp (tsip->accession, "NR_", 3) == 0 ||
594             StringNCmp (tsip->accession, "XM_", 3) == 0 ||
595             StringNCmp (tsip->accession, "XR_", 3) == 0) {
596           is_nm = TRUE;
597           nm = bsp;
598         } else if (StringNCmp (tsip->accession, "NP_", 3) == 0  ||
599                    StringNCmp (tsip->accession, "XP_", 3) == 0) {
600           is_np = TRUE;
601         } else if (StringNCmp (tsip->accession, "NZ_", 3) == 0) {
602           is_nz = TRUE;
603         }
604       }
605       break;
606     }
607     if (sip->choice == SEQID_GENBANK ||
608         sip->choice == SEQID_EMBL ||
609         sip->choice == SEQID_DDBJ) {
610       tsip = (TextSeqIdPtr) sip->data.ptrvalue;
611       if (tsip != NULL) {
612         version = tsip->version;
613       }
614       break;
615     }
616     if (sip->choice == SEQID_TPG ||
617         sip->choice == SEQID_TPE ||
618         sip->choice == SEQID_TPD) {
619       is_tpa = TRUE;
620       tsip = (TextSeqIdPtr) sip->data.ptrvalue;
621       if (tsip != NULL) {
622         version = tsip->version;
623       }
624       break;
625     }
626     if (sip->choice == SEQID_PIR ||
627         sip->choice == SEQID_SWISSPROT ||
628         sip->choice == SEQID_PRF ||
629         sip->choice == SEQID_PDB) break;
630     if (sip->choice == SEQID_GPIPE) {
631       gpp = sip;
632     }
633   }
634   if (sip == NULL) {
635     sip = gpp;
636   }
637   if (sip == NULL) {
638     sip = SeqIdFindBest (bsp->id, SEQID_GENBANK);
639   }
640   sprintf (ver, "%d", (int) version);
641 
642   if (original_id != NULL) {
643     StringNCpy_0 (locus, original_id, sizeof (locus));
644   } else if (genome_view) {
645     SeqIdWrite (sip, locus, PRINTID_TEXTID_ACCESSION, sizeof (locus) - 1);
646   } else {
647     SeqIdWrite (sip, locus, PRINTID_TEXTID_LOCUS, sizeof (locus) - 1);
648     if (LocusHasBadChars (locus)) {
649       SeqIdWrite (sip, locus, PRINTID_TEXTID_ACCESSION, sizeof (locus) - 1);
650     }
651   }
652 
653   if (sip != NULL && sip->choice == SEQID_PDB) {
654     ptr = StringChr (locus, '_');
655     if (ptr != NULL) {
656       ch1 = ptr [1];
657       if (ch1 != '\0') {
658         ch2 = ptr [2];
659         if (ch2 != '\0') {
660           ch3 = ptr [3];
661           if (ch3 == '\0') {
662             if (ch1 == ch2) {
663               if (IS_UPPER (ch1)) {
664                 ptr [1] = TO_LOWER (ch1);
665                 ptr [2] = '\0';
666               }
667             }
668           }
669         }
670       }
671     }
672   }
673 
674   if (is_np) {
675     sfp = SeqMgrGetCDSgivenProduct (bsp, &fcontext);
676     if (sfp != NULL && fcontext.bsp != NULL) {
677       nm = fcontext.bsp;
678       for (sip = nm->id; sip != NULL; sip = sip->next) {
679         if (sip->choice == SEQID_OTHER) {
680           tsip = (TextSeqIdPtr) sip->data.ptrvalue;
681           if (tsip != NULL) {
682             if (StringNCmp (tsip->accession, "NM_", 3) == 0 ||
683                 StringNCmp (tsip->accession, "XM_", 3) == 0) {
684               is_nm = TRUE;
685             }
686           }
687         }
688       }
689       if (! is_nm) {
690         nm = NULL;
691       }
692     }
693   }
694   if (nm != NULL) {
695     /*
696     sfp = SeqMgrGetNextFeature (nm, NULL, SEQFEAT_GENE, 0, &fcontext);
697     if (sfp != NULL) {
698       StringNCpy_0 (gene, fcontext.label, sizeof (gene));
699       if (SeqMgrGetNextFeature (nm, sfp, SEQFEAT_GENE, 0, &fcontext) != NULL) {
700         gene [0] = '\0';
701       }
702       if (StringLen (gene) > 15) {
703         gene [0] = '\0';
704       }
705     }
706     */
707   }
708 
709   /* more complicated code to get parent locus, if segmented, goes here */
710 
711   if (awp->slp != NULL) {
712     length = SeqLocLen (awp->slp);
713   } else {
714     length = bsp->length;
715   }
716 
717   mip = NULL;
718   tech = MI_TECH_standard;
719   origin = 0;
720   bmol = bsp->mol;
721   if (bmol > Seq_mol_aa) {
722     bmol = 0;
723   }
724   istrand = bsp->strand;
725   if (istrand > 3) {
726     istrand = 0;
727   }
728 
729   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
730   if (sdp != NULL) {
731     bbp->entityID = dcontext.entityID;
732     bbp->itemID = dcontext.itemID;
733     bbp->itemtype = OBJ_SEQDESC;
734 
735     mip = (MolInfoPtr) sdp->data.ptrvalue;
736     if (mip != NULL) {
737       if (mip->biomol <= MOLECULE_TYPE_TMRNA) {
738         imol = (Int2) mip->biomol;
739       }
740       tech = mip->tech;
741 
742       if (tech == MI_TECH_wgs && bsp->repr == Seq_repr_virtual) {
743 
744         /* check for WGS master record */
745 
746         for (sip = bsp->id; sip != NULL; sip = sip->next) {
747           switch (sip->choice) {
748             case SEQID_GENBANK :
749             case SEQID_EMBL :
750             case SEQID_DDBJ :
751               tsip = (TextSeqIdPtr) sip->data.ptrvalue;
752               if (tsip != NULL && tsip->accession != NULL) {
753                 acclen = StringLen (tsip->accession);
754                 if (acclen == 12) {
755                   if (StringCmp (tsip->accession + 6, "000000") == 0) {
756                     wgsmaster = TRUE;
757                   }
758                 } else if (acclen == 13) {
759                   if (StringCmp (tsip->accession + 6, "0000000") == 0) {
760                     wgsmaster = TRUE;
761                   }
762                 } else if (acclen == 14) {
763                   if (StringCmp (tsip->accession + 6, "00000000") == 0) {
764                     wgsmaster = TRUE;
765                   }
766                 }
767               }
768               break;
769             case SEQID_OTHER :
770               tsip = (TextSeqIdPtr) sip->data.ptrvalue;
771               if (tsip != NULL && tsip->accession != NULL) {
772                 if (StringLen (tsip->accession) == 15) {
773                   if (StringCmp (tsip->accession + 9, "000000") == 0) {
774                     wgsmaster = TRUE;
775                   }
776                 }
777               }
778               break;
779             default :
780               break;
781           }
782         }
783       }
784 
785       if (tech == MI_TECH_tsa && bsp->repr == Seq_repr_virtual) {
786 
787         /* check for TSA master record */
788 
789         for (sip = bsp->id; sip != NULL; sip = sip->next) {
790           switch (sip->choice) {
791             case SEQID_GENBANK :
792             case SEQID_EMBL :
793             case SEQID_DDBJ :
794               tsip = (TextSeqIdPtr) sip->data.ptrvalue;
795               if (tsip != NULL && tsip->accession != NULL) {
796                 acclen = StringLen (tsip->accession);
797                 if (acclen == 12) {
798                   if (StringCmp (tsip->accession + 6, "000000") == 0) {
799                     tsamaster = TRUE;
800                   }
801                 } else if (acclen == 13) {
802                   if (StringCmp (tsip->accession + 6, "0000000") == 0) {
803                     tsamaster = TRUE;
804                   }
805                 } else if (acclen == 14) {
806                   if (StringCmp (tsip->accession + 6, "00000000") == 0) {
807                     tsamaster = TRUE;
808                   }
809                 }
810               }
811               break;
812             default :
813               break;
814           }
815         }
816       }
817 
818       if (tech == MI_TECH_other && willshowcage && bsp->repr == Seq_repr_virtual) {
819 
820         /* check for TAG master record */
821 
822         for (sip = bsp->id; sip != NULL; sip = sip->next) {
823           switch (sip->choice) {
824             case SEQID_GENBANK :
825             case SEQID_EMBL :
826             case SEQID_DDBJ :
827               tsip = (TextSeqIdPtr) sip->data.ptrvalue;
828               if (tsip != NULL && tsip->accession != NULL) {
829                 acclen = StringLen (tsip->accession);
830                 if (acclen == 12) {
831                   if (StringCmp (tsip->accession + 5, "0000000") == 0) {
832                     cagemaster = TRUE;
833                   }
834                 }
835               }
836               break;
837             default :
838               break;
839           }
840         }
841       }
842 
843       if (tech == MI_TECH_targeted && bsp->repr == Seq_repr_virtual) {
844         tlsmaster = TRUE;
845       }
846     }
847   }
848 
849   /* check inst.mol if mol-type is not-set or genomic */
850 
851   if (imol <= MOLECULE_TYPE_GENOMIC) {
852     if (bmol == Seq_mol_aa) {
853       imol = MOLECULE_TYPE_PEPTIDE;
854     } else if (bmol == Seq_mol_na) {
855       imol = 0;
856     } else if (bmol == Seq_mol_rna) {
857       imol = 2;
858     } else {
859       imol = 1;
860     }
861   } else if (imol == MOLECULE_TYPE_OTHER_GENETIC_MATERIAL) {
862     if (bmol == Seq_mol_aa) {
863       imol = MOLECULE_TYPE_PEPTIDE;
864     } else if (bmol == Seq_mol_rna) {
865       imol = 2;
866     }
867   }
868 
869   /* if ds-DNA don't show ds */
870 
871   if (bmol == Seq_mol_dna && istrand == 2) {
872     istrand = 0;
873   }
874 
875   /* ss=any RNA don't show ss */
876 
877   if ((bmol > Seq_mol_rna ||
878       (imol >= MOLECULE_TYPE_MRNA && imol <= MOLECULE_TYPE_PEPTIDE) ||
879       (imol >= MOLECULE_TYPE_CRNA && imol <= MOLECULE_TYPE_TMRNA)) &&
880       istrand == 1) {
881     istrand = 0;
882   }
883 
884   topology = bsp->topology;
885   if (awp->slp != NULL) {
886     topology = TOPOLOGY_LINEAR;
887   }
888 
889   /* length, topology, and molecule type */
890 
891   if (awp->format == GENBANK_FMT) {
892 
893     if (awp->newLocusLine) {
894 
895       if (wgsmaster && (! is_nz)) {
896         sprintf (len, "%ld rc", (long) length);
897       } else if (tsamaster) {
898         sprintf (len, "%ld rc", (long) length);
899       } else if (cagemaster) {
900         sprintf (len, "%ld rc", (long) length);
901       } else if (tlsmaster) {
902         sprintf (len, "%ld rc", (long) length);
903       } else {
904         sprintf (len, "%ld bp", (long) length);
905       }
906       sprintf (mol, "%s%-4s", strd [istrand], gnbk_mol [imol]);
907 
908     } else {
909 
910       if (topology == TOPOLOGY_CIRCULAR) {
911         sprintf (len, "%7ld bp", (long) length);
912         sprintf (mol, "%s%-4s  circular", strd [istrand], gnbk_mol [imol]);
913       } else {
914         sprintf (len, "%7ld bp", (long) length);
915         sprintf (mol, "%s%-4s          ", strd [istrand], gnbk_mol [imol]);
916       }
917     }
918 
919   } else if (awp->format == GENPEPT_FMT) {
920 
921     if (awp->newLocusLine) {
922       sprintf (len, "%ld aa", (long) length);
923     } else {
924       sprintf (len, "%7ld aa", (long) length);
925     }
926 
927   } else if (awp->format == EMBL_FMT) {
928 
929     if (imol < MOLECULE_TYPE_PEPTIDE) {
930       if (ajp->flags.useEmblMolType) {
931         ebmol = embl_mol [imol];
932       } else {
933         ebmol = gnbk_mol [imol];
934       }
935 
936       if (topology == TOPOLOGY_CIRCULAR) {
937         sprintf (mol, "circular %s", ebmol);
938         sprintf (len, "%ld BP.", (long) length);
939       } else {
940         sprintf (mol, "%s", ebmol);
941         sprintf (len, "%ld BP.", (long) length);
942       }
943     }
944   }
945 
946   /* division */
947 
948   biop = NULL;
949   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
950   if (sdp != NULL) {
951     biop = (BioSourcePtr) sdp->data.ptrvalue;
952   } else {
953     sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext);
954     if (sfp != NULL) {
955       biop = (BioSourcePtr) sfp->data.value.ptrvalue;
956     } else if (ISA_aa (bsp->mol)) {
957 
958       /* if protein with no sources, get sources applicable to DNA location of CDS */
959 
960       cds = SeqMgrGetCDSgivenProduct (bsp, &fcontext);
961       if (cds != NULL) {
962         sfp = SeqMgrGetOverlappingSource (cds->location, &fcontext);
963         if (sfp != NULL) {
964           biop = (BioSourcePtr) sfp->data.value.ptrvalue;
965         } else {
966           dna = BioseqFindFromSeqLoc (cds->location);
967           if (dna != NULL) {
968             sdp = SeqMgrGetNextDescriptor (dna, NULL, Seq_descr_source, &dcontext);
969             if (sdp != NULL) {
970               biop = (BioSourcePtr) sdp->data.ptrvalue;
971             }
972           }
973         }
974       }
975     }
976   }
977   if (biop != NULL) {
978     origin = biop->origin;
979     orp = biop->org;
980     if (orp != NULL) {
981       onp = orp->orgname;
982       if (onp != NULL) {
983         StringNCpy_0 (div, onp->div, sizeof (div));
984         StringNCpy_0 (embldiv, onp->div, sizeof (embldiv));
985       }
986     }
987     for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
988       if (ssp->subtype == SUBSRC_transgenic) {
989         is_transgenic = TRUE;
990       } else if (ssp->subtype == SUBSRC_environmental_sample) {
991         is_env_sample = TRUE;
992       }
993     }
994   }
995 
996   StringCpy (dataclass, "STD");
997   if (is_tpa) {
998     StringCpy (dataclass, "TPA");
999   }
1000 
1001   switch (tech) {
1002     case MI_TECH_est :
1003       StringCpy (div, "EST");
1004       StringCpy (dataclass, "EST");
1005       break;
1006     case MI_TECH_sts :
1007       StringCpy (div, "STS");
1008       StringCpy (dataclass, "STS");
1009       break;
1010     case MI_TECH_survey :
1011       StringCpy (div, "GSS");
1012       StringCpy (dataclass, "GSS");
1013       break;
1014     case MI_TECH_htgs_0 :
1015     case MI_TECH_htgs_1 :
1016     case MI_TECH_htgs_2 :
1017       StringCpy (div, "HTG");
1018       StringCpy (dataclass, "HTG");
1019       break;
1020     case MI_TECH_htc :
1021       StringCpy (div, "HTC");
1022       StringCpy (dataclass, "HTC");
1023       break;
1024     case MI_TECH_tsa :
1025       StringCpy (div, "TSA");
1026       StringCpy (dataclass, "TSA");
1027       break;
1028     default :
1029       break;
1030   }
1031 
1032   if (origin == ORG_MUT ||
1033       origin == ORG_ARTIFICIAL ||
1034       origin == ORG_SYNTHETIC ||
1035       is_transgenic) {
1036     StringCpy (div, "SYN");
1037     StringCpy (embldiv, "SYN");
1038   } else if (is_env_sample) {
1039     if (tech == MI_TECH_unknown ||
1040         tech == MI_TECH_standard ||
1041         tech == MI_TECH_other ||
1042         tech == MI_TECH_wgs ||
1043         tech == MI_TECH_htgs_3) {
1044       StringCpy (div, "ENV");
1045       StringCpy (embldiv, "ENV");
1046     }
1047   }
1048 
1049   if (is_transgenic && tech == MI_TECH_survey) {
1050     StringCpy (div, "GSS");
1051     StringCpy (dataclass, "GSS");
1052   }
1053 
1054   sip = SeqIdFindBest (bsp->id, SEQID_PATENT);
1055   if (sip != NULL && sip->choice == SEQID_PATENT) {
1056     StringCpy (div, "PAT");
1057     StringCpy (dataclass, "PAT");
1058   }
1059 
1060   /* if protein is encoded by a patent nucleotide, use PAT division */
1061 
1062   if (ISA_aa (bsp->mol)) {
1063     cds = SeqMgrGetCDSgivenProduct (bsp, &fcontext);
1064     if (cds != NULL) {
1065       nuc = BioseqFindFromSeqLoc (cds->location);
1066       if (nuc != NULL) {
1067         for (sip = nuc->id; sip != NULL; sip = sip->next) {
1068           if (sip->choice == SEQID_PATENT) {
1069             StringCpy (div, "PAT");
1070             StringCpy (dataclass, "PAT");
1071           }
1072         }
1073       }
1074     }
1075   }
1076 
1077   /* more complicated code for division, if necessary, goes here */
1078 
1079   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &dcontext);
1080   while (sdp != NULL) {
1081     gbp = (GBBlockPtr) sdp->data.ptrvalue;
1082     if (gbp != NULL) {
1083       if (StringHasNoText (div) && gbp->div != NULL) {
1084         StringCpy (div, gbp->div);
1085         StringCpy (embldiv, gbp->div);
1086       } else if (StringCmp(gbp->div, "PAT") == 0) {
1087         StringCpy (div, gbp->div);
1088         StringCpy (dataclass, gbp->div);
1089       } else if (StringCmp(gbp->div, "SYN") == 0 ) {
1090         StringCpy (div, gbp->div);
1091         StringCpy (embldiv, gbp->div);
1092       }
1093     }
1094     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_genbank, &dcontext);
1095   }
1096 
1097   if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
1098 
1099     sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_embl, &dcontext);
1100     if (sdp != NULL) {
1101       ebp = (EMBLBlockPtr) sdp->data.ptrvalue;
1102       if (ebp != NULL) {
1103         if (ebp->div == 255) {
1104           if (mip == NULL) {
1105             StringCpy (div, "HUM");
1106             StringCpy (embldiv, "HUM");
1107           }
1108         } else if (ebp->div < 18)  {
1109           StringCpy (div, embl_divs [ebp->div]);
1110           StringCpy (embldiv, embl_divs [ebp->div]);
1111         }
1112       }
1113     }
1114 
1115     if (StringHasNoText (div)) {
1116       StringCpy (div, "UNA");
1117       StringCpy (embldiv, "UNA");
1118     }
1119   }
1120 
1121   /* empty division field if unable to find anything */
1122 
1123   if (StringHasNoText (div)) {
1124     StringCpy (div, "   ");
1125   }
1126   if (StringHasNoText (embldiv)) {
1127     StringCpy (embldiv, "   ");
1128   }
1129 
1130   /* contig style (old genome_view flag) forces CON division */
1131 
1132   if (awp->contig) {
1133     StringCpy (div, "CON");
1134     StringCpy (dataclass, "CON");
1135   }
1136 
1137   if (genome_view) {
1138     StringCpy (div, "CON");
1139     StringCpy (dataclass, "CON");
1140   }
1141 
1142   if (StringCmp (dataclass, "CON") == 0) {
1143     if (DeltaLitOnly (bsp)) {
1144       if (SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext) != NULL) {
1145         StringCpy (dataclass, "ANN");
1146       }
1147     }
1148   }
1149 
1150   /* date */
1151 
1152   best_date = GetBestDateForBsp (bsp);
1153 
1154   if (best_date == NULL) {
1155 
1156     /* if bsp is product of CDS or mRNA feature, get date from sfp->location bsp */
1157 
1158     sfp = NULL;
1159     if (ISA_na (bsp->mol)) {
1160       sfp = SeqMgrGetRNAgivenProduct (bsp, NULL);
1161     } else if (ISA_aa (bsp->mol)) {
1162       sfp = SeqMgrGetCDSgivenProduct (bsp, NULL);
1163     }
1164     if (sfp != NULL) {
1165       parent = BioseqFindFromSeqLoc (sfp->location);
1166       if (parent != NULL) {
1167         best_date = GetBestDateForBsp (parent);
1168       }
1169     }
1170   }
1171 
1172   /* convert best date */
1173 
1174   if (best_date != NULL) {
1175     DateToFF (date, best_date, FALSE);
1176   }
1177   if (StringHasNoText (date)) {
1178     StringCpy (date, "01-JAN-1900");
1179   }
1180 
1181   if (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT) {
1182 
1183     /* Create the proper locus name */
1184 
1185     parent = awp->parent;
1186     if (parent->repr == Seq_repr_seg) {
1187 
1188       if (! StringHasNoText (awp->basename)) {
1189         StringCpy (locus, awp->basename);
1190         s_LocusAddSuffix (locus, awp);
1191       }
1192     }
1193 
1194     /* Print the "LOCUS_NEW" line, if requested */
1195 
1196     if (awp->newLocusLine) {
1197 
1198       FFStartPrint (ffstring, awp->format, 0, 0, "LOCUS", 12, 0, 0, NULL, FALSE);
1199       parent = awp->parent;
1200 
1201       if (parent->repr == Seq_repr_seg)
1202         s_LocusAdjustLength (locus,16);
1203 
1204       if (is_nm && (! StringHasNoText (gene))) {
1205         FFAddOneString (ffstring, gene, FALSE, FALSE, TILDE_IGNORE);
1206       } else {
1207         FFAddOneString (ffstring, locus, FALSE, FALSE, TILDE_IGNORE);
1208       }
1209       FFAddNChar(ffstring, ' ', 43 - StringLen(len)- ffstring->curr->pos, FALSE);
1210       FFAddOneString (ffstring, len, FALSE, FALSE, TILDE_IGNORE);
1211       FFAddNChar(ffstring, ' ', 44 - ffstring->curr->pos, FALSE);
1212       FFAddOneString (ffstring, mol, FALSE, FALSE, TILDE_IGNORE);
1213       FFAddNChar(ffstring, ' ', 55 - ffstring->curr->pos, FALSE);
1214       if (topology == TOPOLOGY_CIRCULAR) {
1215         FFAddOneString (ffstring, "circular", FALSE, FALSE, TILDE_IGNORE);
1216       } else {
1217         FFAddOneString (ffstring, "linear  ", FALSE, FALSE, TILDE_IGNORE);
1218       }
1219       FFAddNChar(ffstring, ' ', 64 - ffstring->curr->pos, FALSE);
1220       FFAddOneString (ffstring, div, FALSE, FALSE, TILDE_IGNORE);
1221       FFAddNChar(ffstring, ' ', 68 - ffstring->curr->pos, FALSE);
1222       FFAddOneString (ffstring, date, FALSE, FALSE, TILDE_IGNORE);
1223     }
1224 
1225     /* Else print the "LOCUS" line */
1226 
1227     else {
1228 
1229       FFStartPrint (ffstring, awp->format, 0, 0, "LOCUS", 12, 0, 0, NULL, FALSE);
1230 
1231       if (parent->repr == Seq_repr_seg)
1232         s_LocusAdjustLength (locus,16);
1233 
1234       FFAddOneString (ffstring, locus, FALSE, FALSE, TILDE_IGNORE);
1235       FFAddNChar(ffstring, ' ', 32 - StringLen(len) - ffstring->curr->pos, FALSE);
1236       FFAddOneString (ffstring, len, FALSE, FALSE, TILDE_IGNORE);
1237       FFAddNChar(ffstring, ' ', 33 - ffstring->curr->pos, FALSE);
1238       FFAddOneString (ffstring, mol, FALSE, FALSE, TILDE_IGNORE);
1239       FFAddNChar(ffstring, ' ', 52 - ffstring->curr->pos, FALSE);
1240       FFAddOneString (ffstring, div, FALSE, FALSE, TILDE_IGNORE);
1241       FFAddNChar(ffstring, ' ', 62 - ffstring->curr->pos, FALSE);
1242       FFAddOneString (ffstring, date, FALSE, FALSE, TILDE_IGNORE);
1243     }
1244 
1245   } else if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
1246 
1247     if (awp->newLocusLine) {
1248 
1249       str = GetMolTypeQual (bsp);
1250       if (str == NULL) {
1251         switch (bsp->mol) {
1252           case Seq_mol_dna :
1253             str = "unassigned DNA";
1254             break;
1255           case Seq_mol_rna :
1256             str = "unassigned RNA";
1257             break;
1258           case Seq_mol_aa :
1259             break;
1260           default :
1261             str = "unassigned DNA";
1262             break;
1263         }
1264       }
1265       if (StringCmp (str, "viral cRNA") == 0) {
1266         str = "other RNA";
1267       }
1268       if (StringICmp (str, "ncRNA") == 0) {
1269         str = "RNA";
1270       }
1271       StringNCpy_0 (mol, str, sizeof (mol));
1272 
1273       FFStartPrint (ffstring, awp->format, 0, 0, NULL, 0, 5, 0, "ID", FALSE);
1274 
1275       FFAddOneString (ffstring, locus, FALSE, FALSE, TILDE_IGNORE);
1276       FFAddOneString (ffstring, "; ", FALSE, FALSE, TILDE_IGNORE);
1277       FFAddOneString (ffstring, "SV ", FALSE, FALSE, TILDE_IGNORE);
1278       FFAddOneString (ffstring, ver, FALSE, FALSE, TILDE_IGNORE);
1279       FFAddOneString (ffstring, "; ", FALSE, FALSE, TILDE_IGNORE);
1280       if (topology == TOPOLOGY_CIRCULAR) {
1281         FFAddOneString (ffstring, "circular", FALSE, FALSE, TILDE_IGNORE);
1282       } else {
1283         FFAddOneString (ffstring, "linear", FALSE, FALSE, TILDE_IGNORE);
1284       }
1285       FFAddOneString (ffstring, "; ", FALSE, FALSE, TILDE_IGNORE);
1286       FFAddOneString (ffstring, mol, FALSE, FALSE, TILDE_IGNORE);
1287       FFAddOneString (ffstring, "; ", FALSE, FALSE, TILDE_IGNORE);
1288       FFAddOneString (ffstring, dataclass, FALSE, FALSE, TILDE_IGNORE);
1289       FFAddOneString (ffstring, "; ", FALSE, FALSE, TILDE_IGNORE);
1290       FFAddOneString (ffstring, embldiv, FALSE, FALSE, TILDE_IGNORE);
1291       FFAddOneString (ffstring, "; ", FALSE, FALSE, TILDE_IGNORE);
1292       FFAddOneString (ffstring, len, FALSE, FALSE, TILDE_IGNORE);
1293 
1294     } else {
1295 
1296       FFStartPrint (ffstring, awp->format, 0, 0, NULL, 0, 5, 0, "ID", FALSE);
1297 
1298       FFAddOneString (ffstring, locus, FALSE, FALSE, TILDE_IGNORE);
1299       loclen = StringLen(locus);
1300       if (14 - 5 - loclen > 0) {
1301         FFAddNChar(ffstring, ' ', 14 - 5 - loclen, FALSE);
1302       }
1303       if (awp->hup) {
1304         FFAddOneString (ffstring, " confidential; ", FALSE, FALSE, TILDE_IGNORE);
1305       } else {
1306         FFAddOneString (ffstring, " standard; ", FALSE, FALSE, TILDE_IGNORE);
1307       }
1308       FFAddOneString (ffstring, mol, FALSE, FALSE, TILDE_IGNORE);
1309       FFAddOneString (ffstring, "; ", FALSE, FALSE, TILDE_IGNORE);
1310 
1311       /* conditional code to make div "UNA" goes here */
1312 
1313       FFAddOneString (ffstring, div, FALSE, FALSE, TILDE_IGNORE);
1314       FFAddOneString (ffstring, "; ", FALSE, FALSE, TILDE_IGNORE);
1315       FFAddOneString (ffstring, len, FALSE, FALSE, TILDE_IGNORE);
1316     }
1317   }
1318 
1319   /* optionally populate indexes for NCBI internal database */
1320 
1321   if (ajp->index) {
1322     index = &asp->index;
1323   } else {
1324     index = NULL;
1325   }
1326 
1327   if (index != NULL) {
1328     Char  tmp [20];
1329     index->locus = StringSave (locus);
1330     index->div = StringSave (div);
1331     sprintf (tmp, "%ld", (long) length);
1332     index->base_cnt = StringSave (tmp);
1333   }
1334 
1335   /* optionally populate gbseq for XML-ized GenBank format */
1336 
1337   if (ajp->gbseq) {
1338     gbseq = &asp->gbseq;
1339   } else {
1340     gbseq = NULL;
1341   }
1342 
1343   if (gbseq != NULL) {
1344     gbseq->locus = StringSave (locus);
1345     gbseq->length = length;
1346     gbseq->division = StringSave (div);
1347 
1348     gbseq->moltype = StringSave (gbseq_mol [imol]);
1349 
1350     strandedness = (Int2) bsp->strand;
1351     if (strandedness < 0 || strandedness > 3) {
1352       strandedness = 0;
1353     }
1354     if (strandedness == 0) {
1355       moltype = (Int2) imolToMoltype [imol];
1356       if (moltype < 0 || moltype > 11) {
1357         moltype = 0;
1358       }
1359       if (moltype == 1) {
1360         strandedness = 2; /* default to double strand for DNA */
1361       } else if ((moltype >= 2 && moltype <= 8) || moltype >= 10 && moltype <= 11) {
1362         strandedness = 1; /* default to single strand for RNA */
1363       }
1364     }
1365     gbseq->strandedness = StringSaveNoNull (gbseq_strd [strandedness]);
1366 
1367     topol = (Int2) bsp->topology;
1368     if (topol < 0 || topol > 2) {
1369       topol = 0;
1370     }
1371     if (topol == 0) {
1372       topol = 1; /* default to linear if not set */
1373     }
1374     gbseq->topology = StringSaveNoNull (gbseq_top [topol]);
1375 
1376     for (sip = bsp->id; sip != NULL; sip = sip->next) {
1377       SeqIdWrite (sip, id, PRINTID_FASTA_SHORT, sizeof (id));
1378       ValNodeCopyStr (&gbseq->other_seqids, 0, id);
1379     }
1380 
1381     date [0] = '\0';
1382     dp = NULL;
1383     sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_create_date, &dcontext);
1384     if (sdp != NULL) {
1385       dp = (DatePtr) sdp->data.ptrvalue;
1386     }
1387     if (dp != NULL) {
1388       DateToFF (date, dp, FALSE);
1389       if (StringDoesHaveText (date)) {
1390         gbseq->create_date = StringSave (date);
1391       }
1392     }
1393     /*
1394     if (StringHasNoText (date)) {
1395       StringCpy (date, "01-JAN-1900");
1396     }
1397     gbseq->create_date = StringSave (date);
1398     */
1399 
1400     date [0] = '\0';
1401     dp = NULL;
1402     sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_update_date, &dcontext);
1403     if (sdp != NULL) {
1404       dp = (DatePtr) sdp->data.ptrvalue;
1405     }
1406     if (dp != NULL) {
1407       DateToFF (date, dp, FALSE);
1408     }
1409     if (StringHasNoText (date)) {
1410       StringCpy (date, "01-JAN-1900");
1411     }
1412     gbseq->update_date = StringSave (date);
1413   }
1414 
1415   suffix = FFEndPrint(ajp, ffstring, awp->format, 12, 0, 5, 0, "ID");
1416   FFRecycleString(ajp, ffstring);
1417 
1418   bbp->string = suffix;
1419 
1420   /*
1421   if (awp->contig && (! awp->showconfeats) && awp->smartconfeats && GetWWW (ajp) &&
1422       (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
1423     is_aa = ISA_aa (bsp->mol);
1424     gi = 0;
1425     for (sip = bsp->id; sip != NULL; sip = sip->next) {
1426       if (sip->choice == SEQID_GI) {
1427         gi = (BIG_ID) sip->data.intvalue;
1428       }
1429     }
1430     if (gi > 0) {
1431       ffstring = FFGetString(ajp);
1432 
1433       sprintf(gi_buf, "%ld", (long) gi);
1434       FFAddOneString(ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
1435       if (is_aa) {
1436         FF_Add_NCBI_Base_URL(ffstring, link_featp);
1437       } else {
1438         FF_Add_NCBI_Base_URL(ffstring, link_featn);
1439       }
1440       FFAddOneString(ffstring, gi_buf, FALSE, FALSE, TILDE_IGNORE);
1441       if ( is_aa ) {
1442         FFAddOneString(ffstring, "?report=gpwithparts", FALSE, FALSE, TILDE_IGNORE);
1443       } else {
1444         FFAddOneString(ffstring, "?report=gbwithparts", FALSE, FALSE, TILDE_IGNORE);
1445       }
1446       FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
1447       if (bsp->length > 1000000) {
1448         FFAddOneString(ffstring, "Click here to see all features and the sequence of this contig record.", FALSE, FALSE, TILDE_IGNORE);
1449       } else {
1450         FFAddOneString(ffstring, "Click here to see the sequence of this contig record.", FALSE, FALSE, TILDE_IGNORE);
1451       }
1452       FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
1453 
1454       prefix = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "??");
1455 
1456       FFRecycleString(ajp, ffstring);
1457 
1458       if (awp->afp != NULL) {
1459         DoQuickLinkFormat (awp->afp, prefix);
1460       }
1461       MemFree (prefix);
1462     }
1463   }
1464   */
1465 
1466   if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
1467       (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
1468 
1469     sprintf (buf, "<a name=\"locus_%s\"></a>", awp->currAccVerLabel);
1470     DoQuickLinkFormat (awp->afp, buf);
1471 
1472     buf [0] = '\0';
1473     hasComment = (Boolean) (SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_comment, &dcontext) != NULL);
1474     if (! hasComment) {
1475       hasComment = (Boolean) (SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_region, &dcontext) != NULL);
1476     }
1477     if (! hasComment) {
1478       hasComment = (Boolean) (SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_maploc, &dcontext) != NULL);
1479     }
1480     if (! hasComment) {
1481       sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
1482       while (sdp != NULL) {
1483         uop = (UserObjectPtr) sdp->data.ptrvalue;
1484         if (uop != NULL) {
1485           oip = uop->type;
1486           if (oip != NULL) {
1487             if (StringCmp (oip->str, "RefGeneTracking") == 0) {
1488               hasComment = TRUE;
1489             } else if (StringCmp (oip->str, "GenomeBuild") == 0) {
1490               hasComment = TRUE;
1491             } else if (StringCmp (oip->str, "ENCODE") == 0) {
1492               hasComment = TRUE;
1493             }
1494           }
1495         }
1496         sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
1497       }
1498     }
1499     if (! hasComment) {
1500       hist = bsp->hist;
1501       if (hist != NULL) {
1502         if (hist->replaced_by_ids != NULL && hist->replaced_by_date != NULL) {
1503           hasComment = TRUE;
1504         } else if (hist->replace_ids != NULL && hist->replace_date != NULL) {
1505           hasComment = TRUE;
1506         }
1507       }
1508     }
1509 
1510     buf [0] = '\0';
1511     StringCpy (buf, "<div class=\"localnav\"><ul class=\"locals\">");
1512 
1513     if (hasComment) {
1514       sprintf (sect, "<li><a href=\"#comment_%s\" title=\"Jump to the comment section of this record\">Comment</a></li>",
1515                awp->currAccVerLabel);
1516       StringCat (buf, sect);
1517     }
1518     sprintf (sect, "<li><a href=\"#feature_%s\" title=\"Jump to the feature table of this record\">Features</a></li>",
1519              awp->currAccVerLabel);
1520     StringCat (buf, sect);
1521     if (willshowwgs) {
1522       sprintf (sect, "<li><a href=\"#wgs_%s\" title=\"Jump to WGS section of this record\">WGS</a></li>",
1523                awp->currAccVerLabel);
1524       StringCat (buf, sect);
1525     }
1526     if (willshowgenome) {
1527       sprintf (sect, "<li><a href=\"#genome_%s\" title=\"Jump to the genome section of this record\">Genome</a></li>",
1528                awp->currAccVerLabel);
1529       StringCat (buf, sect);
1530     }
1531     if (willshowcontig) {
1532       sprintf (sect, "<li><a href=\"#contig_%s\" title=\"Jump to the contig section of this record\">Contig</a></li>",
1533                awp->currAccVerLabel);
1534       StringCat (buf, sect);
1535     }
1536     if (willshowsequence) {
1537       sprintf (sect, "<li><a href=\"#sequence_%s\" title=\"Jump to the sequence of this record\">Sequence</a></li>",
1538                awp->currAccVerLabel);
1539       StringCat (buf, sect);
1540     }
1541 
1542     StringCat (buf, "</ul>");
1543 
1544     prevGi = 0;
1545     currGi = 0;
1546     nextGi = 0;
1547     gilistpos = awp->gilistpos;
1548     if (gilistpos == NULL) {
1549       gilistpos = ajp->gihead;
1550     }
1551     do {
1552       vnp = gilistpos;
1553       if (vnp != NULL) {
1554         prevGi = (BIG_ID) vnp->data.intvalue;
1555         vnp = vnp->next;
1556         gilistpos = vnp;
1557         if (vnp != NULL) {
1558           currGi = (BIG_ID) vnp->data.intvalue;
1559           vnp = vnp->next;
1560           if (vnp != NULL) {
1561             nextGi = (BIG_ID) vnp->data.intvalue;
1562           }
1563         }
1564       }
1565     } while (gilistpos != NULL && currGi != awp->currGi);
1566 
1567     has_next_pref_ul = FALSE;
1568 
1569     if (currGi == awp->currGi && nextGi > 0 && awp->sectionCount < awp->sectionMax) {
1570       if (! has_next_pref_ul) {
1571         StringCat (buf, "<ul class=\"nextprevlinks\">");
1572         has_next_pref_ul = TRUE;
1573       }
1574       LookupAccnForNavLink (nextGi, seqid, sizeof (seqid), "the next record");
1575       if (awp->seg + 1 > 0 && awp->numsegs > 0 && awp->seg + 1 <= awp->numsegs) {
1576         sprintf (seg, " (segment %d of %ld)", (int) (awp->seg + 1), (long) awp->numsegs);
1577         StringCat (seqid, seg);
1578       }
1579       sprintf (sect, "<li class=\"next\"><a href=\"#locus_%ld\" title=\"Jump to %s\">Next</a></li>", (long) nextGi, seqid);
1580       StringCat (buf, sect);
1581     } else if (awp->nextGi > 0) {
1582       if (! has_next_pref_ul) {
1583         StringCat (buf, "<ul class=\"nextprevlinks\">");
1584         has_next_pref_ul = TRUE;
1585       }
1586       LookupAccnForNavLink (nextGi, seqid, sizeof (seqid), "the next record");
1587       sprintf (sect, "<li class=\"next\"><a href=\"#locus_%ld\" title=\"Jump to %s\">Next</a></li>", (long) awp->nextGi, seqid);
1588       StringCat (buf, sect);
1589     }
1590     if (currGi == awp->currGi && prevGi > 0 && awp->sectionCount > 1) {
1591       if (! has_next_pref_ul) {
1592         StringCat (buf, "<ul class=\"nextprevlinks\">");
1593         has_next_pref_ul = TRUE;
1594       }
1595       LookupAccnForNavLink (prevGi, seqid, sizeof (seqid), "the previous record");
1596       if (awp->seg - 1 > 0 && awp->numsegs > 0 && awp->seg - 1 <= awp->numsegs) {
1597         sprintf (seg, " (segment %d of %ld)", (int) (awp->seg - 1), (long) awp->numsegs);
1598         StringCat (seqid, seg);
1599       }
1600       sprintf (sect, "<li class=\"prev\"><a href=\"#locus_%ld\" title=\"Jump to %s\">Previous</a></li>", (long) prevGi, seqid);
1601       StringCat (buf, sect);
1602     } else if (awp->prevGi > 0) {
1603       if (! has_next_pref_ul) {
1604         StringCat (buf, "<ul class=\"nextprevlinks\">");
1605         has_next_pref_ul = TRUE;
1606       }
1607       LookupAccnForNavLink (prevGi, seqid, sizeof (seqid), "the previous record");
1608       sprintf (sect, "<li class=\"prev\"><a href=\"#locus_%ld\" title=\"Jump to %s\">Previous</a></li>", (long) awp->prevGi, seqid);
1609       StringCat (buf, sect);
1610     }
1611     if (has_next_pref_ul) {
1612       StringCat (buf, "</ul>");
1613     }
1614     StringCat (buf, "</div>\n");
1615     StringCat (buf, "<pre class=\"genbank\">");
1616     DoQuickLinkFormat (awp->afp, buf);
1617   } else if (GetWWW (ajp)) {
1618     buf [0] = '\0';
1619     StringCat (buf, "<pre>");
1620     DoQuickLinkFormat (awp->afp, buf);
1621   }
1622 
1623   if (awp->afp != NULL) {
1624     DoImmediateFormat (awp->afp, bbp);
1625   }
1626 }
1627 
AddDeflineBlock(Asn2gbWorkPtr awp)1628 NLM_EXTERN void AddDeflineBlock (
1629   Asn2gbWorkPtr awp
1630 )
1631 
1632 {
1633   IntAsn2gbJobPtr  ajp;
1634   Asn2gbSectPtr    asp;
1635   BaseBlockPtr     bbp;
1636   BioseqPtr        bsp;
1637   GBSeqPtr         gbseq;
1638   ItemInfo         ii;
1639   StringItemPtr    ffstring;
1640   CharPtr          title;
1641 
1642   if (awp == NULL) return;
1643   ajp = awp->ajp;
1644   if (ajp == NULL) return;
1645   bsp = awp->bsp;
1646   if (bsp == NULL) return;
1647   asp = awp->asp;
1648   if (asp == NULL) return;
1649 
1650   bbp = Asn2gbAddBlock (awp, DEFLINE_BLOCK, sizeof (BaseBlock));
1651   if (bbp == NULL) return;
1652 
1653   ffstring = FFGetString(ajp);
1654   if ( ffstring == NULL ) return;
1655 
1656   MemSet ((Pointer) (&ii), 0, sizeof (ItemInfo));
1657 
1658   /* create default defline */
1659 
1660   title = NewCreateDefLineEx (&ii, bsp, ajp->gpipdDeflines, FALSE, ajp->gpipdDeflines);
1661 
1662   FFStartPrint (ffstring, awp->format, 0, 12, "DEFINITION", 12, 5, 5, "DE", TRUE);
1663 
1664   if (StringDoesHaveText (title)) {
1665     bbp->entityID = ii.entityID;
1666     bbp->itemID = ii.itemID;
1667     bbp->itemtype = ii.itemtype;
1668 
1669     FFAddOneString (ffstring, title, TRUE, TRUE, TILDE_IGNORE);
1670   } else {
1671     FFAddOneChar (ffstring, '.', FALSE);
1672   }
1673 
1674   bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "DE");
1675 
1676   /* optionally populate gbseq for XML-ized GenBank format */
1677 
1678   if (ajp->gbseq) {
1679     gbseq = &asp->gbseq;
1680   } else {
1681     gbseq = NULL;
1682   }
1683 
1684   if (gbseq != NULL) {
1685     gbseq->definition = StringSave (title);
1686   }
1687 
1688   MemFree (title);
1689 
1690   FFRecycleString(ajp, ffstring);
1691 
1692   /*
1693   if (bbp->itemtype == 0) {
1694     bbp->entityID = bsp->idx.entityID;
1695     bbp->itemtype = bsp->idx.itemtype;
1696     bbp->itemID = bsp->idx.itemID;
1697   }
1698   */
1699 
1700   if (awp->afp != NULL) {
1701     DoImmediateFormat (awp->afp, bbp);
1702   }
1703 }
1704 
FF_www_accession(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,CharPtr cstring,Boolean is_na)1705 static void FF_www_accession (
1706   IntAsn2gbJobPtr ajp,
1707   StringItemPtr ffstring,
1708   CharPtr cstring,
1709   Boolean is_na
1710 )
1711 {
1712   if (cstring == NULL || ffstring == NULL) return;
1713 
1714   if ( GetWWW(ajp) ) {
1715     FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
1716     if (is_na) {
1717       FF_Add_NCBI_Base_URL (ffstring, link_seqn);
1718     } else {
1719       FF_Add_NCBI_Base_URL (ffstring, link_seqp);
1720     }
1721     FFAddTextToString(ffstring, /* "val=" */ NULL, cstring, "\">", FALSE, FALSE, TILDE_IGNORE);
1722     FFAddOneString(ffstring, cstring, FALSE, FALSE, TILDE_IGNORE);
1723     FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
1724   } else {
1725     FFAddOneString(ffstring, cstring, FALSE, FALSE, TILDE_IGNORE);
1726   }
1727   return;
1728 }
1729 
1730 /* Check if acc directly follows prev */
IsSuccessor(CharPtr acc,CharPtr prev)1731 static Boolean IsSuccessor(CharPtr acc, CharPtr prev)
1732 {
1733   CharPtr accp, prevp;
1734   Int4 acc_num, prev_num;
1735 
1736   if (acc == NULL  ||  prev == NULL) return FALSE;
1737 
1738   if (StringLen(acc) != StringLen(prev)) return FALSE;
1739 
1740   accp = acc;
1741   prevp = prev;
1742   while (accp != '\0'  &&  prevp != '\0') {
1743     if (*accp != *prevp) return FALSE;
1744     if (IS_DIGIT(*accp)) {
1745       acc_num = (Int4)atol(accp);
1746       prev_num = (Int4)atol(prevp);
1747       return (acc_num == prev_num + 1);
1748     }
1749     ++accp;
1750     ++prevp;
1751   }
1752   return FALSE;
1753 }
1754 
1755 
IsProjectAccn(CharPtr acc)1756 static Boolean IsProjectAccn(CharPtr acc)
1757 {
1758     Int2 letters;
1759     Char digits[3];
1760     CharPtr ptr;
1761 
1762     if (acc == NULL) {
1763         return FALSE;
1764     }
1765     digits[0] = '\0';
1766 
1767     for (ptr = acc, letters = 0; ptr != '\0'  &&  IS_ALPHA(*ptr); ++ptr, ++letters) continue;
1768     if (letters != 4  ||  StringLen(ptr) < 2) {
1769         return FALSE;
1770     }
1771     digits[0] = *ptr++;
1772     digits[1] = *ptr++;
1773     digits[2] = '\0';
1774     if (atoi(digits) < 1) {
1775         return FALSE;
1776     }
1777     while (*ptr != '\0') {
1778         if (*ptr != '0') {
1779             return FALSE;
1780         }
1781         ++ptr;
1782     }
1783     return TRUE;
1784 }
1785 
1786 
GetSecondaryAccessions(ValNodePtr extra_access)1787 static ValNodePtr GetSecondaryAccessions(ValNodePtr extra_access)
1788 {
1789 #define EXTRA_ACCESSION_CUTOFF 20
1790 #define BIN_ACCESSION_CUTOFF   5
1791 
1792   Int4 extra_acc_num = 0;
1793   ValNodePtr  bins, bin, vnp, result = NULL, temp, prj;
1794   CharPtr first, last, curr, prev = NULL;
1795   Char  range[40];
1796 
1797   extra_acc_num = ValNodeLen(extra_access);
1798   if (extra_acc_num < EXTRA_ACCESSION_CUTOFF) {
1799     for (vnp = extra_access; vnp != NULL; vnp = vnp->next) {
1800       ValNodeCopyStr(&result, 0, (CharPtr)vnp->data.ptrvalue);
1801     }
1802     return result;
1803   }
1804 
1805   /* sort the accessions into bins of successive accessions */
1806   bin = bins = NULL;
1807   for (vnp = extra_access; vnp != NULL; vnp = vnp->next) {
1808     curr = (CharPtr) vnp->data.ptrvalue;
1809     if (ValidateAccn (curr) != 0) {
1810       continue;
1811     }
1812     if (IsProjectAccn(curr)) {
1813         prj = ValNodeNew(NULL);
1814         ValNodeAddStr ((ValNodePtr PNTR) &(prj->data.ptrvalue), 0, curr);
1815         prj->next = bins;
1816         bins = prj;
1817         continue;
1818     }
1819     if (!IsSuccessor(curr, prev)) {
1820       bin = ValNodeAdd(&bins);
1821     }
1822     if (bin != NULL) {
1823       temp = (ValNodePtr)bin->data.ptrvalue;
1824       ValNodeAddStr(&temp, 0, curr);
1825       bin->data.ptrvalue = temp;
1826     }
1827 
1828     prev = curr;
1829   }
1830 
1831   for (bin = bins; bin != NULL; bin = bin->next) {
1832     vnp = (ValNodePtr)bin->data.ptrvalue;
1833     if (ValNodeLen(vnp) > BIN_ACCESSION_CUTOFF) {
1834       first = last = NULL;
1835       for ( ; vnp != NULL; vnp = vnp->next) {
1836         last = (CharPtr)vnp->data.ptrvalue;
1837         if (first == NULL) {
1838           first = last;
1839         }
1840       }
1841       range[0] = '\0';
1842       StringCat(range, first);
1843       StringCat(range, "-");
1844       StringCat(range, last);
1845       ValNodeCopyStr(&result, 0, range);
1846     } else {
1847       for ( ; vnp != NULL; vnp = vnp->next) {
1848         ValNodeCopyStr(&result, 0, (CharPtr)vnp->data.ptrvalue);
1849       }
1850     }
1851     bin->data.ptrvalue = ValNodeFree((ValNodePtr)bin->data.ptrvalue);
1852   }
1853 
1854   bins = ValNodeFreeData(bins);
1855   return result;
1856 }
1857 
1858 
1859 /* !!! this definitely needs more work to support all classes, use proper SeqId !!! */
1860 
AddAccessionBlock(Asn2gbWorkPtr awp)1861 NLM_EXTERN void AddAccessionBlock (
1862   Asn2gbWorkPtr awp
1863 )
1864 
1865 {
1866   size_t             acclen;
1867   SeqIdPtr           accn = NULL;
1868   IntAsn2gbJobPtr    ajp;
1869   Asn2gbSectPtr      asp;
1870   BaseBlockPtr       bbp;
1871   BioseqPtr          bsp;
1872   Char               buf [41];
1873   Char               ch1, ch2, ch3;
1874   SeqMgrDescContext  dcontext;
1875   EMBLBlockPtr       ebp;
1876   ValNodePtr         extra_access;
1877   CharPtr            flatloc;
1878   GBBlockPtr         gbp;
1879   SeqIdPtr           gi = NULL;
1880   GBSeqPtr           gbseq;
1881   SeqIdPtr           gnl = NULL;
1882   SeqIdPtr           gpp = NULL;
1883   IndxPtr            index;
1884   Boolean            is_na;
1885   SeqIdPtr           lcl = NULL;
1886   size_t             len = 0;
1887   MolInfoPtr         mip;
1888   CharPtr            ptr;
1889   SeqDescrPtr        sdp;
1890   ValNodePtr         secondary_acc;
1891   CharPtr            separator = " ";
1892   SeqIdPtr           sip;
1893   TextSeqIdPtr       tsip;
1894   ValNodePtr         vnp;
1895   CharPtr            wgsaccn = NULL;
1896   CharPtr            xtra;
1897   StringItemPtr      ffstring;
1898 
1899   if (awp == NULL) return;
1900   ajp = awp->ajp;
1901   if (ajp == NULL) return;
1902   bsp = awp->bsp;
1903   if (bsp == NULL) return;
1904   asp = awp->asp;
1905   if (asp == NULL) return;
1906 
1907   ffstring = FFGetString(ajp);
1908   if ( ffstring == NULL ) return;
1909 
1910   is_na = ISA_na (bsp->mol);
1911 
1912   for (sip = bsp->id; sip != NULL; sip = sip->next) {
1913     switch (sip->choice) {
1914       case SEQID_GI :
1915         gi = sip;
1916         break;
1917       case SEQID_GENBANK :
1918       case SEQID_EMBL :
1919       case SEQID_DDBJ :
1920       case SEQID_TPG :
1921       case SEQID_TPE :
1922       case SEQID_TPD :
1923         accn = sip;
1924         tsip = (TextSeqIdPtr) sip->data.ptrvalue;
1925         if (tsip != NULL) {
1926           acclen = StringLen (tsip->accession);
1927           if (acclen == 12) {
1928             wgsaccn = tsip->accession;
1929             len = 12;
1930           } else if (acclen == 13) {
1931             wgsaccn = tsip->accession;
1932             len = 13;
1933           } else if (acclen == 14) {
1934             wgsaccn = tsip->accession;
1935             len = 14;
1936           } else if (acclen == 15) {
1937             wgsaccn = tsip->accession;
1938             len = 15;
1939           }
1940         }
1941         break;
1942       case SEQID_OTHER :
1943         accn = sip;
1944         tsip = (TextSeqIdPtr) sip->data.ptrvalue;
1945         if (tsip != NULL) {
1946           if (StringLen (tsip->accession) == 15) {
1947             wgsaccn = tsip->accession;
1948             len = 15;
1949           }
1950         }
1951         break;
1952       case SEQID_PIR :
1953       case SEQID_SWISSPROT :
1954       case SEQID_PRF :
1955       case SEQID_PDB :
1956         accn = sip;
1957         break;
1958       case SEQID_GPIPE :
1959         /* should not override better accession */
1960         gpp = sip;
1961         break;
1962       case SEQID_GENERAL :
1963         /* should not override better accession */
1964         gnl = sip;
1965         break;
1966       case SEQID_LOCAL :
1967         lcl = sip;
1968         break;
1969       default :
1970         break;
1971     }
1972   }
1973 
1974   sip = NULL;
1975   if (accn == NULL) {
1976     accn = gpp;
1977     gpp = NULL;
1978   }
1979   if (accn != NULL) {
1980     sip = accn;
1981   } else if (gnl != NULL) {
1982     sip = gnl;
1983   } else if (lcl != NULL) {
1984     sip = lcl;
1985   } else if (gi != NULL) {
1986     sip = gi;
1987   }
1988 
1989   if (sip == NULL) return;
1990 
1991   SeqIdWrite (sip, buf, PRINTID_TEXTID_ACC_ONLY, sizeof (buf));
1992 
1993   if (sip->choice == SEQID_PDB) {
1994     ptr = StringChr (buf, '_');
1995     if (ptr != NULL) {
1996       ch1 = ptr [1];
1997       if (ch1 != '\0') {
1998         ch2 = ptr [2];
1999         if (ch2 != '\0') {
2000           ch3 = ptr [3];
2001           if (ch3 == '\0') {
2002             if (ch1 == ch2) {
2003               if (IS_UPPER (ch1)) {
2004                 ptr [1] = TO_LOWER (ch1);
2005                 ptr [2] = '\0';
2006               }
2007             }
2008           }
2009         }
2010       }
2011     }
2012   }
2013 
2014   bbp = Asn2gbAddBlock (awp, ACCESSION_BLOCK, sizeof (BaseBlock));
2015   if (bbp == NULL) return;
2016 
2017   bbp->entityID = awp->entityID;
2018 
2019   if (accn == NULL) {
2020 
2021     /* if no accession, do not show local or general in ACCESSION */
2022 
2023     if (ajp->mode == ENTREZ_MODE || ajp->mode == SEQUIN_MODE) {
2024       buf [0] = '\0';
2025     }
2026   }
2027 
2028   FFStartPrint (ffstring, awp->format, 0, 12, "ACCESSION", 12, 5, 5, "AC", TRUE);
2029 
2030   if (awp->hup && accn != NULL) {
2031     FFAddOneString (ffstring, ";", FALSE, FALSE, TILDE_TO_SPACES);
2032 
2033   } else if (ajp->ajp.slp != NULL) {
2034     FF_www_accession (ajp, ffstring, buf, is_na);
2035     flatloc =  FFFlatLoc (ajp, bsp, ajp->ajp.slp, ajp->masterStyle, FALSE);
2036     FFAddTextToString (ffstring, " REGION: ", flatloc, NULL, FALSE, FALSE, TILDE_TO_SPACES);
2037     MemFree (flatloc);
2038   } else {
2039     FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
2040     if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
2041         FFAddOneChar(ffstring, ';', FALSE);
2042     }
2043   }
2044 
2045   /* optionally populate indexes for NCBI internal database */
2046 
2047   if (ajp->index) {
2048     index = &asp->index;
2049   } else {
2050     index = NULL;
2051   }
2052 
2053   if (index != NULL) {
2054     index->accession = StringSave (buf);
2055   }
2056 
2057   /* optionally populate gbseq for XML-ized GenBank format */
2058 
2059   if (ajp->gbseq) {
2060     gbseq = &asp->gbseq;
2061   } else {
2062     gbseq = NULL;
2063   }
2064 
2065   if (gbseq != NULL) {
2066     gbseq->primary_accession = StringSave (buf);
2067   }
2068 
2069   if (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT) {
2070     separator = " ";
2071   } else if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
2072     separator = " ";
2073   }
2074 
2075   if (gpp != NULL) {
2076     SeqIdWrite (gpp, buf, PRINTID_TEXTID_ACC_ONLY, sizeof (buf));
2077     FFAddTextToString(ffstring, separator, buf, NULL, FALSE, FALSE, TILDE_IGNORE);
2078     if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
2079       FFAddOneChar(ffstring, ';', FALSE);
2080     }
2081   }
2082 
2083   if (ajp->ajp.slp == NULL) {
2084     sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
2085     if (sdp != NULL && wgsaccn != NULL) {
2086       mip = (MolInfoPtr) sdp->data.ptrvalue;
2087       if (mip != NULL && mip->tech == MI_TECH_wgs) {
2088         StringNCpy_0 (buf, wgsaccn, sizeof (buf));
2089         acclen = StringLen (buf);
2090         if (acclen == 12 && StringCmp (buf + len - 6, "000000") != 0) {
2091           StringCpy (buf + len - 6, "000000");
2092         } else if (acclen == 13 && StringCmp (buf + len - 7, "0000000") != 0) {
2093           StringCpy (buf + len - 7, "0000000");
2094         } else if (acclen == 14 && StringCmp (buf + len - 8, "00000000") != 0) {
2095           StringCpy (buf + len - 8, "00000000");
2096         } else if (acclen == 15 && StringCmp (buf + len - 8, "00000000") != 0) {
2097           StringCpy (buf + len - 8, "00000000");
2098         } else {
2099           buf [0] = '\0';
2100         }
2101         if (! StringHasNoText (buf)) {
2102           if ( GetWWW(ajp) ) {
2103             FFAddTextToString(ffstring, separator, "<a href=\"", NULL, FALSE, FALSE, TILDE_IGNORE);
2104             FF_Add_NCBI_Base_URL (ffstring, link_accn);
2105             FFAddTextToString(ffstring, "db=Nucleotide&cmd=Search&term=", buf, "\">", FALSE, FALSE, TILDE_IGNORE);
2106             FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
2107             FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
2108           } else {
2109             FFAddTextToString(ffstring, separator, buf, NULL, FALSE, FALSE, TILDE_TO_SPACES);
2110           }
2111           if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
2112             FFAddOneChar(ffstring, ';', FALSE);
2113           }
2114         }
2115       }
2116     }
2117 
2118     sdp = SeqMgrGetNextDescriptor (bsp, NULL, 0, &dcontext);
2119     while (sdp != NULL) {
2120 
2121       extra_access = NULL;
2122 
2123       switch (dcontext.seqdesctype) {
2124         case Seq_descr_genbank :
2125           gbp = (GBBlockPtr) sdp->data.ptrvalue;
2126           if (gbp != NULL) {
2127             extra_access = gbp->extra_accessions;
2128           }
2129           break;
2130         case Seq_descr_embl :
2131           ebp = (EMBLBlockPtr) sdp->data.ptrvalue;
2132           if (ebp != NULL) {
2133             extra_access = ebp->extra_acc;
2134           }
2135           break;
2136         default :
2137           break;
2138       }
2139 
2140       if (extra_access != NULL) {
2141         bbp->entityID = dcontext.entityID;
2142         bbp->itemID = dcontext.itemID;
2143         bbp->itemtype = OBJ_SEQDESC;
2144 
2145 
2146         secondary_acc = GetSecondaryAccessions(extra_access);
2147         for (vnp = secondary_acc; vnp != NULL; vnp = vnp->next) {
2148           xtra = (CharPtr)vnp->data.ptrvalue;
2149           FFAddTextToString(ffstring, separator, xtra, NULL, FALSE, FALSE, TILDE_IGNORE);
2150           if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
2151             FFAddOneChar(ffstring, ';', FALSE);
2152           }
2153 
2154           /* optionally populate indexes for NCBI internal database */
2155 
2156           if (index != NULL) {
2157             ValNodeCopyStrToHead (&(index->secondaries), 0, xtra);
2158           }
2159 
2160           /* optionally populate gbseq for XML-ized GenBank format */
2161 
2162           if (gbseq != NULL) {
2163               ValNodeCopyStr (&(gbseq->secondary_accessions), 0, xtra);
2164           }
2165         }
2166         ValNodeFreeData(secondary_acc);
2167       }
2168 
2169       sdp = SeqMgrGetNextDescriptor (bsp, sdp, 0, &dcontext);
2170     }
2171   }
2172 
2173   bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "AC");
2174   FFRecycleString(ajp, ffstring);
2175 
2176   if (bbp->itemtype == 0) {
2177     bbp->entityID = bsp->idx.entityID;
2178     bbp->itemtype = bsp->idx.itemtype;
2179     bbp->itemID = bsp->idx.itemID;
2180   }
2181 
2182   if (awp->afp != NULL) {
2183     DoImmediateFormat (awp->afp, bbp);
2184   }
2185 }
2186 
AddVersionBlock(Asn2gbWorkPtr awp)2187 NLM_EXTERN void AddVersionBlock (
2188   Asn2gbWorkPtr awp
2189 )
2190 
2191 {
2192   SeqIdPtr          accn = NULL;
2193   IntAsn2gbJobPtr   ajp;
2194   Asn2gbSectPtr     asp;
2195   BaseBlockPtr      bbp;
2196   BioseqPtr         bsp;
2197   Char              buf [41];
2198   Char              ch1, ch2, ch3;
2199   Uint1             format = PRINTID_TEXTID_ACC_VER;
2200   GBSeqPtr          gbseq;
2201   BIG_ID            gi = -1;
2202   SeqIdPtr          gpp = NULL;
2203   IntAsn2gbSectPtr  iasp;
2204   IndxPtr           index;
2205   CharPtr           ptr;
2206   SeqIdPtr          sip;
2207   Char              tmp [41];
2208   Char              version [64];
2209   StringItemPtr     ffstring;
2210 
2211   if (awp == NULL) return;
2212   ajp = awp->ajp;
2213   if (ajp == NULL) return;
2214   bsp = awp->bsp;
2215   if (bsp == NULL) return;
2216   asp = awp->asp;
2217   if (asp == NULL) return;
2218 
2219   ffstring = FFGetString(ajp);
2220   if ( ffstring == NULL ) return;
2221 
2222   iasp = (IntAsn2gbSectPtr) asp;
2223 
2224   for (sip = bsp->id; sip != NULL; sip = sip->next) {
2225     switch (sip->choice) {
2226       case SEQID_GI :
2227         gi = (BIG_ID) sip->data.intvalue;
2228         break;
2229       case SEQID_GENBANK :
2230       case SEQID_EMBL :
2231       case SEQID_DDBJ :
2232       case SEQID_OTHER :
2233         accn = sip;
2234         break;
2235       case SEQID_PIR :
2236       case SEQID_SWISSPROT :
2237       case SEQID_PRF :
2238       case SEQID_PDB :
2239         accn = sip;
2240         break;
2241       case SEQID_TPG :
2242       case SEQID_TPE :
2243       case SEQID_TPD :
2244         accn = sip;
2245         break;
2246       case SEQID_GPIPE :
2247         /* should not override better accession */
2248         gpp = sip;
2249         break;
2250       default :
2251         break;
2252     }
2253   }
2254 
2255   if (accn == NULL) {
2256     accn = gpp;
2257     /*
2258     format = PRINTID_TEXTID_ACC_ONLY;
2259     */
2260   }
2261 
2262   /* if (gi < 1 && accn == NULL) return; */
2263 
2264   /* display of GI in VERSION line is now under control of HIDE_GI_NUMBERS bit in flags argument */
2265 
2266   if (ajp->hideGI) {
2267     gi = 0;
2268   }
2269 
2270   if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
2271     if ( accn == NULL ) return;
2272     if (awp->newLocusLine) return;
2273   }
2274 
2275   bbp = Asn2gbAddBlock (awp, VERSION_BLOCK, sizeof (BaseBlock));
2276   if (bbp == NULL) return;
2277 
2278   bbp->entityID = awp->entityID;
2279 
2280   /* no longer displaying NID */
2281 
2282   /*
2283   if (gi > 0) {
2284     sprintf (version, "g%ld", (long) gi);
2285 
2286     gb_StartPrint (awp->format, needInitBuff, 0, 12, "NID", 13, 5, 5, "NI", TRUE);
2287     needInitBuff = FALSE;
2288 
2289     gb_AddString (NULL, version, NULL, FALSE, FALSE, TILDE_TO_SPACES);
2290 
2291     ff_EndPrint();
2292     needEndPrint = FALSE;
2293   }
2294   */
2295 
2296   version [0] = '\0';
2297 
2298   if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
2299     SeqIdWrite (accn, version, format, sizeof (version) - 1);
2300 
2301     FFStartPrint (ffstring, awp->format, 0, 12, "VERSION", 12, 5, 5, "SV", TRUE);
2302 
2303     FFAddOneString (ffstring, version, FALSE, FALSE, TILDE_TO_SPACES);
2304 
2305     FFAddOneChar(ffstring, '\n', FALSE);
2306 
2307     bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "SV");
2308     FFRecycleString(ajp, ffstring);
2309 
2310     if (awp->afp != NULL) {
2311       DoImmediateFormat (awp->afp, bbp);
2312     }
2313 
2314     return;
2315   }
2316 
2317   if (iasp != NULL && GetWWW (ajp) && ajp->mode == ENTREZ_MODE) {
2318     sprintf (version, "%ld", (long) gi);
2319     iasp->gi = StringSave (version);
2320   }
2321 
2322   if (accn != NULL) {
2323 
2324     buf [0] = '\0';
2325     SeqIdWrite (accn, buf, format, sizeof (buf) - 1);
2326 
2327     if (accn->choice == SEQID_PDB) {
2328       ptr = StringChr (buf, '_');
2329       if (ptr != NULL) {
2330         ch1 = ptr [1];
2331         if (ch1 != '\0') {
2332           ch2 = ptr [2];
2333           if (ch2 != '\0') {
2334             ch3 = ptr [3];
2335             if (ch3 == '\0') {
2336               if (ch1 == ch2) {
2337                 if (IS_UPPER (ch1)) {
2338                   ptr [1] = TO_LOWER (ch1);
2339                   ptr [2] = '\0';
2340                 }
2341               }
2342             }
2343           }
2344         }
2345       }
2346     }
2347 
2348     if (iasp != NULL && GetWWW (ajp) && ajp->mode == ENTREZ_MODE) {
2349       iasp->acc = StringSave (buf);
2350       ptr = StringChr (iasp->acc, '.');
2351       if (ptr != NULL) {
2352         *ptr = '\0';
2353       }
2354     }
2355 
2356     if (gi > 0) {
2357       sprintf (version, "%s  GI:%lld", buf, (long long) gi);
2358     } else {
2359       sprintf (version, "%s", buf);
2360     }
2361 
2362     FFStartPrint (ffstring, awp->format, 0, 12, "VERSION", 12, 5, 5, "SV", TRUE);
2363 
2364     FFAddTextToString (ffstring, NULL, version, "\n", FALSE, FALSE, TILDE_TO_SPACES);
2365     /* optionally populate indexes for NCBI internal database */
2366 
2367     if (ajp->index) {
2368       index = &asp->index;
2369     } else {
2370       index = NULL;
2371     }
2372 
2373     if (index != NULL) {
2374       ptr = StringChr (buf, '.');
2375       if (ptr != NULL) {
2376         ptr++;
2377         index->version = StringSave (ptr);
2378       }
2379       if (gi > 0) {
2380         sprintf (tmp, "%lld", (long long) gi);
2381         index->gi = StringSave (tmp);
2382       }
2383     }
2384 
2385     /* optionally populate gbseq for XML-ized GenBank format */
2386 
2387     if (ajp->gbseq) {
2388       gbseq = &asp->gbseq;
2389     } else {
2390       gbseq = NULL;
2391     }
2392 
2393     if (gbseq != NULL) {
2394       ptr = StringChr (buf, '.');
2395       if (ptr != NULL) {
2396         gbseq->accession_version = StringSave (buf);
2397       } else if (StringDoesHaveText (gbseq->primary_accession)) {
2398         gbseq->accession_version = StringSave (gbseq->primary_accession);
2399       }
2400     }
2401 
2402   } else if (gi > 0) {
2403 
2404     FFStartPrint (ffstring, awp->format, 0, 0, "VERSION", 12, 5, 5, "SV", TRUE);
2405 
2406     sprintf (version, "  GI:%lld", (long long) gi);
2407 
2408     FFAddTextToString (ffstring, NULL, version, "\n", FALSE, FALSE, TILDE_TO_SPACES);
2409 
2410   } else {
2411 
2412     FFStartPrint (ffstring, awp->format, 0, 0, "VERSION", 0, 5, 5, "SV", TRUE);
2413     FFAddOneChar(ffstring, '\n', FALSE);
2414   }
2415 
2416   bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "SV");
2417   FFRecycleString(ajp, ffstring);
2418 
2419   if (bbp->itemtype == 0) {
2420     bbp->itemtype = bsp->idx.itemtype;
2421     bbp->itemID = bsp->idx.itemID;
2422   }
2423 
2424   if (awp->afp != NULL) {
2425     DoImmediateFormat (awp->afp, bbp);
2426   }
2427 }
2428 
FF_asn2gb_www_projID(StringItemPtr ffstring,CharPtr projID)2429 static void FF_asn2gb_www_projID (
2430   StringItemPtr ffstring,
2431   CharPtr projID
2432 )
2433 
2434 {
2435   FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
2436   FF_Add_NCBI_Base_URL (ffstring, link_projid);
2437   FFAddOneString (ffstring, projID, FALSE, FALSE, TILDE_IGNORE);
2438   FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
2439   FFAddOneString (ffstring, projID, FALSE, FALSE, TILDE_IGNORE);
2440   FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
2441 }
2442 
FF_asn2gb_www_SRR(IntAsn2gbJobPtr ajp,CharPtr buf,CharPtr str)2443 static void FF_asn2gb_www_SRR (
2444   IntAsn2gbJobPtr ajp,
2445   CharPtr buf,
2446   CharPtr str
2447 )
2448 
2449 {
2450   Char           ch;
2451   StringItemPtr  ffstring;
2452   CharPtr        ptr;
2453   CharPtr        tmp;
2454 
2455   if (ajp == NULL || buf == NULL || StringHasNoText (str)) return;
2456 
2457   ffstring = FFGetString (ajp);
2458   if (ffstring == NULL) return;
2459 
2460   FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
2461   if (StringNCmp (str, "SRZ", 3) == 0 || StringNCmp (str, "DRZ", 3) == 0 || StringNCmp (str, "ERZ", 3) == 0) {
2462     FF_Add_NCBI_Base_URL (ffstring, link_srz);
2463   } else {
2464     FF_Add_NCBI_Base_URL (ffstring, link_srr);
2465   }
2466   FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
2467   FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
2468   FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
2469   FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
2470 
2471   tmp = FFEndPrint (ajp, ffstring, ajp->format, 21, 21, 21, 21, NULL);
2472   FFRecycleString (ajp, ffstring);
2473 
2474   if (tmp != NULL) {
2475     ptr = tmp;
2476     ch = *ptr;
2477     while (ch != '\0') {
2478       if (ch == '\n' || ch == '\r' || ch == '\t') {
2479         *ptr = ' ';
2480       }
2481       ptr++;
2482       ch = *ptr;
2483     }
2484     TrimSpacesAroundString (tmp);
2485     StringCat (buf, tmp);
2486     MemFree (tmp);
2487   }
2488 }
2489 
FF_asn2gb_www_BP(IntAsn2gbJobPtr ajp,CharPtr buf,CharPtr str)2490 static void FF_asn2gb_www_BP (
2491   IntAsn2gbJobPtr ajp,
2492   CharPtr buf,
2493   CharPtr str
2494 )
2495 
2496 {
2497   Char           ch;
2498   StringItemPtr  ffstring;
2499   CharPtr        ptr;
2500   CharPtr        tmp;
2501 
2502   if (ajp == NULL || buf == NULL || StringHasNoText (str)) return;
2503 
2504   ffstring = FFGetString (ajp);
2505   if (ffstring == NULL) return;
2506 
2507   FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
2508   FF_Add_NCBI_Base_URL (ffstring, link_bioproj);
2509   FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
2510   FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
2511   FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
2512   FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
2513 
2514   tmp = FFEndPrint (ajp, ffstring, ajp->format, 21, 21, 21, 21, NULL);
2515   FFRecycleString (ajp, ffstring);
2516 
2517   if (tmp != NULL) {
2518     ptr = tmp;
2519     ch = *ptr;
2520     while (ch != '\0') {
2521       if (ch == '\n' || ch == '\r' || ch == '\t') {
2522         *ptr = ' ';
2523       }
2524       ptr++;
2525       ch = *ptr;
2526     }
2527     TrimSpacesAroundString (tmp);
2528     StringCat (buf, tmp);
2529     MemFree (tmp);
2530   }
2531 }
2532 
FF_asn2gb_www_BS(IntAsn2gbJobPtr ajp,CharPtr buf,CharPtr str)2533 static void FF_asn2gb_www_BS (
2534   IntAsn2gbJobPtr ajp,
2535   CharPtr buf,
2536   CharPtr str
2537 )
2538 
2539 {
2540   Char           ch;
2541   StringItemPtr  ffstring;
2542   CharPtr        ptr;
2543   CharPtr        tmp;
2544 
2545   if (ajp == NULL || buf == NULL || StringHasNoText (str)) return;
2546 
2547   ffstring = FFGetString (ajp);
2548   if (ffstring == NULL) return;
2549 
2550   FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
2551   FF_Add_NCBI_Base_URL (ffstring, link_biosamp);
2552   FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
2553   FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
2554   FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
2555   FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
2556 
2557   tmp = FFEndPrint (ajp, ffstring, ajp->format, 21, 21, 21, 21, NULL);
2558   FFRecycleString (ajp, ffstring);
2559 
2560   if (tmp != NULL) {
2561     ptr = tmp;
2562     ch = *ptr;
2563     while (ch != '\0') {
2564       if (ch == '\n' || ch == '\r' || ch == '\t') {
2565         *ptr = ' ';
2566       }
2567       ptr++;
2568       ch = *ptr;
2569     }
2570     TrimSpacesAroundString (tmp);
2571     StringCat (buf, tmp);
2572     MemFree (tmp);
2573   }
2574 }
2575 
FF_asn2gb_www_AS(IntAsn2gbJobPtr ajp,CharPtr buf,CharPtr str)2576 static void FF_asn2gb_www_AS (
2577   IntAsn2gbJobPtr ajp,
2578   CharPtr buf,
2579   CharPtr str
2580 )
2581 
2582 {
2583   Char           ch;
2584   StringItemPtr  ffstring;
2585   CharPtr        ptr;
2586   CharPtr        tmp;
2587 
2588   if (ajp == NULL || buf == NULL || StringHasNoText (str)) return;
2589 
2590   ffstring = FFGetString (ajp);
2591   if (ffstring == NULL) return;
2592 
2593   FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
2594   FF_Add_NCBI_Base_URL (ffstring, link_assembl);
2595   FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
2596   FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
2597   FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
2598   FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
2599 
2600   tmp = FFEndPrint (ajp, ffstring, ajp->format, 21, 21, 21, 21, NULL);
2601   FFRecycleString (ajp, ffstring);
2602 
2603   if (tmp != NULL) {
2604     ptr = tmp;
2605     ch = *ptr;
2606     while (ch != '\0') {
2607       if (ch == '\n' || ch == '\r' || ch == '\t') {
2608         *ptr = ' ';
2609       }
2610       ptr++;
2611       ch = *ptr;
2612     }
2613     TrimSpacesAroundString (tmp);
2614     StringCat (buf, tmp);
2615     MemFree (tmp);
2616   }
2617 }
2618 
AddGbseqXref(GBXrefPtr PNTR headP,GBXrefPtr PNTR tailP,CharPtr db,CharPtr str,Int4 id)2619 static void AddGbseqXref (
2620   GBXrefPtr PNTR headP,
2621   GBXrefPtr PNTR tailP,
2622   CharPtr db,
2623   CharPtr str,
2624   Int4 id
2625 )
2626 
2627 {
2628   GBXrefPtr  gxp, lst;
2629   Char       tmp [32];
2630 
2631   if (headP == NULL || tailP == NULL) return;
2632   if (StringHasNoText (db)) return;
2633   if (StringHasNoText (str) && id == 0) return;
2634 
2635   gxp = GBXrefNew ();
2636   if (gxp == NULL) return;
2637 
2638   gxp->dbname = StringSave (db);
2639   if (StringDoesHaveText (str)) {
2640     gxp->id = StringSave (str);
2641   } else {
2642     sprintf (tmp, "%ld", (long) id);
2643     gxp->id =  StringSave (tmp);
2644   }
2645 
2646   if (*headP == NULL) {
2647     *headP = gxp;
2648   }
2649   if (*tailP != NULL) {
2650     lst = *tailP;
2651     while (lst->next != NULL) {
2652       lst = lst->next;
2653     }
2654     lst->next = gxp;
2655   }
2656   *tailP = gxp;
2657 }
2658 
2659 typedef enum {
2660     eDbLinkStrOutputDest_Nothing,
2661     eDbLinkStrOutputDest_bioProjectIDP
2662 } EDbLinkStrOutputDest;
2663 
2664 typedef void (*TDbLinkWWWFormatter)(IntAsn2gbJobPtr, CharPtr, CharPtr);
2665 
2666 typedef struct dblinkinfo {
2667     EDbLinkStrOutputDest output_dest;
2668     CharPtr              pchName; /* e.g. "Assembly" */
2669     TDbLinkWWWFormatter  pWWWFormatFunc; /* e.g. & FF_asn2gb_www_BP */
2670     Uint4                uBufIdx; /* index into bufs array in GetDBLinkString */
2671 } DbLinkInfoForStr;
2672 
GetDBLinkString(IntAsn2gbJobPtr ajp,UserObjectPtr uop,CharPtr PNTR bioProjectIDP,GBXrefPtr PNTR dblinkP)2673 static CharPtr GetDBLinkString (
2674   IntAsn2gbJobPtr ajp,
2675   UserObjectPtr uop,
2676   CharPtr PNTR bioProjectIDP,
2677   GBXrefPtr PNTR dblinkP
2678 )
2679 
2680 {
2681   Char          frm [256], tmp [256];
2682   CharPtr       bufs[6];
2683   CharPtr PNTR  cpp;
2684   GBXrefPtr     dbhead = NULL, dbtail = NULL;
2685   ValNodePtr    head, tail;
2686   Int4          i, j;
2687   Int4Ptr       ip;
2688   size_t        len;
2689   ObjectIdPtr   oip;
2690   CharPtr       prefix;
2691   CharPtr       str;
2692   UserFieldPtr  ufp;
2693   Int4          val;
2694 
2695   const Uint4 num_bufs = sizeof(bufs)/sizeof(bufs[0]);
2696 
2697   const static DbLinkInfoForStr str_dblink_infos[] = {
2698       /* Yes, 4 is missing for uBufIdx because that's
2699          handled by "Trace Assembly Archive" which is an
2700          int. */
2701       { eDbLinkStrOutputDest_Nothing,       "Assembly",              & FF_asn2gb_www_AS,  5 },
2702       { eDbLinkStrOutputDest_bioProjectIDP, "BioProject",            & FF_asn2gb_www_BP,  0 },
2703       { eDbLinkStrOutputDest_Nothing,       "BioSample",             & FF_asn2gb_www_BS,  1 },
2704       { eDbLinkStrOutputDest_Nothing,       "ProbeDB",               NULL,                2 },
2705       { eDbLinkStrOutputDest_Nothing,       "Sequence Read Archive", & FF_asn2gb_www_SRR, 3 }
2706   };
2707 
2708   const Uint4 num_link_infos_for_str =
2709       sizeof(str_dblink_infos) /
2710       sizeof(str_dblink_infos[0]);
2711 
2712   if (bioProjectIDP != NULL) {
2713     *bioProjectIDP = NULL;
2714   }
2715   if (dblinkP != NULL) {
2716     *dblinkP = NULL;
2717   }
2718   if (uop == NULL) return NULL;
2719 
2720   for( i = 0; i < num_bufs; ++i ) {
2721       bufs[i] = NULL;
2722   }
2723 
2724   for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
2725     oip = ufp->label;
2726     if (oip == NULL || oip->str == NULL) continue;
2727 
2728     cpp = NULL;
2729     str = NULL;
2730     head = NULL;
2731     tail = NULL;
2732     if (ufp->choice == 1) {
2733       str = (CharPtr) ufp->data.ptrvalue;
2734     } else if (ufp->choice == 7) {
2735       cpp = (CharPtr PNTR) ufp->data.ptrvalue;
2736       if (cpp != NULL && ufp->num > 0) {
2737         str = cpp [0];
2738       }
2739     }
2740 
2741     if (StringICmp (oip->str, "Trace Assembly Archive") == 0) {
2742       if (ufp->choice == 2) {
2743         val = (Int4) ufp->data.intvalue;
2744         if (val > 0) {
2745           sprintf (tmp, "Trace Assembly Archive: %ld", (long) val);
2746           bufs[4] = StringSave (tmp);
2747           AddGbseqXref (&dbhead, &dbtail, "Trace Assembly Archive", NULL, val);
2748         }
2749       } else if (ufp->choice == 8) {
2750         ip = (Int4Ptr) ufp->data.ptrvalue;
2751         if (ufp->num > 0 && ip != NULL) {
2752           val = ip [0];
2753           if (val > 0) {
2754             sprintf (tmp, "Trace Assembly Archive: %ld", (long) val);
2755             ValNodeCopyStrEx (&head, &tail, 0, tmp);
2756             AddGbseqXref (&dbhead, &dbtail, "Trace Assembly Archive", NULL, val);
2757             for (i = 1; i < ufp->num; i++) {
2758               val = ip [i];
2759               if (val > 0) {
2760                 sprintf (tmp, "%ld", (long) val);
2761                 ValNodeCopyStrEx (&head, &tail, 0, tmp);
2762                 AddGbseqXref (&dbhead, &dbtail, "Trace Assembly Archive", NULL, val);
2763               }
2764             }
2765             bufs[4] = ValNodeMergeStrsEx (head, ", ");
2766             ValNodeFreeData (head);
2767           }
2768         }
2769       }
2770     } else if ( StringDoesHaveText(str) || cpp != NULL ) {
2771 
2772       /* this handles DBLink entries where User-field.data is "str" or "strs" */
2773 
2774       /* check which DBLink this is (for ones that are strings) */
2775       for( i = 0; i < num_link_infos_for_str ; ++i ) {
2776         /* check if this is the DBLink name that matches */
2777         if( 0 != StringICmp(oip->str, str_dblink_infos[i].pchName) ) {
2778           continue;
2779         }
2780 
2781         /* str, possibly with HTML */
2782         if( StringDoesHaveText(str) ) {
2783             frm [0] = '\0';
2784             if (ajp != NULL && GetWWW (ajp) &&
2785                 str_dblink_infos[i].pWWWFormatFunc )
2786             {
2787                 (*str_dblink_infos[i].pWWWFormatFunc) (ajp, frm, str);
2788             } else {
2789                 StringCpy (frm, str);
2790             }
2791             sprintf (tmp, "%s", frm);
2792             ValNodeCopyStrEx (&head, &tail, 0, tmp);
2793             AddGbseqXref (&dbhead, &dbtail, str_dblink_infos[i].pchName, str, 0);
2794         }
2795         if (cpp != NULL && ufp->num > 1) {
2796           for (j = 1; j < ufp->num; j++) {
2797             str = cpp [j];
2798             if (StringDoesHaveText (str)) {
2799               tmp [0] = '\0';
2800               if (ajp != NULL && GetWWW (ajp) &&
2801                   str_dblink_infos[i].pWWWFormatFunc )
2802               {
2803                 (*str_dblink_infos[i].pWWWFormatFunc) (
2804                   ajp, tmp, str);
2805               } else {
2806                 StringCpy (tmp, str);
2807               }
2808               ValNodeCopyStrEx (&head, &tail, 0, tmp);
2809               AddGbseqXref (&dbhead, &dbtail, str_dblink_infos[i].pchName, str, 0);
2810             }
2811           }
2812         }
2813 
2814         /* load output variable for some types */
2815         switch( str_dblink_infos[i].output_dest ) {
2816         case eDbLinkStrOutputDest_Nothing:
2817           /* nothing to do */
2818           break;
2819         case eDbLinkStrOutputDest_bioProjectIDP:
2820           if (cpp == NULL || ufp->num == 1) {
2821             if (bioProjectIDP != NULL) {
2822               *bioProjectIDP = str;
2823             }
2824           }
2825           break;
2826         }
2827 
2828         /* write output buf */
2829         if( head != NULL ) {
2830           ASSERT( str_dblink_infos[i].uBufIdx < num_bufs );
2831           sprintf (tmp, "%s: ", str_dblink_infos[i].pchName);
2832           bufs[str_dblink_infos[i].uBufIdx] = ValNodeMergeStrsExEx (head, ", ", tmp, NULL);
2833           ValNodeFreeData (head);
2834         }
2835       }
2836     }
2837   }
2838 
2839   if (dblinkP != NULL) {
2840     *dblinkP = dbhead;
2841   }
2842 
2843   len = 0;
2844   for( i = 0; i < num_bufs ; ++i ) {
2845     len += StringLen(bufs[i]);
2846   }
2847   if( 0 == len ) {
2848     /* all bufs are empty */
2849     return NULL;
2850   }
2851 
2852   str = (CharPtr) MemNew (sizeof (Char) * (len + (num_bufs * 4))); /* not sure why exactly a "4" is used */
2853   if (str == NULL) return NULL;
2854 
2855   prefix = "";
2856 
2857   for( i = 0; i < num_bufs ; ++i ) {
2858     if( StringDoesHaveText(bufs[i]) ) {
2859       StringCat (str, prefix);
2860       StringCat (str, bufs[i]);
2861       bufs[i] = MemFree(bufs[i]);
2862       prefix = "\n";
2863     }
2864   }
2865 
2866   return str;
2867 }
2868 
AddDblinkBlock(Asn2gbWorkPtr awp)2869 NLM_EXTERN void AddDblinkBlock (
2870   Asn2gbWorkPtr awp
2871 )
2872 
2873 {
2874   IntAsn2gbJobPtr    ajp;
2875   Asn2gbSectPtr      asp;
2876   BaseBlockPtr       bbp;
2877   CharPtr            bioProjectID;
2878   BioseqPtr          bsp;
2879   Char               buf [32];
2880   UserFieldPtr       curr;
2881   Uint4              dbitemID = 0;
2882   GBXrefPtr          dblinkHead = NULL;
2883   UserObjectPtr      dbuop = NULL;
2884   SeqMgrDescContext  dcontext;
2885   Boolean            first = TRUE;
2886   StringItemPtr      ffstring;
2887   GBSeqPtr           gbseq;
2888   Uint4              gpitemID = 0;
2889   UserObjectPtr      gpuop = NULL;
2890   ValNodePtr         head = NULL;
2891   ObjectIdPtr        oip;
2892   Int4               parentID;
2893   CharPtr            prefix;
2894   Int4               projectID;
2895   SeqDescrPtr        sdp;
2896   CharPtr            str;
2897   UserObjectPtr      uop;
2898   Int4               val;
2899 
2900   if (awp == NULL) return;
2901   ajp = awp->ajp;
2902   if (ajp == NULL) return;
2903   bsp = awp->bsp;
2904   if (bsp == NULL) return;
2905   asp = awp->asp;
2906   if (asp == NULL) return;
2907 
2908   if (ISA_na (bsp->mol) && awp->format != GENBANK_FMT) return;
2909   if (ISA_aa (bsp->mol) && awp->format != GENPEPT_FMT) return;
2910 
2911   /*
2912   if (! ISA_na (bsp->mol)) return;
2913   if (awp->format != GENBANK_FMT) return;
2914   */
2915 
2916   if (ajp->gbseq) {
2917     gbseq = &asp->gbseq;
2918   } else {
2919     gbseq = NULL;
2920   }
2921 
2922   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
2923   while (sdp != NULL) {
2924     uop = (UserObjectPtr) sdp->data.ptrvalue;
2925     if (uop != NULL) {
2926       oip = uop->type;
2927       if (oip != NULL && StringICmp (oip->str, "GenomeProjectsDB") == 0) {
2928         gpuop = uop;
2929         gpitemID = dcontext.itemID;
2930       }
2931       if (oip != NULL && StringICmp (oip->str, "DBLink") == 0) {
2932         dbuop = uop;
2933         dbitemID = dcontext.itemID;
2934       }
2935     }
2936     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
2937   }
2938   if (gpuop == NULL && dbuop == NULL) return;
2939 
2940   ffstring = FFGetString (ajp);
2941   if ( ffstring == NULL ) return;
2942 
2943   if (gpuop != NULL) {
2944     bbp = Asn2gbAddBlock (awp, PROJECT_BLOCK, sizeof (BaseBlock));
2945     if (bbp == NULL) return;
2946 
2947     bbp->entityID = awp->entityID;
2948     bbp->itemID = gpitemID;
2949     bbp->itemtype = OBJ_SEQDESC;
2950 
2951     if (first) {
2952       FFStartPrint (ffstring, awp->format, 0, 12, "DBLINK", 12, 5, 5, "XX", TRUE);
2953     } else {
2954       FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "XX", TRUE);
2955     }
2956     first = FALSE;
2957 
2958     prefix = "Project: ";
2959     projectID = 0;
2960     parentID = 0;
2961     for (curr = gpuop->data; curr != NULL; curr = curr->next) {
2962       oip = curr->label;
2963       if (oip == NULL) continue;
2964       if (StringICmp (oip->str, "ProjectID") == 0) {
2965         if (curr->choice == 2) {
2966           val = (Int4) curr->data.intvalue;
2967           if (projectID > 0) {
2968             sprintf (buf, "%ld", (long) projectID);
2969             FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
2970             if (GetWWW (ajp)) {
2971               FF_asn2gb_www_projID (ffstring, buf);
2972             } else {
2973               FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
2974             }
2975             /*
2976             FFAddTextToString (ffstring, prefix, buf, NULL, FALSE, FALSE, TILDE_IGNORE);
2977             */
2978             if (gbseq != NULL) {
2979               if (head == NULL) {
2980                 sprintf (buf, "%ld", (long) projectID);
2981               } else {
2982                 sprintf (buf, ", %ld", (long) projectID);
2983               }
2984               ValNodeCopyStr (&head, 0, buf);
2985             }
2986             prefix = ", ";
2987             parentID = 0;
2988           }
2989           projectID = val;
2990         }
2991       } else if (StringICmp (oip->str, "ParentID") == 0) {
2992         if (curr->choice == 2) {
2993           val = (Int4) curr->data.intvalue;
2994           parentID = val;
2995         }
2996       }
2997     }
2998     if (projectID > 0) {
2999       sprintf (buf, "%ld", (long) projectID);
3000       FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3001       if (GetWWW (ajp)) {
3002         FF_asn2gb_www_projID (ffstring, buf);
3003       } else {
3004         FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
3005       }
3006       /*
3007       FFAddTextToString (ffstring, prefix, buf, NULL, FALSE, FALSE, TILDE_IGNORE);
3008       */
3009       if (gbseq != NULL) {
3010         if (head == NULL) {
3011           sprintf (buf, "%ld", (long) projectID);
3012         } else {
3013           sprintf (buf, ", %ld", (long) projectID);
3014         }
3015         ValNodeCopyStr (&head, 0, buf);
3016       }
3017     }
3018 
3019     bbp->string = FFEndPrint (ajp, ffstring, awp->format, 12, 12, 5, 5, "XX");
3020     FFRecycleString (ajp, ffstring);
3021     ffstring = FFGetString (ajp);
3022 
3023     if (awp->afp != NULL) {
3024       DoImmediateFormat (awp->afp, bbp);
3025     }
3026   }
3027 
3028   if (dbuop != NULL) {
3029     str = GetDBLinkString (ajp, dbuop, &bioProjectID, &dblinkHead);
3030     if (StringDoesHaveText (str)) {
3031       bbp = Asn2gbAddBlock (awp, PROJECT_BLOCK, sizeof (BaseBlock));
3032       if (bbp == NULL) return;
3033 
3034       bbp->entityID = awp->entityID;
3035       bbp->itemID = dbitemID;
3036       bbp->itemtype = OBJ_SEQDESC;
3037 
3038       if (first) {
3039         FFStartPrint (ffstring, awp->format, 0, 12, "DBLINK", 12, 5, 5, "XX", TRUE);
3040       } else {
3041         FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "XX", TRUE);
3042       }
3043       first = FALSE;
3044 
3045       FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_EXPAND);
3046 
3047       if (gbseq != NULL && StringDoesHaveText (bioProjectID)) {
3048         ValNodeCopyStr (&head, 0, bioProjectID);
3049       }
3050 
3051       bbp->string = FFEndPrint (ajp, ffstring, awp->format, 12, 12, 5, 5, "XX");
3052       MemFree (str);
3053 
3054       if (awp->afp != NULL) {
3055         DoImmediateFormat (awp->afp, bbp);
3056       }
3057     }
3058   }
3059 
3060   FFRecycleString (ajp, ffstring);
3061 
3062   if (gbseq != NULL) {
3063     if (head != NULL) {
3064       gbseq->project = MergeFFValNodeStrs (head);
3065       ValNodeFreeData (head);
3066     }
3067   }
3068 
3069   if (dblinkHead != NULL) {
3070     if (gbseq != NULL) {
3071       gbseq->xrefs = dblinkHead;
3072     } else {
3073       AsnGenericUserSeqOfFree (dblinkHead, (AsnOptFreeFunc) GBXrefFree);
3074     }
3075   }
3076 }
3077 
3078 /* only displaying PID in GenPept format */
3079 
3080 /*
3081 static void AddPidBlock (Asn2gbWorkPtr awp)
3082 
3083 {
3084   IntAsn2gbJobPtr  ajp;
3085   BaseBlockPtr  bbp;
3086   BioseqPtr     bsp;
3087   BIG_ID        gi = -1;
3088   SeqIdPtr      sip;
3089   Char          version [64];
3090   StringItemPtr ffstring;
3091 
3092   if (awp == NULL) return;
3093   ajp = awp->ajp;
3094   if (ajp == NULL) return;
3095   bsp = awp->bsp;
3096   if (bsp == NULL) return;
3097 
3098   for (sip = bsp->id; sip != NULL; sip = sip->next) {
3099     switch (sip->choice) {
3100       case SEQID_GI :
3101         gi = (BIG_ID) sip->data.intvalue;
3102         break;
3103       default :
3104         break;
3105     }
3106   }
3107 
3108   if (gi < 1) return;
3109 
3110   bbp = Asn2gbAddBlock (awp, PID_BLOCK, sizeof (BaseBlock));
3111   if (bbp == NULL) return;
3112 
3113   ffstring = FFGetString(ajp);
3114   if ( ffstring == NULL ) return;
3115 
3116   FFStartPrint (ffstring, awp->format, 0, 12, "PID", 12, 5, 5, NULL, TRUE);
3117 
3118   sprintf (version, "g%ld", (long) gi);
3119   FFAddOneString (ffstring, version, FALSE, FALSE, TILDE_TO_SPACES);
3120 
3121   bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, NULL);
3122   FFRecycleString(ajp, ffstring);
3123 }
3124 */
3125 
3126 static Uint1 dbsource_fasta_order [NUM_SEQID] = {
3127   33, /* 0 = not set */
3128   20, /* 1 = local Object-id */
3129   15, /* 2 = gibbsq */
3130   16, /* 3 = gibbmt */
3131   30, /* 4 = giim Giimport-id */
3132   10, /* 5 = genbank */
3133   10, /* 6 = embl */
3134   10, /* 7 = pir */
3135   10, /* 8 = swissprot */
3136   15, /* 9 = patent */
3137   10, /* 10 = other = refseq */
3138   20, /* 11 = general Dbtag */
3139   31, /* 12 = gi */
3140   10, /* 13 = ddbj */
3141   10, /* 14 = prf */
3142   12, /* 15 = pdb */
3143   10, /* 16 = tpg */
3144   10, /* 17 = tpe */
3145   10, /* 18 = tpd */
3146   15, /* 19 = gpp */
3147   15  /* 20 = nat */
3148 };
3149 
AddToUniqueSipList(ValNodePtr PNTR list,SeqIdPtr sip)3150 static void AddToUniqueSipList (
3151   ValNodePtr PNTR list,
3152   SeqIdPtr sip
3153 )
3154 
3155 {
3156   ValNodePtr  vnp;
3157 
3158   if (list == NULL || sip == NULL) return;
3159   for (vnp = *list; vnp != NULL; vnp = vnp->next) {
3160     if (SeqIdMatch (sip, (SeqIdPtr) vnp->data.ptrvalue)) return;
3161   }
3162   ValNodeAddPointer (list, 0, (Pointer) sip);
3163 }
3164 
WriteDbsourceID(SeqIdPtr sip,CharPtr str,BoolPtr is_na_p)3165 static Boolean WriteDbsourceID (
3166   SeqIdPtr sip,
3167   CharPtr str,
3168   BoolPtr is_na_p
3169 )
3170 
3171 {
3172   Boolean       check_na = FALSE;
3173   DbtagPtr      db;
3174   CharPtr       dt;
3175   BIG_ID        gi;
3176   ObjectIdPtr   oip;
3177   CharPtr       pfx;
3178   PDBSeqIdPtr   psip = NULL;
3179   CharPtr       prefix;
3180   Boolean       rsult = FALSE;
3181   CharPtr       sfx;
3182   CharPtr       suffix;
3183   Char          tmp [32];
3184   TextSeqIdPtr  tsip = NULL;
3185 
3186   if (is_na_p != NULL) {
3187     *is_na_p = FALSE;
3188   }
3189   if (sip == NULL || str == NULL) return FALSE;
3190   *str = '\0';
3191   switch (sip->choice) {
3192     case SEQID_LOCAL :
3193       oip = (ObjectIdPtr) sip->data.ptrvalue;
3194       if (oip == NULL) return FALSE;
3195       if (! StringHasNoText (oip->str)) {
3196         StringCat (str, oip->str);
3197         return TRUE;
3198       } else if (oip->id > 0) {
3199         sprintf (tmp, "%ld", (long) oip->id);
3200         StringCat (str, tmp);
3201         return TRUE;
3202       }
3203       return FALSE;
3204     case SEQID_GI :
3205       gi = (BIG_ID) sip->data.intvalue;
3206       if (gi == 0) return FALSE;
3207       sprintf (tmp, "gi: %lld", (long long) gi);
3208       StringCat (str, tmp);
3209       return TRUE;
3210     case SEQID_GENERAL :
3211       db = (DbtagPtr) sip->data.ptrvalue;
3212       if (db == NULL) return FALSE;
3213       /* !!! still need to implement this !!! */
3214       return FALSE;
3215     case SEQID_GENBANK :
3216     case SEQID_EMBL :
3217     case SEQID_DDBJ :
3218     case SEQID_OTHER :
3219     case SEQID_TPG :
3220     case SEQID_TPE :
3221     case SEQID_TPD :
3222     case SEQID_GPIPE :
3223       tsip = (TextSeqIdPtr) sip->data.ptrvalue;
3224       if (tsip == NULL) return FALSE;
3225       check_na = TRUE;
3226       break;
3227     case SEQID_PIR :
3228     case SEQID_SWISSPROT :
3229     case SEQID_PRF :
3230       tsip = (TextSeqIdPtr) sip->data.ptrvalue;
3231       if (tsip == NULL) return FALSE;
3232       break;
3233     case SEQID_PDB :
3234       psip = (PDBSeqIdPtr) sip->data.ptrvalue;
3235       if (psip == NULL) return FALSE;
3236       break;
3237     default :
3238       break;
3239   }
3240   prefix = " ";
3241   suffix = NULL;
3242   switch (sip->choice) {
3243     case SEQID_EMBL :
3244       StringCat (str, "embl ");
3245       suffix = ",";
3246       break;
3247     case SEQID_OTHER :
3248       StringCat (str, "REFSEQ: ");
3249       break;
3250     case SEQID_SWISSPROT :
3251       StringCat (str, "UniProtKB: ");
3252       suffix = ",";
3253       break;
3254     case SEQID_PIR :
3255       StringCat (str, "UniProtKB: ");
3256       break;
3257     case SEQID_PRF :
3258       StringCat (str, "prf: ");
3259       break;
3260     case SEQID_PDB :
3261       StringCat (str, "pdb: ");
3262       suffix = ",";
3263       break;
3264     default :
3265       break;
3266   }
3267   pfx = NULL;
3268   sfx = NULL;
3269   if (tsip != NULL) {
3270     if (! StringHasNoText (tsip->name)) {
3271       StringCat (str, sfx);
3272       StringCat (str, pfx);
3273       StringCat (str, "locus ");
3274       StringCat (str, tsip->name);
3275       sfx = suffix;
3276       pfx = prefix;
3277       rsult = TRUE;
3278     }
3279     if (! StringHasNoText (tsip->accession)) {
3280       StringCat (str, sfx);
3281       StringCat (str, pfx);
3282       StringCat (str, "accession ");
3283       StringCat (str, tsip->accession);
3284       sfx = suffix;
3285       pfx = prefix;
3286       rsult = TRUE;
3287       if (check_na && is_na_p != NULL) {
3288         *is_na_p = IS_ntdb_accession (tsip->accession);
3289       }
3290     }
3291     if (tsip->version > 0 && sip->choice != SEQID_SWISSPROT) {
3292       sprintf (tmp, ".%d", (int) tsip->version);
3293       StringCat (str, tmp);
3294       sfx = suffix;
3295       pfx = prefix;
3296     }
3297     if (! StringHasNoText (tsip->release) && sip->choice != SEQID_SWISSPROT) {
3298       StringCat (str, pfx);
3299       StringCat (str, "release ");
3300       StringCat (str, tsip->release);
3301       sfx = suffix;
3302       pfx = prefix;
3303     }
3304     if (sip->choice == SEQID_SWISSPROT || sip->choice == SEQID_PIR || sip->choice == SEQID_PRF) {
3305       StringCat (str, ";");
3306     }
3307     return rsult;
3308   }
3309   if (psip != NULL) {
3310     if (! StringHasNoText (psip->mol)) {
3311       StringCat (str, "molecule ");
3312       StringCat (str, psip->mol);
3313       sfx = suffix;
3314       pfx = prefix;
3315       rsult = TRUE;
3316     }
3317     if (psip->chain > 0) {
3318       StringCat (str, sfx);
3319       StringCat (str, pfx);
3320       sprintf (tmp, "chain %d", (int) psip->chain);
3321       StringCat (str, tmp);
3322       sfx = suffix;
3323       pfx = prefix;
3324       rsult = TRUE;
3325     }
3326     if (psip->rel != NULL) {
3327       StringCat (str, sfx);
3328       StringCat (str, pfx);
3329       StringCat (str, "release ");
3330       dt = asn2gb_PrintDate (psip->rel);
3331       StringCat (str, dt);
3332       MemFree (dt);
3333       sfx = suffix;
3334       pfx = prefix;
3335       rsult = TRUE;
3336     }
3337     StringCat (str, ";");
3338     return rsult;
3339   }
3340   return rsult;
3341 }
3342 
3343 
AddSPBlock(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,BioseqPtr bsp)3344 static void AddSPBlock (
3345   IntAsn2gbJobPtr ajp,
3346   StringItemPtr ffstring,
3347   BioseqPtr bsp
3348 )
3349 
3350 {
3351   CharPtr            acc;
3352   Char               buf [64];
3353   DbtagPtr           db;
3354   SeqMgrDescContext  dcontext;
3355   Boolean            first;
3356   BIG_ID             gi;
3357   Boolean            has_link;
3358   Char               id [42];
3359   ObjectIdPtr        oip;
3360   CharPtr            ptr;
3361   SeqDescrPtr        sdp;
3362   SeqIdPtr           sid;
3363   SeqIdPtr           sif;
3364   SeqIdPtr           sip;
3365   SPBlockPtr         spb;
3366   CharPtr            string;
3367   ValNodePtr         vnp;
3368   CharPtr            str;
3369   Char               numbuf[40];
3370 
3371   if (bsp == NULL) return;
3372   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_sp, &dcontext);
3373   if (sdp == NULL) return;
3374   spb = (SPBlockPtr) sdp->data.ptrvalue;
3375   if (spb == NULL) return;
3376 
3377   if (spb->_class == 1) {
3378     FFAddOneString (ffstring, "class: standard.", FALSE, FALSE, TILDE_IGNORE);
3379     FFAddNewLine(ffstring);
3380   } else if (spb->_class == 2) {
3381     FFAddOneString (ffstring, "class: preliminary.", FALSE, FALSE, TILDE_IGNORE);
3382     FFAddNewLine(ffstring);
3383   }
3384 
3385   if (spb->extra_acc) {
3386     FFAddOneString (ffstring, "extra accessions:", FALSE, FALSE, TILDE_IGNORE);
3387     for (vnp = spb->extra_acc; vnp != NULL; vnp = vnp->next) {
3388       FFAddOneString (ffstring, (CharPtr) vnp->data.ptrvalue, FALSE, FALSE, TILDE_IGNORE);
3389       if (vnp->next != NULL) {
3390         FFAddOneChar (ffstring, ',', FALSE );
3391       }
3392     }
3393     FFAddNewLine(ffstring);
3394   }
3395 
3396   if (spb->imeth) {
3397     FFAddOneString (ffstring, "seq starts with Met", FALSE, FALSE, TILDE_IGNORE);
3398   }
3399 
3400   if (spb->plasnm != NULL) {
3401     FFAddOneString (ffstring, "plasmid:", FALSE, FALSE, TILDE_IGNORE);
3402     for (vnp = spb->plasnm; vnp != NULL; vnp = vnp->next) {
3403       FFAddOneString (ffstring, (CharPtr) vnp->data.ptrvalue, FALSE, FALSE, TILDE_IGNORE);
3404       FFAddOneChar (ffstring, ',', FALSE );
3405     }
3406   }
3407 
3408   if (spb->created) {
3409     string = PrintDate (spb->created);
3410     FFAddOneString (ffstring, "created: ", FALSE, FALSE, TILDE_IGNORE);
3411     FFAddOneString (ffstring, string, FALSE, FALSE, TILDE_IGNORE);
3412 
3413     MemFree (string);
3414   }
3415 
3416   if (spb->sequpd) {
3417     string = PrintDate (spb->sequpd);
3418     FFAddOneString (ffstring, "sequence updated: ", FALSE, FALSE, TILDE_IGNORE);
3419     FFAddOneString (ffstring, string, FALSE, FALSE, TILDE_IGNORE);
3420     MemFree (string);
3421   }
3422 
3423   if (spb->annotupd) {
3424     string = PrintDate (spb->annotupd);
3425     FFAddOneString (ffstring, "annotation updated: ", FALSE, FALSE, TILDE_IGNORE);
3426     FFAddOneString (ffstring, string, FALSE, FALSE, TILDE_IGNORE);
3427     MemFree (string);
3428   }
3429 
3430   if (spb->seqref) {
3431     FFAddOneString (ffstring, "xrefs: ", FALSE, FALSE, TILDE_IGNORE);
3432     first = TRUE;
3433     for (sid = spb->seqref; sid != NULL; sid = sid->next) {
3434       acc = NULL;
3435       has_link = FALSE;
3436       if (first == FALSE) {
3437         FFAddOneString (ffstring, ", ", FALSE, FALSE, TILDE_IGNORE);
3438       }
3439       first = FALSE;
3440       sip = sid;
3441       sif = NULL;
3442       id [0] = '\0';
3443       if (sip->choice == SEQID_GI) {
3444         gi = (BIG_ID) sid->data.intvalue;
3445         if (! GetAccnVerFromServer (gi, id)) {
3446           sif = GetSeqIdForGI (gi);
3447           if (sif != NULL) {
3448             sip = sif;
3449           }
3450         }
3451       }
3452       if (id [0] == '\0') {
3453         SeqIdWrite (sip, id, PRINTID_TEXTID_ACC_VER, sizeof (id) - 1);
3454       }
3455       if (sid->choice == SEQID_GI) {
3456         has_link = TRUE;
3457       }
3458       if (StringDoesHaveText (id)) {
3459         acc = id;
3460       }
3461       if (acc != NULL) {
3462         if ( GetWWW(ajp) && has_link ) {
3463           sprintf(numbuf, "%ld", (long) sid->data.intvalue);
3464           FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3465           StringNCpy_0 (buf, acc, sizeof (buf));
3466           ptr = StringChr (buf, '.');
3467           if (ptr != NULL) {
3468             *ptr = '\0';
3469           }
3470           if (IS_ntdb_accession (buf)) {
3471             FF_Add_NCBI_Base_URL (ffstring, link_seqn);
3472           } else {
3473             FF_Add_NCBI_Base_URL (ffstring, link_seqp);
3474           }
3475           FFAddTextToString(ffstring, /* "val=" */ NULL, numbuf, "\">", FALSE, FALSE, TILDE_IGNORE);
3476           FFAddOneString(ffstring, acc, FALSE, FALSE, TILDE_IGNORE);
3477           FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
3478         } else {
3479           FFAddOneString(ffstring, acc, FALSE, FALSE, TILDE_IGNORE);
3480         }
3481       }
3482       if (sif != NULL) {
3483         SeqIdFree (sif);
3484       }
3485     }
3486   }
3487 
3488   first = TRUE;
3489   for (vnp = spb->dbref; vnp != NULL; vnp = vnp->next) {
3490     db = (DbtagPtr) vnp->data.ptrvalue;
3491     if (db == NULL) continue;
3492     oip = db->tag;
3493     if (oip == NULL) continue;
3494     has_link = FALSE;
3495     if (first) {
3496       FFAddNewLine(ffstring);
3497       FFAddOneString (ffstring, "xrefs (non-sequence databases): ", FALSE, FALSE, TILDE_IGNORE);
3498       first = FALSE;
3499     } else {
3500       FFAddOneString (ffstring, ", ", FALSE, FALSE, TILDE_IGNORE);
3501     }
3502 
3503     str = NULL;
3504     if ( oip->str != NULL ) {
3505       str = oip->str;
3506       if (StringCmp (db->db, "GO") == 0 && StringNCmp (str, "GO:", 3) == 0) {
3507         str += 3;
3508       } else if (StringNCmp (str, "MGI:", 4) == 0) {
3509         str += 4;
3510       } else if (StringCmp (db->db, "HGNC") == 0 && StringNCmp (str, "HGNC:", 5) == 0) {
3511         str += 5;
3512       } else if (StringCmp (db->db, "VGNC") == 0 && StringNCmp (str, "VGNC:", 5) == 0) {
3513         str += 5;
3514       } else if (StringCmp (db->db, "DIP") == 0 && StringNCmp (str, "DIP:", 4) == 0) {
3515         str += 4;
3516       }
3517     } else if ( oip->id > 0 ) {
3518       sprintf (numbuf, "%d", oip->id);
3519       str = numbuf;
3520     }
3521 
3522     FF_www_db_xref (ajp, ffstring, db->db, str, bsp);
3523 
3524     /*
3525     if (StringCmp (db->db, "MGD") == 0 || StringCmp (db->db, "MGI") == 0) {
3526       FFAddOneString (ffstring, "MGI", FALSE, FALSE, TILDE_IGNORE);
3527     } else {
3528       FFAddOneString (ffstring, db->db, FALSE, FALSE, TILDE_IGNORE);
3529     }
3530     if (StringCmp (db->db, "MIM") == 0) {
3531       has_link = TRUE;
3532     }
3533 
3534     str = NULL;
3535     if ( oip->str != NULL ) {
3536       str = oip->str;
3537       if (StringNCmp (str, "GO:", 3) == 0) {
3538         str += 3;
3539       } else if (StringNCmp (str, "MGI:", 4) == 0) {
3540         str += 4;
3541       } else if (StringNCmp (str, "HGNC:", 5) == 0) {
3542         str += 5;
3543       } else if (StringNCmp (str, "VGNC:", 5) == 0) {
3544         str += 5;
3545       }
3546     } else if ( oip->id > 0 ) {
3547       sprintf(numbuf, "%d", oip->id);
3548       str = numbuf;
3549     }
3550 
3551     if ( !StringHasNoText(str) ) {
3552       FFAddOneString (ffstring, ":", FALSE, FALSE, TILDE_IGNORE);
3553       if ( GetWWW(ajp) && has_link) {
3554         FFAddOneChar (ffstring, ' ', FALSE);
3555         FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3556         FF_Add_NCBI_Base_URL (ffstring, link_omim);
3557         FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
3558         FFAddTextToString(ffstring, "\">", str, "</a>", FALSE, FALSE, TILDE_IGNORE);
3559       } else {
3560         FFAddOneString(ffstring, str, FALSE, FALSE, TILDE_IGNORE);
3561       }
3562     }
3563     */
3564   }
3565 }
3566 
AddPIRBlock(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,BioseqPtr bsp)3567 static void AddPIRBlock (
3568   IntAsn2gbJobPtr ajp,
3569   StringItemPtr ffstring,
3570   BioseqPtr bsp
3571 )
3572 
3573 {
3574   CharPtr            acc;
3575   SeqMgrDescContext  dcontext;
3576   Boolean            first;
3577   Char               id [41];
3578   CharPtr            prefix = NULL;
3579   SeqDescrPtr        sdp;
3580   SeqIdPtr           sid;
3581   PirBlockPtr        pbp;
3582 
3583   if (bsp == NULL) return;
3584   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pir, &dcontext);
3585   if (sdp == NULL) return;
3586   pbp = (PirBlockPtr) sdp->data.ptrvalue;
3587   if (pbp == NULL) return;
3588 
3589   if (pbp->host != NULL) {
3590     FFAddTextToString (ffstring, "host:", pbp->host, "\n", FALSE, TRUE, TILDE_IGNORE);
3591     prefix = ";";
3592   }
3593 
3594   if (pbp->source != NULL) {
3595     FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3596     FFAddNewLine(ffstring);
3597     FFAddTextToString(ffstring, "source: ", pbp->source, "\n", FALSE, TRUE, TILDE_IGNORE);
3598     prefix = ";";
3599   }
3600 
3601   if (pbp->summary != NULL) {
3602     FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3603     FFAddNewLine(ffstring);
3604     FFAddTextToString(ffstring, "summary: ", pbp->summary, "\n", FALSE, TRUE, TILDE_IGNORE);
3605     prefix = ";";
3606   }
3607 
3608   if (pbp->genetic != NULL) {
3609     FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3610     FFAddNewLine(ffstring);
3611     FFAddTextToString(ffstring, "genetic: ", pbp->genetic, "\n", FALSE, TRUE, TILDE_IGNORE);
3612     prefix = ";";
3613   }
3614 
3615   if (pbp->includes != NULL) {
3616     FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3617     FFAddNewLine(ffstring);
3618     FFAddTextToString(ffstring, "includes: ", pbp->includes, "\n", FALSE, TRUE, TILDE_IGNORE);
3619     prefix = ";";
3620   }
3621 
3622   if (pbp->placement != NULL) {
3623     FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3624     FFAddNewLine(ffstring);
3625     FFAddTextToString(ffstring, "placement: ", pbp->placement, "\n", FALSE, TRUE, TILDE_IGNORE);
3626     prefix = ";";
3627   }
3628 
3629   if (pbp->superfamily != NULL) {
3630     FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3631     FFAddNewLine(ffstring);
3632     FFAddTextToString(ffstring, "superfamily: ", pbp->superfamily, "\n", FALSE, TRUE, TILDE_IGNORE);
3633     prefix = ";";
3634   }
3635 
3636   if (pbp->cross_reference != NULL) {
3637     FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3638     FFAddNewLine(ffstring);
3639     FFAddTextToString(ffstring, "xref: ", pbp->cross_reference, "\n", FALSE, TRUE, TILDE_IGNORE);
3640     prefix = ";";
3641   }
3642 
3643   if (pbp->date != NULL) {
3644     FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3645     FFAddNewLine(ffstring);
3646     FFAddTextToString (ffstring, "PIR dates: ", pbp->date, "\n", FALSE, TRUE, TILDE_IGNORE);
3647     prefix = ";";
3648   }
3649 
3650   if (pbp->had_punct) {
3651     FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3652     FFAddNewLine(ffstring);
3653     FFAddOneString (ffstring, "punctuation in sequence", FALSE, FALSE, TILDE_IGNORE);
3654     prefix = ";";
3655   }
3656 
3657   if (pbp->seqref) {
3658     FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3659     FFAddNewLine(ffstring);
3660     FFAddOneString (ffstring, "xrefs: ", FALSE, FALSE, TILDE_IGNORE);
3661     first = TRUE;
3662     for (sid = pbp->seqref; sid != NULL; sid = sid->next) {
3663       acc = NULL;
3664       if (first == FALSE) {
3665         FFAddOneString (ffstring, ", ", FALSE, FALSE, TILDE_IGNORE);
3666       }
3667       first = FALSE;
3668       SeqIdWrite (sid, id, PRINTID_TEXTID_ACC_VER, sizeof (id) - 1);
3669       acc = id;
3670       if (acc != NULL) {
3671         switch (sid->choice) {
3672           case SEQID_GENBANK:
3673             FFAddOneString (ffstring, "genbank ", FALSE, FALSE, TILDE_IGNORE);
3674             break;
3675           case SEQID_EMBL:
3676             FFAddOneString (ffstring, "embl ", FALSE, FALSE, TILDE_IGNORE);
3677             break;
3678           case SEQID_PIR:
3679             FFAddOneString (ffstring, "UniProtKB ", FALSE, FALSE, TILDE_IGNORE);
3680             break;
3681           case SEQID_SWISSPROT:
3682             FFAddOneString (ffstring, "UniProtKB ", FALSE, FALSE, TILDE_IGNORE);
3683             break;
3684           case SEQID_DDBJ:
3685             FFAddOneString (ffstring, "ddbj ", FALSE, FALSE, TILDE_IGNORE);
3686             break;
3687           case SEQID_PRF:
3688             FFAddOneString (ffstring, "prf ", FALSE, FALSE, TILDE_IGNORE);
3689             break;
3690           case SEQID_GI:
3691             FFAddOneString (ffstring, "gi: ", FALSE, FALSE, TILDE_IGNORE);
3692             break;
3693           default:
3694             acc = NULL;
3695             break;
3696         }
3697       }
3698       if (acc != NULL) {
3699         FFAddOneString (ffstring, acc, FALSE, FALSE, TILDE_IGNORE);
3700       }
3701     }
3702   }
3703   FFAddOneString (ffstring, ".", FALSE, FALSE, TILDE_IGNORE);
3704 }
3705 
AddPRFBlock(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,BioseqPtr bsp)3706 static void AddPRFBlock (
3707   IntAsn2gbJobPtr ajp,
3708   StringItemPtr ffstring,
3709   BioseqPtr bsp
3710 )
3711 
3712 {
3713   SeqMgrDescContext  dcontext;
3714   PrfExtSrcPtr       extra;
3715   CharPtr            prefix = NULL;
3716   SeqDescrPtr        sdp;
3717   PrfBlockPtr        prf;
3718 
3719   if (bsp == NULL) return;
3720   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_prf, &dcontext);
3721   if (sdp == NULL) return;
3722   prf = (PrfBlockPtr) sdp->data.ptrvalue;
3723   if (prf == NULL) return;
3724   if ( ffstring == NULL ) return;
3725 
3726   extra = prf->extra_src;
3727   if (extra != NULL) {
3728 
3729     if (extra->host != NULL) {
3730       FFAddTextToString(ffstring, "host:", extra->host, NULL, FALSE, TRUE, TILDE_IGNORE);
3731       prefix = ";\n";
3732     }
3733 
3734     if (extra->part != NULL) {
3735       FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3736       FFAddTextToString(ffstring, "part: ", extra->part, NULL, FALSE, TRUE, TILDE_IGNORE);
3737       prefix = ";\n";
3738     }
3739     if (extra->state != NULL) {
3740       FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3741       FFAddTextToString(ffstring, "state: ", extra->state, NULL, FALSE, TRUE, TILDE_IGNORE);
3742       prefix = ";\n";
3743     }
3744     if (extra->strain != NULL) {
3745       FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3746       FFAddTextToString(ffstring, "strain: ", extra->strain, NULL, FALSE, TRUE, TILDE_IGNORE);
3747       prefix = ";\n";
3748     }
3749     if (extra->taxon != NULL) {
3750       FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3751       FFAddTextToString(ffstring, "taxonomy: ", extra->taxon, NULL, FALSE, TRUE, TILDE_IGNORE);
3752       prefix = ";\n";
3753     }
3754 
3755     FFAddOneChar(ffstring, '.', FALSE);
3756   }
3757 }
3758 
AddPDBBlock(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,BioseqPtr bsp)3759 static void AddPDBBlock (
3760   IntAsn2gbJobPtr ajp,
3761   StringItemPtr ffstring,
3762   BioseqPtr bsp
3763 )
3764 
3765 {
3766   Char               ch;
3767   SeqMgrDescContext  dcontext;
3768   CharPtr            dt;
3769   CharPtr            prefix = NULL;
3770   SeqDescrPtr        sdp;
3771   PdbBlockPtr        pdb;
3772   CharPtr            ptr;
3773   PdbRepPtr          replace;
3774   CharPtr            str;
3775   ValNodePtr         vnp;
3776 
3777   if (bsp == NULL) return;
3778   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pdb, &dcontext);
3779   if (sdp == NULL) return;
3780   pdb = (PdbBlockPtr) sdp->data.ptrvalue;
3781   if (pdb == NULL) return;
3782 
3783   if (pdb->deposition != NULL) {
3784     dt = asn2gb_PrintDate (pdb->deposition);
3785     FFAddTextToString (ffstring, "deposition: ", dt, NULL, FALSE, TRUE, TILDE_IGNORE);
3786     MemFree (dt);
3787     prefix = ";";
3788   }
3789   if (pdb->pdbclass != NULL) {
3790     FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3791     FFAddNewLine(ffstring);
3792     FFAddTextToString(ffstring, "class: ", pdb->pdbclass, NULL, FALSE, TRUE, TILDE_IGNORE);
3793     prefix = ";";
3794   }
3795   if (pdb->source != NULL) {
3796     FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3797     FFAddNewLine(ffstring);
3798     FFAddOneString(ffstring, "source: ", FALSE, TRUE, TILDE_IGNORE);
3799     prefix = NULL;
3800     for (vnp = pdb->source; vnp != NULL; vnp = vnp->next) {
3801       str = (CharPtr) vnp->data.ptrvalue;
3802       if (StringHasNoText (str)) continue;
3803       if (GetWWW (ajp)) {
3804         if (StringNICmp (str, "Mmdb_id:", 8) == 0) {
3805           ptr = str + 8;
3806           ch = *ptr;
3807           while (ch == ' ') {
3808             ptr++;
3809             ch = *ptr;
3810           }
3811           if (StringIsAllDigits (ptr)) {
3812             FFAddTextToString (ffstring, prefix, "Mmdb_id:", NULL, FALSE, TRUE, TILDE_IGNORE);
3813             FFAddTextToString (ffstring, NULL, " ", NULL, FALSE, TRUE, TILDE_IGNORE);
3814             FFAddOneString(ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3815             FFAddOneString(ffstring, link_mmdb, FALSE, FALSE, TILDE_IGNORE);
3816             FFAddOneString(ffstring, ptr, FALSE, FALSE, TILDE_IGNORE);
3817             FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
3818             FFAddOneString(ffstring, ptr, FALSE, FALSE, TILDE_IGNORE);
3819             FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
3820           } else {
3821             FFAddTextToString (ffstring, prefix, str, NULL, FALSE, TRUE, TILDE_IGNORE);
3822           }
3823         } else {
3824           FFAddTextToString (ffstring, prefix, str, NULL, FALSE, TRUE, TILDE_IGNORE);
3825         }
3826       } else {
3827         FFAddTextToString (ffstring, prefix, str, NULL, FALSE, TRUE, TILDE_IGNORE);
3828       }
3829       prefix = ", ";
3830     }
3831     prefix = ";";
3832   }
3833   if (pdb->exp_method != NULL) {
3834     FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3835     FFAddNewLine(ffstring);
3836     FFAddTextToString(ffstring, "Exp. method: ", pdb->exp_method, NULL, FALSE, TRUE, TILDE_IGNORE);
3837     prefix = ";";
3838   }
3839   replace = pdb->replace;
3840   if (replace != NULL) {
3841     if (replace->ids != NULL) {
3842       FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3843       FFAddNewLine(ffstring);
3844       FFAddOneString(ffstring, "ids replaced: ", FALSE, TRUE, TILDE_IGNORE);
3845 
3846       prefix = NULL;
3847       for (vnp = replace->ids; vnp != NULL; vnp = vnp->next) {
3848         str = (CharPtr) vnp->data.ptrvalue;
3849         if (StringHasNoText (str)) continue;
3850         FFAddTextToString (ffstring, prefix, str, NULL, FALSE, TRUE, TILDE_IGNORE);
3851         prefix = ", ";
3852       }
3853       prefix = ";";
3854     }
3855     if (replace->date != NULL) {
3856       FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3857       FFAddNewLine(ffstring);
3858 
3859       dt = asn2gb_PrintDate (replace->date);
3860       FFAddTextToString(ffstring, "replacement date: ", dt, NULL, FALSE, TRUE, TILDE_IGNORE);
3861       MemFree (dt);
3862       prefix = ";";
3863     }
3864   }
3865 
3866   FFAddOneChar(ffstring, '.', FALSE);
3867 }
3868 
TxtSave(CharPtr text,size_t len)3869 static CharPtr TxtSave (CharPtr text, size_t len)
3870 
3871 {
3872    CharPtr str = NULL;
3873 
3874    if ((text == NULL) || (len == 0))
3875       return str;
3876 
3877    str = (CharPtr) MemNew((size_t)(len + 1));
3878    MemCopy(str, text, (size_t)len);
3879 
3880    return (str);
3881 }
3882 
FF_www_dbsource(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,CharPtr str,Boolean first,SeqIdPtr sip,Boolean is_na)3883 static Boolean FF_www_dbsource (
3884   IntAsn2gbJobPtr ajp,
3885   StringItemPtr ffstring,
3886   CharPtr str,
3887   Boolean first,
3888   SeqIdPtr sip,
3889   Boolean is_na
3890 )
3891 
3892 {
3893   CharPtr  temp, end, text, loc, link = NULL;
3894   Uint1    choice;
3895   Int2     j;
3896   BIG_ID   gi = 0;
3897   Char     gibuf [32];
3898 
3899   if (sip == NULL) return FALSE;
3900   choice = sip->choice;
3901 
3902   if( GetWWW(ajp) ) {
3903     if (choice == SEQID_PIR) {
3904       link = link_seqp;
3905     } else if (choice == SEQID_SWISSPROT) {
3906       link = link_sp;
3907     } else if (choice == SEQID_PDB || choice == SEQID_PRF) {
3908       link = link_seqp;
3909     } else if (choice == SEQID_EMBL || choice == SEQID_GENBANK ||
3910         choice == SEQID_DDBJ || choice == SEQID_GIBBSQ ||
3911         choice == SEQID_GIBBMT || choice == SEQID_GI ||
3912         choice == SEQID_GIIM || choice == SEQID_OTHER ||
3913         choice == SEQID_TPG || choice == SEQID_TPE || choice == SEQID_TPD ||
3914         choice == SEQID_GPIPE)  {
3915       if (is_na) {
3916         link = link_seqn;
3917       } else {
3918         link = link_seqp;
3919       }
3920     } else {
3921       AddStringWithTildes(ffstring, str);
3922       return TRUE;
3923     }
3924 
3925     if ((text = StringStr(str, "accession")) != NULL) {
3926       end = text + 9;
3927       j = 9;
3928       while (*end == ' ') {
3929         ++end;
3930         j++;
3931       }
3932       if (first == FALSE) {
3933         FFAddOneString(ffstring, ", ", FALSE, FALSE, TILDE_IGNORE);
3934       }
3935       loc = TxtSave (str, end-str - j);
3936       FFAddOneString(ffstring, loc, FALSE, FALSE, TILDE_IGNORE);
3937       MemFree (loc);
3938       for (; text != end; ++text ) {
3939         FFAddOneChar(ffstring, *text, FALSE);
3940       }
3941 
3942       temp = text;
3943       end += StringLen(text) - 1;
3944       if ( *end != ';' ) {
3945         ++end;
3946       }
3947 
3948       if (choice == SEQID_SWISSPROT) {
3949         FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3950         FF_Add_NCBI_Base_URL (ffstring, link);
3951         for (text = temp; text != end; ++text ) {
3952           FFAddOneChar (ffstring, *text, FALSE);
3953         }
3954       } else {
3955         FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3956         FF_Add_NCBI_Base_URL (ffstring, link);
3957         gi = GetGIForSeqId (sip);
3958         if (gi > 0) {
3959           sprintf (gibuf, "%ld", (long) gi);
3960           FFAddOneString (ffstring, gibuf, FALSE, FALSE, TILDE_IGNORE);
3961         } else {
3962           for (text = temp; text != end; ++text ) {
3963             FFAddOneChar(ffstring, *text, FALSE);
3964           }
3965         }
3966       }
3967       FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
3968 
3969       for (text = temp; text != end; ++text ) {
3970         FFAddOneChar(ffstring, *text, FALSE);
3971       }
3972       FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
3973       if ( *end == ';' ) {
3974         FFAddOneChar(ffstring, ';', FALSE);
3975       }
3976     } else {
3977       if (first == FALSE) {
3978         FFAddOneString(ffstring, ", ", FALSE, FALSE, TILDE_IGNORE);
3979       }
3980       FFAddOneString(ffstring, str, FALSE, FALSE, TILDE_IGNORE);
3981     }
3982   } else {
3983     AddStringWithTildes(ffstring, str);
3984   }
3985   return TRUE;
3986 }
3987 
AddDbsourceBlock(Asn2gbWorkPtr awp)3988 NLM_EXTERN void AddDbsourceBlock (
3989   Asn2gbWorkPtr awp
3990 )
3991 
3992 {
3993   IntAsn2gbJobPtr  ajp;
3994   Asn2gbSectPtr    asp;
3995   BaseBlockPtr     bbp;
3996   BioseqPtr        bsp;
3997   Char             buf [256];
3998   SeqFeatPtr       cds;
3999   DbtagPtr         db;
4000   GBSeqPtr         gbseq;
4001   SeqIdPtr         id;
4002   Boolean          is_na;
4003   ValNodePtr       list = NULL;
4004   BioseqPtr        nuc;
4005   SeqEntryPtr      sep;
4006   SeqIdPtr         sip;
4007   SeqLocPtr        slp;
4008   CharPtr          str;
4009   TextSeqIdPtr     tsip;
4010   Boolean          unknown = TRUE;
4011   ValNodePtr       vnp;
4012   StringItemPtr    ffstring;
4013 
4014   if (awp == NULL) return;
4015   ajp = awp->ajp;
4016   if (ajp == NULL) return;
4017   asp = awp->asp;
4018   if (asp == NULL) return;
4019   bsp = awp->bsp;
4020   if (bsp == NULL) return;
4021 
4022   for (sip = bsp->id; sip != NULL; sip = sip->next) {
4023     if (sip->choice != SEQID_OTHER) continue;
4024     tsip = (TextSeqIdPtr) sip->data.ptrvalue;
4025     if (tsip == NULL) continue;
4026     if (StringNCmp (tsip->accession, "WP_", 3) == 0) return;
4027   }
4028 
4029   bbp = Asn2gbAddBlock (awp, DBSOURCE_BLOCK, sizeof (BaseBlock));
4030   if (bbp == NULL) return;
4031 
4032   bbp->entityID = awp->entityID;
4033 
4034   ffstring = FFGetString(ajp);
4035   if ( ffstring == NULL ) return;
4036 
4037   FFStartPrint (ffstring, awp->format, 0, 12, "DBSOURCE", 12, 5, 5, NULL, TRUE);
4038 
4039   sip = SeqIdSelect (bsp->id, dbsource_fasta_order, NUM_SEQID);
4040 
4041   if (sip != NULL) {
4042 
4043     switch (sip->choice) {
4044       case SEQID_PIR :
4045       case SEQID_SWISSPROT :
4046       case SEQID_PRF :
4047       case SEQID_PDB :
4048         if (WriteDbsourceID (sip, buf, &is_na)) {
4049           FF_www_dbsource (ajp, ffstring, buf, TRUE, sip, is_na);
4050           FFAddNewLine(ffstring);
4051           unknown = FALSE;
4052         }
4053         break;
4054       case SEQID_GENERAL :
4055         db = (DbtagPtr) sip->data.ptrvalue;
4056         if (db == NULL) {
4057           break;
4058         }
4059         if (StringNCmp (db->db, "PIDe", 4) != 0 &&
4060             StringNCmp (db->db, "PIDd", 4) != 0 &&
4061             StringNCmp (db->db, "PID", 3) != 0) {
4062           break;
4063         }
4064         /* if (ChoicePID) found, continue on to next set of cases */
4065       case SEQID_EMBL :
4066       case SEQID_GENBANK :
4067       case SEQID_DDBJ :
4068       case SEQID_GIBBSQ :
4069       case SEQID_GIBBMT :
4070       case SEQID_OTHER :
4071       case SEQID_TPG :
4072       case SEQID_TPE :
4073       case SEQID_TPD :
4074       case SEQID_GPIPE :
4075       case SEQID_GI :
4076       case SEQID_GIIM :
4077         cds = SeqMgrGetCDSgivenProduct (bsp, NULL);
4078         if (cds == NULL) {
4079           /* now may also be protein product of mature peptide feature */
4080           cds = SeqMgrGetPROTgivenProduct (bsp, NULL);
4081         }
4082         if (cds != NULL) {
4083           nuc = BioseqFindFromSeqLoc (cds->location);
4084           if (nuc != NULL) {
4085             slp = SeqLocFindNext (cds->location, NULL);
4086             while (slp != NULL) {
4087               sip = SeqLocId (slp);
4088               AddToUniqueSipList (&list, sip);
4089               slp = SeqLocFindNext (cds->location, slp);
4090             }
4091             for (vnp = list; vnp != NULL; vnp = vnp->next) {
4092               id = (SeqIdPtr) vnp->data.ptrvalue;
4093               nuc = BioseqFindCore (id);
4094               sip = NULL;
4095               if (nuc != NULL) {
4096                 sip = SeqIdSelect (nuc->id, dbsource_fasta_order, NUM_SEQID);
4097               } else if (id != NULL && id->choice == SEQID_GI) {
4098                 sip = GetSeqIdForGI (id->data.intvalue);
4099               }
4100               if (sip == NULL) {
4101                 sip = id;
4102               }
4103               if (sip != NULL) {
4104                 if (WriteDbsourceID (sip, buf, &is_na)) {
4105                   FF_www_dbsource (ajp, ffstring, buf, TRUE, sip, is_na);
4106                   FFAddNewLine(ffstring);
4107                   unknown = FALSE;
4108                 }
4109               }
4110             }
4111             ValNodeFree (list);
4112           } else {
4113             sep = GetTopSeqEntryForEntityID (awp->entityID);
4114             if (sep != NULL && IS_Bioseq (sep)) {
4115               /* special case for coded_by CDS packed on retcode 1 protein */
4116               id = SeqLocId (cds->location);
4117               if (id != NULL && id->choice == SEQID_GI) {
4118                 sip = GetSeqIdForGI (id->data.intvalue);
4119                 if (sip == NULL) {
4120                   sip = id;
4121                 }
4122               }
4123               if (WriteDbsourceID (sip, buf, &is_na)) {
4124                 FF_www_dbsource (ajp, ffstring, buf, TRUE, sip, is_na);
4125                 FFAddNewLine(ffstring);
4126                 unknown = FALSE;
4127               }
4128             }
4129           }
4130         } else {
4131           if (WriteDbsourceID (sip, buf, &is_na)) {
4132             FF_www_dbsource (ajp, ffstring, buf, TRUE, sip, is_na);
4133             FFAddNewLine(ffstring);
4134             unknown = FALSE;
4135           }
4136         }
4137         break;
4138       default :
4139         break;
4140     }
4141 
4142     if (sip != NULL) {
4143       switch (sip->choice) {
4144         case SEQID_PIR :
4145           AddPIRBlock (ajp, ffstring, bsp);
4146           break;
4147         case SEQID_SWISSPROT :
4148           AddSPBlock (ajp, ffstring, bsp);
4149           break;
4150         case SEQID_PRF :
4151           AddPRFBlock (ajp, ffstring, bsp);
4152           break;
4153         case SEQID_PDB :
4154           AddPDBBlock (ajp, ffstring, bsp);
4155           break;
4156         default :
4157           break;
4158       }
4159     }
4160   }
4161 
4162   if (unknown) {
4163     FFAddOneString (ffstring, "UNKNOWN", FALSE, FALSE, TILDE_TO_SPACES);
4164   }
4165 
4166   str = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, NULL);
4167 
4168   /* optionally populate gbseq for XML-ized GenBank format */
4169 
4170   if (ajp->gbseq) {
4171     gbseq = &asp->gbseq;
4172   } else {
4173     gbseq = NULL;
4174   }
4175 
4176   if (gbseq != NULL) {
4177     if (StringNCmp (str, "DBSOURCE    ", 12) == 0) {
4178       gbseq->source_db = StringSave (str + 12);
4179     } else {
4180       gbseq->source_db = StringSave (str);
4181     }
4182     CleanQualValue (gbseq->source_db);
4183     Asn2gnbkCompressSpaces (gbseq->source_db);
4184   }
4185 
4186   bbp->string = str;
4187   FFRecycleString(ajp, ffstring);
4188 
4189   if (awp->afp != NULL) {
4190     DoImmediateFormat (awp->afp, bbp);
4191   }
4192 }
4193 
AddDateBlock(Asn2gbWorkPtr awp)4194 NLM_EXTERN void AddDateBlock (
4195   Asn2gbWorkPtr awp
4196 )
4197 
4198 {
4199   IntAsn2gbJobPtr    ajp;
4200   BaseBlockPtr       bbp;
4201   BioseqPtr          bsp;
4202   Char               date [40];
4203   SeqMgrDescContext  dcontext;
4204   DatePtr            dp;
4205   SeqDescrPtr        sdp;
4206   StringItemPtr      ffstring;
4207 
4208   if (awp == NULL) return;
4209   ajp = awp->ajp;
4210   if (ajp == NULL) return;
4211   bsp = awp->bsp;
4212   if (bsp == NULL) return;
4213 
4214   ffstring = FFGetString(ajp);
4215   if ( ffstring == NULL ) return;
4216 
4217   bbp = Asn2gbAddBlock (awp, DATE_BLOCK, sizeof (BaseBlock));
4218   if (bbp == NULL) return;
4219 
4220   date [0] = '\0';
4221 
4222   dp = NULL;
4223   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_create_date, &dcontext);
4224   if (sdp != NULL) {
4225     dp = (DatePtr) sdp->data.ptrvalue;
4226   }
4227   if (dp != NULL) {
4228     DateToFF (date, dp, FALSE);
4229   }
4230   if (StringHasNoText (date)) {
4231     StringCpy (date, "01-JAN-1900");
4232   }
4233 
4234   FFStartPrint (ffstring, awp->format, 0, 0, NULL, 0, 5, 5, "DT", TRUE);
4235   FFAddOneString (ffstring, date, FALSE, FALSE, TILDE_IGNORE);
4236 
4237   bbp->string = FFEndPrint(ajp, ffstring, awp->format, 0, 0, 5, 5, "DT");
4238   FFRecycleString(ajp, ffstring);
4239 
4240   bbp = Asn2gbAddBlock (awp, DATE_BLOCK, sizeof (BaseBlock));
4241   if (bbp == NULL) return;
4242 
4243   ffstring = FFGetString(ajp);
4244 
4245   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_update_date, &dcontext);
4246   if (sdp != NULL) {
4247     dp = (DatePtr) sdp->data.ptrvalue;
4248   }
4249   if (dp != NULL) {
4250     DateToFF (date, dp, FALSE);
4251   }
4252 
4253   FFStartPrint (ffstring, awp->format, 0, 0, NULL, 0, 5, 5, "DT", FALSE);
4254   FFAddOneString (ffstring, date, FALSE, FALSE, TILDE_IGNORE);
4255 
4256   bbp->string = FFEndPrint(ajp, ffstring, awp->format, 0, 0, 5, 5, "DT");
4257   FFRecycleString(ajp, ffstring);
4258 
4259   if (awp->afp != NULL) {
4260     DoImmediateFormat (awp->afp, bbp);
4261   }
4262 }
4263 
4264 
4265 #define TOTAL_ESTKW 11
4266 #define TOTAL_GSSKW 2
4267 #define TOTAL_STSKW 5
4268 
4269 static CharPtr EST_kw_array[ TOTAL_ESTKW] = {
4270   "EST", "EST PROTO((expressed sequence tag)", "expressed sequence tag",
4271   "EST (expressed sequence tag)", "EST(expressed sequence tag)",
4272   "partial cDNA sequence", "transcribed sequence fragment", "TSR",
4273   "putatively transcribed partial sequence", "UK putts"
4274 };
4275 
4276 static CharPtr GSS_kw_array [TOTAL_GSSKW] = {
4277   "GSS", "trapped exon"
4278 };
4279 
4280 static CharPtr STS_kw_array[TOTAL_STSKW] = {
4281   "STS", "STS(sequence tagged site)", "STS (sequence tagged site)",
4282   "STS sequence", "sequence tagged site"
4283 };
4284 
MatchArrayString(CharPtr array_string[],Int2 totalstr,CharPtr text)4285 static Int2 MatchArrayString (
4286   CharPtr array_string [],
4287   Int2 totalstr,
4288   CharPtr text
4289 )
4290 
4291 {
4292   Int2 i;
4293 
4294   for (i = 0; i < totalstr && text != NULL; i++) {
4295     if (StringCmp (array_string [i], text) == 0) {
4296       return (i);
4297     }
4298   }
4299 
4300   return (-1);
4301 }
4302 
CheckSpecialKeyword(Boolean is_est,Boolean is_sts,Boolean is_gss,CharPtr kwd)4303 static Boolean CheckSpecialKeyword (
4304   Boolean is_est,
4305   Boolean is_sts,
4306   Boolean is_gss,
4307   CharPtr kwd
4308 )
4309 
4310 {
4311   if (kwd == NULL) return FALSE;
4312 
4313   if (is_est) {
4314     if (MatchArrayString (STS_kw_array, TOTAL_STSKW, kwd) != -1) return FALSE;
4315     if (MatchArrayString (GSS_kw_array, TOTAL_GSSKW, kwd) != -1) return FALSE;
4316   }
4317 
4318   if (is_sts) {
4319     if (MatchArrayString (EST_kw_array, TOTAL_ESTKW, kwd) != -1) return FALSE;
4320     if (MatchArrayString (GSS_kw_array, TOTAL_GSSKW, kwd) != -1) return FALSE;
4321   }
4322 
4323   if (is_gss) {
4324     if (MatchArrayString (STS_kw_array, TOTAL_STSKW, kwd) != -1) return FALSE;
4325     if (MatchArrayString (EST_kw_array, TOTAL_ESTKW, kwd) != -1) return FALSE;
4326   }
4327 
4328   return TRUE;
4329 }
4330 
KeywordAlreadyInList(ValNodePtr head,CharPtr kwd)4331 static Boolean KeywordAlreadyInList (
4332   ValNodePtr head,
4333   CharPtr kwd
4334 )
4335 
4336 {
4337   ValNodePtr  vnp;
4338 
4339   for (vnp = head; vnp != NULL; vnp = vnp->next) {
4340     if (StringICmp ((CharPtr) vnp->data.ptrvalue, kwd) == 0) return TRUE;
4341   }
4342 
4343   return FALSE;
4344 }
4345 
4346 typedef struct finstatdata {
4347   CharPtr  inuserobj;
4348   CharPtr  inkeyword;
4349 } FinStatData, PNTR FinStatPtr;
4350 
4351 static FinStatData finStatKywds [] = {
4352   {"Standard-draft",                  "STANDARD_DRAFT"},
4353   {"High-quality-draft",              "HIGH_QUALITY_DRAFT"},
4354   {"Improved-high-quality-draft",     "IMPROVED_HIGH_QUALITY_DRAFT"},
4355   {"Annotation-directed-improvement", "ANNOTATION_DIRECTED_IMPROVEMENT"},
4356   {"Noncontiguous-finished",          "NONCONTIGUOUS_FINISHED"},
4357   /*
4358   {"Finished",                        "FINISHED"},
4359   */
4360   {NULL, NULL}
4361 };
4362 
GetFinishingStatus(CharPtr str)4363 static CharPtr GetFinishingStatus (
4364   CharPtr str
4365 )
4366 
4367 {
4368   Char     buf [64];
4369   Char     ch;
4370   Int2     i;
4371   CharPtr  ptr;
4372 
4373   if (StringHasNoText (str)) return NULL;
4374 
4375   StringNCpy_0 (buf, str, sizeof (buf));
4376   ptr = buf;
4377   ch = *ptr;
4378   while (ch != '\0') {
4379     if (ch == ' ') {
4380       *ptr = '-';
4381     }
4382     ptr++;
4383     ch = *ptr;
4384   }
4385 
4386   for (i = 0; finStatKywds [i].inuserobj != NULL; i++) {
4387     if (StringICmp (buf, finStatKywds [i].inuserobj) == 0) {
4388       return finStatKywds [i].inkeyword;
4389     }
4390   }
4391 
4392   return NULL;
4393 }
4394 
AddKeywordsBlock(Asn2gbWorkPtr awp)4395 NLM_EXTERN void AddKeywordsBlock (
4396   Asn2gbWorkPtr awp
4397 )
4398 
4399 {
4400   Boolean            add_encode = FALSE;
4401   IntAsn2gbJobPtr    ajp;
4402   Asn2gbSectPtr      asp;
4403   BaseBlockPtr       bbp;
4404   BioSourcePtr       biop;
4405   BioseqPtr          bsp;
4406   BioseqSetPtr       bssp;
4407   UserFieldPtr       curr;
4408   SeqMgrDescContext  dcontext;
4409   EMBLBlockPtr       ebp;
4410   CharPtr            field;
4411   CharPtr            finishing_status = NULL;
4412   GBBlockPtr         gbp;
4413   GBSeqPtr           gbseq;
4414   ValNodePtr         head = NULL;
4415   IndxPtr            index;
4416   Boolean            is_cross_kingdom = FALSE;
4417   Boolean            is_est = FALSE;
4418   Boolean            is_gss = FALSE;
4419   Boolean            is_sts = FALSE;
4420   Boolean            is_env_sample = FALSE;
4421   Boolean            is_genome_assembly = FALSE;
4422   Boolean            is_tsa = FALSE;
4423   Boolean            is_unverified = FALSE;
4424   Boolean            is_unv_organism = FALSE;
4425   Boolean            is_unv_misassembled = FALSE;
4426   Boolean            is_wp = FALSE;
4427   Boolean            this_is_gen_asm;
4428   ValNodePtr         keywords;
4429   CharPtr            kwd;
4430   ValNodePtr         ky_head;
4431   MolInfoPtr         mip;
4432   BioseqPtr          nbsp;
4433   Int2               num_super_kingdom = 0;
4434   ObjectIdPtr        oip;
4435   OrgNamePtr         onp;
4436   OrgRefPtr          orp;
4437   PirBlockPtr        pir;
4438   PrfBlockPtr        prf;
4439   CharPtr            sc_keyword;
4440   SeqDescrPtr        sdp;
4441   SeqEntryPtr        sep;
4442   SeqIdPtr           sip;
4443   SPBlockPtr         sp;
4444   SubSourcePtr       ssp;
4445   CharPtr            str;
4446   Boolean            super_kingdoms_different = FALSE;
4447   CharPtr            super_kingdom_name = NULL;
4448   TaxElementPtr      tep;
4449   TextSeqIdPtr       tsip;
4450   UserFieldPtr       ufp;
4451   UserObjectPtr      uop;
4452   ValNodePtr         vnp;
4453   StringItemPtr      ffstring;
4454 
4455   if (awp == NULL) return;
4456   ajp = awp->ajp;
4457   if (ajp == NULL) return;
4458   bsp = awp->bsp;
4459   if (bsp == NULL) return;
4460   asp = awp->asp;
4461   if (asp == NULL) return;
4462 
4463   bbp = (BaseBlockPtr) Asn2gbAddBlock (awp, KEYWORDS_BLOCK, sizeof (BaseBlock));
4464   if (bbp == NULL) return;
4465 
4466   ffstring = FFGetString(ajp);
4467   if ( ffstring == NULL ) return;
4468 
4469   for (sip = bsp->id; sip != NULL; sip = sip->next) {
4470     if (sip->choice == SEQID_OTHER) {
4471       tsip = (TextSeqIdPtr) sip->data.ptrvalue;
4472       if (tsip != NULL && StringNICmp (tsip->accession, "WP_", 3) == 0) {
4473         is_wp = TRUE;
4474       }
4475     }
4476   }
4477 
4478   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
4479   while (sdp != NULL) {
4480     biop = (BioSourcePtr) sdp->data.ptrvalue;
4481     if (biop != NULL) {
4482       orp = biop->org;
4483       if (orp != NULL) {
4484         onp = orp->orgname;
4485         if (onp != NULL) {
4486           if (onp->choice == 5) {
4487             for (tep = (TaxElementPtr) onp->data; tep != NULL; tep = tep->next) {
4488               if (tep->fixed_level == 0 && StringICmp (tep->level, "superkingdom") == 0) {
4489                 num_super_kingdom++;
4490                 if (super_kingdom_name == NULL) {
4491                   super_kingdom_name = tep->name;
4492                 } else if (StringICmp (super_kingdom_name, tep->name) != 0) {
4493                   super_kingdoms_different = TRUE;
4494                 }
4495               }
4496             }
4497           }
4498         }
4499       }
4500       for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
4501         if (ssp->subtype == SUBSRC_environmental_sample) {
4502           is_env_sample = TRUE;
4503         }
4504       }
4505     }
4506     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext);
4507   }
4508 
4509   if (num_super_kingdom > 1 && super_kingdoms_different) {
4510     is_cross_kingdom = TRUE;
4511   }
4512 
4513   if (bsp->repr == Seq_repr_map) {
4514     if (head != NULL) {
4515       ValNodeCopyStr (&head, 0, "; ");
4516     }
4517     ValNodeCopyStr (&head, 0, "Whole_Genome_Map");
4518   }
4519 
4520   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
4521   if (sdp != NULL) {
4522     bbp->entityID = dcontext.entityID;
4523     bbp->itemID = dcontext.itemID;
4524     bbp->itemtype = OBJ_SEQDESC;
4525 
4526     mip = (MolInfoPtr) sdp->data.ptrvalue;
4527     if (mip != NULL) {
4528       switch (mip->tech) {
4529         case MI_TECH_htgs_1 :
4530           if (head != NULL) {
4531             ValNodeCopyStr (&head, 0, "; ");
4532           }
4533           ValNodeCopyStr (&head, 0, "HTG");
4534           ValNodeCopyStr (&head, 0, "; ");
4535           ValNodeCopyStr (&head, 0, "HTGS_PHASE1");
4536           break;
4537         case MI_TECH_htgs_2 :
4538           if (head != NULL) {
4539             ValNodeCopyStr (&head, 0, "; ");
4540           }
4541           ValNodeCopyStr (&head, 0, "HTG");
4542           ValNodeCopyStr (&head, 0, "; ");
4543           ValNodeCopyStr (&head, 0, "HTGS_PHASE2");
4544           break;
4545         case MI_TECH_htgs_3 :
4546           if (head != NULL) {
4547             ValNodeCopyStr (&head, 0, "; ");
4548           }
4549           ValNodeCopyStr (&head, 0, "HTG");
4550           break;
4551         case MI_TECH_est :
4552           if (head != NULL) {
4553             ValNodeCopyStr (&head, 0, "; ");
4554           }
4555           is_est = TRUE;
4556           ValNodeCopyStr (&head, 0, "EST");
4557           if (is_env_sample) {
4558             if (head != NULL) {
4559               ValNodeCopyStr (&head, 0, "; ");
4560             }
4561             ValNodeCopyStr (&head, 0, "ENV");
4562           }
4563           break;
4564         case MI_TECH_sts :
4565           if (head != NULL) {
4566             ValNodeCopyStr (&head, 0, "; ");
4567           }
4568           is_sts = TRUE;
4569           ValNodeCopyStr (&head, 0, "STS");
4570           break;
4571         case MI_TECH_survey :
4572           if (head != NULL) {
4573             ValNodeCopyStr (&head, 0, "; ");
4574           }
4575           is_gss = TRUE;
4576           ValNodeCopyStr (&head, 0, "GSS");
4577           if (is_env_sample) {
4578             if (head != NULL) {
4579               ValNodeCopyStr (&head, 0, "; ");
4580             }
4581             ValNodeCopyStr (&head, 0, "ENV");
4582           }
4583           break;
4584         case MI_TECH_fli_cdna :
4585           if (head != NULL) {
4586             ValNodeCopyStr (&head, 0, "; ");
4587           }
4588           ValNodeCopyStr (&head, 0, "FLI_CDNA");
4589           break;
4590         case MI_TECH_htgs_0 :
4591           if (head != NULL) {
4592             ValNodeCopyStr (&head, 0, "; ");
4593           }
4594           ValNodeCopyStr (&head, 0, "HTG");
4595           ValNodeCopyStr (&head, 0, "; ");
4596           ValNodeCopyStr (&head, 0, "HTGS_PHASE0");
4597           break;
4598         case MI_TECH_htc :
4599           if (head != NULL) {
4600             ValNodeCopyStr (&head, 0, "; ");
4601           }
4602           ValNodeCopyStr (&head, 0, "HTC");
4603           break;
4604         case MI_TECH_wgs :
4605           if (head != NULL) {
4606             ValNodeCopyStr (&head, 0, "; ");
4607           }
4608           ValNodeCopyStr (&head, 0, "WGS");
4609           break;
4610         /*
4611         case MI_TECH_barcode :
4612           if (head != NULL) {
4613             ValNodeCopyStr (&head, 0, "; ");
4614           }
4615           ValNodeCopyStr (&head, 0, "BARCODE");
4616           break;
4617         */
4618         case MI_TECH_tsa :
4619           if (head != NULL) {
4620             ValNodeCopyStr (&head, 0, "; ");
4621           }
4622           ValNodeCopyStr (&head, 0, "TSA");
4623           ValNodeCopyStr (&head, 0, "; ");
4624           ValNodeCopyStr (&head, 0, "Transcriptome Shotgun Assembly");
4625           is_tsa = TRUE;
4626           break;
4627         case MI_TECH_targeted :
4628           if (head != NULL) {
4629             ValNodeCopyStr (&head, 0, "; ");
4630           }
4631           ValNodeCopyStr (&head, 0, "TLS");
4632           ValNodeCopyStr (&head, 0, "; ");
4633           ValNodeCopyStr (&head, 0, "Targeted Locus Study");
4634           break;
4635         case MI_TECH_unknown :
4636         case MI_TECH_standard :
4637         case MI_TECH_other :
4638           if (is_env_sample) {
4639             if (head != NULL) {
4640               ValNodeCopyStr (&head, 0, "; ");
4641             }
4642             ValNodeCopyStr (&head, 0, "ENV");
4643           }
4644           break;
4645         default :
4646           break;
4647       }
4648     }
4649   }
4650 
4651   if (ISA_aa (bsp->mol) && (! is_tsa)) {
4652     if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
4653       bssp = (BioseqSetPtr) bsp->idx.parentptr;
4654       if (bssp != NULL && bssp->_class == BioseqseqSet_class_nuc_prot) {
4655         sep = bssp->seq_set;
4656         if (sep != NULL && IS_Bioseq (sep)) {
4657           nbsp = (BioseqPtr) sep->data.ptrvalue;
4658           if (nbsp != NULL) {
4659             sdp = SeqMgrGetNextDescriptor (nbsp, NULL, Seq_descr_molinfo, &dcontext);
4660             if (sdp != NULL) {
4661               mip = (MolInfoPtr) sdp->data.ptrvalue;
4662               if (mip != NULL) {
4663                 if (mip->tech == MI_TECH_tsa) {
4664                   if (head != NULL) {
4665                     ValNodeCopyStr (&head, 0, "; ");
4666                   }
4667                   ValNodeCopyStr (&head, 0, "TSA");
4668                   ValNodeCopyStr (&head, 0, "; ");
4669                   ValNodeCopyStr (&head, 0, "Transcriptome Shotgun Assembly");
4670                 }
4671               }
4672             }
4673           }
4674         }
4675       }
4676     }
4677   }
4678 
4679   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
4680   while (sdp != NULL) {
4681     uop = (UserObjectPtr) sdp->data.ptrvalue;
4682     if (uop != NULL) {
4683       oip = uop->type;
4684       if (oip != NULL && StringICmp (oip->str, "ENCODE") == 0) {
4685         add_encode = TRUE;
4686       } else if (oip != NULL && StringICmp (oip->str, "StructuredComment") == 0) {
4687         this_is_gen_asm = FALSE;
4688         for (curr = uop->data; curr != NULL; curr = curr->next) {
4689           if (curr->choice != 1) continue;
4690           oip = curr->label;
4691           if (oip == NULL) continue;
4692           field = oip->str;
4693           if (StringHasNoText (field)) continue;
4694           if (StringCmp (field, "StructuredCommentPrefix") == 0) {
4695             if (StringCmp ((CharPtr) curr->data.ptrvalue, "##Genome-Assembly-Data-START##") == 0) {
4696               is_genome_assembly = TRUE;
4697               this_is_gen_asm = TRUE;
4698             }
4699           }
4700         }
4701         if (this_is_gen_asm) {
4702           for (curr = uop->data; curr != NULL; curr = curr->next) {
4703             if (curr->choice != 1) continue;
4704             oip = curr->label;
4705             if (oip == NULL) continue;
4706             field = oip->str;
4707             if (StringHasNoText (field)) continue;
4708             if (StringCmp (field, "Current Finishing Status") == 0) {
4709               finishing_status = GetFinishingStatus ((CharPtr) curr->data.ptrvalue);
4710             }
4711           }
4712         }
4713         sc_keyword = KeywordForStructuredCommentName (uop);
4714         if (sc_keyword != NULL) {
4715           if (IsStructuredCommentValid (uop, NULL, NULL) == eFieldValid_Valid) {
4716             ky_head = SplitStringAtSemicolon (sc_keyword);
4717             if (ky_head != NULL) {
4718               for (vnp = ky_head; vnp != NULL; vnp = vnp->next) {
4719                 kwd = (CharPtr) vnp->data.ptrvalue;
4720                 if (StringHasNoText (kwd)) continue;
4721                 if (head != NULL) {
4722                   ValNodeCopyStr (&head, 0, "; ");
4723                 }
4724                 ValNodeCopyStr (&head, 0, kwd);
4725               }
4726               ValNodeFreeData (ky_head);
4727             }
4728           }
4729           MemFree (sc_keyword);
4730         }
4731       } else if (oip != NULL && StringICmp (oip->str, "Unverified") == 0) {
4732         is_unverified = TRUE;
4733         for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
4734           oip = ufp->label;
4735           if (oip != NULL && StringCmp (oip->str, "Type") == 0 && ufp->choice == 1) {
4736             str = (CharPtr) ufp->data.ptrvalue;
4737             if (StringICmp (str, "Organism") == 0) {
4738               is_unv_organism = TRUE;
4739             } else if (StringICmp (str, "Misassembled") == 0) {
4740               is_unv_misassembled = TRUE;
4741             }
4742           }
4743         }
4744       }
4745     }
4746     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
4747   }
4748   if (is_unverified) {
4749     if (head != NULL) {
4750       ValNodeCopyStr (&head, 0, "; ");
4751     }
4752     if (is_unv_organism) {
4753       ValNodeCopyStr (&head, 0, "UNVERIFIED_ORGANISM");
4754     } else if (is_unv_misassembled) {
4755       ValNodeCopyStr (&head, 0, "UNVERIFIED_MISASSEMBLY");
4756     } else {
4757       ValNodeCopyStr (&head, 0, "UNVERIFIED");
4758     }
4759   }
4760   if (add_encode) {
4761     if (head != NULL) {
4762       ValNodeCopyStr (&head, 0, "; ");
4763     }
4764     ValNodeCopyStr (&head, 0, "ENCODE");
4765   }
4766   if (is_genome_assembly && StringDoesHaveText (finishing_status)) {
4767     if (head != NULL) {
4768       ValNodeCopyStr (&head, 0, "; ");
4769     }
4770     ValNodeCopyStr (&head, 0, finishing_status);
4771   }
4772 
4773   for (sip = bsp->id; sip != NULL; sip = sip->next) {
4774     if (sip->choice == SEQID_TPG || sip->choice == SEQID_TPE || sip->choice == SEQID_TPD) {
4775       if (head != NULL) {
4776         ValNodeCopyStr (&head, 0, "; ");
4777       }
4778       ValNodeCopyStr (&head, 0, "Third Party Data");
4779       ValNodeCopyStr (&head, 0, "; ");
4780       ValNodeCopyStr (&head, 0, "TPA");
4781     } else if (sip->choice == SEQID_OTHER) {
4782       if (head != NULL) {
4783         ValNodeCopyStr (&head, 0, "; ");
4784       }
4785       ValNodeCopyStr (&head, 0, "RefSeq");
4786     }
4787   }
4788 
4789   if (is_cross_kingdom && is_wp) {
4790     if (head != NULL) {
4791       ValNodeCopyStr (&head, 0, "; ");
4792     }
4793     ValNodeCopyStr (&head, 0, "CROSS_KINGDOM");
4794   }
4795 
4796   sdp = SeqMgrGetNextDescriptor (bsp, NULL, 0, &dcontext);
4797   while (sdp != NULL) {
4798 
4799     keywords = NULL;
4800 
4801     switch (dcontext.seqdesctype) {
4802       case Seq_descr_genbank :
4803         gbp = (GBBlockPtr) sdp->data.ptrvalue;
4804         if (gbp != NULL) {
4805           keywords = gbp->keywords;
4806         }
4807         break;
4808       case Seq_descr_embl :
4809         ebp = (EMBLBlockPtr) sdp->data.ptrvalue;
4810         if (ebp != NULL) {
4811           keywords = ebp->keywords;
4812         }
4813         break;
4814       case Seq_descr_pir :
4815         pir = (PirBlockPtr) sdp->data.ptrvalue;
4816         if (pir != NULL) {
4817           keywords = pir->keywords;
4818         }
4819         break;
4820       case Seq_descr_prf :
4821         prf = (PrfBlockPtr) sdp->data.ptrvalue;
4822         if (prf != NULL) {
4823           keywords = prf->keywords;
4824         }
4825         break;
4826       case Seq_descr_sp :
4827         sp = (SPBlockPtr) sdp->data.ptrvalue;
4828         if (sp != NULL) {
4829           keywords = sp->keywords;
4830         }
4831         break;
4832       default :
4833         break;
4834     }
4835 
4836     if (keywords != NULL) {
4837       bbp->entityID = dcontext.entityID;
4838       bbp->itemID = dcontext.itemID;
4839       bbp->itemtype = OBJ_SEQDESC;
4840     }
4841 
4842     for (vnp = keywords; vnp != NULL; vnp = vnp->next) {
4843       kwd = (CharPtr) vnp->data.ptrvalue;
4844       if (CheckSpecialKeyword (is_est, is_sts, is_gss, kwd)) {
4845         if (! KeywordAlreadyInList (head, kwd)) {
4846           if (head != NULL) {
4847             ValNodeCopyStr (&head, 0, "; ");
4848           }
4849           ValNodeCopyStr (&head, 0, kwd);
4850         }
4851       }
4852     }
4853 
4854     sdp = SeqMgrGetNextDescriptor (bsp, sdp, 0, &dcontext);
4855   }
4856 
4857   FFStartPrint( ffstring, awp->format, 0, 12, "KEYWORDS", 12, 5, 5, "KW", TRUE);
4858   str = MergeFFValNodeStrs (head);
4859 
4860   /* if no keywords were found, period will still be added by this call */
4861   if ( str != NULL ) {
4862     FFAddOneString (ffstring, str, TRUE, FALSE, TILDE_TO_SPACES);
4863   } else {
4864     FFAddOneChar(ffstring, '.', FALSE);
4865   }
4866 
4867   MemFree (str);
4868 
4869   /* optionally populate indexes for NCBI internal database */
4870 
4871   if (ajp->index) {
4872     index = &asp->index;
4873   } else {
4874     index = NULL;
4875   }
4876 
4877   if (index != NULL) {
4878     for (vnp = head; vnp != NULL; vnp = vnp->next) {
4879       kwd = (CharPtr) vnp->data.ptrvalue;
4880       if (StringCmp (kwd, "; ") == 0) continue;
4881       ValNodeCopyStrToHead (&(index->keywords), 0, kwd);
4882     }
4883   }
4884 
4885   /* optionally populate gbseq for XML-ized GenBank format */
4886 
4887   if (ajp->gbseq) {
4888     gbseq = &asp->gbseq;
4889   } else {
4890     gbseq = NULL;
4891   }
4892 
4893   if (gbseq != NULL) {
4894     for (vnp = head; vnp != NULL; vnp = vnp->next) {
4895       kwd = (CharPtr) vnp->data.ptrvalue;
4896       if (StringCmp (kwd, "; ") == 0) continue;
4897       ValNodeCopyStr (&(gbseq->keywords), 0, kwd);
4898     }
4899   }
4900 
4901   ValNodeFreeData (head);
4902 
4903   bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "KW");
4904 
4905   FFRecycleString(ajp, ffstring);
4906 
4907   if (awp->afp != NULL) {
4908     DoImmediateFormat (awp->afp, bbp);
4909   }
4910 }
4911 
AddSegmentBlock(Asn2gbWorkPtr awp,Boolean onePartOfSeg,Boolean is_na)4912 NLM_EXTERN void AddSegmentBlock (
4913   Asn2gbWorkPtr awp,
4914   Boolean onePartOfSeg,
4915   Boolean is_na
4916 )
4917 
4918 {
4919   Char             acc [41];
4920   IntAsn2gbJobPtr  ajp;
4921   Asn2gbSectPtr    asp;
4922   BaseBlockPtr     bbp;
4923   Char             buf [32];
4924   GBSeqPtr         gbseq;
4925   StringItemPtr    ffstring;
4926 
4927   if (awp == NULL) return;
4928   ajp = awp->ajp;
4929   if (ajp == NULL) return;
4930   asp = awp->asp;
4931   if (asp == NULL) return;
4932 
4933   if (awp->seg < 1 || awp->numsegs < 1) return;
4934 
4935   bbp = Asn2gbAddBlock (awp, SEGMENT_BLOCK, sizeof (BaseBlock));
4936   if (bbp == NULL) return;
4937 
4938   ffstring = FFGetString(ajp);
4939   if ( ffstring == NULL ) return;
4940 
4941 
4942   FFStartPrint (ffstring, awp->format, 0, 12, "SEGMENT", 12, 5, 5, "XX", FALSE);
4943 
4944   if ( GetWWW(ajp) && awp->parent != NULL && onePartOfSeg) {
4945     sprintf (buf, "%d of ", (int) awp->seg);
4946     FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
4947     SeqIdWrite (awp->parent->id, acc, PRINTID_TEXTID_ACC_VER, sizeof (acc) - 1);
4948 
4949     FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
4950     if (is_na) {
4951       FF_Add_NCBI_Base_URL (ffstring, link_seqn);
4952     } else {
4953       FF_Add_NCBI_Base_URL (ffstring, link_seqp);
4954     }
4955     FFAddTextToString(ffstring, /* "val=" */ NULL, acc, "\">", FALSE, FALSE, TILDE_IGNORE);
4956 
4957     sprintf (buf, "%ld", (long) awp->numsegs);
4958     FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
4959     FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
4960   } else {
4961     sprintf (buf, "%d of %ld", (int) awp->seg, (long) awp->numsegs);
4962     FFAddOneString (ffstring, buf, FALSE, TRUE, TILDE_TO_SPACES);
4963   }
4964 
4965   /* optionally populate gbseq for XML-ized GenBank format */
4966 
4967   if (ajp->gbseq) {
4968     gbseq = &asp->gbseq;
4969   } else {
4970     gbseq = NULL;
4971   }
4972 
4973   if (gbseq != NULL) {
4974     sprintf (buf, "%d of %ld", (int) awp->seg, (long) awp->numsegs);
4975     gbseq->segment = StringSave (buf);
4976   }
4977 
4978   bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "XX");
4979   FFRecycleString(ajp, ffstring);
4980 
4981   if (awp->afp != NULL) {
4982     DoImmediateFormat (awp->afp, bbp);
4983   }
4984 }
4985 
AddSrcBlk(Asn2gbWorkPtr awp,Uint2 entityID,Uint4 itemID,Uint2 itemtype)4986 static void AddSrcBlk (
4987   Asn2gbWorkPtr awp,
4988   Uint2 entityID,
4989   Uint4 itemID,
4990   Uint2 itemtype
4991 )
4992 
4993 {
4994   BaseBlockPtr       bbp;
4995 
4996   if (awp == NULL) return;
4997   bbp = Asn2gbAddBlock (awp, SOURCE_BLOCK, sizeof (BaseBlock));
4998   if (bbp == NULL) return;
4999 
5000   bbp->entityID = entityID;
5001   bbp->itemID = itemID;
5002   bbp->itemtype = itemtype;
5003 
5004   if (awp->afp != NULL) {
5005     DoImmediateFormat (awp->afp, bbp);
5006   }
5007 }
5008 
AddOrgBlk(Asn2gbWorkPtr awp,Uint2 entityID,Uint4 itemID,Uint2 itemtype)5009 static void AddOrgBlk (
5010   Asn2gbWorkPtr awp,
5011   Uint2 entityID,
5012   Uint4 itemID,
5013   Uint2 itemtype
5014 )
5015 
5016 {
5017   BaseBlockPtr       bbp;
5018 
5019   if (awp == NULL) return;
5020   bbp = Asn2gbAddBlock (awp, ORGANISM_BLOCK, sizeof (BaseBlock));
5021   if (bbp == NULL) return;
5022 
5023   bbp->entityID = entityID;
5024   bbp->itemID = itemID;
5025   bbp->itemtype = itemtype;
5026 
5027   if (awp->afp != NULL) {
5028     DoImmediateFormat (awp->afp, bbp);
5029   }
5030 }
5031 
x_NotSpecialTaxName(CharPtr taxname)5032 static Boolean x_NotSpecialTaxName (
5033   CharPtr taxname
5034 )
5035 
5036 {
5037   if (StringHasNoText (taxname)) return TRUE;
5038 
5039   if (StringICmp (taxname, "synthetic construct") == 0) return FALSE;
5040   if (StringICmp (taxname, "artificial sequence") == 0) return FALSE;
5041   if (StringStr (taxname, "vector") != NULL) return FALSE;
5042   if (StringStr (taxname, "Vector") != NULL) return FALSE;
5043 
5044   return TRUE;
5045 }
5046 
AddSourceOrganismBlock(Asn2gbWorkPtr awp)5047 NLM_EXTERN void AddSourceOrganismBlock (
5048   Asn2gbWorkPtr awp
5049 )
5050 
5051 {
5052   IntAsn2gbJobPtr    ajp;
5053   BioSourcePtr       biop;
5054   BioseqPtr          bsp;
5055   SeqFeatPtr         cds;
5056   CharPtr            common;
5057   SeqMgrDescContext  dcontext;
5058   BioseqPtr          dna;
5059   SeqMgrFeatContext  fcontext;
5060   GBBlockPtr         gbp = NULL;
5061   SeqDescrPtr        gbsdp = NULL;
5062   Boolean            is_wp = FALSE;
5063   Boolean            loop = FALSE;
5064   Int2               num_super_kingdom = 0;
5065   Boolean            okay = FALSE;
5066   OrgNamePtr         onp;
5067   OrgRefPtr          orp;
5068   ObjValNodePtr      ovp;
5069   SeqDescrPtr        sdp;
5070   ValNodePtr         sdplist = NULL;
5071   SeqFeatPtr         sfp;
5072   SeqIntPtr          sintp;
5073   SeqIdPtr           sip;
5074   SeqLocPtr          slp, slpx;
5075   SeqPntPtr          spp;
5076   Boolean            super_kingdoms_different = FALSE;
5077   CharPtr            super_kingdom_name = NULL;
5078   CharPtr            taxname;
5079   TaxElementPtr      tep;
5080   TextSeqIdPtr       tsip;
5081   ValNodePtr         vnp;
5082 
5083   if (awp == NULL) return;
5084   ajp = awp->ajp;
5085   if (ajp == NULL) return;
5086   bsp = awp->bsp;
5087   if (bsp == NULL) return;
5088 
5089   for (sip = bsp->id; sip != NULL; sip = sip->next) {
5090     if (sip->choice == SEQID_SWISSPROT) {
5091       loop = TRUE;
5092     } else if (sip->choice == SEQID_OTHER) {
5093       tsip = (TextSeqIdPtr) sip->data.ptrvalue;
5094       if (tsip != NULL && StringNICmp (tsip->accession, "WP_", 3) == 0) {
5095         is_wp = TRUE;
5096       }
5097     }
5098   }
5099 
5100   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &dcontext);
5101   if (sdp != NULL && (! ajp->newSourceOrg)) {
5102     gbp = (GBBlockPtr) sdp->data.ptrvalue;
5103     if (gbp != NULL && StringDoesHaveText (gbp->source)) {
5104       gbsdp = sdp;
5105     }
5106   }
5107 
5108   if (ISA_aa (bsp->mol)) {
5109 
5110     /* if protein, get sources applicable to DNA location of CDS */
5111 
5112     sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_source, NULL);
5113     if (sdp != NULL && sdp->choice == Seq_descr_source) {
5114       biop = (BioSourcePtr) sdp->data.ptrvalue;
5115       if (biop != NULL) {
5116         orp = biop->org;
5117         if (orp != NULL) {
5118           taxname = orp->taxname;
5119           if (StringHasNoText (taxname) || x_NotSpecialTaxName (taxname)) {
5120             cds = SeqMgrGetCDSgivenProduct (bsp, &fcontext);
5121             if (cds != NULL) {
5122               dna = BioseqFindFromSeqLoc (cds->location);
5123               if (dna != NULL) {
5124                 slp = AsnIoMemCopy ((Pointer) cds->location, (AsnReadFunc) SeqLocAsnRead, (AsnWriteFunc) SeqLocAsnWrite);
5125                 if (slp != NULL) {
5126                   for (slpx = SeqLocFindNext (slp, NULL); slpx != NULL; slpx = SeqLocFindNext (slp, slpx)) {
5127                     if (slpx->choice == SEQLOC_INT) {
5128                       sintp = (SeqIntPtr) slpx->data.ptrvalue;
5129                       if (sintp != NULL) {
5130                         sintp->strand = Seq_strand_both;
5131                       }
5132                     } else if (slpx->choice == SEQLOC_PNT) {
5133                       spp = (SeqPntPtr) slpx->data.ptrvalue;
5134                       if (spp != NULL) {
5135                         spp->strand = Seq_strand_both;
5136                       }
5137                     }
5138                   }
5139                 }
5140                 sfp = SeqMgrGetOverlappingSource (slp, &fcontext);
5141                 SeqLocFree (slp);
5142                 if (sfp != NULL) {
5143                   AddSrcBlk (awp, sfp->idx.entityID, sfp->idx.itemID, OBJ_SEQFEAT);
5144                   AddOrgBlk (awp, sfp->idx.entityID, sfp->idx.itemID, OBJ_SEQFEAT);
5145                   return;
5146                 }
5147               }
5148             }
5149           }
5150         }
5151       }
5152     }
5153   }
5154 
5155   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
5156   while (sdp != NULL) {
5157     ValNodeAddPointer (&sdplist, 0, (Pointer) sdp);
5158     biop = (BioSourcePtr) sdp->data.ptrvalue;
5159     if (biop != NULL) {
5160       orp = biop->org;
5161       if (orp != NULL) {
5162         taxname = orp->taxname;
5163         common = orp->common;
5164         onp = orp->orgname;
5165         if (onp != NULL) {
5166           if (onp->choice == 5) {
5167             for (tep = (TaxElementPtr) onp->data; tep != NULL; tep = tep->next) {
5168               if (tep->fixed_level == 0 && StringICmp (tep->level, "superkingdom") == 0) {
5169                 num_super_kingdom++;
5170                 if (super_kingdom_name == NULL) {
5171                   super_kingdom_name = tep->name;
5172                 } else if (StringICmp (super_kingdom_name, tep->name) != 0) {
5173                   super_kingdoms_different = TRUE;
5174                 }
5175               }
5176             }
5177           }
5178         }
5179       }
5180     }
5181     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext);
5182   }
5183 
5184   if (sdplist != NULL && ((num_super_kingdom > 1 && super_kingdoms_different && is_wp) || loop)) {
5185 
5186     for (vnp = sdplist; vnp != NULL; vnp = vnp->next) {
5187       sdp = (SeqDescrPtr) vnp->data.ptrvalue;
5188 
5189       if (gbsdp != NULL) {
5190         if (gbsdp->extended != 0) {
5191           ovp = (ObjValNodePtr) gbsdp;
5192           AddSrcBlk (awp, ovp->idx.entityID, ovp->idx.itemID, OBJ_SEQDESC);
5193           okay = TRUE;
5194         }
5195       } else if (sdp->extended != 0) {
5196         ovp = (ObjValNodePtr) sdp;
5197         AddSrcBlk (awp, ovp->idx.entityID, ovp->idx.itemID, OBJ_SEQDESC);
5198         okay = TRUE;
5199       }
5200 
5201       if (sdp->extended != 0) {
5202         ovp = (ObjValNodePtr) sdp;
5203         AddOrgBlk (awp, ovp->idx.entityID, ovp->idx.itemID, OBJ_SEQDESC);
5204         okay = TRUE;
5205       }
5206     }
5207 
5208   } else if (sdplist != NULL) {
5209 
5210     sdp = (SeqDescrPtr) sdplist->data.ptrvalue;
5211 
5212     if (gbsdp != NULL) {
5213       if (gbsdp->extended != 0) {
5214         ovp = (ObjValNodePtr) gbsdp;
5215         AddSrcBlk (awp, ovp->idx.entityID, ovp->idx.itemID, OBJ_SEQDESC);
5216         okay = TRUE;
5217       }
5218     } else if (sdp->extended != 0) {
5219       ovp = (ObjValNodePtr) sdp;
5220       AddSrcBlk (awp, ovp->idx.entityID, ovp->idx.itemID, OBJ_SEQDESC);
5221       okay = TRUE;
5222     }
5223 
5224     if (sdp->extended != 0) {
5225       ovp = (ObjValNodePtr) sdp;
5226       AddOrgBlk (awp, ovp->idx.entityID, ovp->idx.itemID, OBJ_SEQDESC);
5227     }
5228 
5229   } else {
5230 
5231     sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext);
5232     if (sfp != NULL) {
5233       AddSrcBlk (awp, sfp->idx.entityID, sfp->idx.itemID, OBJ_SEQFEAT);
5234       AddOrgBlk (awp, sfp->idx.entityID, sfp->idx.itemID, OBJ_SEQFEAT);
5235       okay = TRUE;
5236 
5237     } else if (ISA_aa (bsp->mol)) {
5238 
5239       /* if protein with no sources, get sources applicable to DNA location of CDS */
5240 
5241       cds = SeqMgrGetCDSgivenProduct (bsp, &fcontext);
5242       if (cds != NULL) {
5243         sfp = SeqMgrGetOverlappingSource (cds->location, &fcontext);
5244         if (sfp != NULL) {
5245           AddSrcBlk (awp, sfp->idx.entityID, sfp->idx.itemID, OBJ_SEQFEAT);
5246           AddOrgBlk (awp, sfp->idx.entityID, sfp->idx.itemID, OBJ_SEQFEAT);
5247           okay = TRUE;
5248         } else {
5249           dna = BioseqFindFromSeqLoc (cds->location);
5250           if (dna != NULL) {
5251             sdp = SeqMgrGetNextDescriptor (dna, NULL, Seq_descr_source, &dcontext);
5252             if (sdp != NULL) {
5253               if (sdp->extended != 0) {
5254                 ovp = (ObjValNodePtr) sdp;
5255                 AddSrcBlk (awp, ovp->idx.entityID, ovp->idx.itemID, OBJ_SEQDESC);
5256                 AddOrgBlk (awp, ovp->idx.entityID, ovp->idx.itemID, OBJ_SEQDESC);
5257                 okay = TRUE;
5258               }
5259             }
5260           }
5261         }
5262       }
5263     }
5264   }
5265 
5266   if (! okay) {
5267     AddSrcBlk (awp, 0, 0, 0);
5268     AddOrgBlk (awp, 0, 0, 0);
5269   }
5270 
5271   ValNodeFree (sdplist);
5272 }
5273 
AddPub(Asn2gbWorkPtr awp,ValNodePtr PNTR head,PubdescPtr pdp)5274 static RefBlockPtr AddPub (
5275   Asn2gbWorkPtr awp,
5276   ValNodePtr PNTR head,
5277   PubdescPtr pdp
5278 )
5279 
5280 {
5281   Char            buf [521]; /* increased for consortium in citsub */
5282   CitArtPtr       cap;
5283   CitBookPtr      cbp;
5284   CitGenPtr       cgp;
5285   CitJourPtr      cjp;
5286   CitPatPtr       cpp;
5287   CitSubPtr       csp;
5288   DatePtr         dp = NULL;
5289   Boolean         justuids = TRUE;
5290   ImprintPtr      imp = NULL;
5291   IntRefBlockPtr  irp;
5292   RefBlockPtr     rbp;
5293   ValNodePtr      vnp;
5294   ArticleIdPtr    aip;
5295 
5296   if (awp == NULL || head == NULL || pdp == NULL) return NULL;
5297 
5298   if (awp->hideGeneRIFs) {
5299     if (StringISearch (pdp->comment, "GeneRIF") != NULL) return NULL;
5300   } else if (awp->onlyGeneRIFs) {
5301     if (StringISearch (pdp->comment, "GeneRIF") == NULL) return NULL;
5302   } else if (awp->onlyReviewPubs) {
5303     if (StringISearch (pdp->comment, "Review Article") == NULL) return NULL;
5304   }
5305 
5306   rbp = (RefBlockPtr) MemNew (sizeof (IntRefBlock));
5307   if (rbp == NULL) return NULL;
5308   rbp->blocktype = REFERENCE_BLOCK;
5309   rbp->section = awp->currsection;
5310 
5311   rbp->serial = INT2_MAX;
5312 
5313   for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
5314     switch (vnp->choice) {
5315       case PUB_Gen :
5316         /* may be unpublished, or may be serial number of swiss-prot reference */
5317         cgp = (CitGenPtr) vnp->data.ptrvalue;
5318         if (cgp != NULL) {
5319           if (StringNICmp ("BackBone id_pub", cgp->cit, 15) != 0) {
5320             rbp->category = REF_CAT_UNP;
5321             if (dp == NULL) {
5322               dp = cgp->date;
5323             }
5324             if (cgp->serial_number > 0) {
5325               rbp->serial = cgp->serial_number;
5326             }
5327             if (cgp->cit != NULL) {
5328               if (StringNICmp ("unpublished", cgp->cit, 11) != 0 &&
5329                   StringNICmp ("submitted", cgp->cit, 8) != 0 &&
5330                   StringNICmp ("to be published", cgp->cit, 15) != 0 &&
5331                   StringNICmp ("in press", cgp->cit, 8) != 0 &&
5332                   StringStr (cgp->cit, "Journal") == NULL) {
5333                 if (cgp->serial_number == 0) {
5334                   MemFree (rbp);
5335                   return NULL;
5336                 }
5337               }
5338             } else if (cgp->journal == NULL || cgp->date == NULL) {
5339               if (cgp->serial_number == 0) {
5340                 MemFree (rbp);
5341                 return NULL;
5342               }
5343             }
5344           }
5345         }
5346         break;
5347       case PUB_Sub :
5348         rbp->category = REF_CAT_SUB;
5349         csp = (CitSubPtr) vnp->data.ptrvalue;
5350         if (csp != NULL) {
5351           imp = csp->imp;
5352           if (imp != NULL) {
5353             if (dp == NULL) {
5354               dp = imp->date;
5355             }
5356           }
5357           if (csp->date != NULL) {
5358             if (dp == NULL) {
5359               dp = csp->date;
5360             }
5361           }
5362         }
5363         break;
5364       case PUB_Article:
5365         cap = (CitArtPtr) vnp->data.ptrvalue;
5366         if (cap != NULL) {
5367           switch (cap->from) {
5368             case 1:
5369               cjp = (CitJourPtr) cap->fromptr;
5370               if (cjp != NULL) {
5371                 imp = (ImprintPtr) cjp->imp;
5372                 if (imp != NULL) {
5373                   if (dp == NULL) {
5374                     dp = imp->date;
5375                   }
5376                 }
5377               }
5378               break;
5379             case 2:
5380               cbp = (CitBookPtr) cap->fromptr;
5381               if (cbp != NULL) {
5382                 imp = (ImprintPtr) cbp->imp;
5383                 if (imp != NULL) {
5384                   if (dp == NULL) {
5385                     dp = imp->date;
5386                   }
5387                 }
5388               }
5389               break;
5390             case 3:
5391               cbp = (CitBookPtr) cap->fromptr;
5392               if (cbp != NULL) {
5393                 imp = (ImprintPtr) cbp->imp;
5394                 if (imp != NULL) {
5395                   if (dp == NULL) {
5396                     dp = imp->date;
5397                   }
5398                 }
5399               }
5400               break;
5401             default:
5402               break;
5403           }
5404           /*  look for PMID and MUID in the Cit-art article ids set */
5405           if (cap->ids != NULL) {
5406             for (aip = cap->ids; aip != NULL; aip = aip->next) {
5407               if (aip->choice == ARTICLEID_PUBMED && rbp->pmid == 0) {
5408                 rbp->pmid = aip->data.intvalue;
5409                 rbp->category = REF_CAT_PUB;
5410               } else if (aip->choice == ARTICLEID_MEDLINE && rbp->muid == 0) {
5411                 rbp->muid = aip->data.intvalue;
5412                 rbp->category = REF_CAT_PUB;
5413               }
5414             }
5415           }
5416         }
5417         break;
5418       case PUB_Book:
5419         cbp = (CitBookPtr) vnp->data.ptrvalue;
5420         if (cbp != NULL) {
5421           imp = (ImprintPtr) cbp->imp;
5422           if (imp != NULL) {
5423             if (dp == NULL) {
5424               dp = imp->date;
5425             }
5426           }
5427         }
5428         break;
5429       case PUB_Proc:
5430         cbp = (CitBookPtr) vnp->data.ptrvalue;
5431         if (cbp != NULL) {
5432           imp = (ImprintPtr) cbp->imp;
5433           if (imp != NULL) {
5434             if (dp == NULL) {
5435               dp = imp->date;
5436             }
5437           }
5438         }
5439         break;
5440       case PUB_Patent :
5441         rbp->category = REF_CAT_PUB;
5442         cpp = (CitPatPtr) vnp->data.ptrvalue;
5443         if (cpp != NULL) {
5444           if (cpp->date_issue != NULL) {
5445             if (dp == NULL) {
5446               dp = (DatePtr) cpp->date_issue;
5447             }
5448           } else if (cpp->app_date != NULL) {
5449             if (dp == NULL) {
5450               dp = (DatePtr) cpp->app_date;
5451             }
5452           }
5453         }
5454         break;
5455       case PUB_Man:
5456         cbp = (CitBookPtr) vnp->data.ptrvalue;
5457         if (cbp != NULL) {
5458           imp = (ImprintPtr) cbp->imp;
5459           if (imp != NULL) {
5460             if (dp == NULL) {
5461               dp = imp->date;
5462             }
5463           }
5464         }
5465         break;
5466       case PUB_Muid :
5467         if (rbp->muid == 0) {
5468           rbp->muid = vnp->data.intvalue;
5469           rbp->category = REF_CAT_PUB;
5470         }
5471         break;
5472       case PUB_PMid :
5473         if (rbp->pmid == 0) {
5474           rbp->pmid = vnp->data.intvalue;
5475           rbp->category = REF_CAT_PUB;
5476         }
5477         break;
5478       default :
5479         break;
5480     }
5481     if (vnp->choice != PUB_Muid && vnp->choice != PUB_PMid) {
5482       justuids = FALSE;
5483     }
5484   }
5485 
5486   /* check for submitted vs. in-press */
5487 
5488   if (imp != NULL) {
5489     rbp->category = REF_CAT_PUB;
5490     switch (imp->prepub) {
5491       case 1 :
5492         rbp->category = REF_CAT_UNP;
5493         break;
5494       case 2 :
5495         rbp->category = REF_CAT_PUB;
5496         break;
5497       default :
5498         break;
5499     }
5500   }
5501 
5502   /* check for sites reftype */
5503 
5504   if (pdp->reftype != 0) {
5505     rbp->sites = pdp->reftype;
5506   }
5507 
5508   if (rbp->muid == 0 && rbp->pmid == 0) {
5509     vnp = pdp->pub;
5510 
5511     /* skip over just serial number */
5512 
5513     if (vnp != NULL && vnp->choice == PUB_Gen && vnp->next != NULL) {
5514       cgp = (CitGenPtr) vnp->data.ptrvalue;
5515       if (cgp != NULL) {
5516         if (StringNICmp ("BackBone id_pub", cgp->cit, 15) != 0) {
5517           if (cgp->cit == NULL && cgp->journal == NULL && cgp->date == NULL && cgp->serial_number) {
5518             vnp = vnp->next;
5519           }
5520         }
5521       }
5522     }
5523 
5524     if (PubLabelUnique (vnp, buf, sizeof (buf) - 1, OM_LABEL_CONTENT, TRUE) > 0) {
5525       rbp->uniquestr = StringSaveNoNull (buf);
5526     }
5527   }
5528 
5529   irp = (IntRefBlockPtr) rbp;
5530   irp->date = DateDup (dp);
5531   irp->justuids = justuids;
5532   /* if (justuids) { */
5533     irp->fig = StringSaveNoNull (pdp->fig);
5534     irp->maploc = StringSaveNoNull (pdp->maploc);
5535     irp->poly_a = pdp->poly_a;
5536   /* } */
5537 
5538   /* if not rejected by now, link in */
5539 
5540   ValNodeAddPointer (head, 0, rbp);
5541 
5542   return rbp;
5543 }
5544 
SortReferences(VoidPtr ptr1,VoidPtr ptr2,Boolean serialFirst,Boolean isRefSeq)5545 static int LIBCALLBACK SortReferences (
5546   VoidPtr ptr1,
5547   VoidPtr ptr2,
5548   Boolean serialFirst,
5549   Boolean isRefSeq
5550 )
5551 
5552 {
5553   int             compare;
5554   IntRefBlockPtr  irp1;
5555   IntRefBlockPtr  irp2;
5556   RefBlockPtr     rbp1;
5557   RefBlockPtr     rbp2;
5558   Int2            status;
5559   RefBlockPtr     temp;
5560   ValNodePtr      vnp1;
5561   ValNodePtr      vnp2;
5562 
5563   if (ptr1 == NULL || ptr2 == NULL) return 0;
5564   vnp1 = *((ValNodePtr PNTR) ptr1);
5565   vnp2 = *((ValNodePtr PNTR) ptr2);
5566   if (vnp1 == NULL || vnp2 == NULL) return 0;
5567   rbp1 = (RefBlockPtr) vnp1->data.ptrvalue;
5568   rbp2 = (RefBlockPtr) vnp2->data.ptrvalue;
5569   if (rbp1 == NULL || rbp2 == NULL) return 0;
5570 
5571   if (serialFirst) {
5572     if (rbp1->serial > rbp2->serial) {
5573       return 1;
5574     } else if (rbp1->serial < rbp2->serial) {
5575       return -1;
5576     }
5577   }
5578 
5579   /* usual first sort by published, unpublished, and cit-subs */
5580 
5581   if (rbp1->category > rbp2->category) {
5582     return 1;
5583   } else if (rbp1->category < rbp2->category) {
5584     return -1;
5585   }
5586 
5587   /* for RefSeq, newer publications first, so temporarily swap pointers */
5588 
5589   if (isRefSeq) {
5590     temp = rbp1;
5591     rbp1 = rbp2;
5592     rbp2 = temp;
5593   }
5594 
5595   /* within class, sort by date, older publications first (except RefSeq) */
5596 
5597   irp1 = (IntRefBlockPtr) rbp1;
5598   irp2 = (IntRefBlockPtr) rbp2;
5599 
5600   if ( irp1->date != 0  &&  irp2->date == 0 ) {
5601       return 1;
5602   } else if ( irp1->date == 0  &&  irp2->date != 0 ) {
5603       return -1;
5604   }
5605 
5606   status = DateMatch (irp1->date, irp2->date, TRUE);
5607   if (status == 1 || status == -1) return status;
5608   /* if dates incomparable, do other comparisons */
5609   if ( status != 0 ) {
5610     if( (NULL != irp1->date) && (NULL != irp2->date ) ) {
5611       /* std date comes before str date */
5612       return ( irp2->date->data[0] - irp1->date->data[0] );
5613     }
5614   }
5615 
5616   /* if dates (e.g., years) match, try to distinguish by uids */
5617 
5618   if (rbp1->pmid != 0 && rbp2->pmid != 0) {
5619     if (rbp1->pmid > rbp2->pmid) {
5620       return 1;
5621     } else if (rbp1->pmid < rbp2->pmid) {
5622       return -1;
5623     }
5624   }
5625 
5626   if (rbp1->muid != 0 && rbp2->muid != 0) {
5627     if (rbp1->muid > rbp2->muid) {
5628       return 1;
5629     } else if (rbp1->muid < rbp2->muid) {
5630       return -1;
5631     }
5632   }
5633 
5634   /* restore sort order after date and pmid/muid matching */
5635 
5636   if (isRefSeq) {
5637     temp = rbp1;
5638     rbp1 = rbp2;
5639     rbp2 = temp;
5640 
5641     irp1 = (IntRefBlockPtr) rbp1;
5642     irp2 = (IntRefBlockPtr) rbp2;
5643   }
5644 
5645   /* if same uid, one with just uids goes last to be excised but remembered */
5646 
5647   if ((rbp1->pmid != 0 && rbp2->pmid != 0) || (rbp1->muid != 0 && rbp2->muid != 0)) {
5648     if (irp1->justuids && (! irp2->justuids)) {
5649       return 1;
5650     } else if ((! irp1->justuids) && irp2->justuids) {
5651       return -1;
5652     }
5653   }
5654 
5655   /* put sites after pubs that refer to all or a range of bases */
5656 
5657   if (rbp1->sites > rbp2->sites) {
5658     return 1;
5659   } else if (rbp2->sites > rbp1->sites) {
5660     return -1;
5661   }
5662 
5663   /* next use author string */
5664 
5665   if (irp1->authstr != NULL && irp2->authstr != NULL) {
5666     compare = StringICmp (irp1->authstr, irp2->authstr);
5667     if (compare > 0) {
5668       return 1;
5669     } else if (compare < 0) {
5670       return -1;
5671     }
5672   }
5673 
5674   /* use unique label string to determine sort order */
5675 
5676   if (rbp1->uniquestr != NULL && rbp2->uniquestr != NULL) {
5677     compare = StringICmp (rbp1->uniquestr, rbp2->uniquestr);
5678     if (compare > 0) {
5679       return 1;
5680     } else if (compare < 0) {
5681       return -1;
5682     }
5683   }
5684 
5685   /* for publication features, sort in explore index order - probably superset of itemID below */
5686 
5687   if (irp1->index > irp2->index) {
5688     return 1;
5689   } else if (irp1->index < irp2->index) {
5690     return -1;
5691   }
5692 
5693   /* last resort for equivalent publication descriptors, sort in itemID order */
5694 
5695   if (rbp1->itemtype == OBJ_SEQDESC && rbp2->itemtype == OBJ_SEQDESC) {
5696     if (rbp1->itemID > rbp2->itemID) {
5697       return 1;
5698     } else if (rbp1->itemID < rbp2->itemID) {
5699       return -1;
5700     }
5701   }
5702 
5703   if (rbp1->itemtype == OBJ_ANNOTDESC && rbp2->itemtype == OBJ_ANNOTDESC) {
5704     if (rbp1->itemID > rbp2->itemID) {
5705       return 1;
5706     } else if (rbp1->itemID < rbp2->itemID) {
5707       return -1;
5708     }
5709   }
5710 
5711   if (! serialFirst) {
5712     if (rbp1->serial > rbp2->serial) {
5713       return 1;
5714     } else if (rbp1->serial < rbp2->serial) {
5715       return -1;
5716     }
5717   }
5718 
5719   return 0;
5720 }
5721 
SortReferencesA(VoidPtr ptr1,VoidPtr ptr2)5722 static int LIBCALLBACK SortReferencesA (
5723   VoidPtr ptr1,
5724   VoidPtr ptr2
5725 )
5726 
5727 {
5728   return SortReferences (ptr1, ptr2, FALSE, FALSE);
5729 }
5730 
SortReferencesB(VoidPtr ptr1,VoidPtr ptr2)5731 static int LIBCALLBACK SortReferencesB (
5732   VoidPtr ptr1,
5733   VoidPtr ptr2
5734 )
5735 
5736 {
5737   return SortReferences (ptr1, ptr2, TRUE, FALSE);
5738 }
5739 
SortReferencesAR(VoidPtr ptr1,VoidPtr ptr2)5740 static int LIBCALLBACK SortReferencesAR (
5741   VoidPtr ptr1,
5742   VoidPtr ptr2
5743 )
5744 
5745 {
5746   return SortReferences (ptr1, ptr2, FALSE, TRUE);
5747 }
5748 
SortReferencesBR(VoidPtr ptr1,VoidPtr ptr2)5749 static int LIBCALLBACK SortReferencesBR (
5750   VoidPtr ptr1,
5751   VoidPtr ptr2
5752 )
5753 
5754 {
5755   return SortReferences (ptr1, ptr2, TRUE, TRUE);
5756 }
5757 
GetAuthorsPlusConsortium(FmtType format,AuthListPtr alp)5758 static CharPtr GetAuthorsPlusConsortium (
5759   FmtType format,
5760   AuthListPtr alp
5761 )
5762 
5763 {
5764   CharPtr  consortium;
5765   CharPtr  str;
5766   CharPtr  tmp;
5767 
5768   consortium = NULL;
5769   str = GetAuthorsString (format, alp, &consortium, NULL, NULL);
5770   if (str == NULL) return consortium;
5771   if (consortium == NULL) return str;
5772   tmp = (CharPtr) MemNew (StringLen (str) + StringLen (consortium) + 5);
5773   if (tmp == NULL) return NULL;
5774   StringCpy (tmp, str);
5775   StringCat (tmp, "; ");
5776   StringCat (tmp, consortium);
5777   MemFree (str);
5778   MemFree (consortium);
5779   return tmp;
5780 }
5781 
HasNoPmidOrMuid(PubdescPtr pdp)5782 static Boolean HasNoPmidOrMuid (
5783   PubdescPtr pdp
5784 )
5785 
5786 {
5787   ValNodePtr  vnp;
5788 
5789   if (pdp == NULL) return TRUE;
5790   for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
5791     if (vnp->choice == PUB_PMid || vnp->choice == PUB_Muid) return FALSE;
5792   }
5793   return TRUE;
5794 }
5795 
5796 typedef struct cdspubs {
5797   Asn2gbWorkPtr  awp;
5798   BioseqPtr      target;
5799   ValNodePtr     vnp;
5800 } CdsPubs, PNTR CdsPubsPtr;
5801 
GetRefsOnCDS(SeqFeatPtr sfp,SeqMgrFeatContextPtr context)5802 static Boolean LIBCALLBACK GetRefsOnCDS (
5803   SeqFeatPtr sfp,
5804   SeqMgrFeatContextPtr context
5805 )
5806 
5807 {
5808   AuthListPtr     alp;
5809   Asn2gbWorkPtr   awp;
5810   CdsPubsPtr      cpp;
5811   IntRefBlockPtr  irp;
5812   Boolean         okay;
5813   PubdescPtr      pdp;
5814   RefBlockPtr     rbp;
5815   BioseqPtr       target;
5816 
5817   if (sfp == NULL || context == NULL) return TRUE;
5818   cpp = (CdsPubsPtr) context->userdata;
5819   awp = cpp->awp;
5820   if (awp == NULL) return TRUE;
5821   target = cpp->target;
5822 
5823   okay = TRUE;
5824   pdp = (PubdescPtr) sfp->data.value.ptrvalue;
5825   if (awp->format == FTABLE_FMT) {
5826     if (HasNoPmidOrMuid (pdp)) {
5827       okay = FALSE;
5828     }
5829   }
5830 
5831   if (okay) {
5832     rbp = AddPub (awp, &(awp->pubhead), pdp);
5833     if (rbp != NULL) {
5834 
5835       rbp->entityID = context->entityID;
5836       rbp->itemID = context->itemID;
5837       rbp->itemtype = OBJ_SEQFEAT;
5838 
5839       irp = (IntRefBlockPtr) rbp;
5840       irp->loc = SeqLocMerge (cpp->target, cpp->vnp, NULL, FALSE, TRUE, FALSE);
5841       if (target != NULL) {
5842         irp->left = 0;
5843         irp->right = target->length - 1;
5844       }
5845       alp = GetAuthListPtr (pdp, NULL);
5846       if (alp != NULL) {
5847         irp->authstr = GetAuthorsPlusConsortium (awp->format, alp);
5848       }
5849       irp->index = 0;
5850     }
5851   }
5852 
5853   return TRUE;
5854 }
5855 
GetRefsOnBioseq(Asn2gbWorkPtr awp,BioseqPtr target,BioseqPtr bsp,Int4 from,Int4 to,SeqLocPtr cdsloc,BioseqPtr cdsbsp)5856 static void GetRefsOnBioseq (
5857   Asn2gbWorkPtr awp,
5858   BioseqPtr target,
5859   BioseqPtr bsp,
5860   Int4 from,
5861   Int4 to,
5862   SeqLocPtr cdsloc,
5863   BioseqPtr cdsbsp
5864 )
5865 
5866 {
5867   SeqMgrAndContext   acontext;
5868   AnnotDescPtr       adp;
5869   IntAsn2gbJobPtr    ajp;
5870   AuthListPtr        alp;
5871   CdsPubs            cp;
5872   SeqMgrDescContext  dcontext;
5873   SeqMgrFeatContext  fcontext;
5874   Int2               i;
5875   Int2               idx;
5876   IntRefBlockPtr     irp;
5877   Int4Ptr            ivals;
5878   Int4               left;
5879   SeqLocPtr          newloc;
5880   Int2               numivals;
5881   Boolean            okay;
5882   PubdescPtr         pdp;
5883   RefBlockPtr        rbp;
5884   Int4               right;
5885   SeqDescrPtr        sdp;
5886   SeqFeatPtr         sfp;
5887   SeqInt             sint;
5888   SeqIntPtr          sintp;
5889   SeqIdPtr           sip;
5890   Boolean            split;
5891   Int4               start;
5892   Int4               stop;
5893   Uint1              strand;
5894   Boolean            takeIt;
5895   ValNode            vn;
5896   ValNodePtr         vnp;
5897 
5898   if (awp == NULL || target == NULL || bsp == NULL) return;
5899   ajp = awp->ajp;
5900   if (ajp == NULL) return;
5901 
5902   /* full length loc for descriptors */
5903 
5904   sint.from = 0;
5905   if (ajp->ajp.slp != NULL) {
5906     from = SeqLocStart (ajp->ajp.slp); /* other features use awp->slp for from and to */
5907   }
5908   if (ajp->ajp.slp != NULL) {
5909     sint.to = SeqLocLen (ajp->ajp.slp) - 1;
5910     to = SeqLocStop (ajp->ajp.slp); /* other features use awp->slp for from and to */
5911   } else {
5912     sint.to = bsp->length - 1;
5913   }
5914   sint.strand = Seq_strand_plus;
5915   sint.id = SeqIdStripLocus (SeqIdDup (SeqIdFindBest (bsp->id, 0)));
5916   sint.if_from = NULL;
5917   sint.if_to = NULL;
5918 
5919   vn.choice = SEQLOC_INT;
5920   vn.data.ptrvalue = (Pointer) &sint;
5921   vn.next = NULL;
5922 
5923   sdp = SeqMgrGetNextDescriptor (target, NULL, Seq_descr_pub, &dcontext);
5924   while (sdp != NULL) {
5925 
5926     /* check if descriptor on part already added on segmented bioseq */
5927 
5928     okay = TRUE;
5929     for (vnp = awp->pubhead; vnp != NULL && okay; vnp = vnp->next) {
5930       rbp = (RefBlockPtr) vnp->data.ptrvalue;
5931       if (rbp != NULL) {
5932         if (rbp->entityID == dcontext.entityID &&
5933             rbp->itemID == dcontext.itemID &&
5934             rbp->itemtype == OBJ_SEQDESC) {
5935           okay = FALSE;
5936         }
5937       }
5938     }
5939     if (awp->format == FTABLE_FMT) {
5940       pdp = (PubdescPtr) sdp->data.ptrvalue;
5941       if (HasNoPmidOrMuid (pdp)) {
5942         okay = FALSE;
5943       }
5944     }
5945 
5946     if (okay) {
5947       pdp = (PubdescPtr) sdp->data.ptrvalue;
5948       rbp = AddPub (awp, &(awp->pubhead), pdp);
5949       if (rbp != NULL) {
5950 
5951         rbp->entityID = dcontext.entityID;
5952         rbp->itemID = dcontext.itemID;
5953         rbp->itemtype = OBJ_SEQDESC;
5954 
5955         irp = (IntRefBlockPtr) rbp;
5956         if (cdsloc != NULL && cdsbsp != NULL) {
5957           sintp = SeqIntNew ();
5958           sintp->from = 0;
5959           sintp->to = cdsbsp->length - 1;
5960           sintp->id = SeqIdDup (cdsbsp->id);
5961           irp->loc = ValNodeAddPointer (NULL, SEQLOC_INT, (Pointer) sintp);
5962           /*
5963           irp->loc = SeqLocWholeNew (cdsbsp);
5964           */
5965           irp->left = 0;
5966           irp->right = cdsbsp->length - 1;
5967         } else {
5968           irp->loc = SeqLocMerge (target, &vn, NULL, FALSE, TRUE, FALSE);
5969           irp->left = from;
5970           irp->right = to;
5971         }
5972         alp = GetAuthListPtr (pdp, NULL);
5973         if (alp != NULL) {
5974           irp->authstr = GetAuthorsPlusConsortium (awp->format, alp);
5975         }
5976         irp->index = 0;
5977       }
5978     }
5979     sdp = SeqMgrGetNextDescriptor (target, sdp, Seq_descr_pub, &dcontext);
5980   }
5981 
5982   /* if protein with no pubs, get pubs applicable to DNA location of CDS */
5983 
5984   if (cdsloc != NULL) {
5985     cp.awp = awp;
5986     cp.target = cdsbsp;
5987     cp.vnp = &vn;
5988     SeqMgrGetAllOverlappingFeatures (cdsloc, FEATDEF_PUB, NULL, 0, LOCATION_SUBSET, (Pointer) &cp, GetRefsOnCDS);
5989   }
5990 
5991   /* also get publications from AnnotDesc on SeqAnnot */
5992 
5993   adp = SeqMgrGetNextAnnotDesc (target, NULL, Annot_descr_pub, &acontext);
5994   while (adp != NULL) {
5995 
5996     okay = TRUE;
5997 
5998     if (okay) {
5999       pdp = (PubdescPtr) adp->data.ptrvalue;
6000       rbp = AddPub (awp, &(awp->pubhead), pdp);
6001       if (rbp != NULL) {
6002 
6003         rbp->entityID = acontext.entityID;
6004         rbp->itemID = acontext.itemID;
6005         rbp->itemtype = OBJ_ANNOTDESC;
6006 
6007         irp = (IntRefBlockPtr) rbp;
6008         irp->loc = SeqLocMerge (target, &vn, NULL, FALSE, TRUE, FALSE);
6009         irp->left = from;
6010         irp->right = to;
6011         alp = GetAuthListPtr (pdp, NULL);
6012         if (alp != NULL) {
6013           irp->authstr = GetAuthorsPlusConsortium (awp->format, alp);
6014         }
6015         irp->index = 0;
6016       }
6017     }
6018     adp = SeqMgrGetNextAnnotDesc (target, adp, Annot_descr_pub, &acontext);
6019   }
6020 
6021   SeqIdFree (sint.id);
6022 
6023   /* features are indexed on parent if segmented */
6024 
6025   bsp = awp->parent;
6026 
6027   sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_PUB, 0, &fcontext);
6028   while (sfp != NULL) {
6029     ivals = fcontext.ivals;
6030     numivals = fcontext.numivals;
6031     if (ivals != NULL && numivals > 0) {
6032 
6033       /*
6034       idx = (numivals - 1) * 2;
6035       start = ivals [idx];
6036       stop = ivals [idx + 1];
6037       */
6038 
6039       takeIt = FALSE;
6040       for (i = 0, idx = 0; i < numivals; i++, idx += 2) {
6041         start = ivals [idx];
6042         stop = ivals [idx + 1];
6043         if ((start <= from && stop > from) ||
6044             (start < to && stop >= to) ||
6045             (start >= from && stop <= to)) {
6046           takeIt = TRUE;
6047         }
6048       }
6049       if (awp->format == FTABLE_FMT) {
6050         pdp = (PubdescPtr) sfp->data.value.ptrvalue;
6051         if (HasNoPmidOrMuid (pdp)) {
6052           takeIt = FALSE;
6053         }
6054       }
6055 
6056       if (takeIt /* stop >= from && stop <= to */) {
6057 
6058         /*
6059         start = ivals [0] + 1;
6060         stop = ivals [idx + 1] + 1;
6061         */
6062         pdp = (PubdescPtr) sfp->data.value.ptrvalue;
6063         rbp = AddPub (awp, &(awp->pubhead), pdp);
6064         if (rbp != NULL) {
6065 
6066           rbp->entityID = fcontext.entityID;
6067           rbp->itemID = fcontext.itemID;
6068           rbp->itemtype = OBJ_SEQFEAT;
6069 
6070           irp = (IntRefBlockPtr) rbp;
6071           irp->loc = SeqLocMerge (target, sfp->location, NULL, FALSE, TRUE, FALSE);
6072           irp->left = fcontext.left;
6073           irp->right = fcontext.right;
6074           if (ajp->ajp.slp != NULL) {
6075             sip = SeqIdParse ("lcl|dummy");
6076             left = GetOffsetInBioseq (ajp->ajp.slp, bsp, SEQLOC_LEFT_END);
6077             right = GetOffsetInBioseq (ajp->ajp.slp, bsp, SEQLOC_RIGHT_END);
6078             strand = SeqLocStrand (ajp->ajp.slp);
6079             split = FALSE;
6080             newloc = SeqLocReMapEx (sip, ajp->ajp.slp, irp->loc, 0, FALSE, ajp->masterStyle, ajp->relaxedMapping);
6081             /*
6082             newloc = SeqLocCopyRegion (sip, irp->loc, bsp, left, right, strand, &split);
6083             */
6084             SeqIdFree (sip);
6085             if (newloc != NULL) {
6086               A2GBSeqLocReplaceID (newloc, ajp->ajp.slp);
6087               irp->loc = SeqLocFree (irp->loc);
6088               irp->loc = newloc;
6089             }
6090           }
6091           alp = GetAuthListPtr (pdp, NULL);
6092           if (alp != NULL) {
6093             irp->authstr = GetAuthorsPlusConsortium (awp->format, alp);
6094           }
6095           irp->index = fcontext.index;
6096         }
6097       }
6098     }
6099 
6100     sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_PUB, 0, &fcontext);
6101   }
6102 }
6103 
GetRefsOnSeg(SeqLocPtr slp,SeqMgrSegmentContextPtr context)6104 static Boolean LIBCALLBACK GetRefsOnSeg (
6105   SeqLocPtr slp,
6106   SeqMgrSegmentContextPtr context
6107 )
6108 
6109 {
6110   Asn2gbWorkPtr  awp;
6111   BioseqPtr      bsp;
6112   Int4           from;
6113   SeqLocPtr      loc;
6114   SeqEntryPtr    oldscope;
6115   SeqEntryPtr    sep;
6116   SeqIdPtr       sip;
6117   Int4           to;
6118 
6119   if (slp == NULL || context == NULL) return FALSE;
6120   awp = (Asn2gbWorkPtr) context->userdata;
6121 
6122   from = context->cumOffset;
6123   to = from + context->to - context->from;
6124 
6125   sip = SeqLocId (slp);
6126   if (sip == NULL) {
6127     loc = SeqLocFindNext (slp, NULL);
6128     if (loc != NULL) {
6129       sip = SeqLocId (loc);
6130     }
6131   }
6132   if (sip == NULL) return TRUE;
6133 
6134   /* reference descriptors only on parts within entity */
6135 
6136   sep = GetTopSeqEntryForEntityID (awp->entityID);
6137   oldscope = SeqEntrySetScope (sep);
6138   bsp = BioseqFind (sip);
6139   SeqEntrySetScope (oldscope);
6140 
6141   if (bsp != NULL) {
6142     GetRefsOnBioseq (awp, awp->refs, bsp, from, to, NULL, NULL);
6143     return TRUE;
6144   }
6145 
6146   /* if we ever want to fetch remote references, code goes here */
6147 
6148   return TRUE;
6149 }
6150 
AddReferenceBlock(Asn2gbWorkPtr awp,Boolean isRefSeq)6151 NLM_EXTERN Boolean AddReferenceBlock (
6152   Asn2gbWorkPtr awp,
6153   Boolean isRefSeq
6154 )
6155 
6156 {
6157   IntAsn2gbJobPtr    ajp;
6158   AuthListPtr        alp;
6159   Asn2gbSectPtr      asp;
6160   BioseqPtr          bsp;
6161   SeqFeatPtr         cds;
6162   Boolean            combine;
6163   SeqMgrFeatContext  context;
6164   CitSubPtr          csp;
6165   BioseqPtr          dna;
6166   Boolean            excise;
6167   Int2               firstserial;
6168   ValNodePtr         head = NULL;
6169   Int2               i = 0;
6170   IntRefBlockPtr     irp;
6171   Boolean            is_aa;
6172   Boolean            is_ddbj = FALSE;
6173   Boolean            is_embl = FALSE;
6174   Boolean            is_patent = FALSE;
6175   Int2               j;
6176   IntRefBlockPtr     lastirp;
6177   RefBlockPtr        lastrbp;
6178   ValNodePtr         next;
6179   Int2               numReferences;
6180   ValNodePtr         PNTR prev;
6181   RefBlockPtr        rbp;
6182   RefBlockPtr        PNTR referenceArray;
6183   BioseqPtr          refs;
6184   SubmitBlockPtr     sbp;
6185   SeqIdPtr           sip;
6186   SeqLocPtr          slp;
6187   BioseqPtr          target;
6188   ValNodePtr         vnp;
6189 
6190   if (awp == NULL) return FALSE;
6191   ajp = awp->ajp;
6192   if (ajp == NULL) return FALSE;
6193   asp = awp->asp;
6194   if (asp == NULL) return FALSE;
6195   bsp = awp->bsp;
6196   refs = awp->refs;
6197   if (bsp == NULL || refs == NULL) return FALSE;
6198 
6199   /* collect publications on bioseq */
6200 
6201   awp->pubhead = NULL;
6202   GetRefsOnBioseq (awp, bsp, refs, awp->from, awp->to, NULL, NULL);
6203   target = bsp;
6204 
6205   for (sip = bsp->id; sip != NULL; sip = sip->next) {
6206     if (sip->choice == SEQID_EMBL) {
6207       is_embl = TRUE;
6208     } else if (sip->choice == SEQID_DDBJ) {
6209       is_ddbj = TRUE;
6210     } else if (sip->choice == SEQID_PATENT) {
6211       is_patent = TRUE;
6212     }
6213   }
6214 
6215   is_aa = (Boolean) ISA_aa (bsp->mol);
6216 
6217   if (bsp->repr == Seq_repr_seg) {
6218 
6219     /* collect publication descriptors on local parts */
6220 
6221     SeqMgrExploreSegments (bsp, (Pointer) awp, GetRefsOnSeg);
6222     target = awp->refs;
6223   }
6224 
6225   if (awp->pubhead == NULL && ISA_aa (bsp->mol)) {
6226 
6227     /* if protein with no pubs, get pubs applicable to DNA location of CDS */
6228 
6229     cds = SeqMgrGetCDSgivenProduct (bsp, &context);
6230     if (cds != NULL) {
6231       dna = BioseqFindFromSeqLoc (cds->location);
6232       if (dna != NULL) {
6233         GetRefsOnBioseq (awp, dna, dna, context.left, context.right, cds->location, bsp);
6234         target = dna;
6235       }
6236     }
6237   }
6238 
6239   head = awp->pubhead;
6240   awp->pubhead = NULL;
6241 
6242   if (head == NULL && awp->ssp == NULL) return FALSE;
6243 
6244   /* sort by pub/unpub/sites/sub, then date, finally existing serial */
6245 
6246   if (isRefSeq) {
6247     head = ValNodeSort (head, SortReferencesAR);
6248   } else {
6249     head = ValNodeSort (head, SortReferencesA);
6250   }
6251 
6252   if (awp->ssp != NULL && (! awp->onlyGeneRIFs) && (! awp->onlyReviewPubs)) {
6253 
6254     /* add seq-submit citation */
6255 
6256     rbp = (RefBlockPtr) MemNew (sizeof (IntRefBlock));
6257     if (rbp != NULL) {
6258       irp = (IntRefBlockPtr) rbp;
6259 
6260       rbp->blocktype = REFERENCE_BLOCK;
6261       rbp->section = awp->currsection;
6262       rbp->serial = INT2_MAX;
6263       rbp->category = REF_CAT_SUB;
6264 
6265       rbp->entityID = ajp->ajp.entityID;
6266       rbp->itemID = 1;
6267       rbp->itemtype = OBJ_SEQSUB_CIT;
6268 
6269       sbp = awp->ssp->sub;
6270       if (sbp != NULL) {
6271         csp = sbp->cit;
6272         if (csp != NULL) {
6273           alp = GetAuthListPtr (NULL, csp);
6274           if (alp != NULL) {
6275             irp->authstr = GetAuthorsPlusConsortium (awp->format, alp);
6276           }
6277           if (csp->date != NULL) {
6278             irp->date = DateDup (csp->date);
6279           }
6280         }
6281       }
6282 
6283       if (awp->citSubsFirst) {
6284 
6285         /* for DDBJ, add seq-submit citation to beginning of list */
6286 
6287         vnp = ValNodeNew (NULL);
6288         if (vnp != NULL) {
6289           vnp->choice = 0;
6290           vnp->data.ptrvalue = (VoidPtr) rbp;
6291           vnp->next = head;
6292           head = vnp;
6293         }
6294 
6295       } else {
6296 
6297         /* for GENBANK and EMBL add seq-submit citation to end of list */
6298 
6299         ValNodeAddPointer (&head, 0, rbp);
6300       }
6301     }
6302   }
6303 
6304   /* unique references, excise duplicates from list */
6305 
6306   prev = &(head);
6307   vnp = head;
6308   lastrbp = NULL;
6309   while (vnp != NULL) {
6310     excise = FALSE;
6311     combine = TRUE;
6312     next = vnp->next;
6313     rbp = (RefBlockPtr) vnp->data.ptrvalue;
6314     if (lastrbp != NULL) {
6315       lastirp = (IntRefBlockPtr) lastrbp;
6316       if (rbp != NULL) {
6317         irp = (IntRefBlockPtr) rbp;
6318         if (lastrbp->pmid != 0 && rbp->pmid != 0) {
6319           if (lastrbp->pmid == rbp->pmid) {
6320             if (lastirp->right + 1 >= irp->left) {
6321               excise = TRUE;
6322             }
6323           }
6324         } else if (lastrbp->muid != 0 && rbp->muid != 0) {
6325           if (lastrbp->muid == rbp->muid) {
6326             if (lastirp->right + 1 >= irp->left) {
6327               excise = TRUE;
6328             }
6329           }
6330         } else if (lastrbp->uniquestr != NULL && rbp->uniquestr != NULL) {
6331           if (StringICmp (lastrbp->uniquestr, rbp->uniquestr) == 0) {
6332             if (SeqLocCompare (irp->loc, lastirp->loc) == SLC_A_EQ_B) {
6333               if (StringICmp (irp->authstr, lastirp->authstr) == 0) {
6334 
6335                 /* L76496.1 - removing duplicate submission pubs */
6336                 if (lastirp->right + 1 >= irp->left) {
6337                   excise = TRUE;
6338                 }
6339               }
6340             }
6341           }
6342         }
6343         if (excise && lastrbp->sites == 0 && rbp->sites > 0) {
6344           /* real range trumps sites */
6345           combine = FALSE;
6346         }
6347       }
6348     }
6349     if (rbp != NULL) {
6350       irp = (IntRefBlockPtr) rbp;
6351       if (irp->justuids) {
6352         if (isRefSeq && is_aa) {
6353           /* if allowing justuid in protein RefSeq, try to look up dynamically */
6354           excise = TRUE; /* Back to old behavior, do not fetch */
6355         } else {
6356           /* do not allow justuids reference to appear by itself - S79174.1 */
6357           excise = TRUE;
6358           /* justuids should still combine, even if no authors - S67070.1 */
6359         }
6360       } else if (is_embl && is_patent) {
6361         /* EMBL patent records do not need author or title - A29528.1 */
6362       } else if (StringHasNoText (irp->authstr)) {
6363         /* do not allow no author reference to appear by itself - U07000.1 */
6364         excise = TRUE;
6365         combine = FALSE;
6366       } else if (isRefSeq && is_aa && rbp->category == REF_CAT_SUB) {
6367         /* GenPept RefSeq suppresses cit-subs */
6368         excise = TRUE;
6369         combine = FALSE;
6370       }
6371     }
6372     if (awp->mode == DUMP_MODE) {
6373       excise = FALSE;
6374     }
6375     /* do not hide duplicate EMBL and DDBJ publications */
6376     if (is_embl || is_ddbj) {
6377       excise = FALSE;
6378       combine = TRUE;
6379     }
6380     /* does not fuse equivalent publication features for local, general, refseq, and 2+6 genbank ids */
6381     if (excise && awp->sourcePubFuse) {
6382       *prev = vnp->next;
6383       vnp->next = NULL;
6384 
6385       /* combine locations of duplicate references */
6386 
6387       irp = (IntRefBlockPtr) rbp;
6388       lastirp = (IntRefBlockPtr) lastrbp;
6389       if (combine) {
6390         if (lastirp != NULL) {
6391           slp = SeqLocMerge (target, lastirp->loc, irp->loc, FALSE, TRUE, FALSE);
6392           lastirp->loc = SeqLocFree (lastirp->loc);
6393           lastirp->loc = slp;
6394         }
6395         if (irp != NULL && lastirp != NULL) {
6396           if ((rbp->muid == lastrbp->muid && rbp->muid != 0) ||
6397               (rbp->pmid == lastrbp->pmid && rbp->pmid != 0)) {
6398             if (lastirp->fig == NULL) {
6399               lastirp->fig = StringSaveNoNull (irp->fig);
6400             }
6401             if (lastirp->maploc == NULL) {
6402               lastirp->maploc = StringSaveNoNull (irp->maploc);
6403             }
6404             lastirp->poly_a = irp->poly_a;
6405           }
6406         }
6407       }
6408 
6409       /* and remove duplicate reference */
6410 
6411       MemFree (rbp->uniquestr);
6412       DateFree (irp->date);
6413       SeqLocFree (irp->loc);
6414       MemFree (irp->authstr);
6415       MemFree (irp->fig);
6416       MemFree (irp->maploc);
6417       MemFree (rbp);
6418       ValNodeFree (vnp);
6419 
6420     } else {
6421 
6422       prev = &(vnp->next);
6423       lastrbp = rbp;
6424     }
6425     vnp = next;
6426   }
6427 
6428   /* resort by existing serial, then pub/unpub/sites/sub, then date */
6429 
6430   if (isRefSeq) {
6431     head = ValNodeSort (head, SortReferencesBR);
6432   } else {
6433     head = ValNodeSort (head, SortReferencesB);
6434   }
6435 
6436   if (head == NULL) return FALSE;
6437 
6438   /* if taking newest publications, free remainder */
6439 
6440   if (awp->newestPubs) {
6441     for (vnp = head, i = 1; vnp != NULL && i < 5; vnp = vnp->next, i++) continue;
6442     if (vnp != NULL) {
6443       next = vnp->next;
6444       vnp->next = NULL;
6445       for (vnp = next; vnp != NULL; vnp = vnp->next) {
6446         rbp = (RefBlockPtr) vnp->data.ptrvalue;
6447         MemFree (rbp->uniquestr);
6448         irp = (IntRefBlockPtr) rbp;
6449         DateFree (irp->date);
6450         SeqLocFree (irp->loc);
6451         MemFree (irp->authstr);
6452         MemFree (irp->fig);
6453         MemFree (irp->maploc);
6454         MemFree (rbp);
6455       }
6456     }
6457 
6458   /* if taking oldest publications, free remainder */
6459 
6460   } else if (awp->oldestPubs) {
6461     for (vnp = head, j = 0; vnp != NULL; vnp = vnp->next, j++) continue;
6462     if (j > 5) {
6463       for (vnp = head, i = 0; vnp != NULL && i < j - 6; vnp = vnp->next, i++) continue;
6464       if (vnp != NULL) {
6465         next = vnp->next;
6466         vnp->next = NULL;
6467         for (vnp = head; vnp != NULL; vnp = vnp->next) {
6468           rbp = (RefBlockPtr) vnp->data.ptrvalue;
6469           MemFree (rbp->uniquestr);
6470           irp = (IntRefBlockPtr) rbp;
6471           DateFree (irp->date);
6472           SeqLocFree (irp->loc);
6473           MemFree (irp->authstr);
6474           MemFree (irp->fig);
6475           MemFree (irp->maploc);
6476           MemFree (rbp);
6477         }
6478         head = next;
6479       }
6480     }
6481   }
6482 
6483   /* assign serial numbers */
6484 
6485   firstserial = 1;
6486 
6487   /* first find highest one assigned by EMBL/SWISS-PROT */
6488 
6489   for (vnp = head; vnp != NULL; vnp = vnp->next) {
6490     rbp = (RefBlockPtr) vnp->data.ptrvalue;
6491     if (rbp == NULL) continue;
6492     if (rbp->serial > 0 && rbp->serial < INT2_MAX) {
6493       firstserial = rbp->serial + 1;
6494     }
6495   }
6496 
6497   /* then give increasing serial numbers to unassigned publications */
6498 
6499   for (vnp = head; vnp != NULL; vnp = vnp->next) {
6500     rbp = (RefBlockPtr) vnp->data.ptrvalue;
6501     if (rbp == NULL) continue;
6502     if (rbp->serial > 0 && rbp->serial < INT2_MAX) continue;
6503     rbp->serial = firstserial;
6504     firstserial++;
6505   }
6506 
6507   /* allocate reference array for this section */
6508 
6509   numReferences = ValNodeLen (head);
6510   asp->numReferences = numReferences;
6511 
6512   if (numReferences > 0) {
6513     referenceArray = (RefBlockPtr PNTR) MemNew (sizeof (RefBlockPtr) * (numReferences + 1));
6514     asp->referenceArray = referenceArray;
6515 
6516     if (referenceArray != NULL) {
6517 
6518       /* fill in reference array */
6519 
6520       for (vnp = head, i = 0; vnp != NULL && i < numReferences; vnp = vnp->next, i++) {
6521         referenceArray [i] = (RefBlockPtr) vnp->data.ptrvalue;
6522       }
6523     }
6524   }
6525 
6526   /* finally link into blocks for current section */
6527 
6528   ValNodeLink (&(awp->lastblock), head);
6529   vnp = awp->lastblock;
6530   if (vnp == NULL) return FALSE;
6531   while (vnp->next != NULL) {
6532     vnp = vnp->next;
6533   }
6534 
6535   awp->lastblock = vnp;
6536   if (awp->blockList == NULL) {
6537     awp->blockList = vnp;
6538   }
6539 
6540   if (awp->afp != NULL) {
6541     for (vnp = head; vnp != NULL; vnp = vnp->next) {
6542       rbp = (RefBlockPtr) vnp->data.ptrvalue;
6543       if (rbp == NULL) continue;
6544       DoImmediateFormat (awp->afp, (BaseBlockPtr) rbp);
6545     }
6546   }
6547 
6548   return TRUE;
6549 }
6550 
AddRefStatsBlock(Asn2gbWorkPtr awp)6551 NLM_EXTERN void AddRefStatsBlock (
6552   Asn2gbWorkPtr awp
6553 )
6554 
6555 {
6556   IntAsn2gbJobPtr  ajp;
6557   BaseBlockPtr     bbp;
6558   BioseqPtr        bsp;
6559   StringItemPtr    ffstring;
6560 
6561   if (awp == NULL) return;
6562   ajp = awp->ajp;
6563   if ( ajp == NULL ) return;
6564   bsp = awp->bsp;
6565   if (bsp == NULL) return;
6566 
6567   if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) return;
6568 
6569   bbp = Asn2gbAddBlock (awp, REF_STATS_BLOCK, sizeof (BaseBlock));
6570   if (bbp != NULL) {
6571     ffstring = FFGetString (ajp);
6572     if (ffstring != NULL) {
6573       FFStartPrint (ffstring, awp->format, 0, 12, "REFSTATS", 12, 0, 0, NULL, FALSE);
6574 
6575       FFAddOneString (ffstring, "placeholder", FALSE, FALSE, TILDE_TO_SPACES);
6576 
6577       bbp->string = FFEndPrint (ajp, ffstring, awp->format, 12, 12, 0, 0, NULL);
6578       FFRecycleString(ajp, ffstring);
6579     }
6580 
6581     if (awp->afp != NULL) {
6582       DoImmediateFormat (awp->afp, bbp);
6583     }
6584   }
6585 }
6586 
AddWGSBlock(Asn2gbWorkPtr awp)6587 NLM_EXTERN void AddWGSBlock (
6588   Asn2gbWorkPtr awp
6589 )
6590 
6591 {
6592   IntAsn2gbJobPtr    ajp;
6593   Asn2gbSectPtr      asp;
6594   BaseBlockPtr       bbp;
6595   BioseqPtr          bsp;
6596   Char               buf [128];
6597   SeqMgrDescContext  dcontext;
6598   CharPtr            first;
6599   GBAltSeqItemPtr    gbaip;
6600   GBAltSeqDataPtr    gbasp, asphead = NULL, asplast = NULL;
6601   GBSeqPtr           gbseq;
6602   CharPtr            last;
6603   ObjectIdPtr        oip;
6604   SeqDescrPtr        sdp;
6605   UserFieldPtr       ufp;
6606   UserObjectPtr      uop;
6607   Int2               wgstype;
6608   StringItemPtr      ffstring;
6609 
6610   if (awp == NULL) return;
6611   ajp = awp->ajp;
6612   if ( ajp == NULL ) return;
6613   bsp = awp->bsp;
6614   if (bsp == NULL) return;
6615   asp = awp->asp;
6616   if (asp == NULL) return;
6617 
6618   if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) return;
6619 
6620   if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
6621       (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
6622     sprintf (buf, "<a name=\"wgs_%s\"></a>", awp->currAccVerLabel);
6623     DoQuickLinkFormat (awp->afp, buf);
6624   }
6625 
6626   if (ajp->gbseq) {
6627     gbseq = &asp->gbseq;
6628   } else {
6629     gbseq = NULL;
6630   }
6631 
6632   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
6633   while (sdp != NULL) {
6634     uop = (UserObjectPtr) sdp->data.ptrvalue;
6635     if (uop != NULL) {
6636       oip = uop->type;
6637       first = NULL;
6638       last = NULL;
6639       wgstype = 0;
6640       if (oip != NULL) {
6641         if (StringICmp (oip->str, "WGSProjects") == 0) {
6642           wgstype = 1;
6643         } else if (StringICmp (oip->str, "WGS-Scaffold-List") == 0) {
6644           wgstype = 2;
6645         } else if (StringICmp (oip->str, "WGS-Contig-List") == 0) {
6646           wgstype = 3;
6647         }
6648         if (wgstype != 0) {
6649           for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
6650             oip = ufp->label;
6651             if (oip == NULL || oip->str == NULL || ufp->choice != 1) continue;
6652             if (StringICmp (oip->str, "WGS_accession_first") == 0) {
6653               first = (CharPtr) ufp->data.ptrvalue;
6654             } else if (StringICmp (oip->str, "WGS_accession_last") == 0) {
6655               last = (CharPtr) ufp->data.ptrvalue;
6656             } else if (StringICmp (oip->str, "Accession_first") == 0) {
6657               first = (CharPtr) ufp->data.ptrvalue;
6658             } else if (StringICmp (oip->str, "Accession_last") == 0) {
6659               last = (CharPtr) ufp->data.ptrvalue;
6660             }
6661           }
6662           if (first != NULL && last != NULL) {
6663             if (gbseq != NULL) {
6664               gbasp = (GBAltSeqDataPtr) MemNew (sizeof (GBAltSeqData));
6665               if (gbasp != NULL) {
6666                 if (asphead == NULL) {
6667                   asphead = gbasp;
6668                 }
6669                 if (asplast != NULL) {
6670                   asplast->next = gbasp;
6671                 }
6672                 asplast = gbasp;
6673                 if (wgstype == 1) {
6674                   gbasp->name = StringSave ("WGS");
6675                 } else if (wgstype == 2) {
6676                   gbasp->name = StringSave ("WGS_SCAFLD");
6677                 } else if (wgstype == 3) {
6678                   gbasp->name = StringSave ("WGS_CONTIG");
6679                 }
6680                 gbaip = GBAltSeqItemNew ();
6681                 if (gbaip != NULL) {
6682                   gbaip->first_accn = StringSave (first);
6683                   if (StringCmp (first, last) != 0) {
6684                     gbaip->last_accn = StringSave (last);
6685                   }
6686                   gbasp->items = gbaip;
6687                 }
6688               }
6689             }
6690             bbp = Asn2gbAddBlock (awp, WGS_BLOCK, sizeof (BaseBlock));
6691             if (bbp != NULL) {
6692               ffstring = FFGetString (ajp);
6693               if (ffstring != NULL) {
6694                 if (wgstype == 1) {
6695                   FFStartPrint (ffstring, awp->format, 0, 12, "WGS", 12, 0, 0, NULL, FALSE);
6696                 } else if (wgstype == 2) {
6697                   FFStartPrint (ffstring, awp->format, 0, 12, "WGS_SCAFLD", 12, 0, 0, NULL, FALSE);
6698                 } else if (wgstype == 3) {
6699                   FFStartPrint (ffstring, awp->format, 0, 12, "WGS_CONTIG", 12, 0, 0, NULL, FALSE);
6700                 }
6701 
6702                 if ( GetWWW(ajp) ) {
6703                   if (StringCmp (first, last) != 0) {
6704                     if (wgstype == 1) {
6705                       FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
6706                       FF_Add_NCBI_Base_URL (ffstring, link_wgs);
6707                       StringCpy (buf, first);
6708                       if (buf [2] == '_') {
6709                         buf [9] = '\0';
6710                       } else {
6711                         buf [6] = '\0';
6712                       }
6713                       FFAddTextToString(ffstring, "val=", buf, "#contigs\">", FALSE, FALSE, TILDE_IGNORE);
6714                       sprintf (buf, "%s-%s", first, last);
6715                       FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
6716                       FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
6717                     } else if (wgstype == 2) {
6718                       FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
6719                       if (StringLen (first) > 7 && first [6] == 'S') {
6720                         FF_Add_NCBI_Base_URL (ffstring, link_wgs);
6721                         StringCpy (buf, first);
6722                         if (buf [2] == '_') {
6723                           buf [9] = '\0';
6724                         } else {
6725                           buf [6] = '\0';
6726                         }
6727                         FFAddTextToString(ffstring, "val=", buf, "#scaffolds\">", FALSE, FALSE, TILDE_IGNORE);
6728                         sprintf (buf, "%s-%s", first, last);
6729                         FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
6730                       } else {
6731                         FF_Add_NCBI_Base_URL (ffstring, link_wgsscaf);
6732                         sprintf (buf, "%s:%s", first, last);
6733                         FFAddTextToString(ffstring, "term=", buf, "[PACC]\">", FALSE, FALSE, TILDE_IGNORE);
6734                         sprintf (buf, "%s-%s", first, last);
6735                         FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
6736                       }
6737                       FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
6738                     } else if (wgstype == 3) {
6739                       FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
6740                       FF_Add_NCBI_Base_URL (ffstring, link_wgsscaf);
6741                       sprintf (buf, "%s:%s", first, last);
6742                       FFAddTextToString(ffstring, "term=", buf, "[PACC]\">", FALSE, FALSE, TILDE_IGNORE);
6743                       sprintf (buf, "%s-%s", first, last);
6744                       FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
6745                       FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
6746                     }
6747                   } else {
6748                     FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
6749                     FF_Add_NCBI_Base_URL (ffstring, link_seqn);
6750                     FFAddTextToString(ffstring, /* "val=" */ NULL, first, "\">", FALSE, FALSE, TILDE_IGNORE);
6751                     sprintf (buf, "%s", first);
6752                     FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
6753                     FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
6754                   }
6755                 } else {
6756                   if (StringCmp (first, last) != 0) {
6757                     sprintf (buf, "%s-%s", first, last);
6758                     FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
6759                   } else {
6760                     sprintf (buf, "%s", first);
6761                     FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
6762                   }
6763                 }
6764 
6765                 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 0, 0, NULL);
6766                 FFRecycleString(ajp, ffstring);
6767               }
6768 
6769               bbp->entityID = dcontext.entityID;
6770               bbp->itemtype = OBJ_SEQDESC;
6771               bbp->itemID = dcontext.itemID;
6772               if (awp->afp != NULL) {
6773                 DoImmediateFormat (awp->afp, bbp);
6774               }
6775             }
6776           }
6777         }
6778       }
6779     }
6780     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
6781   }
6782 
6783   if (gbseq != NULL) {
6784     gbseq->alt_seq = asphead;
6785   }
6786 }
6787 
AddTLSBlock(Asn2gbWorkPtr awp)6788 NLM_EXTERN void AddTLSBlock (
6789   Asn2gbWorkPtr awp
6790 )
6791 
6792 {
6793   IntAsn2gbJobPtr    ajp;
6794   Asn2gbSectPtr      asp;
6795   BaseBlockPtr       bbp;
6796   BioseqPtr          bsp;
6797   Char               buf [128];
6798   SeqMgrDescContext  dcontext;
6799   CharPtr            first;
6800   CharPtr            last;
6801   ObjectIdPtr        oip;
6802   SeqDescrPtr        sdp;
6803   Char               tls [32];
6804   UserFieldPtr       ufp;
6805   UserObjectPtr      uop;
6806   StringItemPtr      ffstring;
6807 
6808   if (awp == NULL) return;
6809   ajp = awp->ajp;
6810   if ( ajp == NULL ) return;
6811   bsp = awp->bsp;
6812   if (bsp == NULL) return;
6813   asp = awp->asp;
6814   if (asp == NULL) return;
6815 
6816   if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) return;
6817 
6818   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
6819   while (sdp != NULL) {
6820     uop = (UserObjectPtr) sdp->data.ptrvalue;
6821     if (uop != NULL) {
6822       oip = uop->type;
6823       first = NULL;
6824       last = NULL;
6825       if (oip != NULL) {
6826         if (StringICmp (oip->str, "TLSProjects") == 0) {
6827           for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
6828             oip = ufp->label;
6829             if (oip == NULL || oip->str == NULL || ufp->choice != 1) continue;
6830             if (StringICmp (oip->str, "TLS_accession_first") == 0) {
6831               first = (CharPtr) ufp->data.ptrvalue;
6832             } else if (StringICmp (oip->str, "TLS_accession_last") == 0) {
6833               last = (CharPtr) ufp->data.ptrvalue;
6834             }
6835           }
6836           if (first != NULL && last != NULL) {
6837             bbp = Asn2gbAddBlock (awp, WGS_BLOCK, sizeof (BaseBlock));
6838             if (bbp != NULL) {
6839               ffstring = FFGetString (ajp);
6840               if (ffstring != NULL) {
6841                 FFStartPrint (ffstring, awp->format, 0, 12, "TLS", 12, 0, 0, NULL, FALSE);
6842 
6843                 if ( GetWWW(ajp) ) {
6844                   StringNCpy_0 (tls, first, sizeof (tls));
6845                   tls [6] = '\0';
6846                   if (StringCmp (first, last) != 0) {
6847                     FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
6848                     FF_Add_NCBI_Base_URL (ffstring, link_tls);
6849                     FFAddTextToString(ffstring, "val=", tls, "#contigs\">", FALSE, FALSE, TILDE_IGNORE);
6850                     sprintf (buf, "%s-%s", first, last);
6851                     FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
6852                     FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
6853                   } else {
6854                     FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
6855                     FF_Add_NCBI_Base_URL (ffstring, link_tls);
6856                     FFAddTextToString(ffstring, "val=", tls, "#contigs\">", FALSE, FALSE, TILDE_IGNORE);
6857                     sprintf (buf, "%s", first);
6858                     FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
6859                     FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
6860                   }
6861                 } else {
6862                   if (StringCmp (first, last) != 0) {
6863                     sprintf (buf, "%s-%s", first, last);
6864                     FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
6865                   } else {
6866                     sprintf (buf, "%s", first);
6867                     FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
6868                   }
6869                 }
6870 
6871                 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 0, 0, NULL);
6872                 FFRecycleString(ajp, ffstring);
6873               }
6874 
6875               bbp->entityID = dcontext.entityID;
6876               bbp->itemtype = OBJ_SEQDESC;
6877               bbp->itemID = dcontext.itemID;
6878               if (awp->afp != NULL) {
6879                 DoImmediateFormat (awp->afp, bbp);
6880               }
6881             }
6882           }
6883         }
6884       }
6885     }
6886     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
6887   }
6888 }
6889 
AddTSABlock(Asn2gbWorkPtr awp)6890 NLM_EXTERN void AddTSABlock (
6891   Asn2gbWorkPtr awp
6892 )
6893 
6894 {
6895   IntAsn2gbJobPtr    ajp;
6896   Asn2gbSectPtr      asp;
6897   BaseBlockPtr       bbp;
6898   BioseqPtr          bsp;
6899   Char               buf [128];
6900   SeqMgrDescContext  dcontext;
6901   CharPtr            first;
6902   GBAltSeqItemPtr    gbaip;
6903   GBAltSeqDataPtr    gbasp, asphead = NULL, asplast = NULL;
6904   GBSeqPtr           gbseq;
6905   CharPtr            last;
6906   ObjectIdPtr        oip;
6907   SeqDescrPtr        sdp;
6908   Int2               tsatype;
6909   UserFieldPtr       ufp;
6910   UserObjectPtr      uop;
6911   StringItemPtr      ffstring;
6912 
6913   if (awp == NULL) return;
6914   ajp = awp->ajp;
6915   if ( ajp == NULL ) return;
6916   bsp = awp->bsp;
6917   if (bsp == NULL) return;
6918   asp = awp->asp;
6919   if (asp == NULL) return;
6920 
6921   if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) return;
6922 
6923   if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
6924       (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
6925     sprintf (buf, "<a name=\"wgs_%s\"></a>", awp->currAccVerLabel);
6926     DoQuickLinkFormat (awp->afp, buf);
6927   }
6928 
6929   if (ajp->gbseq) {
6930     gbseq = &asp->gbseq;
6931   } else {
6932     gbseq = NULL;
6933   }
6934 
6935   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
6936   while (sdp != NULL) {
6937     uop = (UserObjectPtr) sdp->data.ptrvalue;
6938     if (uop != NULL) {
6939       oip = uop->type;
6940       first = NULL;
6941       last = NULL;
6942       tsatype = 0;
6943       if (oip != NULL) {
6944         if (StringICmp (oip->str, "TSA-mRNA-List") == 0 || StringICmp (oip->str, "TSA-RNA-List") == 0) {
6945           tsatype = 1;
6946         }
6947         if (tsatype != 0) {
6948           for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
6949             oip = ufp->label;
6950             if (oip == NULL || oip->str == NULL || ufp->choice != 1) continue;
6951             if (StringICmp (oip->str, "TSA_accession_first") == 0) {
6952               first = (CharPtr) ufp->data.ptrvalue;
6953             } else if (StringICmp (oip->str, "TSA_accession_last") == 0) {
6954               last = (CharPtr) ufp->data.ptrvalue;
6955             }
6956           }
6957           if (first != NULL && last != NULL) {
6958             if (gbseq != NULL) {
6959               gbasp = (GBAltSeqDataPtr) MemNew (sizeof (GBAltSeqData));
6960               if (gbasp != NULL) {
6961                 if (asphead == NULL) {
6962                   asphead = gbasp;
6963                 }
6964                 if (asplast != NULL) {
6965                   asplast->next = gbasp;
6966                 }
6967                 asplast = gbasp;
6968                 if (tsatype == 1) {
6969                   gbasp->name = StringSave ("TSA");
6970                 }
6971                 gbaip = GBAltSeqItemNew ();
6972                 if (gbaip != NULL) {
6973                   gbaip->first_accn = StringSave (first);
6974                   if (StringCmp (first, last) != 0) {
6975                     gbaip->last_accn = StringSave (last);
6976                   }
6977                   gbasp->items = gbaip;
6978                 }
6979               }
6980             }
6981             bbp = Asn2gbAddBlock (awp, WGS_BLOCK, sizeof (BaseBlock));
6982             if (bbp != NULL) {
6983               ffstring = FFGetString (ajp);
6984               if (ffstring != NULL) {
6985                 if (tsatype == 1) {
6986                   FFStartPrint (ffstring, awp->format, 0, 12, "TSA", 12, 0, 0, NULL, FALSE);
6987                 }
6988 
6989                 if ( GetWWW(ajp) ) {
6990                   if (StringCmp (first, last) != 0) {
6991                     /*
6992                     sprintf (buf, "%s-%s", first, last);
6993                     FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
6994                     */
6995                     FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
6996                     FF_Add_NCBI_Base_URL (ffstring, link_tsa);
6997                     StringCpy (buf, first);
6998                     if (buf [2] == '_') {
6999                       buf [9] = '\0';
7000                     } else {
7001                       buf [6] = '\0';
7002                     }
7003                     FFAddTextToString(ffstring, "val=", buf, "\">", FALSE, FALSE, TILDE_IGNORE);
7004                     sprintf (buf, "%s-%s", first, last);
7005                     FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
7006                     FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
7007                   } else {
7008                     /*
7009                     sprintf (buf, "%s", first);
7010                     FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
7011                     */
7012                     FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
7013                     FF_Add_NCBI_Base_URL (ffstring, link_seqn);
7014                     FFAddTextToString(ffstring, /* "val=" */ NULL, first, "\">", FALSE, FALSE, TILDE_IGNORE);
7015                     sprintf (buf, "%s", first);
7016                     FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
7017                     FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
7018                   }
7019                 } else {
7020                   if (StringCmp (first, last) != 0) {
7021                     sprintf (buf, "%s-%s", first, last);
7022                     FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
7023                   } else {
7024                     sprintf (buf, "%s", first);
7025                     FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
7026                   }
7027                 }
7028 
7029                 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 0, 0, NULL);
7030                 FFRecycleString(ajp, ffstring);
7031               }
7032 
7033               bbp->entityID = dcontext.entityID;
7034               bbp->itemtype = OBJ_SEQDESC;
7035               bbp->itemID = dcontext.itemID;
7036               if (awp->afp != NULL) {
7037                 DoImmediateFormat (awp->afp, bbp);
7038               }
7039             }
7040           }
7041         }
7042       }
7043     }
7044     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
7045   }
7046 
7047   if (gbseq != NULL) {
7048     gbseq->alt_seq = asphead;
7049   }
7050 }
7051 
AddCAGEBlock(Asn2gbWorkPtr awp)7052 NLM_EXTERN void AddCAGEBlock (
7053   Asn2gbWorkPtr awp
7054 )
7055 
7056 {
7057   IntAsn2gbJobPtr    ajp;
7058   Asn2gbSectPtr      asp;
7059   BaseBlockPtr       bbp;
7060   BioseqPtr          bsp;
7061   Char               buf [128];
7062   Int2               cagetype;
7063   SeqMgrDescContext  dcontext;
7064   CharPtr            first;
7065   GBAltSeqItemPtr    gbaip;
7066   GBAltSeqDataPtr    gbasp, asphead = NULL, asplast = NULL;
7067   GBSeqPtr           gbseq;
7068   CharPtr            last;
7069   ObjectIdPtr        oip;
7070   SeqDescrPtr        sdp;
7071   UserFieldPtr       ufp;
7072   UserObjectPtr      uop;
7073   StringItemPtr      ffstring;
7074 
7075   if (awp == NULL) return;
7076   ajp = awp->ajp;
7077   if ( ajp == NULL ) return;
7078   bsp = awp->bsp;
7079   if (bsp == NULL) return;
7080   asp = awp->asp;
7081   if (asp == NULL) return;
7082 
7083   if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) return;
7084 
7085   if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
7086       (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
7087     sprintf (buf, "<a name=\"wgs_%s\"></a>", awp->currAccVerLabel);
7088     DoQuickLinkFormat (awp->afp, buf);
7089   }
7090 
7091   if (ajp->gbseq) {
7092     gbseq = &asp->gbseq;
7093   } else {
7094     gbseq = NULL;
7095   }
7096 
7097   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
7098   while (sdp != NULL) {
7099     uop = (UserObjectPtr) sdp->data.ptrvalue;
7100     if (uop != NULL) {
7101       oip = uop->type;
7102       first = NULL;
7103       last = NULL;
7104       cagetype = 0;
7105       if (oip != NULL) {
7106         if (StringICmp (oip->str, "CAGE-Tag-List") == 0) {
7107           cagetype = 1;
7108         }
7109         if (cagetype != 0) {
7110           for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
7111             oip = ufp->label;
7112             if (oip == NULL || oip->str == NULL || ufp->choice != 1) continue;
7113             if (StringICmp (oip->str, "CAGE_accession_first") == 0) {
7114               first = (CharPtr) ufp->data.ptrvalue;
7115             } else if (StringICmp (oip->str, "CAGE_accession_last") == 0) {
7116               last = (CharPtr) ufp->data.ptrvalue;
7117             }
7118           }
7119           if (first != NULL && last != NULL) {
7120             if (gbseq != NULL) {
7121               gbasp = (GBAltSeqDataPtr) MemNew (sizeof (GBAltSeqData));
7122               if (gbasp != NULL) {
7123                 if (asphead == NULL) {
7124                   asphead = gbasp;
7125                 }
7126                 if (asplast != NULL) {
7127                   asplast->next = gbasp;
7128                 }
7129                 asplast = gbasp;
7130                 if (cagetype == 1) {
7131                   gbasp->name = StringSave ("TAG");
7132                 }
7133                 gbaip = GBAltSeqItemNew ();
7134                 if (gbaip != NULL) {
7135                   gbaip->first_accn = StringSave (first);
7136                   if (StringCmp (first, last) != 0) {
7137                     gbaip->last_accn = StringSave (last);
7138                   }
7139                   gbasp->items = gbaip;
7140                 }
7141               }
7142             }
7143             bbp = Asn2gbAddBlock (awp, WGS_BLOCK, sizeof (BaseBlock));
7144             if (bbp != NULL) {
7145               ffstring = FFGetString (ajp);
7146               if (ffstring != NULL) {
7147                 if (cagetype == 1) {
7148                   FFStartPrint (ffstring, awp->format, 0, 12, "TAG", 12, 0, 0, NULL, FALSE);
7149                 }
7150 
7151                 if ( GetWWW(ajp) ) {
7152                   if (StringCmp (first, last) != 0) {
7153                     FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
7154                     FF_Add_NCBI_Base_URL (ffstring, link_cage);
7155                     FFAddTextToString(ffstring, "db=Nucleotide&cmd=Search&term=", first, NULL, FALSE, FALSE, TILDE_IGNORE);
7156                     FFAddTextToString(ffstring, ":", last, "[PACC]\">", FALSE, FALSE, TILDE_IGNORE);
7157                     sprintf (buf, "%s-%s", first, last);
7158                     FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
7159                     FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
7160                   } else {
7161                     FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
7162                     FF_Add_NCBI_Base_URL (ffstring, link_seqn);
7163                     FFAddTextToString(ffstring, /* "val=" */ NULL, first, "\">", FALSE, FALSE, TILDE_IGNORE);
7164                     sprintf (buf, "%s", first);
7165                     FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
7166                     FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
7167                   }
7168                 } else {
7169                   if (StringCmp (first, last) != 0) {
7170                     sprintf (buf, "%s-%s", first, last);
7171                     FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
7172                   } else {
7173                     sprintf (buf, "%s", first);
7174                     FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
7175                   }
7176                 }
7177 
7178                 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 0, 0, NULL);
7179                 FFRecycleString(ajp, ffstring);
7180               }
7181 
7182               bbp->entityID = dcontext.entityID;
7183               bbp->itemtype = OBJ_SEQDESC;
7184               bbp->itemID = dcontext.itemID;
7185               if (awp->afp != NULL) {
7186                 DoImmediateFormat (awp->afp, bbp);
7187               }
7188             }
7189           }
7190         }
7191       }
7192     }
7193     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
7194   }
7195 
7196   if (gbseq != NULL) {
7197     gbseq->alt_seq = asphead;
7198   }
7199 }
7200 
AddGenomeBlock(Asn2gbWorkPtr awp)7201 NLM_EXTERN void AddGenomeBlock (
7202   Asn2gbWorkPtr awp
7203 )
7204 
7205 {
7206   CharPtr            accn;
7207   IntAsn2gbJobPtr    ajp;
7208   Asn2gbSectPtr      asp;
7209   BaseBlockPtr       bbp;
7210   BioseqPtr          bsp;
7211   Char               buf [128];
7212   SeqMgrDescContext  dcontext;
7213   Boolean            first = TRUE;
7214   GBAltSeqItemPtr    gbaip;
7215   GBAltSeqDataPtr    gbasp, asphead = NULL, asplast = NULL;
7216   GBSeqPtr           gbseq;
7217   CharPtr            moltype;
7218   ObjectIdPtr        oip;
7219   SeqDescrPtr        sdp;
7220   UserFieldPtr       ufp;
7221   UserObjectPtr      uop;
7222   UserFieldPtr       urf;
7223   StringItemPtr      ffstring;
7224 
7225   if (awp == NULL) return;
7226   ajp = awp->ajp;
7227   if ( ajp == NULL ) return;
7228   bsp = awp->bsp;
7229   if (bsp == NULL) return;
7230   asp = awp->asp;
7231   if (asp == NULL) return;
7232 
7233   if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) return;
7234 
7235   if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
7236       (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
7237     sprintf (buf, "<a name=\"genome_%s\"></a>", awp->currAccVerLabel);
7238     DoQuickLinkFormat (awp->afp, buf);
7239   }
7240 
7241   if (ajp->gbseq) {
7242     gbseq = &asp->gbseq;
7243   } else {
7244     gbseq = NULL;
7245   }
7246 
7247   bbp = Asn2gbAddBlock (awp, GENOME_BLOCK, sizeof (BaseBlock));
7248   if (bbp == NULL) return;
7249 
7250   ffstring = FFGetString(ajp);
7251   if ( ffstring == NULL ) return;
7252 
7253   FFStartPrint (ffstring, awp->format, 0, 12, "GENOME", 12, 0, 0, NULL, FALSE);
7254 
7255   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
7256   while (sdp != NULL) {
7257     uop = (UserObjectPtr) sdp->data.ptrvalue;
7258     if (uop != NULL) {
7259       oip = uop->type;
7260       if (oip != NULL && StringICmp (oip->str, "GenomeProject") == 0) {
7261         for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
7262           oip = ufp->label;
7263           if (oip == NULL || oip->str == NULL || ufp->choice != 11) continue;
7264           if (StringICmp (oip->str, "Chromosome") != 0) continue;
7265           accn = NULL;
7266           moltype = NULL;
7267           for (urf = (UserFieldPtr) ufp->data.ptrvalue; urf != NULL; urf = urf->next) {
7268             oip = urf->label;
7269             if (oip == NULL || oip->str == NULL || urf->choice != 1) continue;
7270             if (StringICmp (oip->str, "accession") == 0) {
7271               accn = (CharPtr) urf->data.ptrvalue;
7272             } else if (StringICmp (oip->str, "Moltype") == 0) {
7273               moltype = (CharPtr) urf->data.ptrvalue;
7274             }
7275           }
7276           if (! StringHasNoText (accn)) {
7277             if (gbseq != NULL) {
7278               gbasp = (GBAltSeqDataPtr) MemNew (sizeof (GBAltSeqData));
7279               if (gbasp != NULL) {
7280                 if (asphead == NULL) {
7281                   asphead = gbasp;
7282                 }
7283                 if (asplast != NULL) {
7284                   asplast->next = gbasp;
7285                 }
7286                 asplast = gbasp;
7287                 gbasp->name = StringSave ("GENOME");
7288                 gbaip = GBAltSeqItemNew ();
7289                 if (gbaip != NULL) {
7290                   if (! StringHasNoText (moltype)) {
7291                     sprintf (buf, "%s (%s)", accn, moltype);
7292                     gbaip->value = StringSave (buf);
7293                   } else {
7294                     sprintf (buf, "%s", accn);
7295                     gbaip->value = StringSave (buf);
7296                   }
7297                   gbasp->items = gbaip;
7298                 }
7299               }
7300             }
7301             if (! first) {
7302               FFAddNewLine(ffstring);
7303             }
7304             first = FALSE;
7305             FFAddOneString (ffstring, accn, FALSE, FALSE, TILDE_IGNORE);
7306             if (! StringHasNoText (moltype)) {
7307               FFAddTextToString (ffstring, " (", moltype, ")", FALSE, FALSE, TILDE_TO_SPACES);
7308             }
7309           }
7310         }
7311       }
7312     }
7313     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
7314   }
7315 
7316   bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 0, 0, NULL);
7317   FFRecycleString(ajp, ffstring);
7318 
7319   if (awp->afp != NULL) {
7320     DoImmediateFormat (awp->afp, bbp);
7321   }
7322 
7323   if (gbseq != NULL) {
7324     gbseq->alt_seq = asphead;
7325   }
7326 }
7327 
AddBasecountBlock(Asn2gbWorkPtr awp)7328 NLM_EXTERN void AddBasecountBlock (
7329   Asn2gbWorkPtr awp
7330 )
7331 
7332 {
7333   IntAsn2gbJobPtr  ajp;
7334   BaseBlockPtr     bbp;
7335   BioseqPtr        bsp;
7336 
7337   if (awp == NULL) return;
7338   ajp = awp->ajp;
7339   if (ajp == NULL) return;
7340   bsp = awp->bsp;
7341   if (bsp == NULL) return;
7342 
7343   bbp = Asn2gbAddBlock (awp, BASECOUNT_BLOCK, sizeof (BaseBlock));
7344   if (bbp == NULL) return;
7345 
7346   bbp->entityID = awp->entityID;
7347   bbp->itemtype = bsp->idx.itemtype;
7348   bbp->itemID = bsp->idx.itemID;
7349 
7350   if (awp->afp != NULL) {
7351     DoImmediateFormat (awp->afp, bbp);
7352   }
7353 }
7354 
AddOriginBlock(Asn2gbWorkPtr awp)7355 NLM_EXTERN void AddOriginBlock (
7356   Asn2gbWorkPtr awp
7357 )
7358 
7359 {
7360   IntAsn2gbJobPtr    ajp;
7361   BaseBlockPtr       bbp;
7362   BioseqPtr          bsp;
7363   Char               buf [67];
7364   SeqMgrDescContext  dcontext;
7365   GBBlockPtr         gbp;
7366   SeqDescrPtr        sdp;
7367   StringItemPtr      ffstring;
7368 
7369   if (awp == NULL) return;
7370   ajp = awp->ajp;
7371   if (ajp == NULL) return;
7372   bsp = awp->bsp;
7373   if (bsp == NULL) return;
7374 
7375   if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) return;
7376 
7377   ffstring = FFGetString(ajp);
7378   if ( ffstring == NULL ) return;
7379 
7380   bbp = Asn2gbAddBlock (awp, ORIGIN_BLOCK, sizeof (BaseBlock));
7381   if (bbp == NULL) return;
7382 
7383   bbp->entityID = awp->entityID;
7384 
7385   if (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT) {
7386 
7387     buf [0] = '\0';
7388 
7389     sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &dcontext);
7390     if (sdp != NULL) {
7391       gbp = (GBBlockPtr) sdp->data.ptrvalue;
7392       if (gbp != NULL && (! StringHasNoText (gbp->origin))) {
7393         StringNCpy_0 (buf, gbp->origin, sizeof (buf));
7394         bbp->entityID = dcontext.entityID;
7395         bbp->itemID = dcontext.itemID;
7396         bbp->itemtype = OBJ_SEQDESC;
7397       }
7398     }
7399 
7400     FFStartPrint (ffstring, awp->format, 0, 12, "ORIGIN", 12, 0, 0, NULL, FALSE);
7401 
7402     if (! StringHasNoText (buf)) {
7403       FFAddOneString (ffstring, buf, TRUE, FALSE, TILDE_TO_SPACES);
7404     }
7405   }
7406 
7407   bbp->string = FFEndPrint(ajp, ffstring, awp->format, 0, 12, 0, 0, NULL);
7408   FFRecycleString(ajp, ffstring);
7409 
7410   if (awp->afp != NULL) {
7411     DoImmediateFormat (awp->afp, bbp);
7412   }
7413 }
7414 
7415 #define BASES_PER_BLOCK 1200
7416 
AddSequenceBlock(Asn2gbWorkPtr awp)7417 NLM_EXTERN void AddSequenceBlock (
7418   Asn2gbWorkPtr awp
7419 )
7420 
7421 {
7422   IntAsn2gbJobPtr  ajp;
7423   BioseqPtr        bsp;
7424   Char             buf [128];
7425   Int4             extend;
7426   Int4             len;
7427   SeqBlockPtr      sbp;
7428   Int4             start;
7429   Int4             stop;
7430 
7431   if (awp == NULL) return;
7432   ajp = awp->ajp;
7433   if (ajp == NULL) return;
7434   bsp = awp->bsp;
7435   if (bsp == NULL) return;
7436 
7437   if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
7438       (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
7439     sprintf (buf, "<a name=\"sequence_%s\"></a>", awp->currAccVerLabel);
7440     DoQuickLinkFormat (awp->afp, buf);
7441   }
7442 
7443   if (awp->slp != NULL) {
7444     len = SeqLocLen (awp->slp);
7445   } else {
7446     len = bsp->length;
7447   }
7448 
7449   /* if generating GBSeq XML, populate single sequence block */
7450 
7451   if (ajp->gbseq) {
7452     sbp = (SeqBlockPtr) Asn2gbAddBlock (awp, SEQUENCE_BLOCK, sizeof (SeqBlock));
7453     if (sbp == NULL) return;
7454 
7455     sbp->entityID = bsp->idx.entityID;
7456     sbp->itemID = bsp->idx.itemID;
7457     sbp->itemtype = OBJ_BIOSEQ;
7458 
7459     sbp->start = 0;
7460     sbp->stop = len;
7461 
7462     if (awp->afp != NULL) {
7463       DoImmediateFormat (awp->afp, (BaseBlockPtr) sbp);
7464     }
7465 
7466     return;
7467   }
7468 
7469   /* otherwise populate individual sequence blocks for given range */
7470 
7471   for (start = 0; start < len; start += BASES_PER_BLOCK) {
7472     sbp = (SeqBlockPtr) Asn2gbAddBlock (awp, SEQUENCE_BLOCK, sizeof (SeqBlock));
7473     if (sbp == NULL) continue;
7474 
7475     sbp->entityID = bsp->idx.entityID;
7476     sbp->itemID = bsp->idx.itemID;
7477     sbp->itemtype = OBJ_BIOSEQ;
7478 
7479     stop = start + BASES_PER_BLOCK;
7480     if (stop >= len) {
7481       stop = len;
7482     }
7483     extend = start + BASES_PER_BLOCK + 60;
7484     if (extend >= len) {
7485       extend = len;
7486     }
7487 
7488     sbp->start = start;
7489     sbp->stop = stop;
7490     sbp->extend = extend;
7491 
7492     if (awp->afp != NULL) {
7493       DoImmediateFormat (awp->afp, (BaseBlockPtr) sbp);
7494     }
7495   }
7496 }
7497 
AddContigBlock(Asn2gbWorkPtr awp)7498 NLM_EXTERN void AddContigBlock (
7499   Asn2gbWorkPtr awp
7500 )
7501 
7502 {
7503   IntAsn2gbJobPtr  ajp;
7504   BaseBlockPtr     bbp;
7505   Char             buf [128];
7506 
7507   if (awp == NULL) return;
7508   ajp = awp->ajp;
7509   if ( ajp == NULL ) return;
7510 
7511   if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
7512       (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
7513     sprintf (buf, "<a name=\"contig_%s\"></a>", awp->currAccVerLabel);
7514     DoQuickLinkFormat (awp->afp, buf);
7515   }
7516   bbp = Asn2gbAddBlock (awp, CONTIG_BLOCK, sizeof (BaseBlock));
7517 
7518   if (awp->afp != NULL) {
7519     DoImmediateFormat (awp->afp, bbp);
7520   }
7521 }
7522 
AddSlashBlock(Asn2gbWorkPtr awp)7523 NLM_EXTERN void AddSlashBlock (
7524   Asn2gbWorkPtr awp
7525 )
7526 
7527 {
7528   IntAsn2gbJobPtr  ajp;
7529   BaseBlockPtr     bbp;
7530   Char             buf [128];
7531   CharPtr          str;
7532 
7533   if (awp == NULL) return;
7534   ajp = awp->ajp;
7535   if (ajp == NULL) return;
7536 
7537   /*
7538   if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
7539       (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
7540     sprintf (buf, "<a name=\"slash_%s\"></a>", awp->currAccVerLabel);
7541     DoQuickLinkFormat (awp->afp, buf);
7542   }
7543   */
7544 
7545   bbp = Asn2gbAddBlock (awp, SLASH_BLOCK, sizeof (BaseBlock));
7546   if (bbp == NULL) return;
7547 
7548   bbp->entityID = awp->entityID;
7549 
7550   if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
7551       (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
7552     sprintf (buf, "//</pre>\n<a name=\"slash_%s\"></a>", awp->currAccVerLabel);
7553     str = StringSave (buf);
7554   } else if (GetWWW (ajp)) {
7555     sprintf (buf, "//</pre>\n");
7556     str = StringSave (buf);
7557   } else {
7558     str = (CharPtr) MemNew(sizeof(Char) * 4);
7559     StringNCpy(str, "//\n", 4);
7560   }
7561 
7562   bbp->string = str;
7563 
7564   if (awp->afp != NULL) {
7565     DoImmediateFormat (awp->afp, bbp);
7566   }
7567 }
7568 
7569