1 /*   asn2gnb1.c
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *            National Center for Biotechnology Information (NCBI)
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government do not place any restriction on its use or reproduction.
13 *  We would, however, appreciate having the NCBI and the author cited in
14 *  any work or product based on this material
15 *
16 *  Although all reasonable efforts have been taken to ensure the accuracy
17 *  and reliability of the software and data, the NLM and the U.S.
18 *  Government do not and cannot warrant the performance or results that
19 *  may be obtained by using this software or data. The NLM and the U.S.
20 *  Government disclaim all warranties, express or implied, including
21 *  warranties of performance, merchantability or fitness for any particular
22 *  purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name:  asn2gnb1.c
27 *
28 * Author:  Karl Sirotkin, Tom Madden, Tatiana Tatusov, Jonathan Kans,
29 *          Mati Shomrat
30 *
31 *
32 * Version Creation Date:   10/21/98
33 *
34 * $Revision: 1.285 $
35 *
36 * File Description:  New GenBank flatfile generator - work in progress
37 *
38 * Modifications:
39 * --------------------------------------------------------------------------
40 * ==========================================================================
41 */
42 
43 #include <ncbi.h>
44 #include <objall.h>
45 #include <objsset.h>
46 #include <objsub.h>
47 #include <objfdef.h>
48 #include <objpubme.h>
49 #include <seqport.h>
50 #include <sequtil.h>
51 #include <sqnutils.h>
52 #include <subutil.h>
53 #include <tofasta.h>
54 #include <explore.h>
55 #include <gbfeat.h>
56 #include <gbftdef.h>
57 #include <edutil.h>
58 #include <alignmgr2.h>
59 #include <asn2gnbi.h>
60 
61 #ifdef WIN_MAC
62 #if __profile__
63 #include <Profiler.h>
64 #endif
65 #endif
66 
GetAccVerForBioseq(BioseqPtr bsp,CharPtr buf,size_t buflen,Boolean hideGi,Boolean isSpan)67 NLM_EXTERN Boolean GetAccVerForBioseq (BioseqPtr bsp, CharPtr buf, size_t buflen, Boolean hideGi, Boolean isSpan) {
68 
69   Char      ch;
70   BIG_ID    gi;
71   SeqIdPtr  sip;
72   CharPtr   tmp;
73   Boolean   acc_found = FALSE;
74 
75   if (buf == NULL || buflen < 1) {
76     return FALSE;
77   }
78   *buf = '\0';
79   if (bsp == NULL) {
80     return FALSE;
81   }
82 
83   if (! hideGi) {
84     for (sip = bsp->id; sip != NULL; sip = sip->next) {
85       if (sip->choice == SEQID_GI) {
86         gi = (BIG_ID) sip->data.intvalue;
87         if (gi > 0) {
88           sprintf (buf, "%lld", (long long) gi);
89           return TRUE;
90         }
91       }
92     }
93   }
94   for (sip = bsp->id; sip != NULL; sip = sip->next) {
95     if (sip->choice == SEQID_GENBANK ||
96                sip->choice == SEQID_EMBL ||
97                sip->choice == SEQID_DDBJ ||
98                sip->choice == SEQID_GPIPE ||
99                sip->choice == SEQID_TPG ||
100                sip->choice == SEQID_TPE ||
101                sip->choice == SEQID_TPD ||
102                sip->choice == SEQID_PIR ||
103                sip->choice == SEQID_SWISSPROT ||
104                sip->choice == SEQID_PRF ||
105                sip->choice == SEQID_PDB ||
106                sip->choice == SEQID_OTHER) {
107       SeqIdWrite (sip, buf, PRINTID_TEXTID_ACC_VER, buflen - 1);
108       acc_found = TRUE;
109     }
110     /* RefSeq, GPipe and PDB accessions can contain an underscore character '_',
111      * which is used as delimiter in the Javascript functions processing the
112      * HTML markup, hence change '_' to '-'.
113      */
114     if (sip->choice == SEQID_OTHER ||
115         sip->choice == SEQID_PDB ||
116         sip->choice == SEQID_GPIPE) {
117       if (isSpan) {
118         tmp = buf;
119         ch = *tmp;
120         while (ch != '\0') {
121           if (ch == '_') {
122             *tmp = '-';
123           }
124           tmp++;
125           ch = *tmp;
126         }
127       }
128     }
129     if (acc_found)
130        return TRUE;
131   }
132 
133   return FALSE;
134 }
135 
136 static Boolean FFIsStartOfLinkEx (StringItemPtr iter, Int4 pos, Int4Ptr lenP);
137 
138 /* utility functions */
139 
ValNodeCopyStrToHead(ValNodePtr PNTR head,Int2 choice,CharPtr str)140 NLM_EXTERN ValNodePtr ValNodeCopyStrToHead (ValNodePtr PNTR head, Int2 choice, CharPtr str)
141 
142 {
143   ValNodePtr newnode;
144 
145   if (head == NULL || str == NULL) return NULL;
146 
147   newnode = ValNodeNew (NULL);
148   if (newnode == NULL) return NULL;
149 
150   newnode->choice = (Uint1) choice;
151   newnode->data.ptrvalue = StringSave (str);
152 
153   newnode->next = *head;
154   *head = newnode;
155 
156   return newnode;
157 }
158 
159 /* the val node strings mechanism will be replaced by a more efficient method later  */
160 
MergeFFValNodeStrs(ValNodePtr list)161 NLM_EXTERN CharPtr MergeFFValNodeStrs (
162   ValNodePtr list
163 )
164 
165 {
166   size_t      len;
167   CharPtr     ptr;
168   CharPtr     str;
169   CharPtr     tmp;
170   ValNodePtr  vnp;
171 
172 
173   if (list == NULL) return NULL;
174 
175   for (vnp = list, len = 0; vnp != NULL; vnp = vnp->next) {
176     str = (CharPtr) vnp->data.ptrvalue;
177     len += StringLen (str);
178   }
179   if (len == 0) return NULL;
180 
181   ptr = MemNew (sizeof (Char) * (len + 2));
182   if (ptr == NULL) return NULL;
183 
184   for (vnp = list, tmp = ptr; vnp != NULL; vnp = vnp->next) {
185     str = (CharPtr) vnp->data.ptrvalue;
186     tmp = StringMove (tmp, str);
187   }
188 
189   return ptr;
190 }
191 
192 
AddValNodeString(ValNodePtr PNTR head,CharPtr prefix,CharPtr string,CharPtr suffix)193 NLM_EXTERN void AddValNodeString (
194   ValNodePtr PNTR head,
195   CharPtr prefix,
196   CharPtr string,
197   CharPtr suffix
198 )
199 
200 {
201   Char     buf [256];
202   CharPtr  freeme = NULL;
203   size_t   len;
204   CharPtr  newstr;
205   CharPtr  strptr;
206 
207   len = StringLen (prefix) + StringLen (string) + StringLen (suffix);
208   if (len == 0) return;
209 
210   if (len < sizeof (buf)) {
211 
212     /* if new string fits in stack buffer, no need to allocate */
213 
214     MemSet ((Pointer) buf, 0, sizeof (buf));
215     newstr = buf;
216 
217   } else {
218 
219     /* new string bigger than stack buffer, so allocate sufficient string */
220 
221     newstr = (CharPtr) MemNew (sizeof (Char) * (len + 2));
222     if (newstr == NULL) return;
223 
224     /* allocated string will be freed at end of function */
225 
226     freeme = newstr;
227   }
228 
229   strptr = newstr;
230 
231   if (prefix != NULL) {
232     strptr = StringMove (strptr, prefix);
233   }
234 
235   if (string != NULL) {
236     strptr = StringMove (strptr, string);
237   }
238 
239   if (suffix != NULL) {
240     strptr = StringMove (strptr, suffix);
241   }
242 
243   /* currently just makes a valnode list, to be enhanced later */
244 
245   ValNodeCopyStr (head, 0, newstr);
246 
247   /* if large string was allocated, free it now */
248 
249   if (freeme != NULL) {
250     MemFree (freeme);
251   }
252 }
253 
254 
FFAddString_NoRedund(StringItemPtr unique,CharPtr prefix,CharPtr string,CharPtr suffix,Boolean convertQuotes)255 NLM_EXTERN void FFAddString_NoRedund (
256   StringItemPtr unique,
257   CharPtr prefix,
258   CharPtr string,
259   CharPtr suffix,
260   Boolean convertQuotes
261 )
262 {
263   CharPtr    str = string;
264   Int4       foundPos = 0;
265   Boolean    wholeWord = FALSE;
266 
267   if ( StringHasNoText(prefix)  &&
268        StringHasNoText(string)  &&
269        StringHasNoText(suffix)  ) return;
270 
271   if (StringNICmp (string, "tRNA-", 5) == 0) {
272     str = string+5;
273     }
274 
275   while ( foundPos >= 0 && !wholeWord ) {
276     foundPos = FFStringSearch(unique, str, foundPos);
277     if ( foundPos >= 0 ) {
278       wholeWord = IsWholeWordSubstr(unique, foundPos, str);
279       foundPos += StringLen(str);
280     }
281   }
282 
283   if ( foundPos < 0 || !wholeWord ) {
284       FFAddTextToString(unique, prefix, string, suffix, FALSE, convertQuotes, TILDE_IGNORE);
285   }
286 }
287 
288 NLM_EXTERN void FFAddString_NoRedundEx (
289   StringItemPtr unique,
290   CharPtr prefix,
291   CharPtr string,
292   CharPtr suffix,
293   Boolean convertQuotes
294 );
295 
FFAddString_NoRedundEx(StringItemPtr unique,CharPtr prefix,CharPtr string,CharPtr suffix,Boolean convertQuotes)296 NLM_EXTERN void FFAddString_NoRedundEx (
297   StringItemPtr unique,
298   CharPtr prefix,
299   CharPtr string,
300   CharPtr suffix,
301   Boolean convertQuotes
302 )
303 {
304   CharPtr    str = string;
305   Int4       foundPos = 0;
306   Boolean    wholeWord = FALSE;
307 
308   if ( StringHasNoText(prefix)  &&
309        StringHasNoText(string)  &&
310        StringHasNoText(suffix)  ) return;
311 
312   if (StringNICmp (string, "tRNA-", 5) == 0) {
313     str = string+5;
314     }
315 
316   while ( foundPos >= 0 && !wholeWord ) {
317     foundPos = FFStringSearch(unique, str, foundPos);
318     if ( foundPos >= 0 ) {
319       wholeWord = IsWholeWordSubstr(unique, foundPos, str);
320       foundPos += StringLen(str);
321     }
322   }
323 
324   if ( foundPos < 0 || !wholeWord ) {
325       FFAddTextToString(unique, prefix, string, suffix, FALSE, convertQuotes, TILDE_EXPAND);
326   }
327 }
328 
329 
330 
331 /* s_AddPeriodToEnd () -- Adds a '.' to the end of a given string if */
332 /*                        there is not already one there.            */
333 /*                                                                   */
334 /*                        Note that this adds one character to the   */
335 /*                        length of the string, leading to a         */
336 /*                        memory overrun if space was not previously */
337 /*                        allocated for this.                        */
338 
s_AddPeriodToEnd(CharPtr someString)339 NLM_EXTERN void s_AddPeriodToEnd (CharPtr someString)
340 {
341   Int4  len;
342 
343   if (StringHasNoText (someString)) return;
344   len = StringLen (someString);
345   if (len < 1) return;
346   if (someString[len-1] != '.')
347     {
348       someString[len] = '.';
349       someString[len+1] = '\0';
350     }
351 }
352 
353 /* s_RemovePeriodFromEnd () -- If the last character in a given      */
354 /*                             string is a '.', removes it.          */
355 
s_RemovePeriodFromEnd(CharPtr someString)356 NLM_EXTERN Boolean s_RemovePeriodFromEnd (CharPtr someString)
357 {
358   Int4  len;
359 
360   if (StringHasNoText (someString)) return FALSE;
361   len = StringLen (someString);
362   if (len < 1) return FALSE;
363   if (someString[len-1] == '.') {
364     someString[len-1] = '\0';
365     return TRUE;
366   }
367   return FALSE;
368 }
369 
370 /**/
371 /*   isEllipsis () - Determines if a string ends in an ellipses */
372 /**/
373 
IsEllipsis(CharPtr str)374 NLM_EXTERN Boolean IsEllipsis (
375   CharPtr str
376 )
377 
378 {
379   size_t   len;
380   CharPtr  ptr;
381 
382   if (StringHasNoText (str)) return FALSE;
383   len = StringLen (str);
384   if (len < 3) return FALSE;
385   ptr = str + len - 3;
386   return (Boolean) (ptr [0] == '.' && ptr [1] == '.' && ptr [2] == '.');
387 }
388 
A2GBSeqLocReplaceID(SeqLocPtr newloc,SeqLocPtr ajpslp)389 NLM_EXTERN void A2GBSeqLocReplaceID (
390   SeqLocPtr newloc,
391   SeqLocPtr ajpslp
392 )
393 
394 {
395   BioseqPtr  bsp;
396   SeqIdPtr   sip;
397 
398   bsp = BioseqFindFromSeqLoc (ajpslp);
399   if (bsp == NULL) return;
400   sip = SeqIdFindBest (bsp->id, 0);
401   SeqLocReplaceID (newloc, sip);
402 }
403 
asn2gb_PrintDate(DatePtr dp)404 NLM_EXTERN CharPtr asn2gb_PrintDate (
405   DatePtr dp
406 )
407 
408 {
409   Char    buf [30];
410   size_t  len;
411 
412   if (dp == NULL) return NULL;
413 
414   if (DatePrint (dp, buf)) {
415     if (StringICmp (buf, "Not given") != 0) {
416       len = StringLen (buf);
417       if (len > 0) {
418         if (buf [len - 1] == '\n') {
419           if (buf [len - 2] == '.') {
420             buf [len - 2] = '\0';
421           } else {
422             buf [len - 1] = '\0';
423           }
424         }
425       }
426       return StringSave (buf);
427     }
428   }
429 
430   return NULL;
431 }
432 
433 static CharPtr month_names [] = {
434   "JAN", "FEB", "MAR", "APR", "MAY", "JUN",
435   "JUL", "AUG", "SEP", "OCT", "NOV", "DEC",
436   "??"
437 };
438 
DateToFF(CharPtr buf,DatePtr dp,Boolean citSub)439 NLM_EXTERN CharPtr DateToFF (
440   CharPtr buf,
441   DatePtr dp,
442   Boolean citSub
443 )
444 
445 {
446   Int2  day;
447   Int2  month;
448   Int2  year;
449 
450   if (buf != NULL) {
451     *buf = '\0';
452   }
453   if (dp == NULL) return NULL;
454 
455   if (dp->data [0] == 0) {
456 
457     StringCpy (buf, dp->str);
458 
459   } else if (dp->data [0] == 1) {
460 
461     year = 1900 + (Int2) dp->data [1];
462     month = (Int2) dp->data [2];
463     day = (Int2) dp->data [3];
464 
465     if (citSub) {
466       if (month < 1 || month > 12) {
467         month = 13;
468       }
469       if (day < 1 || day > 31) {
470         day = 0;
471       }
472     } else {
473       if (month < 1 || month > 12) {
474         month = 1;
475       }
476       if (day < 1 || day > 31) {
477         day = 1;
478       }
479     }
480 
481     if (day < 1) {
482       sprintf (buf, "\?\?-%s-%ld",
483                month_names [month-1], (long) year);
484     } else if (day < 10) {
485       sprintf (buf, "0%ld-%s-%ld",
486                (long) day, month_names [month-1], (long) year);
487     } else {
488       sprintf(buf, "%ld-%s-%ld",
489                (long) day, month_names [month-1], (long) year);
490     }
491   }
492 
493   return buf;
494 }
495 
496 
FFGetString(IntAsn2gbJobPtr ajp)497 NLM_EXTERN StringItemPtr FFGetString (IntAsn2gbJobPtr ajp)
498 
499 {
500   StringItemPtr  sip;
501 
502   if (ajp == NULL) return NULL;
503   if (ajp->pool != NULL) {
504     sip = ajp->pool;
505     ajp->pool = sip->next;
506     sip->next = NULL;
507     MemSet ((Pointer) sip, 0, sizeof (StringItem));
508   } else {
509     sip = (StringItemPtr) MemNew (sizeof (StringItem));
510     if (sip == NULL) return NULL;
511   }
512   sip->curr = sip;
513   sip->iajp = ajp;
514   sip->pos = 0;
515   return sip;
516 }
517 
FFRecycleString(IntAsn2gbJobPtr ajp,StringItemPtr ffstring)518 NLM_EXTERN void FFRecycleString (IntAsn2gbJobPtr ajp, StringItemPtr ffstring)
519 
520 {
521   StringItemPtr  nxt;
522 
523   if (ajp == NULL || ffstring == NULL) return;
524   if ( ffstring->pos == -1 ) return;
525 
526   nxt = ffstring;
527   nxt->pos = -1;
528   while (nxt->next != NULL) {
529     nxt->pos = -1;
530     nxt = nxt->next;
531   }
532   nxt->next = ajp->pool;
533   ajp->pool = ffstring;
534 
535   ffstring->curr = NULL;
536 }
537 
FFAddOneChar(StringItemPtr sip,Char ch,Boolean convertQuotes)538 NLM_EXTERN void FFAddOneChar (
539   StringItemPtr sip,
540   Char ch,
541   Boolean convertQuotes
542 )
543 {
544   StringItemPtr current = sip->curr;
545 
546   if ( current->pos == STRING_BUF_LEN ) {
547     current->next = FFGetString(sip->iajp);
548     current = current->next;
549     current->pos = 0;
550     sip->curr = current;
551   }
552 
553   if ( convertQuotes && ch == '\"' ) {
554     ch = '\'';
555   }
556   current->buf[current->pos] = ch;
557   current->pos++;
558 }
559 
FFAddNewLine(StringItemPtr ffstring)560 NLM_EXTERN void FFAddNewLine(StringItemPtr ffstring) {
561   FFAddOneChar(ffstring, '\n', FALSE);
562 }
563 
FFAddNChar(StringItemPtr sip,Char ch,Int4 n,Boolean convertQuotes)564 NLM_EXTERN void FFAddNChar (
565   StringItemPtr sip,
566   Char ch,
567   Int4 n,
568   Boolean convertQuotes
569 )
570 {
571   Int4 i;
572 
573   for ( i = 0; i < n; ++i ) {
574     FFAddOneChar(sip, ch, convertQuotes);
575   }
576 }
577 
578 
FFExpandTildes(StringItemPtr sip,CharPtr PNTR cpp)579 NLM_EXTERN void FFExpandTildes (StringItemPtr sip, CharPtr PNTR cpp) {
580   Char replace = **cpp;
581 
582   if ( **cpp == '~' ) {
583     if ( *((*cpp) + 1) == '~' ) {     /* "~~" -> '~' */
584       replace = '~';
585       (*cpp)++;
586     } else {
587       replace = '\n';
588     }
589   }
590 
591   FFAddOneChar(sip, replace, FALSE);
592 }
593 
594 
FFSemicolonSeparateTildes(StringItemPtr sip,CharPtr PNTR cpp)595 NLM_EXTERN void FFSemicolonSeparateTildes (StringItemPtr sip, CharPtr PNTR cpp)
596 
597 {
598   Char replace = **cpp;
599 
600   if ( **cpp == '~' ) {
601     if ( *((*cpp) + 1) == '~' ) {     /* "~~" -> '~' */
602       replace = '~';
603       (*cpp)++;
604     } else {
605       FFAddOneChar(sip, ';', FALSE);
606       replace = '\n';
607     }
608   }
609 
610   FFAddOneChar(sip, replace, FALSE);
611 }
612 
613 
FFReplaceTildesWithSpaces(StringItemPtr ffstring,CharPtr PNTR cpp)614 NLM_EXTERN void FFReplaceTildesWithSpaces (StringItemPtr ffstring, CharPtr PNTR cpp) {
615   Char replace = **cpp, lookahead;
616   CharPtr cptr = *cpp;
617 
618   if ( *cptr == '`' ) {
619     FFAddOneChar(ffstring, replace, FALSE);
620     return;
621   }
622 
623   replace = ' ';
624   lookahead = *(cptr + 1);
625 
626   if ( IS_DIGIT(lookahead) ) {
627     replace = '~';
628   }
629   else {
630     if ( (lookahead == ' ') || (lookahead == '(') ) {
631       if ( IS_DIGIT(*(cptr + 2)) ) {
632         replace = '~';
633       }
634     }
635   }
636 
637   FFAddOneChar(ffstring, replace, FALSE);
638 }
639 
FFOldExpand(StringItemPtr sip,CharPtr PNTR cpp)640 NLM_EXTERN void FFOldExpand (StringItemPtr sip, CharPtr PNTR cpp) {
641   /* "~" -> "\n", "~~" or "~~ ~~" -> "\n\n" */
642   CharPtr cp = *cpp;
643   Char current = *cp;
644   Char next = *(cp + 1);
645 
646   /* handle "'~" */
647   if ( current == '`' ) {
648     if ( next != '~' ) {
649         FFAddOneChar(sip, current, FALSE);
650     } else {
651         FFAddOneChar(sip, '~', FALSE);
652         (*cpp)++;
653     }
654     return;
655   }
656 
657   /* handle "~", "~~" or "~~ ~~" */
658   FFAddOneChar(sip, '\n', FALSE);
659   if ( next == '~' ) {
660     FFAddOneChar(sip, '\n', FALSE);
661     cp++;
662     *cpp = cp;
663     cp++;
664     if ( *cp == ' ' ) {
665       cp++;
666       if ( *cp == '~' ) {
667         cp++;
668         if ( *cp == '~' ) { /* saw "~~ ~~" */
669           *cpp = cp;
670         }
671       }
672     }
673   }
674 }
675 
AddCommentStringWithTildes(StringItemPtr ffstring,CharPtr string)676 NLM_EXTERN void AddCommentStringWithTildes (StringItemPtr ffstring, CharPtr string)
677 {
678 /* One "~" is a  new line, "~~" or "~~ ~~" means 2 returns */
679 
680     /* Int2  i; */
681 
682     while (*string != '\0') {
683         if (*string == '`' && *(string+1) == '~') {
684             FFAddOneChar(ffstring, '~', FALSE);
685             string += 2;
686         } else if (*string == '~') {
687             FFAddOneChar(ffstring, '\n', FALSE);
688             string++;
689             if (*string == '~') {
690                 /*
691                 for (i = 0; i < 12; i++) {
692                     FFAddOneChar(ffstring, ' ', FALSE);
693                 }
694                 */
695                 FFAddOneChar(ffstring, '\n', FALSE);
696                 string++;
697             if (*string == ' ' && *(string+1) == '~' && *(string+2) == '~') {
698                     string += 3;
699             }
700           }
701         } else if (*string == '\"') {
702             *string = '\'';
703             FFAddOneChar(ffstring, *string, FALSE);
704             string++;
705         } else {
706             FFAddOneChar(ffstring, *string, FALSE);
707             string++;
708         }
709     }
710 }    /* AddCommentStringWithTildes */
711 
712 
AddStringWithTildes(StringItemPtr ffstring,CharPtr string)713 NLM_EXTERN void AddStringWithTildes (StringItemPtr ffstring, CharPtr string)
714 {
715 /* One "~" is a  new line, "~~" or "~~ ~~" means 2 returns */
716 
717     while (*string != '\0') {
718         if (*string == '`' && *(string+1) == '~') {
719             FFAddOneChar(ffstring, '~', FALSE);
720             string += 2;
721         } else if (*string == '~') {
722             FFAddOneChar(ffstring, '\n', FALSE);
723             string++;
724             if (*string == '~') {
725                 FFAddOneChar(ffstring, '\n', FALSE);
726                 string++;
727         if (*string == ' ' && *(string+1) == '~' && *(string+2) == '~') {
728                     string += 3;
729         }
730       }
731         } else if (*string == '\"') {
732             *string = '\'';
733             FFAddOneChar(ffstring, *string, FALSE);
734             string++;
735         } else {
736             FFAddOneChar(ffstring, *string, FALSE);
737             string++;
738         }
739     }
740 }    /* AddStringWithTildes */
741 
742 
FFProcessTildes(StringItemPtr sip,CharPtr PNTR cpp,Int2 tildeAction)743 NLM_EXTERN void FFProcessTildes (StringItemPtr sip, CharPtr PNTR cpp, Int2 tildeAction) {
744 
745   switch (tildeAction) {
746 
747   case TILDE_EXPAND :
748       FFExpandTildes(sip, cpp);
749       break;
750 
751   case TILDE_SEMICOLON :
752       FFSemicolonSeparateTildes(sip, cpp);
753       break;
754 
755   case TILDE_OLD_EXPAND :
756       FFOldExpand(sip, cpp);
757       break;
758 
759   case TILDE_TO_SPACES :
760       FFReplaceTildesWithSpaces (sip, cpp);
761       break;
762 
763   case TILDE_IGNORE:
764   default:
765       FFAddOneChar(sip, **cpp, FALSE);
766       break;
767   }
768 }
769 
FFAddPeriod(StringItemPtr sip)770 NLM_EXTERN void FFAddPeriod (StringItemPtr sip) {
771   Int4 i;
772   Char ch  = '\0';
773   StringItemPtr riter = sip->curr, prev;
774   IntAsn2gbJobPtr ajp;
775 
776   if ( sip == NULL ) return;
777   ajp = (IntAsn2gbJobPtr)sip->iajp;
778   if ( ajp == NULL ) return;
779 
780   for ( i = riter->pos - 1; i >= 0; --i ) {
781     ch = riter->buf[i];
782 
783     if ( (ch == ' ') || (ch == '\t')  || (ch == '~')  || (ch == '.') || (ch == '\n') || (ch == '\r')) {
784       riter->pos--;
785 
786       if ( i < 0 && riter != sip ) {
787         for ( prev = sip; prev->next != NULL; prev = prev->next ) {
788           if ( prev->next == riter ) {
789             i = prev->pos - 1;
790             FFRecycleString(ajp, riter);
791             riter = prev;
792             riter->next = NULL;
793               sip->curr = riter;
794             break;
795           }
796         }
797       }
798 
799     } else {
800       break;
801     }
802   }
803 
804   if (ch != '.') {
805     FFAddOneChar(sip, '.', FALSE);
806   }
807 }
808 
FFAddOneString(StringItemPtr sip,CharPtr string,Boolean addPeriod,Boolean convertQuotes,Int2 tildeAction)809 NLM_EXTERN void FFAddOneString (
810   StringItemPtr sip,
811   CharPtr string,
812   Boolean addPeriod,
813   Boolean convertQuotes,
814   Int2 tildeAction
815 )
816 {
817   CharPtr strp = string;
818   Char ch;
819   Char prevchar = '\0';
820 
821   if ( string == NULL ) return;
822 
823   ch = *strp;
824   while ( ch != '\0' ) {
825     if ( (ch == '`') || (ch == '~') ) {
826       if (tildeAction == TILDE_SEMICOLON && prevchar == ';') {
827         FFProcessTildes(sip, &strp, TILDE_EXPAND);
828       } else if (tildeAction == TILDE_SEMICOLON && prevchar == ' ') {
829         FFProcessTildes(sip, &strp, TILDE_EXPAND);
830       } else {
831         FFProcessTildes(sip, &strp, tildeAction);
832       }
833     } else {
834       FFAddOneChar(sip, ch, convertQuotes);
835     }
836     prevchar = ch;
837     strp++;
838     ch = *strp;
839   }
840 
841   if ( addPeriod ) {
842     FFAddPeriod(sip);
843   }
844 }
845 
FFCatenateSubString(StringItemPtr dest,StringItemPtr start_sip,Int4 start_pos,StringItemPtr end_sip,Int4 end_pos,Uint4 line_max)846 NLM_EXTERN void FFCatenateSubString (
847   StringItemPtr dest,
848   StringItemPtr start_sip, Int4 start_pos,
849   StringItemPtr end_sip, Int4 end_pos,
850   Uint4 line_max
851 )
852 {
853   Int4 max_i, min_i, i, len = 0;
854   StringItemPtr current;
855   Boolean in_url = FALSE, found_start = FALSE;
856   Boolean in_html_ampersand_escape = FALSE;
857   IntAsn2gbJobPtr ajp = (IntAsn2gbJobPtr)dest->iajp;
858   Uint4 char_count = 0;
859 
860   for ( current = start_sip, i = start_pos;
861   current != NULL;
862   current = current->next ) {
863     if ( current == start_sip ) {
864       min_i = start_pos;
865     } else {
866       min_i = 0;
867     }
868 
869     if ( current == end_sip ) {
870       max_i = end_pos;
871     } else {
872       max_i = current->pos;
873     }
874 
875     for ( i = min_i; i < max_i; ++i ) {
876 
877       /* -----------------------------------------------------------------------
878        * HTML specific processing:
879        * ---------------------------------------------------------------------*/
880       if ( GetWWW(ajp) ) {
881         if ( ! in_url && ! in_html_ampersand_escape ) {
882           if ( current->buf[i] == '<' ) {
883             /* Watch out! */
884             if (FFIsStartOfLinkEx (current, i, &len)) {
885               FFAddOneChar(dest, '<', FALSE);
886               in_url = TRUE;
887               found_start = TRUE;
888               continue;
889             } else {
890               FFAddOneString(dest, "&lt;", FALSE, FALSE, TILDE_IGNORE);
891               ++char_count;
892               continue;
893             }
894           }
895           if( current->buf[i] == '&' )
896           {
897             FFAddOneChar(dest, '&', FALSE);
898             if( FFIsStartOfHTMLAmpersandEscape(current, i) ) {
899               in_html_ampersand_escape = TRUE;
900             }
901             ++char_count;
902             continue;
903           }
904           if (char_count == line_max) {
905             break;
906           }
907 
908           if ( current->buf[i] == '>' ) {
909             /* Obviously *not* a tag terminator */
910             FFAddOneString(dest, "&gt;", FALSE, FALSE, TILDE_IGNORE);
911             ++char_count;
912             continue;
913           }
914 
915           /* Common garden variety of character */
916           FFAddOneChar(dest, current->buf[i], FALSE);
917           ++char_count;
918 
919           if (found_start && len > 0) {
920             len--;
921             if (len == 0) {
922               FFAddOneChar(dest, '"', FALSE);
923               found_start = FALSE;
924             }
925           }
926         }
927 
928         else if( in_html_ampersand_escape ) {
929           FFAddOneChar(dest, current->buf[i], FALSE);
930           if( current->buf[i] == ';' ) {
931             in_html_ampersand_escape = FALSE;
932           }
933           continue;
934         }
935 
936         else /* in_url */ {
937           if ( current->buf[i] == '&' &&
938                ! FFStartsWith(current, i, "&amp;", TRUE) )
939           {
940             /* encode ampersand for XHMLT */
941             FFAddOneString(dest, "&amp;", FALSE, FALSE, TILDE_IGNORE);
942             continue;
943           }
944           if ( current->buf[i] == '>' ) {
945             FFAddOneChar(dest, '>', FALSE);
946             in_url = FALSE;
947             found_start = FALSE;
948             continue;
949           }
950 
951           /* nothing inside a link needs any cooking. And neither does it
952              count against the page width limit. */
953           FFAddOneChar(dest, current->buf[i], FALSE);
954         }
955       }
956 
957       /*  ---------------------------------------------------------------------
958        *  TEXT mode processing:
959        *  --------------------------------------------------------------------*/
960       else {
961         FFAddOneChar(dest, current->buf[i], FALSE);
962         if (++char_count == line_max) {
963           break;
964         }
965       }
966 
967     }
968     if ( current == end_sip || char_count == line_max ) break;
969   }
970 }
971 
FFToCharPtrEx(StringItemPtr sip,CharPtr pfx,CharPtr sfx)972 NLM_EXTERN CharPtr FFToCharPtrEx (StringItemPtr sip, CharPtr pfx, CharPtr sfx)
973 
974 {
975   Int4 size = 0;
976   StringItemPtr iter;
977   CharPtr result, temp;
978   size_t pfx_len, sfx_len;
979 
980   pfx_len = StringLen (pfx);
981   sfx_len = StringLen (sfx);
982 
983   for ( iter = sip; iter != NULL; iter = iter->next ) {
984     size += iter->pos;
985   }
986 
987   result = (CharPtr)MemNew(size + pfx_len + sfx_len + 2);
988   temp = result;
989 
990   if (pfx_len > 0) {
991     MemCpy( temp, pfx, pfx_len );
992     temp += pfx_len;
993   }
994   for ( iter = sip; iter != NULL; iter = iter->next ) {
995     MemCpy( temp, iter->buf, iter->pos );
996     temp += iter->pos;
997   }
998   if (sfx_len > 0) {
999     MemCpy( temp, sfx, sfx_len );
1000     temp += sfx_len;
1001   }
1002 
1003   *temp = '\0';
1004 
1005   return result;
1006 }
1007 
FFToCharPtr(StringItemPtr sip)1008 NLM_EXTERN CharPtr FFToCharPtr (StringItemPtr sip)
1009 
1010 {
1011   return FFToCharPtrEx (sip, NULL, NULL);
1012 }
1013 
1014 /* word wrap functions */
1015 
1016 static CharPtr url_anchor_strings [] = {
1017   "</A>",
1018  "</ACRONYM>",
1019   "<A HREF=/",
1020   "<A HREF=\"/",
1021   "<A HREF=FTP://",
1022   "<A HREF=MAILTO:",
1023   "<A HREF=HTTP://",
1024   "<A HREF=HTTPS://",
1025   "<A HREF=\"HTTP://",
1026   "<A HREF=\"HTTPS://",
1027   "<ACRONYM TITLE=\"",
1028   "<DIV ",
1029   "</DIV>",
1030   NULL
1031 };
1032 
GetUrlAnchorFSA(void)1033 static TextFsaPtr GetUrlAnchorFSA (void)
1034 
1035 {
1036   return (TextFsaPtr) GetAppProperty ("Asn2gbUrlAnchorFSA");
1037 }
1038 
InitUrlAnchorFSA(void)1039 static TextFsaPtr InitUrlAnchorFSA (void)
1040 
1041 {
1042   TextFsaPtr  fsa;
1043   Int2        q;
1044 
1045   fsa = GetUrlAnchorFSA ();
1046   if (fsa != NULL) return fsa;
1047 
1048   fsa = TextFsaNew ();
1049   if (fsa == NULL) return NULL;
1050 
1051   for (q = 0; url_anchor_strings [q] != NULL; q++) {
1052     TextFsaAdd (fsa, url_anchor_strings [q]);
1053   }
1054 
1055   SetAppProperty ("Asn2gbUrlAnchorFSA", (Pointer) fsa);
1056 
1057   return fsa;
1058 }
1059 
FreeUrlAnchorFSA(void)1060 static void FreeUrlAnchorFSA (void)
1061 
1062 {
1063   TextFsaPtr  fsa;
1064 
1065   fsa = GetUrlAnchorFSA ();
1066   if (fsa == NULL) return;
1067 
1068   SetAppProperty ("Asn2gbUrlAnchorFSA", NULL);
1069   TextFsaFree (fsa);
1070 }
1071 
FFSkipLink(StringItemPtr PNTR iterp,Int4Ptr ip)1072 NLM_EXTERN void FFSkipLink (StringItemPtr PNTR iterp, Int4Ptr ip) {
1073   StringItemPtr iter = *iterp;
1074   Int4 i = *ip;
1075 
1076   while ( (iter != NULL) && (iter->buf[i] != '>') ) {
1077     ++i;
1078 
1079     if ( i == iter->pos ) {
1080       iter = iter->next;
1081       i = 0;
1082     }
1083   }
1084   ++i;
1085   if ( iter != NULL && i == iter->pos && iter->next != NULL ) {
1086     iter = iter->next;
1087     i = 0;
1088   }
1089 
1090   *iterp = iter;
1091   *ip = i;
1092 }
1093 
FFSkipHTMLAmpersandEscape(StringItemPtr PNTR iterp,Int4Ptr ip)1094 NLM_EXTERN void FFSkipHTMLAmpersandEscape (StringItemPtr PNTR iterp, Int4Ptr ip)
1095 {
1096   StringItemPtr iter = *iterp;
1097   Int4 i = *ip;
1098 
1099   while ( (iter != NULL) && (iter->buf[i] != ';') ) {
1100     ++i;
1101 
1102     if ( i == iter->pos ) {
1103       iter = iter->next;
1104       i = 0;
1105     }
1106   }
1107 
1108   *iterp = iter;
1109   *ip = i;
1110 }
1111 
FFIsStartOfLinkEx(StringItemPtr iter,Int4 pos,Int4Ptr lenP)1112 static Boolean FFIsStartOfLinkEx (StringItemPtr iter, Int4 pos, Int4Ptr lenP)
1113 
1114 {
1115   Char        ch;
1116   TextFsaPtr  fsa;
1117   Int4        i;
1118   ValNodePtr  matches;
1119   Int4        max_url_len;
1120   Int4        state = 0;
1121 
1122   if ( iter == NULL || pos >= iter->pos ) return FALSE;
1123   if ( iter->buf [pos] != '<' ) return FALSE;
1124 
1125   fsa = GetUrlAnchorFSA ();
1126   if (fsa == NULL) return FALSE;
1127 
1128   if (! TextFsaGetStats (fsa, NULL, NULL, &max_url_len)) return FALSE;
1129 
1130   for (i = 0; i < max_url_len; i++) {
1131     ch = iter->buf [pos];
1132     ch = TO_UPPER (ch);
1133     state = TextFsaNext (fsa, state, ch, &matches);
1134     if (matches != NULL) {
1135       if (lenP != NULL) {
1136         *lenP = i + 1;
1137       }
1138       return TRUE;
1139     }
1140 
1141     pos++;
1142     if (pos >= iter->pos) {
1143       iter = iter->next;
1144       pos = 0;
1145       if (iter == NULL) return FALSE;
1146     }
1147   }
1148 
1149   return FALSE;
1150 }
1151 
FFIsStartOfLink(StringItemPtr iter,Int4 pos)1152 NLM_EXTERN Boolean FFIsStartOfLink (StringItemPtr iter, Int4 pos)
1153 
1154 {
1155   return FFIsStartOfLinkEx (iter, pos, NULL);
1156 }
1157 
FFIsStartOfHTMLAmpersandEscape(StringItemPtr iter,Int4 pos)1158 NLM_EXTERN Boolean FFIsStartOfHTMLAmpersandEscape (
1159     StringItemPtr iter,
1160     Int4 pos )
1161 {
1162   Char        ch;
1163   Int4        i;
1164   Int4        max_len = 20;
1165 
1166   if ( iter == NULL || pos >= iter->pos ) return FALSE;
1167   if ( iter->buf [pos] != '&' ) return FALSE;
1168 
1169   /* skip the initial '&' */
1170   pos++;
1171   if (pos >= iter->pos) {
1172     iter = iter->next;
1173     pos = 0;
1174     if (iter == NULL) return FALSE;
1175   }
1176 
1177   for (i = 0; i < max_len; i++) {
1178     ch = iter->buf [pos];
1179     if( isalnum(ch) || ch == '#' ) {
1180       /* fine; these are chars expected in HTML ampersand char */
1181     } else if( ch == ';' ) {
1182       /* found end of HTML ampersand char */
1183       return TRUE;
1184     } else {
1185       /* illegal char in HTML ampersand char */
1186       return FALSE;
1187     }
1188 
1189     pos++;
1190     if (pos >= iter->pos) {
1191       iter = iter->next;
1192       pos = 0;
1193       if (iter == NULL) return FALSE;
1194     }
1195   }
1196 
1197   return FALSE;
1198 }
1199 
1200 /*
1201 NLM_EXTERN Boolean FFIsStartOfLink (StringItemPtr iter, Int4 pos)  {
1202   static CharPtr start_link = "<A HREF=";
1203   static CharPtr end_link = "</A>";
1204   Int4 start_len = StringLen(start_link);
1205   Int4 end_len = StringLen(end_link);
1206   Char temp[10];
1207   Int4 i;
1208 
1209   if ( iter == NULL || pos >= iter->pos ) return FALSE;
1210   if ( iter->buf[pos] != '<' ) return FALSE;
1211 
1212   MemSet(temp, 0, sizeof(temp));
1213   for ( i = 0; i < start_len && iter != NULL; ++i ) {
1214     if ( pos + i < iter->pos ) {
1215       temp[i] = iter->buf[pos+i];
1216       if ( i == end_len - 1 ) {
1217         if ( StringNICmp(temp, end_link, end_len) == 0 ) {
1218           return TRUE;
1219         }
1220       }
1221     } else {
1222       iter = iter->next;
1223       pos = -i;
1224       --i;
1225     }
1226   }
1227 
1228   if ( i == start_len ) {
1229     if ( StringNICmp(temp, start_link, start_len) == 0 ) {
1230         return TRUE;
1231     }
1232   }
1233 
1234   return FALSE;
1235 }
1236 */
1237 
1238 
FFSavePosition(StringItemPtr ffstring,StringItemPtr PNTR bufptr,Int4 PNTR posptr)1239 NLM_EXTERN void FFSavePosition(StringItemPtr ffstring, StringItemPtr PNTR bufptr, Int4 PNTR posptr) {
1240   *bufptr = ffstring->curr;
1241   *posptr = ffstring->curr->pos;
1242 }
1243 
1244 
FFTrim(StringItemPtr ffstring,StringItemPtr line_start,Int4 line_pos,Int4 line_prefix_len)1245 NLM_EXTERN void FFTrim (
1246     StringItemPtr ffstring,
1247     StringItemPtr line_start,
1248     Int4 line_pos,
1249     Int4 line_prefix_len
1250 )
1251 {
1252   StringItemPtr riter, iter;
1253   Int4 i;
1254   IntAsn2gbJobPtr ajp = (IntAsn2gbJobPtr)ffstring->iajp;
1255 
1256   for ( i = 0; i < line_prefix_len; ++i ) {
1257     ++line_pos;
1258     if ( line_pos == STRING_BUF_LEN ) {
1259       line_pos = 0;
1260       line_start= line_start->next;
1261     }
1262   }
1263 
1264   riter = ffstring->curr;
1265   while ( riter != NULL ) {
1266     for ( i = riter->pos - 1;
1267           /* (i >= 0) && !(riter == line_start && i <= line_pos); */
1268           (i >= 0) && ((riter != line_start) || (i >= line_pos));
1269           --i ) {
1270       if ( !IS_WHITESP(riter->buf[i]) || (riter->buf[i] == '\n') ) {
1271         break;
1272       }
1273     }
1274     if ( i < 0 ) {
1275       i = STRING_BUF_LEN - 1;
1276       for ( iter = ffstring; iter != NULL; iter = iter->next ) {
1277         if ( iter->next == riter ) {
1278           break;
1279         }
1280       }
1281       if ( iter == NULL ){
1282         ffstring->pos = 0;
1283         break;
1284       } else {
1285 
1286         riter = iter;
1287         ffstring->curr = riter;
1288       }
1289     } else {
1290       riter->pos = i + 1;
1291       FFRecycleString(ajp, riter->next);
1292       riter->next = NULL;
1293       break;
1294     }
1295   }
1296 }
1297 
FFNextChar(StringItemPtr start_sip,Int4 start_pos)1298 NLM_EXTERN int FFNextChar(
1299   StringItemPtr start_sip,
1300   Int4 start_pos
1301 )
1302 {
1303   if (start_pos < start_sip->pos-1) {
1304     return start_sip->buf[start_pos+1];
1305   }
1306   else if (start_sip->next != NULL) {
1307     return (start_sip->next->buf)[0];
1308   }
1309   else {
1310     return 0;
1311   }
1312 }
1313 
FFAdvanceChar(StringItemPtr * start_sip,Int4 * start_pos)1314 NLM_EXTERN void FFAdvanceChar(
1315   StringItemPtr* start_sip,
1316   Int4* start_pos
1317 )
1318 {
1319   if (*start_pos < (*start_sip)->pos-1) {
1320     ++(*start_pos);
1321   }
1322   else {
1323     (*start_sip) = (*start_sip)->next;
1324     *start_pos = 0;
1325   }
1326 }
1327 
1328 /* A line is wrapped when the visble text in th eline exceeds the line size. */
1329 /* Visible text is text that is not an HTML hyper-link.                      */
1330 /* A line may be broken in one of the following characters:                  */
1331 /* space, comma and dash                                                     */
1332 /* the oredr of search is first spaces, then commas and then dashes.         */
1333 /* We nee to take into account the possiblity that a 'new-line' character    */
1334 /* already exists in the line, in such case we break at the 'new-line'       */
1335 /* spaces, dashes and new-lines will be broken at that character wheras for  */
1336 /* commas we break at the character following the comma.                     */
1337 
FFCalculateLineBreak(StringItemPtr PNTR break_sip,Int4 PNTR break_pos,Int4 init_indent,Int4 visible,Boolean is_html)1338 NLM_EXTERN void FFCalculateLineBreak (
1339   StringItemPtr PNTR break_sip, Int4 PNTR break_pos,
1340   Int4 init_indent, Int4 visible,
1341   Boolean is_html
1342 )
1343 {
1344   StringItemPtr iter, prev;
1345   Int4 i,
1346        done = FALSE,
1347        copied = 0,
1348        start = *break_pos,
1349        pos = 0;
1350   Char ch;
1351   Boolean found_comma = FALSE, found_dash = FALSE, found_lb = FALSE;
1352   /* each candidate is a pair of buffer and position withingh this buffer */
1353   StringItemPtr candidate_sip_space = NULL,
1354                 candidate_sip_comma = NULL,
1355                 candidate_sip_dash  = NULL;
1356   Int4          candidate_int_space = -1,
1357                 candidate_int_comma = -1,
1358                 candidate_int_dash  = -1;
1359   /* This is set when the line consists entirely of one huge word that
1360      we actually ended up breaking in the middle */
1361   /* This variable was introduced to cover problems with the corner case
1362      of having a really long word such that it would be broken at exactly the
1363      point where only its last letter ends up on the next line. */
1364   /* e.g. AA000002 */
1365   Boolean breaking_long_word = FALSE;
1366 
1367 
1368   iter = *break_sip;
1369   prev = iter;
1370 
1371   i = start;
1372 
1373   /* skip the first 'init_indent' characters of the line */
1374   while ( iter != NULL && !done ) {
1375     for ( i = start; i < iter->pos && init_indent > 0; ++i ) {
1376       if ( iter->buf[i] == '\n' ) {
1377         candidate_sip_space = iter;
1378         candidate_int_space = i;
1379         done = TRUE;
1380         break;
1381       }
1382       if( is_html ) {
1383         if ( FFIsStartOfLink(iter, i) ) {
1384           FFSkipLink(&iter, &i);
1385           --i;
1386           continue;
1387         }
1388         if( FFIsStartOfHTMLAmpersandEscape(iter, i) ) {
1389           FFSkipHTMLAmpersandEscape(&iter, &i);
1390         }
1391       }
1392 
1393       --init_indent;
1394       ++copied;
1395     }
1396     if ( init_indent > 0 ) {
1397       start = 0;
1398       iter = iter->next;
1399     } else {
1400       break;
1401     }
1402   }
1403   start = i;
1404 
1405   while ( iter != NULL && !done ) {
1406     for ( i = start; iter != NULL && i < iter->pos; ++i ) {
1407       if ( found_comma ) {
1408         candidate_sip_comma = iter;
1409         candidate_int_comma = i;
1410         found_comma = FALSE;
1411       }
1412       if ( found_dash ) {
1413         candidate_sip_dash = iter;
1414         candidate_int_dash = i;
1415         found_dash= FALSE;
1416       }
1417 
1418       ch = iter->buf[i];
1419       if ( ch == '\n' ) {
1420         candidate_sip_space = iter;
1421         candidate_int_space = i;
1422         found_lb = TRUE;
1423         done = TRUE;
1424         break;
1425       } else if ( ch == ' ' ) {
1426         candidate_sip_space = iter;
1427         candidate_int_space = i;
1428       } else if ( ch == ',' ) {
1429         found_comma = TRUE;
1430       } else if ( ch == '-' ) {
1431         found_dash = TRUE;
1432         /*candidate_sip_dash = iter;
1433         candidate_int_dash = i;*/
1434       }
1435 
1436       if( is_html ) {
1437         if ( FFIsStartOfLink(iter, i) ) {
1438           FFSkipLink(&iter, &i);
1439           --i;
1440           continue;
1441         }
1442         if( FFIsStartOfHTMLAmpersandEscape(iter, i) ) {
1443           FFSkipHTMLAmpersandEscape(&iter, &i);
1444         }
1445       }
1446 
1447       ++copied;
1448       if ( copied >= visible ) {
1449         if ( (candidate_sip_space == NULL) && (candidate_int_space == -1) &&
1450              (candidate_sip_comma == NULL) && (candidate_int_comma == -1) &&
1451              (candidate_sip_dash == NULL)  && (candidate_int_dash == -1)  ) {
1452 	  breaking_long_word = TRUE;
1453           candidate_sip_space = iter;
1454           candidate_int_space = i;
1455         }
1456         done = TRUE;
1457         break;
1458       }
1459     }
1460     start = 0;
1461     if ( iter != NULL && !done ) {
1462       prev = iter;
1463       pos = prev->pos;
1464       iter = iter->next;
1465     }
1466   }
1467 
1468   /* the order in which we examine the various candidate breaks is important */
1469   if ( iter == NULL && !done) { /* reached the end */
1470     *break_sip = prev;
1471     *break_pos = pos;
1472   } else {
1473     if( candidate_sip_space != NULL ) {
1474         *break_sip = candidate_sip_space;
1475         *break_pos = candidate_int_space;
1476     } else if( candidate_sip_comma != NULL ) {
1477         *break_sip = candidate_sip_comma;
1478       *break_pos = candidate_int_comma;
1479     } else if( candidate_sip_dash != NULL ) {
1480       *break_sip = candidate_sip_dash;
1481       *break_pos = candidate_int_dash;
1482     }
1483     if ( ! found_lb && ! breaking_long_word ) {
1484       while (FFNextChar(*break_sip, *break_pos) == ' ') {
1485 	FFAdvanceChar(break_sip, break_pos);
1486       }
1487       if (FFNextChar(*break_sip, *break_pos) == '\n') {
1488         FFAdvanceChar(break_sip, break_pos);
1489       }
1490     }
1491   }
1492 }
1493 
1494 /*
1495  * Scans the given buffer froma given scan position, for the next occurrence of
1496  * the indicated character. The search breaks when the character is found, or the
1497  * supplied break position is reached.
1498  * On exit, the scan position will either be on the character found, or at the
1499  * given break position.
1500  *
1501  *  *p_line_sip:    in: points to the buffer where scan should start
1502  *                  out: points to the buffer where the scan ended
1503  *  *p_line_pos:    in: points to the position in *p_line_sip where the scan should
1504  *                    start
1505  *                  out: points to the position in *p_line_sip where the scan ended.
1506  *  break_sip:      points to buffer where the scan should stop
1507  *  break_pos:      position in *break_sip where the scan should stop
1508  *  c:              the character we are looking for
1509  */
FFFindSingleChar(StringItemPtr * p_line_sip,Int4 * p_line_pos,StringItemPtr break_sip,Int4 break_pos,char c)1510 NLM_EXTERN Boolean FFFindSingleChar(
1511   StringItemPtr* p_line_sip,
1512   Int4* p_line_pos,
1513   StringItemPtr break_sip,
1514   Int4 break_pos,
1515   char c )
1516 {
1517   while( *p_line_pos >= (*p_line_sip)->pos) {
1518     *p_line_pos -= (*p_line_sip)->pos;
1519     (*p_line_sip) = (*p_line_sip)->next;
1520     if ( *p_line_sip == NULL ) {
1521       return FALSE;
1522     }
1523   }
1524   while (*p_line_sip != break_sip){
1525     while (*p_line_pos <(*p_line_sip)->pos) {
1526       if ((*p_line_sip)->buf[ *p_line_pos ] == c)
1527         return TRUE;
1528       else
1529         ++(*p_line_pos);
1530     }
1531     *p_line_pos = 0;
1532     *p_line_sip = (*p_line_sip)->next;
1533   }
1534   while (*p_line_pos < break_pos){
1535     if ( (*p_line_sip)->buf[ *p_line_pos ] == c )
1536       return TRUE;
1537     else
1538       ++(*p_line_pos);
1539   }
1540   return FALSE;
1541 }
1542 
1543 /*
1544  * Returns the number of bytes remaining in the buffer chain, starting from the
1545  * given buffer and a read mark inside it.
1546  *
1547  *  sip:            points to the buffer where the string starts,
1548  *  cur_pos:        read mark in the buffer
1549  */
FFRemainingLength(StringItemPtr sip,Int4 cur_pos)1550 NLM_EXTERN Int4 FFRemainingLength(
1551   StringItemPtr sip,
1552   Int4 cur_pos )
1553 {
1554   return FFLength(sip)-cur_pos;
1555 }
1556 
1557 /*
1558  * Scans the given line for the next opening tag of an HTML hyperlink. Ajusts
1559  * the line position to immediately after the opening tag (if such a tag is
1560  * found) or the the end of the line (if no such tag is found).
1561  * If a character buffer is supplied, this function will copy any opening tag
1562  * it finds into that buffer.
1563  *
1564  *  *p_line_sip:    in: points to the string buffer where the scan should start
1565  *                  out: points to the string buffer where the scan ended
1566  *  *p_line_pos:    in: position in **p_start_sip where the scan should start
1567  *                  out: position in **p_start_sip where the scan ended
1568  *  break_sip:      buffer that contain the line break
1569  *  break_pos:      position in break_sip that represents the line break
1570  *  buf_open_link:  character buffer to hold a copy of the opening link found
1571  *                  (or =0 if this information is not required).
1572  */
FFExtractNextOpenLink(StringItemPtr * p_line_sip,Int4 * p_line_pos,StringItemPtr break_sip,Int4 break_pos,char * buf_open_link)1573 NLM_EXTERN Boolean FFExtractNextOpenLink(
1574   StringItemPtr* p_line_sip,
1575   Int4* p_line_pos,
1576   StringItemPtr break_sip,
1577   Int4 break_pos,
1578   char* buf_open_link )
1579 {
1580   int i;
1581 
1582   const char* buf_markup_open = "<A HREF";
1583   const int markup_size = strlen(buf_markup_open);
1584 
1585    while ((*p_line_sip != break_sip) || (*p_line_pos < break_pos)) {
1586 
1587     if (FFFindSingleChar(p_line_sip, p_line_pos, break_sip, break_pos, '<' )) {
1588 
1589       if (FFRemainingLength(*p_line_sip, *p_line_pos) < markup_size) {
1590         *p_line_sip = break_sip;
1591         *p_line_pos = break_pos;
1592         return FALSE;
1593       }
1594       for ( i=0; i < markup_size; ++i ) {
1595         if (buf_markup_open[i] != toupper( FFCharAt( *p_line_sip, (*p_line_pos)+i )))
1596           break;
1597       }
1598       if ( i == markup_size ) {
1599         if (buf_open_link != 0) {
1600 
1601           char next;
1602 
1603           for (i=0; '>' != (next = FFCharAt( *p_line_sip, *p_line_pos )); ++(*p_line_pos)) {
1604 
1605             if (next == '&' && ! FFStartsWith(*p_line_sip, *p_line_pos, "&amp;", TRUE) ) {
1606 
1607               MemCopy( buf_open_link+i, "&amp;", strlen( "&amp;" ) );
1608 
1609               i += strlen("&amp;");
1610 
1611             }
1612 
1613             else {
1614 
1615               buf_open_link[i++] = next;
1616 
1617             }
1618 
1619           }
1620 
1621           buf_open_link[i++] = '>';
1622 
1623           buf_open_link[i] = 0;
1624 
1625         } else {
1626           *p_line_pos += markup_size;
1627         }
1628 
1629         return TRUE;
1630       } else {
1631         ++(*p_line_pos);
1632       }
1633     }
1634   }
1635   return FALSE;
1636 }
1637 
1638 /*
1639  * Scans the given line for the next closing tag of an HTML hyperlink. Ajusts
1640  * the line position to immediately after the closing tag (if such a tag is
1641  * found) or the the end of the line (if no such tag is found).
1642  *
1643  *  *p_line_sip:    in: points to the string buffer where the scan should start
1644  *                  out: points to the string buffer where the scan ended
1645  *  *p_line_pos:    in: position in **p_start_sip where the scan should start
1646  *                  out: position in **p_start_sip where the scan ended
1647  *  break_sip:      buffer that contain the line break
1648  *  break_pos:      position in break_sip that represents the line break
1649  */
FFExtractNextCloseLink(StringItemPtr * p_line_sip,Int4 * p_line_pos,StringItemPtr break_sip,Int4 break_pos)1650 NLM_EXTERN Boolean FFExtractNextCloseLink(
1651   StringItemPtr* p_line_sip,
1652   Int4* p_line_pos,
1653   StringItemPtr break_sip,
1654   Int4 break_pos )
1655 {
1656   int i;
1657 
1658   const char* buf_close_link = "</A>";
1659   const int markup_close_size = strlen(buf_close_link);
1660 
1661    while ((*p_line_sip != break_sip) || (*p_line_pos < break_pos)) {
1662 
1663     if (FFFindSingleChar(p_line_sip, p_line_pos, break_sip, break_pos, '<' )) {
1664       if (FFRemainingLength(*p_line_sip, *p_line_pos) < markup_close_size) {
1665         *p_line_sip = break_sip;
1666         *p_line_pos = break_pos;
1667         return FALSE;
1668       }
1669       for ( i=0; i < markup_close_size; ++i ) {
1670         if (buf_close_link[i] != toupper(FFCharAt( *p_line_sip, (*p_line_pos)+i)))
1671           break;
1672       }
1673       if (i == markup_close_size) {
1674           (*p_line_pos) += markup_close_size;
1675         return TRUE;
1676       } else {
1677         ++(*p_line_pos);
1678       }
1679     }
1680   }
1681   return FALSE;
1682 }
1683 
1684 /*
1685  * Checks a given line whether its end falls between the opening and the closing
1686  * tag of an HTML link.
1687  *
1688  *  start_sip:      string buffer where the given line starts,
1689  *  start_pos:      position in start_sip where the given line starts,
1690  *  break_sip:      string buffer where the given line ends,
1691  *  break_pos:      position in break_pos where the given line ends,
1692  *  buf_link_open:  optional buffer where the open tag of the split link will be
1693  *                  written to. Leave =0 if you don't need this.
1694  */
FFLineBreakSplitsHtmlLink(StringItemPtr start_sip,Int4 start_pos,StringItemPtr break_sip,Int4 break_pos,char * buf_link_open,Int4 * html_open_link_counter)1695 NLM_EXTERN Boolean FFLineBreakSplitsHtmlLink(
1696   StringItemPtr start_sip,
1697   Int4 start_pos,
1698   StringItemPtr break_sip,
1699   Int4 break_pos,
1700   char* buf_link_open,
1701   Int4* html_open_link_counter )
1702 {
1703   StringItemPtr cur_iter=0;
1704   int cur_pos=0;
1705 
1706   if ( ! GetWWW((IntAsn2gbJobPtr)start_sip->iajp) )
1707       return FALSE;
1708 
1709   cur_iter = start_sip;
1710   cur_pos = start_pos;
1711 
1712   while ((cur_iter != break_sip) || (cur_pos < break_pos)) {
1713     switch(*html_open_link_counter) {
1714     case 0:
1715       if (FFExtractNextOpenLink(&cur_iter, &cur_pos, break_sip, break_pos, buf_link_open ))
1716         ++(*html_open_link_counter);
1717       break;
1718     case 1:
1719       if (FFExtractNextCloseLink(&cur_iter, &cur_pos, break_sip, break_pos ))
1720         --(*html_open_link_counter);
1721       break;
1722     default:
1723       break;
1724     }
1725   }
1726   return (*html_open_link_counter);
1727 } /*FFLineBreakSplitsHtmlLink*/
1728 
FFLineWrap(IntAsn2gbJobPtr ajp,StringItemPtr dest,StringItemPtr src,Int4 init_indent,Int4 cont_indent,Int4 line_max,CharPtr eb_line_prefix)1729 NLM_EXTERN void FFLineWrap (
1730   IntAsn2gbJobPtr ajp,
1731   StringItemPtr dest,
1732   StringItemPtr src,
1733   Int4 init_indent,
1734   Int4 cont_indent,
1735   Int4 line_max,
1736   CharPtr eb_line_prefix
1737 )
1738 {
1739   /* line break candidate is a pair <StringItemPtr, position> */
1740   StringItemPtr break_sip = src;
1741   Int4          break_pos = 0;
1742   StringItemPtr line_start = NULL;
1743   Int4          line_pos = 0;
1744   Int4          i, line_prefix_len = 0;
1745   StringItemPtr iter;
1746   Boolean       cont = FALSE;
1747   Boolean       is_html = GetWWW(ajp);
1748 
1749   /* Note:
1750      The value of the next two variables needs to persist between consecutive
1751      invocations of FFLineBreakSplitsHtmlLink().
1752   */
1753   Int4          html_open_link_counter = 0;
1754   char          buf_split_link_open[ 1024 ];
1755 
1756   Boolean       linebreak_splits_link = FALSE;
1757   const char*   buf_split_link_close = "</a>";
1758 
1759   MemSet( (void*)buf_split_link_open, 0, sizeof(buf_split_link_open) );
1760   FFSavePosition(dest, &line_start, &line_pos);
1761 
1762   for ( iter = src; iter != NULL; iter = iter->next ) {
1763     for ( i = 0; i < iter->pos; ) {
1764 
1765       break_pos = i;
1766       break_sip = iter;
1767 
1768       FFCalculateLineBreak(&break_sip, &break_pos, init_indent,
1769                            line_max - line_prefix_len + 1, is_html);
1770       linebreak_splits_link =
1771         FFLineBreakSplitsHtmlLink(iter, i, break_sip, break_pos,
1772           buf_split_link_open, &html_open_link_counter );
1773       FFCatenateSubString(dest, iter, i, break_sip, break_pos, line_max);
1774       if (0 && eb_line_prefix) {
1775         /* don't quit at the indent width but trim all the way down to the EMBL line code */
1776         FFTrim(dest, line_start, line_pos, strlen(eb_line_prefix));
1777       } else {
1778         FFTrim(dest, line_start, line_pos, cont_indent);
1779       }
1780       if ( linebreak_splits_link ) {
1781         FFAddOneString( dest,
1782           (char*)buf_split_link_close, FALSE, FALSE, TILDE_IGNORE );
1783       }
1784       FFAddOneChar(dest, '\n', FALSE);
1785 
1786       FFSavePosition(dest, &line_start, &line_pos);
1787 
1788       /* for EMBL 'XX' lines */
1789       if (eb_line_prefix != NULL) {
1790         cont = FALSE;
1791         if (break_pos > 1) {
1792           if (break_sip->buf[break_pos-1] == 'X' && break_sip->buf[break_pos-2] == 'X') {
1793             if ((break_pos == 2) || (break_sip->buf[break_pos-3] == '\n')) {
1794               ++break_pos;
1795               cont = TRUE;
1796             }
1797           }
1798         } else if (break_pos == 1) {
1799           if (break_sip->buf[0] == 'X' && iter->buf[iter->pos-1] == 'X') {
1800             if ((iter->pos > 1)  &&  iter->buf[iter->pos-2] == '\n') {
1801               ++break_pos;
1802               cont = TRUE;
1803             }
1804           }
1805         }
1806       }
1807 
1808       i = break_pos;
1809       iter = break_sip;
1810 
1811       if (cont) continue;
1812 
1813       if ( IS_WHITESP(iter->buf[i]) ) {
1814         i++;
1815       }
1816       if ( iter != src->curr || i < iter->pos ) {
1817         if ( eb_line_prefix != NULL ) {
1818           FFAddOneString(dest, eb_line_prefix, FALSE, FALSE, TILDE_IGNORE);
1819         }
1820         FFAddNChar(dest, ' ', cont_indent - StringLen(eb_line_prefix), FALSE);
1821         if ( linebreak_splits_link ) {
1822           FFAddOneString( dest, buf_split_link_open, FALSE, FALSE, TILDE_IGNORE );
1823         }
1824         init_indent = 0;
1825         line_prefix_len = cont_indent;
1826         /*FFSkipGarbage(&iter, &i);*/
1827       }
1828     }
1829   }
1830 }
1831 
1832 /* === */
1833 
FFStartPrint(StringItemPtr sip,FmtType format,Int4 gb_init_indent,Int4 gb_cont_indent,CharPtr gb_label,Int4 gb_tab_to,Int4 eb_init_indent,Int4 eb_cont_indent,CharPtr eb_line_prefix,Boolean eb_print_xx)1834 NLM_EXTERN void FFStartPrint (
1835   StringItemPtr sip,
1836   FmtType format,
1837   Int4 gb_init_indent,
1838   Int4 gb_cont_indent,
1839   CharPtr gb_label,
1840   Int4 gb_tab_to,
1841   Int4 eb_init_indent,
1842   Int4 eb_cont_indent,
1843   CharPtr eb_line_prefix,
1844   Boolean eb_print_xx
1845 )
1846 
1847 {
1848   if (format == GENBANK_FMT || format == GENPEPT_FMT) {
1849     FFAddNChar(sip, ' ', gb_init_indent, FALSE);
1850     FFAddOneString(sip, gb_label, FALSE, FALSE, TILDE_IGNORE);
1851     FFAddNChar(sip, ' ', gb_tab_to - gb_init_indent - StringLen(gb_label), FALSE);
1852   } else if (format == EMBL_FMT || format == EMBLPEPT_FMT) {
1853     if ( eb_print_xx ) {
1854       FFAddOneString(sip, "XX\n", FALSE, FALSE, TILDE_IGNORE);
1855     }
1856     FFAddOneString(sip, eb_line_prefix, FALSE, FALSE, TILDE_IGNORE);
1857     FFAddNChar(sip, ' ', eb_init_indent - StringLen(eb_line_prefix), FALSE);
1858   }
1859 }
1860 
FFAddTextToString(StringItemPtr ffstring,CharPtr prefix,CharPtr string,CharPtr suffix,Boolean addPeriod,Boolean convertQuotes,Int2 tildeAction)1861 NLM_EXTERN void FFAddTextToString (
1862   StringItemPtr ffstring,
1863   CharPtr prefix,
1864   CharPtr string,
1865   CharPtr suffix,
1866   Boolean addPeriod,
1867   Boolean convertQuotes,
1868   Int2 tildeAction
1869 )
1870 
1871 {
1872   FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
1873   FFAddOneString (ffstring, string, FALSE, convertQuotes, tildeAction);
1874   FFAddOneString (ffstring, suffix, FALSE, FALSE, TILDE_IGNORE);
1875 
1876   if ( addPeriod ) {
1877     FFAddPeriod(ffstring);
1878   }
1879 }
1880 
FFEndPrintEx(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,FmtType format,Int2 gb_init_indent,Int2 gb_cont_indent,Int2 eb_init_indent,Int2 eb_cont_indent,CharPtr eb_line_prefix,CharPtr pfx,CharPtr sfx)1881 NLM_EXTERN CharPtr FFEndPrintEx (
1882   IntAsn2gbJobPtr ajp,
1883   StringItemPtr ffstring,
1884   FmtType format,
1885   Int2 gb_init_indent,
1886   Int2 gb_cont_indent,
1887   Int2 eb_init_indent,
1888   Int2 eb_cont_indent,
1889   CharPtr eb_line_prefix,
1890   CharPtr pfx,
1891   CharPtr sfx
1892 )
1893 
1894 {
1895   StringItemPtr temp = FFGetString(ajp);
1896   CharPtr result;
1897 
1898   if ( (ffstring == NULL) || (ajp == NULL) ) return NULL;
1899 
1900   if (format == GENBANK_FMT || format == GENPEPT_FMT) {
1901     FFLineWrap(ajp, temp, ffstring, gb_init_indent, gb_cont_indent, ASN2FF_GB_MAX, NULL);
1902   } else {
1903     FFLineWrap(ajp, temp, ffstring, eb_init_indent, eb_cont_indent, ASN2FF_EMBL_MAX, eb_line_prefix);
1904   }
1905   result = FFToCharPtrEx(temp, pfx, sfx);
1906   FFRecycleString(ajp, temp);
1907   return result;
1908 }
1909 
FFEndPrint(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,FmtType format,Int2 gb_init_indent,Int2 gb_cont_indent,Int2 eb_init_indent,Int2 eb_cont_indent,CharPtr eb_line_prefix)1910 NLM_EXTERN CharPtr FFEndPrint (
1911   IntAsn2gbJobPtr ajp,
1912   StringItemPtr ffstring,
1913   FmtType format,
1914   Int2 gb_init_indent,
1915   Int2 gb_cont_indent,
1916   Int2 eb_init_indent,
1917   Int2 eb_cont_indent,
1918   CharPtr eb_line_prefix
1919 )
1920 
1921 {
1922   return FFEndPrintEx (ajp, ffstring, format, gb_init_indent, gb_cont_indent,
1923                        eb_init_indent, eb_cont_indent, eb_line_prefix, NULL, NULL);
1924 }
1925 
FFLength(StringItemPtr ffstring)1926 NLM_EXTERN Uint4 FFLength(StringItemPtr ffstring) {
1927   Uint4 len = 0;
1928   StringItemPtr current;
1929 
1930   for ( current = ffstring; current != NULL; current = current->next ) {
1931     len += current->pos;
1932   }
1933 
1934   return len;
1935 }
1936 
1937 
FFCharAt(StringItemPtr ffstring,Uint4 pos)1938 NLM_EXTERN Char FFCharAt(StringItemPtr ffstring, Uint4 pos) {
1939   Uint4 inbufpos = pos % STRING_BUF_LEN;
1940   Uint4 count = 0;
1941   StringItemPtr current = NULL;
1942 
1943   inbufpos = pos % STRING_BUF_LEN;
1944 
1945   for ( current = ffstring; current != NULL; current = current->next ) {
1946     count += current->pos;
1947     if ( count > pos ) break;
1948   }
1949 
1950   if ( current != NULL && inbufpos <= pos )  {
1951     return current->buf[inbufpos];
1952   }
1953 
1954   return '\0';
1955 }
1956 
1957 
FFFindChar(StringItemPtr ffstring,StringItemPtr start_buf,Uint4 start_pos,Uint4 old_pos,Uint4 new_pos)1958 NLM_EXTERN Char FFFindChar (
1959   StringItemPtr ffstring,   /* StringItem to search in */
1960   StringItemPtr start_buf,  /* the position of the last char searched for (buffer) */
1961   Uint4 start_pos,          /* the position of the last char searched for (pos) */
1962   Uint4 old_pos,         /* the global position searched for */
1963   Uint4 new_pos             /* new search position */
1964 )
1965 {
1966   Uint4 delta;
1967   Uint4 count;
1968   StringItemPtr current = NULL;
1969 
1970   Char result = '\0';
1971 
1972   if ( new_pos == old_pos ) {
1973     result = start_buf->buf[start_pos];
1974   }
1975 
1976   if ( new_pos > old_pos ) {
1977     delta = new_pos - old_pos;
1978     current = start_buf;
1979     count = current->pos - start_pos - 1;
1980     current = current->next;
1981 
1982     while ( delta > count && current != NULL ) {
1983       current = current->next;
1984       count += current->pos;
1985     }
1986 
1987     if ( current != NULL  )  {
1988       result = current->buf[new_pos % STRING_BUF_LEN];
1989     }
1990 
1991   } else /* new_pos < old_pos */ {
1992     delta = old_pos - new_pos;
1993     if ( old_pos % STRING_BUF_LEN >= delta ) {
1994       result = start_buf->buf[new_pos % STRING_BUF_LEN];
1995     } else {
1996       result = FFCharAt(ffstring, new_pos);
1997     }
1998   }
1999 
2000   return result;
2001 }
2002 
FFEmpty(StringItemPtr ffstring)2003 NLM_EXTERN Boolean FFEmpty(StringItemPtr ffstring) {
2004   if ( ffstring != NULL && ffstring->pos != 0 ) {
2005     return FALSE;
2006   }
2007   return TRUE;
2008 }
2009 
2010 /*
2011  * Compute the right-most position in the pattern at which character a occurs,
2012  * for each character a in the alphabet (assumed ASCII-ISO 8859-1)
2013  *
2014  * The result is returned in the supplied vector.
2015  */
ComputeLastOccurrence(const CharPtr pattern,Int4 last_occurrence[])2016 static void ComputeLastOccurrence(const CharPtr pattern, Int4 last_occurrence[])
2017 {
2018     Int4 i;
2019     Int4 pat_len;
2020 
2021     /* Initilalize vector */
2022     for ( i = 0; i < 256; ++i ) {
2023         last_occurrence[i] = -1;
2024     }
2025 
2026     /* compute right-most occurrence */
2027     pat_len = StringLen(pattern);
2028     for ( i = 0; i < pat_len; ++i ) {
2029         last_occurrence[(Uint1)pattern[i]] = i;
2030     }
2031 }
2032 
ComputePrefix(const CharPtr pattern,Int4 longest_prefix[])2033 static void ComputePrefix(const CharPtr pattern, Int4 longest_prefix[])
2034 {
2035     Int4 pat_len = StringLen(pattern);
2036     Int4 k, q;
2037 
2038     longest_prefix[0] = 0;
2039 
2040     k = 0;
2041     for ( q = 1; q < pat_len; ++q ) {
2042         while ( k > 0 && pattern[k] != pattern[q] ) {
2043             k = longest_prefix[k - 1];
2044         }
2045         if ( pattern[k] == pattern[q] ) {
2046             ++k;
2047         }
2048         longest_prefix[q] = k;
2049     }
2050 }
2051 
2052 
ComputeGoodSuffix(const CharPtr pattern,Int4 good_suffix[])2053 static void ComputeGoodSuffix(const CharPtr pattern, Int4 good_suffix[])
2054 {
2055     Int4 pat_len = StringLen(pattern);
2056     Int4Ptr longest_prefix, reverse_longest_prefix;
2057     CharPtr reverse_pattern;
2058     Int4 i, j;
2059 
2060     /* allocate memory */
2061     longest_prefix = MemNew(pat_len * sizeof(Int4));
2062     reverse_longest_prefix = MemNew(pat_len * sizeof(Int4));
2063     reverse_pattern = MemNew((pat_len + 1) * sizeof(Char));
2064 
2065     if ( longest_prefix == NULL  ||
2066          reverse_longest_prefix == NULL  ||
2067          reverse_pattern == NULL ) {
2068       MemFree(longest_prefix);
2069       MemFree(reverse_longest_prefix);
2070       MemFree(reverse_pattern);
2071       return;
2072     }
2073 
2074     /* compute reverse pattern */
2075     for ( i = 0; i < pat_len; ++i ) {
2076       reverse_pattern[pat_len - i - 1] = pattern[i];
2077     }
2078 
2079     ComputePrefix(pattern, longest_prefix);
2080     ComputePrefix(reverse_pattern, reverse_longest_prefix);
2081 
2082     for ( j = 0; j <= pat_len; ++j) {
2083         good_suffix[j] = pat_len - longest_prefix[pat_len-1];
2084     }
2085 
2086     for ( i = 0; i < pat_len; ++i ) {
2087         j = pat_len - reverse_longest_prefix[i];
2088         if ( good_suffix[j] > i - reverse_longest_prefix[i] + 1) {
2089             good_suffix[j] = i - reverse_longest_prefix[i] + 1;
2090         }
2091     }
2092 
2093     MemFree(longest_prefix);
2094     MemFree(reverse_longest_prefix);
2095     MemFree(reverse_pattern);
2096 }
2097 
2098 
2099 /*
2100  * searches for a pattern in a StringItem.
2101  * Using the Boyer-Moore algorithm for the search.
2102  */
FFStringSearch(StringItemPtr text,const CharPtr pattern,Uint4 position)2103 NLM_EXTERN Int4 FFStringSearch (
2104   StringItemPtr text,
2105   const CharPtr pattern,
2106   Uint4 position )
2107 {
2108   Int4 text_len = FFLength(text);
2109   Int4 pat_len = StringLen(pattern);
2110   Int4 last_occurrence[256];
2111   Int4Ptr good_suffix;
2112   Int4 shift;
2113   Int4 j;
2114 
2115   if ( pat_len == 0 ) return 0;
2116   if ( text_len == 0 || pat_len > text_len - position ) return -1;
2117 
2118   good_suffix = (Int4Ptr)MemNew((pat_len+1) * sizeof(Int4));
2119   if ( good_suffix == NULL ) return -1;
2120 
2121   ComputeLastOccurrence(pattern, last_occurrence);
2122   ComputeGoodSuffix(pattern, good_suffix);
2123 
2124   shift = position;
2125   while ( shift <= text_len - pat_len ) {
2126     j = pat_len - 1;
2127     while( j >= 0 && pattern[j] == FFCharAt(text,shift + j) ) {
2128       --j;
2129     }
2130     if ( j == -1 ) {
2131       MemFree (good_suffix);
2132       return shift;
2133     } else {
2134       if( last_occurrence[(int) FFCharAt(text,shift + j)] <= j ) {
2135         shift += MAX( (Int4)good_suffix[(int) j+1],
2136               (Int4)(j - last_occurrence[(int) FFCharAt(text,shift + j)]));
2137       } else {
2138         shift += (Int4)good_suffix[(int) j+1];
2139       }
2140     }
2141   }
2142   MemFree (good_suffix);
2143 
2144   return -1;
2145 }
2146 
2147 /* Returns true if the given text starts with "pattern".
2148    You can also control whether this is done case insensitively */
FFStartsWith(StringItemPtr text,Int4 text_pos,const CharPtr pattern,Boolean case_insens)2149 NLM_EXTERN Boolean FFStartsWith(
2150   StringItemPtr text,
2151   Int4 text_pos,
2152   const CharPtr pattern,
2153   Boolean case_insens
2154 )
2155 {
2156   Int4 pattern_pos = 0;
2157 
2158   if( NULL == text || NULL == pattern ) {
2159     return FALSE;
2160   }
2161 
2162   /* every string starts with the empty string */
2163   if( pattern[0] == '\0' ) {
2164     return TRUE;
2165   }
2166 
2167   while( ( case_insens ?
2168            toupper(pattern[pattern_pos]) == toupper(text->buf[text_pos]) :
2169            pattern[pattern_pos] == text->buf[text_pos] ) )
2170   {
2171     /* advance pattern; if we reach the end,
2172      * text starts with pattern */
2173     ++pattern_pos;
2174     if( pattern[pattern_pos] == '\0' ) {
2175       return TRUE;
2176     }
2177 
2178     /* advance text, if we reach the end, text does NOT start
2179      * with pattern */
2180     FFAdvanceChar( &text, &text_pos );
2181     if( NULL == text ) {
2182       return FALSE;
2183     }
2184   }
2185 
2186   return FALSE;
2187 }
2188 
2189 /*                                                                   */
2190 /* IsWholeWordSubstr () -- Determines if a substring that is         */
2191 /*                         contained in another string is a whole    */
2192 /*                         word or phrase -- i.e. is it both         */
2193 /*                         preceded and followed by white space.     */
2194 /*                                                                   */
2195 
IsWholeWordSubstr(StringItemPtr searchStr,Uint4 foundPos,CharPtr subStr)2196 NLM_EXTERN Boolean IsWholeWordSubstr (
2197   StringItemPtr searchStr,
2198   Uint4 foundPos,
2199   CharPtr subStr
2200 )
2201 {
2202     Boolean left, right;
2203     Char ch;
2204 
2205 
2206     /* check on the left only if there is a character there */
2207     if (foundPos > 0) {
2208         ch = FFCharAt(searchStr, foundPos - 1);
2209         left = IS_WHITESP(ch) || ispunct(ch);
2210     } else {
2211         left = TRUE;
2212     }
2213 
2214     foundPos += StringLen(subStr);
2215   if ( foundPos == FFLength(searchStr) ) {
2216     right = TRUE;
2217   } else {
2218     ch = FFCharAt(searchStr, foundPos);
2219       right = IS_WHITESP(ch) || ispunct(ch);
2220   }
2221 
2222     return left; /* see comment above */
2223   /* return left && right;  this is how it should be!*/
2224 }
2225 
2226 
2227 /* functions to record sections or blocks in linked lists */
2228 
Asn2gbAddBlock(Asn2gbWorkPtr awp,BlockType blocktype,size_t size)2229 NLM_EXTERN BaseBlockPtr Asn2gbAddBlock (
2230   Asn2gbWorkPtr awp,
2231   BlockType blocktype,
2232   size_t size
2233 )
2234 
2235 {
2236   BaseBlockPtr  bbp;
2237   ValNodePtr    vnp;
2238 
2239   if (awp == NULL || size < 1) return NULL;
2240 
2241   bbp = (BaseBlockPtr) MemNew (size);
2242   if (bbp == NULL) return NULL;
2243   bbp->blocktype = blocktype;
2244   bbp->section = awp->currsection;
2245 
2246   vnp = ValNodeAddPointer (&(awp->lastblock), 0, bbp);
2247   if (vnp == NULL) return bbp;
2248 
2249   awp->lastblock = vnp;
2250   if (awp->blockList == NULL) {
2251     awp->blockList = vnp;
2252   }
2253 
2254   return bbp;
2255 }
2256 
2257 
2258 /*--------------------------------------------------------*/
2259 /*                                                        */
2260 /*  s_LocusGetBaseName() -                                */
2261 /*                                                        */
2262 /*--------------------------------------------------------*/
2263 
s_LocusGetBaseName(BioseqPtr parent,BioseqPtr segment,CharPtr baseName)2264 static Boolean s_LocusGetBaseName (BioseqPtr parent, BioseqPtr segment, CharPtr baseName)
2265 {
2266   Char          parentName[SEQID_MAX_LEN];
2267   Char          segName[SEQID_MAX_LEN];
2268   SeqIdPtr      sip;
2269   TextSeqIdPtr  tsip;
2270   Char          prefix[5];
2271   Char          bufTmp[SEQID_MAX_LEN];
2272   Int2          deleteChars;
2273   Int2          newLength;
2274   Int2          i;
2275   Uint2         segNameLen;
2276 
2277   /* Get the parent Sequence ID */
2278 
2279   parentName [0] = '\0';
2280   sip = NULL;
2281   for (sip = parent->id; sip != NULL; sip = sip->next) {
2282     if (sip->choice == SEQID_GENBANK ||
2283         sip->choice == SEQID_EMBL ||
2284         sip->choice == SEQID_DDBJ) break;
2285     if (sip->choice == SEQID_TPG ||
2286         sip->choice == SEQID_TPE ||
2287         sip->choice == SEQID_TPD) break;
2288   }
2289 
2290   if (sip != NULL) {
2291     tsip = (TextSeqIdPtr) sip->data.ptrvalue;
2292     if (tsip != NULL && (! StringHasNoText (tsip->name))) {
2293       StringNCpy_0 (parentName, tsip->name, sizeof (parentName));
2294     }
2295   }
2296 
2297   if (StringHasNoText (parentName)) {
2298     StringNCpy_0 (parentName, baseName, sizeof (parentName));
2299   }
2300 
2301   /* Get segment id */
2302 
2303   segName [0] = '\0';
2304   segNameLen = 0;
2305   sip = NULL;
2306   for (sip = segment->id; sip != NULL; sip = sip->next) {
2307     if (sip->choice == SEQID_GENBANK ||
2308         sip->choice == SEQID_EMBL ||
2309         sip->choice == SEQID_DDBJ) break;
2310     if (sip->choice == SEQID_TPG ||
2311         sip->choice == SEQID_TPE ||
2312         sip->choice == SEQID_TPD) break;
2313     }
2314 
2315   if (sip != NULL) {
2316     tsip = (TextSeqIdPtr) sip->data.ptrvalue;
2317     if (tsip != NULL && (! StringHasNoText (tsip->name))) {
2318       StringNCpy_0 (segName, tsip->name, sizeof (segName));
2319       segNameLen = StringLen(segName);
2320     }
2321   }
2322 
2323   /* If there's no "SEG_" prefix, then */
2324   /* just use the parent ID.           */
2325 
2326   StringNCpy_0 (prefix,parentName,sizeof (prefix));
2327   prefix[4] = '\0';
2328   if (StringCmp(prefix,"SEG_") != 0)
2329     {
2330       StringCpy(baseName,parentName);
2331       return FALSE;
2332     }
2333 
2334   /* Otherwise, eliminate the "SEG_" ... */
2335 
2336   StringCpy(bufTmp, &parentName[4]);
2337   StringCpy(parentName,bufTmp);
2338 
2339   /* ... And calculate a base name */
2340 
2341   if (segNameLen > 0 &&
2342       (segName[segNameLen-1] == '1') &&
2343       (StringLen(parentName) == segNameLen) &&
2344       (parentName[segNameLen-1] == segName[segNameLen-1]))
2345     {
2346       deleteChars = 1;
2347       for (i = segNameLen-2; i >= 0; i--)
2348     if (parentName[i] == '0')
2349       deleteChars++;
2350     else
2351       break;
2352       newLength = segNameLen - deleteChars;
2353       StringNCpy (parentName,segName,newLength); /* not StringNCpy_0 */
2354       parentName[newLength] = '\0';
2355     }
2356 
2357   /* Return the base name in the basename parameter */
2358 
2359   StringCpy(baseName,parentName);
2360   return TRUE;
2361 }
2362 
2363 /* ********************************************************************** */
2364 
2365   static Uint1 fasta_order [NUM_SEQID] = {
2366     33, /* 0 = not set */
2367     20, /* 1 = local Object-id */
2368     15, /* 2 = gibbsq */
2369     16, /* 3 = gibbmt */
2370     30, /* 4 = giim Giimport-id */
2371     10, /* 5 = genbank */
2372     10, /* 6 = embl */
2373     10, /* 7 = pir */
2374     10, /* 8 = swissprot */
2375     15, /* 9 = patent */
2376     10, /* 10 = other = refseq */
2377     20, /* 11 = general Dbtag */
2378     255, /* 12 = gi */
2379     10, /* 13 = ddbj */
2380     10, /* 14 = prf */
2381     12, /* 15 = pdb */
2382     10, /* 16 = tpg */
2383     10, /* 17 = tpe */
2384     10, /* 18 = tpd */
2385     15, /* 19 = gpp */
2386     15  /* 20 = nat */
2387   };
2388 
2389 /* DoOneSection builds a single report for one bioseq or segment */
2390 
Asn2gbAddSection(Asn2gbWorkPtr awp)2391 static Asn2gbSectPtr Asn2gbAddSection (
2392   Asn2gbWorkPtr awp
2393 )
2394 
2395 {
2396   Asn2gbSectPtr  asp;
2397   ValNodePtr     vnp;
2398 
2399   if (awp == NULL) return NULL;
2400 
2401   asp = (Asn2gbSectPtr) MemNew (sizeof (IntAsn2gbSect));
2402   if (asp == NULL) return NULL;
2403 
2404   vnp = ValNodeAddPointer (&(awp->lastsection), 0, asp);
2405   if (vnp == NULL) return asp;
2406 
2407   awp->lastsection = vnp;
2408   if (awp->sectionList == NULL) {
2409     awp->sectionList = vnp;
2410   }
2411 
2412   return asp;
2413 }
2414 
SegHasParts(BioseqPtr bsp)2415 NLM_EXTERN Boolean SegHasParts (
2416   BioseqPtr bsp
2417 )
2418 
2419 {
2420   BioseqSetPtr  bssp;
2421   SeqEntryPtr   sep;
2422 
2423   if (bsp == NULL || bsp->repr != Seq_repr_seg) return FALSE;
2424   sep = bsp->seqentry;
2425   if (sep == NULL) return FALSE;
2426   sep = sep->next;
2427   if (sep == NULL || (! IS_Bioseq_set (sep))) return FALSE;
2428   bssp = (BioseqSetPtr) sep->data.ptrvalue;
2429   if (bssp != NULL && bssp->_class == BioseqseqSet_class_parts) return TRUE;
2430   return FALSE;
2431 }
2432 
LocInBioseq(SeqLocPtr slp,BioseqPtr bsp)2433 static Boolean LocInBioseq (
2434   SeqLocPtr slp,
2435   BioseqPtr bsp
2436 )
2437 
2438 {
2439   SeqIdPtr  sip;
2440 
2441   if (slp == NULL || bsp == NULL) return FALSE;
2442   sip = SeqLocId (slp);
2443   if (sip == NULL) return FALSE;
2444   return SeqIdIn (sip, bsp->id);
2445 }
2446 
AddRemainingGaps(Asn2gbWorkPtr awp)2447 static void AddRemainingGaps (
2448   Asn2gbWorkPtr awp
2449 )
2450 
2451 {
2452   Asn2gbSectPtr    asp;
2453   BioseqPtr        bsp;
2454   FeatBlockPtr     fbp;
2455   SeqFeatPtr       gap;
2456   IntFeatBlockPtr  ifp;
2457 
2458   if (awp == NULL) return;
2459   asp = awp->asp;
2460   if (asp == NULL) return;
2461   bsp = asp->bsp;
2462   if (bsp == NULL) return;
2463   gap = awp->currfargap;
2464   if (gap != NULL && awp->afp != NULL) {
2465     while (gap != NULL && LocInBioseq (gap->location, bsp)) {
2466 
2467       fbp = (FeatBlockPtr) Asn2gbAddBlock (awp, FEATURE_BLOCK, sizeof (IntFeatBlock));
2468       if (fbp != NULL) {
2469         fbp->entityID = 0;
2470         fbp->itemID = 0;
2471         fbp->itemtype = OBJ_SEQFEAT;
2472         fbp->featdeftype = FEATDEF_gap;
2473         ifp = (IntFeatBlockPtr) fbp;
2474         ifp->mapToNuc = FALSE;
2475         ifp->mapToProt = FALSE;
2476         ifp->mapToGen = FALSE;
2477         ifp->mapToMrna = FALSE;
2478         ifp->mapToPep = FALSE;
2479         ifp->left = 0;
2480         ifp->right = 0;
2481         ifp->firstfeat = awp->firstfeat;
2482         awp->firstfeat = FALSE;
2483         if (awp->afp != NULL) {
2484           DoImmediateRemoteFeatureFormat (awp->afp, (BaseBlockPtr) fbp, gap);
2485         }
2486       }
2487 
2488       awp->currfargap = gap->next;
2489       gap = awp->currfargap;
2490     }
2491   }
2492 }
2493 
DoOneSection(BioseqPtr target,BioseqPtr parent,BioseqPtr bsp,BioseqPtr refs,SeqLocPtr slp,Uint2 seg,Int4 from,Int4 to,Boolean contig,Boolean onePartOfSeg,Asn2gbWorkPtr awp)2494 NLM_EXTERN void DoOneSection (
2495   BioseqPtr target,
2496   BioseqPtr parent,
2497   BioseqPtr bsp,
2498   BioseqPtr refs,
2499   SeqLocPtr slp,
2500   Uint2 seg,
2501   Int4 from,
2502   Int4 to,
2503   Boolean contig,
2504   Boolean onePartOfSeg,
2505   Asn2gbWorkPtr awp
2506 )
2507 
2508 {
2509   size_t               acclen;
2510   Asn2gbFormatPtr      afp;
2511   IntAsn2gbJobPtr      ajp;
2512   Asn2gbSectPtr        asp;
2513   SeqMgrBioseqContext  bcontext;
2514   BlockMask            bkmask;
2515   BaseBlockPtr         PNTR blockArray;
2516   Boolean              cagemaster = FALSE;
2517   SeqMgrDescContext    dcontext;
2518   BioseqPtr            gbsp;
2519   SeqAnnotPtr          gsap;
2520   Boolean              hasRefs;
2521   Int4                 i;
2522   IntAsn2gbSectPtr     iasp;
2523   Boolean              isGpipe = FALSE;
2524   Boolean              isRefSeq = FALSE;
2525   MolInfoPtr           mip;
2526   Boolean              nsgenome = FALSE;
2527   Int4                 numBlocks;
2528   Int4                 numsegs = 0;
2529   SeqDescrPtr          sdp;
2530   SeqIdPtr             sip;
2531   Boolean              tlsmaster = FALSE;
2532   Boolean              tsamaster = FALSE;
2533   TextSeqIdPtr         tsip;
2534   ValNodePtr           vnp;
2535   Boolean              wgsmaster = FALSE;
2536   Boolean              wgstech = FALSE;
2537   Boolean              willshowcage = FALSE;
2538   Boolean              willshowcontig = FALSE;
2539   Boolean              willshowgenome = FALSE;
2540   Boolean              willshowsequence = FALSE;
2541   Boolean              willshowtls = FALSE;
2542   Boolean              willshowtsa = FALSE;
2543   Boolean              willshowwgs = FALSE;
2544 
2545   if (target == NULL || parent == NULL || bsp == NULL || awp == NULL) return;
2546   ajp = awp->ajp;
2547   if (ajp == NULL) return;
2548   bkmask = ajp->bkmask;
2549 
2550   if (awp->mode == RELEASE_MODE && awp->style == CONTIG_STYLE) {
2551     if (bsp->repr == Seq_repr_seg) {
2552     } else if (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp))) {
2553     } else if (bsp->repr == Seq_repr_ref) {
2554     } else if (bsp->repr == Seq_repr_map) {
2555     } else return;
2556   }
2557 
2558   if (ajp->flags.suppressLocalID) {
2559     sip = SeqIdSelect (bsp->id, fasta_order, NUM_SEQID);
2560     if (sip == NULL || sip->choice == SEQID_LOCAL) return;
2561   }
2562 
2563   if (seg == 0) {
2564     awp->basename[0] = '\0';
2565   } else if (seg == 1) {
2566     s_LocusGetBaseName (parent, bsp, awp->basename);
2567   }
2568 
2569   asp = Asn2gbAddSection (awp);
2570   if (asp == NULL) return;
2571 
2572   afp = awp->afp;
2573   if (afp != NULL) {
2574     afp->asp = asp;
2575   }
2576 
2577   numsegs = awp->partcount;
2578   if (numsegs == 0 && SeqMgrGetBioseqContext (parent, &bcontext)) {
2579     numsegs = bcontext.numsegs;
2580   }
2581 
2582   /* set working data fields */
2583 
2584   awp->asp = asp;
2585 
2586   awp->target = target;
2587   awp->parent = parent;
2588   awp->bsp = bsp;
2589   awp->refs = refs;
2590   awp->slp = slp;
2591   (awp->sectionCount)++;
2592   awp->currGi = 0;
2593   awp->currAccVer [0] = '\0';
2594   awp->seg = seg;
2595   awp->numsegs = numsegs;
2596   awp->from = from;
2597   awp->to = to;
2598   awp->contig = contig;
2599 
2600   awp->firstfeat = TRUE;
2601   awp->featseen = FALSE;
2602   awp->featjustseen = FALSE;
2603   awp->wgsaccnlist = NULL;
2604 
2605   if (ajp->manygaps != NULL) {
2606     gbsp = (BioseqPtr) ajp->manygaps->data.ptrvalue;
2607     if (gbsp != NULL) {
2608       gsap = gbsp->annot;
2609       if (gsap != NULL && gsap->type == 1) {
2610         awp->currfargap = (SeqFeatPtr) gsap->data;
2611       }
2612     }
2613   }
2614 
2615   /* initialize empty blockList for this section */
2616 
2617   awp->blockList = NULL;
2618   awp->lastblock = NULL;
2619 
2620   /* and store section data into section fields */
2621 
2622   asp->target = target;
2623   asp->bsp = bsp;
2624   asp->slp = slp;
2625   asp->seg = seg;
2626   asp->numsegs = numsegs;
2627   asp->from = from;
2628   asp->to = to;
2629 
2630   iasp = (IntAsn2gbSectPtr) asp;
2631 
2632   asp->blockArray = NULL;
2633   asp->numBlocks = 0;
2634 
2635   /* WGS master and NS_ virtual records treated differently */
2636 
2637   if (bsp->repr == Seq_repr_virtual) {
2638 
2639     /* check for certain ID types */
2640 
2641     for (sip = bsp->id; sip != NULL; sip = sip->next) {
2642       if (sip->choice == SEQID_GENBANK ||
2643           sip->choice == SEQID_EMBL ||
2644           sip->choice == SEQID_DDBJ ||
2645           sip->choice == SEQID_TPG ||
2646           sip->choice == SEQID_TPE ||
2647           sip->choice == SEQID_TPD) {
2648         tsip = (TextSeqIdPtr) sip->data.ptrvalue;
2649         if (tsip != NULL && tsip->accession != NULL) {
2650           acclen = StringLen (tsip->accession);
2651           if (acclen == 12) {
2652             if (StringCmp (tsip->accession + 6, "000000") == 0) {
2653               wgsmaster = TRUE;
2654             }
2655           } else if (acclen == 13) {
2656             if (StringCmp (tsip->accession + 6, "0000000") == 0) {
2657               wgsmaster = TRUE;
2658             }
2659           } else if (acclen == 14) {
2660             if (StringCmp (tsip->accession + 6, "00000000") == 0) {
2661               wgsmaster = TRUE;
2662             }
2663           }
2664         }
2665      } else if (sip->choice == SEQID_OTHER) {
2666         tsip = (TextSeqIdPtr) sip->data.ptrvalue;
2667         if (tsip != NULL && tsip->accession != NULL) {
2668           if (StringNICmp (tsip->accession, "NC_", 3) == 0) {
2669             wgsmaster = TRUE;
2670           } else if (StringNICmp (tsip->accession, "NS_", 3) == 0) {
2671             nsgenome = TRUE;
2672           } else if (StringNICmp (tsip->accession, "NZ_", 3) == 0) {
2673             if (StringLen (tsip->accession) == 15) {
2674               if (StringCmp (tsip->accession + 9, "000000") == 0) {
2675                 wgsmaster = TRUE;
2676               }
2677             } else if (StringLen (tsip->accession) == 16) {
2678               if (StringCmp (tsip->accession + 9, "0000000") == 0) {
2679                 wgsmaster = TRUE;
2680               }
2681             }
2682           }
2683         }
2684       }
2685     }
2686 
2687     sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
2688     if (sdp != NULL) {
2689       mip = (MolInfoPtr) sdp->data.ptrvalue;
2690       if (mip != NULL) {
2691         if (mip->tech == MI_TECH_wgs) {
2692           wgstech = TRUE;
2693         } else if (mip->tech == MI_TECH_tsa && bsp->repr == Seq_repr_virtual) {
2694           tsamaster = TRUE;
2695         } else if (mip->tech == MI_TECH_targeted && bsp->repr == Seq_repr_virtual) {
2696           tlsmaster = TRUE;
2697         } else if (mip->tech == MI_TECH_other && StringCmp (mip->techexp, "cage") == 0) {
2698           cagemaster = TRUE;
2699         }
2700       }
2701     }
2702   }
2703 
2704   for (sip = bsp->id; sip != NULL; sip = sip->next) {
2705     if (sip->choice == SEQID_OTHER) {
2706       isRefSeq = TRUE;
2707       SeqIdWrite (sip, awp->currAccVer, PRINTID_TEXTID_ACC_VER, sizeof (awp->currAccVer) - 1);
2708     } else if (sip->choice == SEQID_GI) {
2709       awp->currGi = (BIG_ID) sip->data.intvalue;
2710     } else if (sip->choice == SEQID_GPIPE) {
2711       isGpipe = TRUE;
2712       SeqIdWrite (sip, awp->currAccVer, PRINTID_TEXTID_ACC_VER, sizeof (awp->currAccVer) - 1);
2713     } else if (sip->choice == SEQID_GENBANK ||
2714                sip->choice == SEQID_EMBL ||
2715                sip->choice == SEQID_DDBJ ||
2716                sip->choice == SEQID_TPG ||
2717                sip->choice == SEQID_TPE ||
2718                sip->choice == SEQID_TPD ||
2719                sip->choice == SEQID_PIR ||
2720                sip->choice == SEQID_SWISSPROT ||
2721                sip->choice == SEQID_PRF ||
2722                sip->choice == SEQID_PDB) {
2723       SeqIdWrite (sip, awp->currAccVer, PRINTID_TEXTID_ACC_VER, sizeof (awp->currAccVer) - 1);
2724     }
2725   }
2726 
2727   GetAccVerForBioseq (bsp, awp->currAccVerLabel, sizeof (awp->currAccVerLabel), ajp->hideGI, TRUE);
2728 
2729   /* start exploring and populating paragraphs */
2730 
2731   if (awp->format == FTABLE_FMT) {
2732     AddFeatHeaderBlock (awp);
2733     if (awp->showFtableRefs) {
2734       AddReferenceBlock (awp, isRefSeq);
2735     }
2736     if (! awp->hideSources) {
2737       AddSourceFeatBlock (awp);
2738     }
2739     if (! awp->hideFeatures) {
2740       AddFeatureBlock (awp);
2741       AddRemainingGaps (awp);
2742     }
2743 
2744   } else {
2745 
2746     if (wgsmaster && wgstech) {
2747       willshowwgs = TRUE;
2748     } else if (tsamaster) {
2749       willshowtsa = TRUE;
2750     } else if (tlsmaster) {
2751       willshowtls = TRUE;
2752     } else if (cagemaster) {
2753       willshowcage = TRUE;
2754     } else if (nsgenome) {
2755       willshowgenome = TRUE;
2756     } else if (contig) {
2757       willshowcontig = TRUE;
2758       if (awp->showContigAndSeq) {
2759         if (! awp->hideSequence) {
2760           willshowsequence = TRUE;
2761         }
2762       }
2763     } else {
2764       if (awp->showContigAndSeq) {
2765         if (bsp->repr == Seq_repr_seg && (! SegHasParts (bsp))) {
2766           willshowcontig = TRUE;
2767         } else if (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp))) {
2768           willshowcontig = TRUE;
2769         } else if (bsp->repr == Seq_repr_ref) {
2770           willshowcontig = TRUE;
2771         }
2772       }
2773       if (! awp->hideSequence) {
2774         willshowsequence = TRUE;
2775       }
2776     }
2777 
2778     AddLocusBlock (awp, willshowwgs, willshowtsa, willshowtls, willshowcage, willshowgenome, willshowcontig, willshowsequence);
2779 
2780     if (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT) {
2781 
2782       AddDeflineBlock (awp);
2783       AddAccessionBlock (awp);
2784 
2785       if (ISA_aa (bsp->mol)) {
2786         /*
2787         AddPidBlock (awp);
2788         */
2789       }
2790 
2791       AddVersionBlock (awp);
2792 
2793       /* if (ISA_na (bsp->mol)) { */
2794         AddDblinkBlock (awp);
2795       /* } */
2796 
2797       if (ISA_aa (bsp->mol)) {
2798         AddDbsourceBlock (awp);
2799       }
2800 
2801     } else if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
2802 
2803       AddAccessionBlock (awp);
2804 
2805       if (ISA_na (bsp->mol)) {
2806         AddVersionBlock (awp);
2807       }
2808 
2809       if (ISA_aa (bsp->mol)) {
2810         /* AddPidBlock (awp); */
2811         /* AddDbsourceBlock (awp); */
2812       }
2813 
2814       AddDateBlock (awp);
2815 
2816       AddDeflineBlock (awp);
2817     }
2818 
2819     AddKeywordsBlock (awp);
2820 
2821     if (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT) {
2822       AddSegmentBlock (awp, onePartOfSeg, (Boolean) ISA_na (bsp->mol));
2823     }
2824 
2825     AddSourceOrganismBlock (awp);
2826 
2827     /*
2828     if (awp->showRefStats) {
2829       AddRefStatsBlock (awp);
2830     }
2831     */
2832 
2833     if (! awp->hidePubs) {
2834 
2835       /* !!! RELEASE_MODE should check return value of AddReferenceBlock !!! */
2836 
2837       hasRefs = AddReferenceBlock (awp, isRefSeq);
2838       if (! hasRefs) {
2839         if (ajp->flags.needAtLeastOneRef) {
2840           /* RefSeq and Gpipe do not require a publication */
2841           if ((! isRefSeq) && (! isGpipe)) {
2842             awp->failed = TRUE;
2843           }
2844         }
2845       }
2846     }
2847 
2848     AddCommentBlock (awp);
2849     AddPrimaryBlock (awp);
2850 
2851     /*
2852     if (awp->showFeatStats) {
2853       AddFeatStatsBlock (awp);
2854     }
2855     */
2856 
2857     AddFeatHeaderBlock (awp);
2858     if (! awp->hideSources) {
2859       AddSourceFeatBlock (awp);
2860     }
2861 
2862     if (wgsmaster && wgstech) {
2863 
2864       AddWGSBlock (awp);
2865 
2866     } else if (tsamaster) {
2867 
2868       AddTSABlock (awp);
2869 
2870     } else if (tlsmaster) {
2871 
2872       AddTLSBlock (awp);
2873 
2874     } else if (cagemaster) {
2875 
2876       AddCAGEBlock (awp);
2877 
2878     } else if (nsgenome) {
2879 
2880       AddGenomeBlock (awp);
2881 
2882     } else if (contig) {
2883 
2884       if (awp->showconfeats) {
2885         if (! awp->hideFeatures) {
2886           AddFeatureBlock (awp);
2887           AddRemainingGaps (awp);
2888         }
2889       } else if (awp->smartconfeats && bsp->length <= 1000000) {
2890         if (! awp->hideFeatures) {
2891           AddFeatureBlock (awp);
2892           AddRemainingGaps (awp);
2893         }
2894       }
2895       AddContigBlock (awp);
2896 
2897       if (awp->showContigAndSeq) {
2898         if (ISA_na (bsp->mol) && ajp->gbseq == NULL) {
2899           if (awp->showBaseCount && bsp->repr != Seq_repr_map) {
2900             AddBasecountBlock (awp);
2901           }
2902         }
2903         if (bsp->repr != Seq_repr_map) {
2904           AddOriginBlock (awp);
2905         }
2906 
2907         if (! awp->hideSequence) {
2908           if (bsp->repr != Seq_repr_map) {
2909             AddSequenceBlock (awp);
2910           }
2911         }
2912       }
2913 
2914     } else {
2915 
2916       if (! awp->hideFeatures) {
2917         AddFeatureBlock (awp);
2918         AddRemainingGaps (awp);
2919       }
2920 
2921       if (awp->showContigAndSeq) {
2922         if (bsp->repr == Seq_repr_seg && (! SegHasParts (bsp))) {
2923           AddContigBlock (awp);
2924         } else if (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp))) {
2925           AddContigBlock (awp);
2926         } else if (bsp->repr == Seq_repr_ref) {
2927           AddContigBlock (awp);
2928         }
2929       }
2930 
2931       if (ISA_na (bsp->mol) && ajp->gbseq == NULL) {
2932         if (awp->showBaseCount && bsp->repr != Seq_repr_map) {
2933           AddBasecountBlock (awp );
2934         }
2935       }
2936       if (bsp->repr != Seq_repr_map) {
2937         AddOriginBlock (awp);
2938       }
2939 
2940       if (! awp->hideSequence) {
2941         if (bsp->repr != Seq_repr_map) {
2942           AddSequenceBlock (awp);
2943         }
2944       }
2945     }
2946 
2947     AddSlashBlock (awp);
2948   }
2949 
2950   /* allocate block array for this section */
2951 
2952   numBlocks = ValNodeLen (awp->blockList);
2953   asp->numBlocks = numBlocks;
2954 
2955   if (numBlocks > 0) {
2956     blockArray = (BaseBlockPtr PNTR) MemNew (sizeof (BaseBlockPtr) * (numBlocks + 1));
2957     asp->blockArray = blockArray;
2958 
2959     if (blockArray != NULL) {
2960       for (vnp = awp->blockList, i = 0; vnp != NULL; vnp = vnp->next, i++) {
2961         blockArray [i] = (BaseBlockPtr) vnp->data.ptrvalue;
2962       }
2963     }
2964   }
2965 
2966   /* free blockList, but leave data, now pointed to by blockArray elements */
2967 
2968   awp->blockList = ValNodeFree (awp->blockList);
2969   awp->lastblock = NULL;
2970 
2971   (awp->currsection)++;
2972 }
2973 
2974 /* ********************************************************************** */
2975 
2976 /*
2977 the following functions handle various kinds of input, all calling
2978 DoOneSection once for each component that gets its own report
2979 */
2980 
Asn2Seg(SeqLocPtr slp,SeqMgrSegmentContextPtr context)2981 static Boolean LIBCALLBACK Asn2Seg (
2982   SeqLocPtr slp,
2983   SeqMgrSegmentContextPtr context
2984 )
2985 
2986 {
2987   Asn2gbWorkPtr  awp;
2988   BioseqPtr      bsp = NULL;
2989   Uint2          entityID;
2990   Int4           from;
2991   SeqLocPtr      loc;
2992   BioseqPtr      parent;
2993   SeqIdPtr       sip;
2994   Int4           to;
2995 
2996   if (slp == NULL || context == NULL) return FALSE;
2997   awp = (Asn2gbWorkPtr) context->userdata;
2998 
2999   parent = context->parent;
3000 
3001   from = context->cumOffset;
3002   to = from + context->to - context->from;
3003 
3004   sip = SeqLocId (slp);
3005   if (sip == NULL) {
3006     loc = SeqLocFindNext (slp, NULL);
3007     if (loc != NULL) {
3008       sip = SeqLocId (loc);
3009     }
3010   }
3011   if (sip == NULL) return TRUE;
3012 
3013   /* may remote fetch genome component if not already in memory */
3014 
3015   bsp = BioseqLockById (sip);
3016 
3017   if (bsp == NULL) return TRUE;
3018 
3019   entityID = ObjMgrGetEntityIDForPointer (bsp);
3020 
3021   if (entityID != awp->entityID) {
3022 
3023     /* if segment not packaged in record, may need to feature index it */
3024 
3025     if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
3026       SeqMgrIndexFeatures (entityID, NULL);
3027     }
3028 
3029     /* collect features indexed on the remote bioseq */
3030 
3031     parent = bsp;
3032     from = 0;
3033     to = bsp->length - 1;
3034   }
3035 
3036   if (bsp->repr != Seq_repr_virtual) {
3037     (awp->seg)++;
3038     DoOneSection (bsp, parent, bsp, bsp, /* slp */ NULL, awp->seg, from, to, FALSE, FALSE, awp);
3039   }
3040 
3041   BioseqUnlock (bsp);
3042 
3043   return TRUE;
3044 }
3045 
CountRealParts(SeqLocPtr slp_head)3046 static Int4 CountRealParts (
3047   SeqLocPtr slp_head
3048 )
3049 
3050 {
3051   SeqIdPtr   id;
3052   Int4       numparts;
3053   BioseqPtr  part;
3054   SeqIdPtr   sip;
3055   SeqLocPtr  slp;
3056 
3057   numparts = 0;
3058   for (slp = (SeqLocPtr) slp_head; slp != NULL; slp = slp->next) {
3059     sip = SeqLocId (slp);
3060     if (sip == NULL) continue;
3061     if (sip->choice == SEQID_GI) {
3062       part = BioseqFind (sip);
3063       if (part == NULL) continue;
3064       for (id = part->id; id != NULL; id = id->next) {
3065         if (id->choice == SEQID_GIBBSQ ||
3066             id->choice == SEQID_GIBBMT ||
3067             id->choice == SEQID_GIIM) break;
3068       }
3069       if (id != NULL && part->repr == Seq_repr_virtual) continue;
3070     }
3071     numparts++;
3072   }
3073   return numparts;
3074 }
3075 
3076 typedef struct findseg {
3077   BioseqPtr  bsp;
3078   Uint2      seg;
3079 } FindSeg, PNTR FindSegPtr;
3080 
FindSegForPart(SeqLocPtr slp,SeqMgrSegmentContextPtr context)3081 static Boolean LIBCALLBACK FindSegForPart (
3082   SeqLocPtr slp,
3083   SeqMgrSegmentContextPtr context
3084 )
3085 
3086 {
3087   FindSegPtr  fsp;
3088   BioseqPtr   bsp = NULL;
3089   SeqLocPtr   loc;
3090   SeqIdPtr    sip;
3091 
3092   if (slp == NULL || context == NULL) return TRUE;
3093   fsp = (FindSegPtr) context->userdata;
3094 
3095   sip = SeqLocId (slp);
3096   if (sip == NULL) {
3097     loc = SeqLocFindNext (slp, NULL);
3098     if (loc != NULL) {
3099       sip = SeqLocId (loc);
3100     }
3101   }
3102   if (sip == NULL) return TRUE;
3103 
3104   bsp = BioseqFind (sip);
3105   if (bsp == NULL) return TRUE;
3106 
3107   if (bsp->repr != Seq_repr_virtual) {
3108     (fsp->seg)++;
3109   }
3110 
3111   if (bsp != fsp->bsp) return TRUE;
3112 
3113   return FALSE;
3114 }
3115 
DoOneBioseq(BioseqPtr bsp,Pointer userdata)3116 NLM_EXTERN void DoOneBioseq (
3117   BioseqPtr bsp,
3118   Pointer userdata
3119 )
3120 
3121 {
3122   IntAsn2gbJobPtr       ajp;
3123   Asn2gbWorkPtr         awp;
3124   BioseqSetPtr          bssp;
3125   SeqMgrSegmentContext  context;
3126   Boolean               contig = FALSE;
3127   Int4                  from;
3128   FindSeg               fs;
3129   SeqEntryPtr           oldscope;
3130   BioseqPtr             parent;
3131   Boolean               segmented = FALSE;
3132   SeqEntryPtr           sep;
3133   Int4                  to;
3134 
3135   if (bsp == NULL) return;
3136   awp = (Asn2gbWorkPtr) userdata;
3137   if (awp == NULL) return;
3138   ajp = awp->ajp;
3139   if (ajp == NULL) return;
3140 
3141   /* return if molecule not right for format */
3142 
3143   if (ISA_na (bsp->mol)) {
3144     if (ajp->format == GENPEPT_FMT || ajp->format == EMBLPEPT_FMT) return;
3145 
3146     /* only do mRNA feature tables in GPS if targeted to a specific mRNA */
3147 
3148     if (ajp->format == FTABLE_FMT && ajp->skipMrnas) {
3149       if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
3150         bssp = (BioseqSetPtr) bsp->idx.parentptr;
3151         if (bssp != NULL && bssp->_class == BioseqseqSet_class_nuc_prot) {
3152           if (bssp->idx.parenttype == OBJ_BIOSEQSET) {
3153             bssp = (BioseqSetPtr) bssp->idx.parentptr;
3154             if (bssp != NULL && bssp->_class == BioseqseqSet_class_gen_prod_set) {
3155               return;
3156             }
3157           }
3158         }
3159       }
3160     }
3161 
3162   } else if (ISA_aa (bsp->mol)) {
3163     if (ajp->format == GENBANK_FMT || ajp->format == EMBL_FMT) return;
3164 
3165     /* only do protein feature tables if targeted to a specific protein */
3166 
3167     if (ajp->format == FTABLE_FMT && ajp->skipProts) return;
3168   }
3169 
3170   if (awp->style == SEGMENT_STYLE) {
3171     segmented = TRUE;
3172   }
3173   if (awp->style == CONTIG_STYLE) {
3174     contig = TRUE;
3175   }
3176   /* Never do segmented style in FTABLE format */
3177   if (awp->format == FTABLE_FMT) {
3178       segmented = FALSE;
3179       contig = FALSE;
3180   }
3181 
3182   awp->partcount = 0;
3183 
3184   if (bsp->repr == Seq_repr_seg && awp->style == NORMAL_STYLE) {
3185 
3186     /* if bsp followed by parts set, then do not default to contig style */
3187 
3188     if (SegHasParts (bsp)) {
3189       segmented = TRUE;
3190       contig = FALSE;
3191 
3192       if (bsp->seq_ext_type == 1) {
3193 
3194         /* count only non-virtual parts */
3195 
3196         sep = GetTopSeqEntryForEntityID (awp->entityID);
3197         oldscope = SeqEntrySetScope (sep);
3198         awp->partcount = CountRealParts ((SeqLocPtr) bsp->seq_ext);
3199         SeqEntrySetScope (oldscope);
3200       }
3201     } else {
3202       segmented = FALSE;
3203       contig = TRUE;
3204     }
3205   }
3206   if (bsp->repr == Seq_repr_delta && awp->style == NORMAL_STYLE) {
3207     if (! DeltaLitOnly (bsp)) {
3208       contig = TRUE;
3209       if (awp->isRefSeq) {
3210         ajp->masterStyle = TRUE;
3211       }
3212     }
3213   }
3214 
3215   if (bsp->repr == Seq_repr_seg) {
3216 
3217     /* this is a segmented bioseq */
3218 
3219     if (segmented) {
3220 
3221       /* show all segments individually */
3222 
3223       awp->seg = 0;
3224       SeqMgrExploreSegments (bsp, (Pointer) awp, Asn2Seg);
3225 
3226     } else {
3227 
3228       /* show as single bioseq */
3229 
3230       parent = bsp;
3231       from = 0;
3232       to = bsp->length - 1;
3233 
3234       DoOneSection (parent, parent, bsp, parent, ajp->ajp.slp, 0, from, to, contig, FALSE, awp);
3235     }
3236 
3237   } else if (bsp->repr == Seq_repr_raw ||
3238              bsp->repr == Seq_repr_const ||
3239              bsp->repr == Seq_repr_delta ||
3240              bsp->repr == Seq_repr_ref ||
3241              bsp->repr == Seq_repr_map ||
3242              bsp->repr == Seq_repr_virtual) {
3243 
3244     parent = SeqMgrGetParentOfPart (bsp, &context);
3245     if (parent != NULL) {
3246 
3247       /* this is a part of an indexed segmented bioseq */
3248 
3249       from = context.cumOffset;
3250       to = from + context.to - context.from;
3251 
3252       s_LocusGetBaseName (parent, bsp, awp->basename);
3253 
3254       fs.bsp = bsp;
3255       fs.seg = 0;
3256       SeqMgrExploreSegments (parent, (Pointer) &fs, FindSegForPart);
3257       awp->showAllFeats = TRUE;
3258 
3259       DoOneSection (bsp, parent, bsp, parent, ajp->ajp.slp, fs.seg, from, to, contig, TRUE, awp);
3260 
3261     } else {
3262 
3263       /* this is a regular non-segmented bioseq */
3264 
3265       parent = bsp;
3266       from = 0;
3267       to = bsp->length - 1;
3268 
3269       DoOneSection (bsp, parent, bsp, parent, ajp->ajp.slp, 0, from, to, contig, FALSE, awp);
3270     }
3271   }
3272 }
3273 
DoBioseqSetList(SeqEntryPtr seq_set,Asn2gbWorkPtr awp)3274 static void DoBioseqSetList (
3275   SeqEntryPtr seq_set,
3276   Asn2gbWorkPtr awp
3277 )
3278 
3279 {
3280   BioseqSetPtr  bssp;
3281   SeqEntryPtr   sep;
3282 
3283   if (seq_set == NULL || awp == NULL) return;
3284 
3285   /* iterate rather than recurse unless multiple nested sets > nuc-prot */
3286 
3287   for (sep = seq_set; sep != NULL; sep = sep->next) {
3288 
3289     if (IS_Bioseq_set (sep)) {
3290       bssp = (BioseqSetPtr) sep->data.ptrvalue;
3291       if (bssp == NULL) continue;
3292 
3293       if (bssp->_class == BioseqseqSet_class_genbank ||
3294           bssp->_class == BioseqseqSet_class_mut_set ||
3295           bssp->_class == BioseqseqSet_class_pop_set ||
3296           bssp->_class == BioseqseqSet_class_phy_set ||
3297           bssp->_class == BioseqseqSet_class_eco_set ||
3298           bssp->_class == BioseqseqSet_class_wgs_set ||
3299           bssp->_class == BioseqseqSet_class_gen_prod_set ||
3300           bssp->_class == BioseqseqSet_class_small_genome_set) {
3301 
3302         /* if popset within genbank set, for example, recurse */
3303 
3304         DoBioseqSetList (bssp->seq_set, awp);
3305 
3306         continue;
3307       }
3308     }
3309 
3310     /* at most nuc-prot set, so do main bioseqs that fit the format */
3311 
3312     VisitSequencesInSep (sep, (Pointer) awp, VISIT_MAINS, DoOneBioseq);
3313   }
3314 }
3315 
DoOneBioseqSet(SeqEntryPtr sep,Asn2gbWorkPtr awp)3316 static void DoOneBioseqSet (
3317   SeqEntryPtr sep,
3318   Asn2gbWorkPtr awp
3319 )
3320 
3321 {
3322   BioseqSetPtr  bssp;
3323 
3324   if (sep == NULL || awp == NULL) return;
3325 
3326   if (IS_Bioseq_set (sep)) {
3327     bssp = (BioseqSetPtr) sep->data.ptrvalue;
3328     if (bssp == NULL) return;
3329 
3330     if (bssp->_class == BioseqseqSet_class_genbank ||
3331         bssp->_class == BioseqseqSet_class_mut_set ||
3332         bssp->_class == BioseqseqSet_class_pop_set ||
3333         bssp->_class == BioseqseqSet_class_phy_set ||
3334         bssp->_class == BioseqseqSet_class_eco_set ||
3335         bssp->_class == BioseqseqSet_class_wgs_set ||
3336         bssp->_class == BioseqseqSet_class_gen_prod_set ||
3337         bssp->_class == BioseqseqSet_class_small_genome_set) {
3338 
3339       /* this is a pop/phy/mut/eco set, catenate separate reports */
3340 
3341       DoBioseqSetList (bssp->seq_set, awp);
3342 
3343       return;
3344     }
3345   }
3346 
3347   /* at most nuc-prot set, so do main bioseqs that fit the format */
3348 
3349   VisitSequencesInSep (sep, (Pointer) awp, VISIT_MAINS, DoOneBioseq);
3350 }
3351 
3352 /* ********************************************************************** */
3353 
RecordOneSection(Asn2gbWorkPtr awp,BioseqPtr bsp,SeqIdPtr sip)3354 static void RecordOneSection (
3355   Asn2gbWorkPtr awp,
3356   BioseqPtr bsp,
3357   SeqIdPtr sip
3358 )
3359 
3360 {
3361   IntAsn2gbJobPtr  ajp;
3362   ValNodePtr       vnp;
3363 
3364   if (awp == NULL) return;
3365   ajp = awp->ajp;
3366   if (ajp == NULL) return;
3367 
3368   if (bsp != NULL) {
3369     for (sip = bsp->id; sip != NULL; sip = sip->next) {
3370       if (sip->choice == SEQID_GI) break;
3371     }
3372   }
3373 
3374   if (sip == NULL) return;
3375 
3376   if (sip->choice == SEQID_GI) {
3377     vnp = ValNodeAddBigInt (&(ajp->gitail), 0, (BIG_ID) sip->data.intvalue);
3378     if (ajp->gihead == NULL) {
3379       ajp->gihead = vnp;
3380     }
3381     ajp->gitail = vnp;
3382   }
3383 
3384   (awp->sectionMax)++;
3385 }
3386 
CountOneSection(BioseqPtr target,BioseqPtr parent,BioseqPtr bsp,BioseqPtr refs,SeqLocPtr slp,Uint2 seg,Int4 from,Int4 to,Boolean contig,Boolean onePartOfSeg,Asn2gbWorkPtr awp)3387 static void CountOneSection (
3388   BioseqPtr target,
3389   BioseqPtr parent,
3390   BioseqPtr bsp,
3391   BioseqPtr refs,
3392   SeqLocPtr slp,
3393   Uint2 seg,
3394   Int4 from,
3395   Int4 to,
3396   Boolean contig,
3397   Boolean onePartOfSeg,
3398   Asn2gbWorkPtr awp
3399 )
3400 
3401 {
3402   IntAsn2gbJobPtr  ajp;
3403   SeqIdPtr         sip;
3404 
3405   if (target == NULL || parent == NULL || bsp == NULL || awp == NULL) return;
3406   ajp = awp->ajp;
3407   if (ajp == NULL) return;
3408 
3409   if (awp->mode == RELEASE_MODE && awp->style == CONTIG_STYLE) {
3410     if (bsp->repr == Seq_repr_seg) {
3411     } else if (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp))) {
3412     } else if (bsp->repr == Seq_repr_ref) {
3413     } else if (bsp->repr == Seq_repr_map) {
3414     } else return;
3415   }
3416 
3417   if (ajp->flags.suppressLocalID) {
3418     sip = SeqIdSelect (bsp->id, fasta_order, NUM_SEQID);
3419     if (sip == NULL || sip->choice == SEQID_LOCAL) return;
3420   }
3421 
3422   RecordOneSection (awp, bsp, NULL);
3423 }
3424 
3425 
Count2Seg(SeqLocPtr slp,SeqMgrSegmentContextPtr context)3426 static Boolean LIBCALLBACK Count2Seg (
3427   SeqLocPtr slp,
3428   SeqMgrSegmentContextPtr context
3429 )
3430 
3431 {
3432   Asn2gbWorkPtr  awp;
3433   BioseqPtr      bsp = NULL;
3434   Int4           from;
3435   SeqLocPtr      loc;
3436   BioseqPtr      parent;
3437   SeqIdPtr       sip;
3438   Int4           to;
3439 
3440   if (slp == NULL || context == NULL) return FALSE;
3441   awp = (Asn2gbWorkPtr) context->userdata;
3442 
3443   parent = context->parent;
3444 
3445   from = context->cumOffset;
3446   to = from + context->to - context->from;
3447 
3448   sip = SeqLocId (slp);
3449   if (sip == NULL) {
3450     loc = SeqLocFindNext (slp, NULL);
3451     if (loc != NULL) {
3452       sip = SeqLocId (loc);
3453     }
3454   }
3455   if (sip == NULL) return TRUE;
3456 
3457   bsp = BioseqFindCore (sip);
3458   if (bsp != NULL && bsp->repr == Seq_repr_virtual) return TRUE;
3459 
3460   RecordOneSection (awp, NULL, sip);
3461 
3462   return TRUE;
3463 }
3464 
CountOneBioseq(BioseqPtr bsp,Pointer userdata)3465 static void CountOneBioseq (
3466   BioseqPtr bsp,
3467   Pointer userdata
3468 )
3469 
3470 {
3471   IntAsn2gbJobPtr       ajp;
3472   Asn2gbWorkPtr         awp;
3473   BioseqSetPtr          bssp;
3474   SeqMgrSegmentContext  context;
3475   Boolean               contig = FALSE;
3476   Int4                  from;
3477   BioseqPtr             parent;
3478   Boolean               segmented = FALSE;
3479   Int4                  to;
3480 
3481   if (bsp == NULL) return;
3482   awp = (Asn2gbWorkPtr) userdata;
3483   if (awp == NULL) return;
3484   ajp = awp->ajp;
3485   if (ajp == NULL) return;
3486 
3487   if (ISA_na (bsp->mol)) {
3488     if (ajp->format == GENPEPT_FMT || ajp->format == EMBLPEPT_FMT) return;
3489 
3490     if (ajp->format == FTABLE_FMT && ajp->skipMrnas) {
3491       if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
3492         bssp = (BioseqSetPtr) bsp->idx.parentptr;
3493         if (bssp != NULL && bssp->_class == BioseqseqSet_class_nuc_prot) {
3494           if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
3495             bssp = (BioseqSetPtr) bsp->idx.parentptr;
3496             if (bssp != NULL && bssp->_class == BioseqseqSet_class_gen_prod_set) {
3497               return;
3498             }
3499           }
3500         }
3501       }
3502     }
3503 
3504   } else if (ISA_aa (bsp->mol)) {
3505     if (ajp->format == GENBANK_FMT || ajp->format == EMBL_FMT) return;
3506 
3507     if (ajp->format == FTABLE_FMT && ajp->skipProts) return;
3508   }
3509 
3510   if (awp->style == SEGMENT_STYLE) {
3511     segmented = TRUE;
3512   }
3513   if (awp->style == CONTIG_STYLE) {
3514     contig = TRUE;
3515   }
3516   if (awp->format == FTABLE_FMT) {
3517       segmented = FALSE;
3518       contig = FALSE;
3519   }
3520 
3521   if (bsp->repr == Seq_repr_seg && awp->style == NORMAL_STYLE) {
3522 
3523     if (SegHasParts (bsp)) {
3524       segmented = TRUE;
3525       contig = FALSE;
3526     } else {
3527       segmented = FALSE;
3528       contig = TRUE;
3529     }
3530   }
3531   if (bsp->repr == Seq_repr_delta && awp->style == NORMAL_STYLE) {
3532     if (! DeltaLitOnly (bsp)) {
3533       contig = TRUE;
3534     }
3535   }
3536 
3537   if (bsp->repr == Seq_repr_seg) {
3538 
3539     if (segmented) {
3540 
3541       SeqMgrExploreSegments (bsp, (Pointer) awp, Count2Seg);
3542 
3543     } else {
3544 
3545       parent = bsp;
3546       from = 0;
3547       to = bsp->length - 1;
3548 
3549       CountOneSection (parent, parent, bsp, parent, ajp->ajp.slp, 0, from, to, contig, FALSE, awp);
3550     }
3551 
3552   } else if (bsp->repr == Seq_repr_raw ||
3553              bsp->repr == Seq_repr_const ||
3554              bsp->repr == Seq_repr_delta ||
3555              bsp->repr == Seq_repr_ref ||
3556              bsp->repr == Seq_repr_map ||
3557              bsp->repr == Seq_repr_virtual) {
3558 
3559     parent = SeqMgrGetParentOfPart (bsp, &context);
3560     if (parent != NULL) {
3561 
3562       from = context.cumOffset;
3563       to = from + context.to - context.from;
3564 
3565       CountOneSection (bsp, parent, bsp, parent, ajp->ajp.slp, 0, from, to, contig, TRUE, awp);
3566 
3567     } else {
3568 
3569       parent = bsp;
3570       from = 0;
3571       to = bsp->length - 1;
3572 
3573       CountOneSection (bsp, parent, bsp, parent, ajp->ajp.slp, 0, from, to, contig, FALSE, awp);
3574     }
3575   }
3576 }
3577 
3578 
3579 
CountBioseqSetList(SeqEntryPtr seq_set,Asn2gbWorkPtr awp)3580 static void CountBioseqSetList (
3581   SeqEntryPtr seq_set,
3582   Asn2gbWorkPtr awp
3583 )
3584 
3585 {
3586   BioseqSetPtr  bssp;
3587   SeqEntryPtr   sep;
3588 
3589   if (seq_set == NULL || awp == NULL) return;
3590 
3591   for (sep = seq_set; sep != NULL; sep = sep->next) {
3592 
3593     if (IS_Bioseq_set (sep)) {
3594       bssp = (BioseqSetPtr) sep->data.ptrvalue;
3595       if (bssp == NULL) continue;
3596 
3597       if (bssp->_class == BioseqseqSet_class_genbank ||
3598           bssp->_class == BioseqseqSet_class_mut_set ||
3599           bssp->_class == BioseqseqSet_class_pop_set ||
3600           bssp->_class == BioseqseqSet_class_phy_set ||
3601           bssp->_class == BioseqseqSet_class_eco_set ||
3602           bssp->_class == BioseqseqSet_class_wgs_set ||
3603           bssp->_class == BioseqseqSet_class_gen_prod_set ||
3604           bssp->_class == BioseqseqSet_class_small_genome_set) {
3605 
3606         CountBioseqSetList (bssp->seq_set, awp);
3607 
3608         continue;
3609       }
3610     }
3611 
3612     VisitSequencesInSep (sep, (Pointer) awp, VISIT_MAINS, CountOneBioseq);
3613   }
3614 }
3615 
CountOneBioseqSet(SeqEntryPtr sep,Asn2gbWorkPtr awp)3616 static void CountOneBioseqSet (
3617   SeqEntryPtr sep,
3618   Asn2gbWorkPtr awp
3619 )
3620 
3621 {
3622   BioseqSetPtr  bssp;
3623 
3624   if (sep == NULL || awp == NULL) return;
3625 
3626   if (IS_Bioseq_set (sep)) {
3627     bssp = (BioseqSetPtr) sep->data.ptrvalue;
3628     if (bssp == NULL) return;
3629 
3630     if (bssp->_class == BioseqseqSet_class_genbank ||
3631         bssp->_class == BioseqseqSet_class_mut_set ||
3632         bssp->_class == BioseqseqSet_class_pop_set ||
3633         bssp->_class == BioseqseqSet_class_phy_set ||
3634         bssp->_class == BioseqseqSet_class_eco_set ||
3635         bssp->_class == BioseqseqSet_class_wgs_set ||
3636         bssp->_class == BioseqseqSet_class_gen_prod_set ||
3637         bssp->_class == BioseqseqSet_class_small_genome_set) {
3638 
3639       CountBioseqSetList (bssp->seq_set, awp);
3640 
3641       return;
3642     }
3643   }
3644 
3645   VisitSequencesInSep (sep, (Pointer) awp, VISIT_MAINS, CountOneBioseq);
3646 }
3647 
3648 /* ********************************************************************** */
3649 
3650 /* public functions */
3651 
SortParagraphByIDProc(VoidPtr vp1,VoidPtr vp2)3652 static int LIBCALLBACK SortParagraphByIDProc (
3653   VoidPtr vp1,
3654   VoidPtr vp2
3655 )
3656 
3657 {
3658   BaseBlockPtr  bbp1, bbp2;
3659 
3660   if (vp1 == NULL || vp2 == NULL) return 0;
3661   bbp1 = *((BaseBlockPtr PNTR) vp1);
3662   bbp2 = *((BaseBlockPtr PNTR) vp2);
3663   if (bbp1 == NULL || bbp2 == NULL) return 0;
3664 
3665   if (bbp1->entityID > bbp2->entityID) return 1;
3666   if (bbp1->entityID < bbp2->entityID) return -1;
3667 
3668   if (bbp1->itemtype > bbp2->itemtype) return 1;
3669   if (bbp1->itemtype < bbp2->itemtype) return -1;
3670 
3671   if (bbp1->itemID > bbp2->itemID) return 1;
3672   if (bbp1->itemID < bbp2->itemID) return -1;
3673 
3674   if (bbp1->paragraph > bbp2->paragraph) return 1;
3675   if (bbp1->paragraph < bbp2->paragraph) return -1;
3676 
3677   return 0;
3678 }
3679 
IsBspRefseq(BioseqPtr bsp,Pointer userdata)3680 static void IsBspRefseq (
3681   BioseqPtr bsp,
3682   Pointer userdata
3683 )
3684 
3685 {
3686   BoolPtr   has_refseqP;
3687   SeqIdPtr  sip;
3688 
3689   if (bsp == NULL || userdata == NULL) return;
3690   has_refseqP = (BoolPtr) userdata;
3691   for (sip = bsp->id; sip != NULL; sip = sip->next) {
3692     if (sip->choice == SEQID_OTHER) {
3693       *has_refseqP = TRUE;
3694     }
3695   }
3696 }
3697 
IsSepRefseq(SeqEntryPtr sep)3698 static Boolean IsSepRefseq (
3699   SeqEntryPtr sep
3700 )
3701 
3702 {
3703   Boolean  is_refseq = FALSE;
3704 
3705   if (sep == NULL) return FALSE;
3706   VisitBioseqsInSep (sep, (Pointer) &is_refseq, IsBspRefseq);
3707   return is_refseq;
3708 }
3709 
3710 typedef struct modeflags {
3711   Boolean  flags [30];
3712 } ModeFlags, PNTR ModeFlagsPtr;
3713 
3714 static ModeFlags flagTable [] = {
3715 
3716   /* RELEASE_MODE */
3717   {TRUE,  TRUE,  TRUE,  TRUE,  TRUE,
3718    TRUE,  TRUE,  TRUE,  TRUE,  TRUE,
3719    TRUE,  TRUE,  TRUE,  TRUE,  TRUE,
3720    TRUE,  TRUE,  TRUE,  TRUE,  TRUE,
3721    TRUE,  TRUE,  TRUE, TRUE,  TRUE,
3722    TRUE,  TRUE,  TRUE,  TRUE,  TRUE},
3723 
3724   /* ENTREZ_MODE */
3725   {FALSE, TRUE,  TRUE,  TRUE,  TRUE,
3726    FALSE, TRUE,  TRUE,  TRUE,  TRUE,
3727    TRUE,  TRUE,  FALSE, TRUE,  TRUE,
3728    TRUE,  TRUE,  FALSE, FALSE, TRUE,
3729    TRUE,  TRUE,  TRUE, TRUE,  TRUE,
3730    TRUE,  TRUE,  TRUE,  TRUE,  FALSE},
3731 
3732   /* SEQUIN_MODE */
3733   {FALSE, FALSE, FALSE, FALSE, FALSE,
3734    FALSE, FALSE, TRUE,  FALSE, FALSE,
3735    FALSE, FALSE, FALSE, FALSE, FALSE,
3736    FALSE, FALSE, FALSE, FALSE, FALSE,
3737    FALSE, FALSE, TRUE, FALSE, FALSE,
3738    FALSE, TRUE,  FALSE, FALSE, FALSE},
3739 
3740   /* DUMP_MODE */
3741   {FALSE, FALSE, FALSE, FALSE, FALSE,
3742    FALSE, FALSE, FALSE, FALSE, FALSE,
3743    FALSE, FALSE, FALSE, FALSE, FALSE,
3744    FALSE, FALSE, FALSE, FALSE, FALSE,
3745    FALSE, FALSE, FALSE, FALSE, FALSE,
3746    FALSE, FALSE, FALSE, FALSE, FALSE}
3747 };
3748 
SetFlagsFromMode(IntAsn2gbJobPtr ajp,ModType mode)3749 static void SetFlagsFromMode (
3750   IntAsn2gbJobPtr ajp,
3751   ModType mode
3752 )
3753 
3754 {
3755   BoolPtr       bp;
3756   ModeFlagsPtr  mfp;
3757   SeqEntryPtr   sep;
3758 
3759   if (ajp == NULL) return;
3760   if (! (mode >= RELEASE_MODE && mode <= DUMP_MODE)) {
3761     mode = DUMP_MODE;
3762   }
3763   mfp = &(flagTable [(int) (mode - 1)]);
3764   bp = &(mfp->flags [0]);
3765 
3766   ajp->flags.suppressLocalID = *(bp++);
3767   ajp->flags.validateFeats = *(bp++);
3768   ajp->flags.ignorePatPubs = *(bp++);
3769   ajp->flags.dropShortAA = *(bp++);
3770   ajp->flags.avoidLocusColl = *(bp++);
3771 
3772   ajp->flags.iupacaaOnly = *(bp++);
3773   ajp->flags.dropBadCitGens = *(bp++);
3774   ajp->flags.noAffilOnUnpub = *(bp++);
3775   ajp->flags.dropIllegalQuals = *(bp++);
3776   ajp->flags.checkQualSyntax = *(bp++);
3777 
3778   ajp->flags.needRequiredQuals = *(bp++);
3779   ajp->flags.needOrganismQual = *(bp++);
3780   ajp->flags.needAtLeastOneRef = *(bp++);
3781   ajp->flags.citArtIsoJta = *(bp++);
3782   ajp->flags.dropBadDbxref = *(bp++);
3783 
3784   ajp->flags.useEmblMolType = *(bp++);
3785   ajp->flags.hideBankItComment = *(bp++);
3786   ajp->flags.checkCDSproductID = *(bp++);
3787   ajp->flags.suppressSegLoc = *(bp++);
3788   ajp->flags.srcQualsToNote = *(bp)++;
3789 
3790   ajp->flags.hideEmptySource = *(bp++);
3791   ajp->flags.goQualsToNote = *(bp++);
3792   ajp->flags.separateGeneSyns = *(bp++);
3793   ajp->flags.refSeqQualsToNote = *(bp++);
3794   ajp->flags.selenocysteineToNote = *(bp++);
3795 
3796   ajp->flags.pyrrolysineToNote = *(bp++);
3797   ajp->flags.extraProductsToNote = *(bp++);
3798   ajp->flags.codonRecognizedToNote = *(bp++);
3799   ajp->flags.hideSpecificGeneMaps = *(bp++);
3800   ajp->flags.forGbRelease = *(bp++);
3801 
3802   /* unapproved qualifiers suppressed for flatfile, okay for GBSeq XML */
3803 
3804   if (ajp->gbseq == NULL) {
3805 
3806     /* collaboration unapproved source quals on their own line only in indexer Sequin - relaxed */
3807 
3808     /*
3809     if (GetAppProperty ("InternalNcbiSequin") == NULL) {
3810 
3811       ajp->flags.srcQualsToNote = TRUE;
3812     }
3813     */
3814 
3815     sep = GetTopSeqEntryForEntityID (ajp->ajp.entityID);
3816     if (IsSepRefseq (sep)) {
3817 
3818       ajp->flags.srcQualsToNote = FALSE;
3819       ajp->flags.separateGeneSyns = FALSE;
3820       ajp->flags.codonRecognizedToNote = FALSE;
3821       ajp->flags.goQualsToNote = FALSE;
3822       ajp->flags.refSeqQualsToNote = FALSE;
3823 
3824       /* selenocysteine always a separate qualifier for RefSeq */
3825 
3826       ajp->flags.selenocysteineToNote = FALSE;
3827       ajp->flags.pyrrolysineToNote = FALSE;
3828 
3829     } else {
3830 
3831       /* collaboration unapproved Gene Ontology quals on their own line only for RefSeq */
3832 
3833       /* ajp->flags.goQualsToNote = TRUE; */
3834       /* ajp->flags.separateGeneSyns = TRUE; */
3835     }
3836 
3837   } else {
3838 
3839     sep = GetTopSeqEntryForEntityID (ajp->ajp.entityID);
3840     if (IsSepRefseq (sep)) {
3841 
3842       ajp->flags.srcQualsToNote = FALSE;
3843       ajp->flags.separateGeneSyns = FALSE;
3844       ajp->flags.codonRecognizedToNote = FALSE;
3845 
3846       /* selenocysteine always a separate qualifier for RefSeq */
3847 
3848       ajp->flags.selenocysteineToNote = FALSE;
3849       ajp->flags.pyrrolysineToNote = FALSE;
3850 
3851     }
3852   }
3853 
3854   if (ajp->refseqConventions) {
3855     ajp->flags.srcQualsToNote = FALSE;
3856     ajp->flags.separateGeneSyns = FALSE;
3857     ajp->flags.codonRecognizedToNote = FALSE;
3858     ajp->flags.goQualsToNote = FALSE;
3859     ajp->flags.refSeqQualsToNote = FALSE;
3860     ajp->flags.hideSpecificGeneMaps = FALSE;
3861   }
3862 }
3863 
CheckVersionWithGi(BioseqPtr bsp,Pointer userdata)3864 static void CheckVersionWithGi (BioseqPtr bsp, Pointer userdata)
3865 
3866 {
3867   Boolean       hasGi = FALSE;
3868   BoolPtr       missingVersion;
3869   SeqIdPtr      sip;
3870   TextSeqIdPtr  tsip;
3871   Boolean       zeroVersion = FALSE;
3872 
3873   for (sip = bsp->id; sip != NULL; sip = sip->next) {
3874     switch (sip->choice) {
3875       case SEQID_TPG:
3876       case SEQID_TPE:
3877       case SEQID_TPD:
3878       case SEQID_GENBANK:
3879       case SEQID_EMBL:
3880       case SEQID_DDBJ:
3881         tsip = (TextSeqIdPtr) sip->data.ptrvalue;
3882         if (tsip != NULL && tsip->version == 0) {
3883           zeroVersion = TRUE;
3884         }
3885         break;
3886       case SEQID_GI :
3887         hasGi = TRUE;
3888         break;
3889       default :
3890         break;
3891     }
3892   }
3893   if (hasGi && zeroVersion) {
3894     missingVersion = (BoolPtr) userdata;
3895     *missingVersion = TRUE;
3896   }
3897 }
3898 
3899 
3900 typedef struct lookforids {
3901   Boolean  isG;
3902   Boolean  isGED;
3903   Boolean  isED;
3904   Boolean  isNTorNWorNG;
3905   Boolean  isNC;
3906   Boolean  isNZ;
3907   Boolean  isRefSeq;
3908   Boolean  isGeneral;
3909   Boolean  isNCBIGenomes;
3910   Boolean  isTPA;
3911   Boolean  isTPG;
3912   Boolean  isSP;
3913   Boolean  isNuc;
3914   Boolean  isProt;
3915   Boolean  isFarProt;
3916   Boolean  isLocal;
3917   Boolean  isNonLocal;
3918   Boolean  sourcePubFuse;
3919 } LookForIDs, PNTR LookForIDsPtr;
3920 
LookForSeqIDs(BioseqPtr bsp,Pointer userdata)3921 static void LookForSeqIDs (BioseqPtr bsp, Pointer userdata)
3922 
3923 {
3924   DbtagPtr       dbt;
3925   LookForIDsPtr  lfip;
3926   SeqIdPtr       sip;
3927   TextSeqIdPtr   tsip;
3928 
3929   lfip = (LookForIDsPtr) userdata;
3930   if (ISA_na (bsp->mol)) {
3931     lfip->isNuc = TRUE;
3932   }
3933   if (ISA_aa (bsp->mol)) {
3934     lfip->isProt = TRUE;
3935     if (bsp->repr == Seq_repr_delta) {
3936       if (! DeltaLitOnly (bsp)) {
3937         lfip->isFarProt = TRUE;
3938       }
3939     } else if (bsp->repr == Seq_repr_ref) {
3940       lfip->isFarProt = TRUE;
3941     }
3942   }
3943 
3944   for (sip = bsp->id; sip != NULL; sip = sip->next) {
3945     switch (sip->choice) {
3946       case SEQID_GENBANK :
3947         lfip->isG = TRUE;
3948         lfip->isGED = TRUE;
3949         lfip->isNonLocal = TRUE;
3950         break;
3951       case SEQID_EMBL :
3952       case SEQID_DDBJ :
3953         lfip->isED = TRUE;
3954         lfip->isGED = TRUE;
3955         lfip->isNonLocal = TRUE;
3956         break;
3957       case SEQID_SWISSPROT :
3958         lfip->isSP = TRUE;
3959         break;
3960       case SEQID_TPG :
3961         lfip->isTPG = TRUE;
3962         /* and fall through to TPE and TPD */
3963       case SEQID_TPE :
3964       case SEQID_TPD :
3965         lfip->isTPA = TRUE;
3966         lfip->isNonLocal = TRUE;
3967         break;
3968       case SEQID_OTHER :
3969         lfip->isRefSeq = TRUE;
3970         tsip = (TextSeqIdPtr) sip->data.ptrvalue;
3971         if (tsip != NULL) {
3972           if (StringNCmp (tsip->accession, "NC_", 3) == 0) {
3973             lfip->isNC = TRUE;
3974           } else if (StringNCmp (tsip->accession, "NT_", 3) == 0) {
3975             lfip->isNTorNWorNG = TRUE;
3976           } else if (StringNCmp (tsip->accession, "NW_", 3) == 0) {
3977             lfip->isNTorNWorNG = TRUE;
3978           } else if (StringNCmp (tsip->accession, "NG_", 3) == 0) {
3979             lfip->isNTorNWorNG = TRUE;
3980           } else if (StringNCmp (tsip->accession, "NZ_", 3) == 0) {
3981             lfip->isNZ = TRUE;
3982           }
3983         }
3984         lfip->isNonLocal = TRUE;
3985         break;
3986       case SEQID_GENERAL :
3987         dbt = (DbtagPtr) sip->data.ptrvalue;
3988         if (dbt != NULL && !IsSkippableDbtag(dbt)) {
3989           lfip->isGeneral = TRUE;
3990           lfip->isNonLocal = TRUE;
3991           if (StringCmp (dbt->db, "NCBI_GENOMES") == 0) {
3992             lfip->isNCBIGenomes = TRUE;
3993           }
3994         }
3995         break;
3996       case SEQID_LOCAL :
3997         lfip->isLocal = TRUE;
3998         break;
3999       default :
4000         lfip->isNonLocal = TRUE;
4001         break;
4002     }
4003     /* also set policy on sourcePubFuse */
4004     switch (sip->choice) {
4005       case SEQID_GIBBSQ :
4006       case SEQID_GIBBMT :
4007         lfip->sourcePubFuse = TRUE;
4008         break;
4009       case SEQID_EMBL :
4010       case SEQID_PIR :
4011       case SEQID_SWISSPROT :
4012       case SEQID_PATENT :
4013       case SEQID_DDBJ :
4014       case SEQID_PRF :
4015       case SEQID_PDB :
4016       case SEQID_TPE:
4017       case SEQID_TPD:
4018       case SEQID_GPIPE:
4019         lfip->sourcePubFuse = TRUE;
4020         break;
4021       case SEQID_GENBANK :
4022       case SEQID_TPG:
4023         tsip = (TextSeqIdPtr) sip->data.ptrvalue;
4024         if (tsip != NULL) {
4025           if (StringLen (tsip->accession) == 6) {
4026             lfip->sourcePubFuse = TRUE;
4027           }
4028         }
4029         break;
4030       case SEQID_NOT_SET :
4031       case SEQID_LOCAL :
4032       case SEQID_OTHER :
4033       case SEQID_GENERAL :
4034         break;
4035       default :
4036         break;
4037     }
4038   }
4039 }
4040 
LookForGEDetc(SeqEntryPtr topsep,BoolPtr isG,BoolPtr isGED,BoolPtr isED,BoolPtr isNTorNWorNG,BoolPtr isNC,BoolPtr isNZ,BoolPtr isRefSeq,BoolPtr isGeneral,BoolPtr isNCBIGenomes,BoolPtr isTPA,BoolPtr isTPG,BoolPtr isSP,BoolPtr isNuc,BoolPtr isProt,BoolPtr isFarProt,BoolPtr isOnlyLocal,BoolPtr sourcePubFuse)4041 static void LookForGEDetc (
4042   SeqEntryPtr topsep,
4043   BoolPtr isG,
4044   BoolPtr isGED,
4045   BoolPtr isED,
4046   BoolPtr isNTorNWorNG,
4047   BoolPtr isNC,
4048   BoolPtr isNZ,
4049   BoolPtr isRefSeq,
4050   BoolPtr isGeneral,
4051   BoolPtr isNCBIGenomes,
4052   BoolPtr isTPA,
4053   BoolPtr isTPG,
4054   BoolPtr isSP,
4055   BoolPtr isNuc,
4056   BoolPtr isProt,
4057   BoolPtr isFarProt,
4058   BoolPtr isOnlyLocal,
4059   BoolPtr sourcePubFuse
4060 )
4061 
4062 {
4063   LookForIDs  lfi;
4064 
4065   MemSet ((Pointer) &lfi, 0, sizeof (LookForIDs));
4066   VisitBioseqsInSep (topsep, (Pointer) &lfi, LookForSeqIDs);
4067   *isG = lfi.isG;
4068   *isGED = lfi.isGED;
4069   *isED = lfi.isED;
4070   *isNTorNWorNG = lfi.isNTorNWorNG;
4071   *isNC = lfi.isNC;
4072   *isNZ = lfi.isNZ;
4073   *isRefSeq = lfi.isRefSeq;
4074   *isGeneral = lfi.isGeneral;
4075   *isNCBIGenomes = lfi.isNCBIGenomes;
4076   *isTPA = lfi.isTPA;
4077   *isTPG = lfi.isTPG;
4078   *isSP = lfi.isSP;
4079   *isNuc = lfi.isNuc;
4080   *isProt = lfi.isProt;
4081   *isFarProt = lfi.isFarProt;
4082   if (lfi.isLocal && (! lfi.isNonLocal)) {
4083     *isOnlyLocal = TRUE;
4084   } else {
4085     *isOnlyLocal = FALSE;
4086   }
4087   *sourcePubFuse = lfi.sourcePubFuse;
4088 }
4089 
MakeGapFeatsBase(BioseqPtr bsp,Pointer userdata,Boolean isSP,Boolean rev_comp)4090 static void MakeGapFeatsBase (
4091   BioseqPtr bsp,
4092   Pointer userdata,
4093   Boolean isSP,
4094   Boolean rev_comp
4095 )
4096 
4097 {
4098   Char             buf [128];
4099   Int4             currpos = 0;
4100   BioseqPtr        fakebsp = NULL;
4101   IntFuzzPtr       fuzz;
4102   ValNodePtr PNTR  gapvnp;
4103   ImpFeatPtr       ifp;
4104   SeqFeatPtr       last = NULL;
4105   SeqLitPtr        litp;
4106   SeqAnnotPtr      sap = NULL;
4107   SeqFeatPtr       sfp;
4108   SeqIdPtr         sip;
4109   SeqLocPtr        slp;
4110   ValNodePtr       vnp;
4111   SeqGapPtr        seq_gap = NULL;
4112   Boolean          gap_is_linked = FALSE;
4113   ValNodePtr       evidvnp = NULL;
4114   Int4             linktype = 0;
4115   Boolean          needs_evidence = FALSE;
4116 
4117   if (bsp == NULL || bsp->repr != Seq_repr_delta) return;
4118   gapvnp = (ValNodePtr PNTR) userdata;
4119   if (gapvnp == NULL) return;
4120   sip = SeqIdFindBest (bsp->id, 0);
4121   if (sip == NULL) return;
4122   /* no longer suppress on far delta contigs */
4123   /* if (! DeltaLitOnly (bsp)) return; */
4124 
4125   for (vnp = (ValNodePtr)(bsp->seq_ext); vnp != NULL; vnp = vnp->next) {
4126     if (vnp->choice == 1) {
4127       slp = (SeqLocPtr) vnp->data.ptrvalue;
4128       if (slp == NULL) continue;
4129       currpos += SeqLocLen (slp);
4130     }
4131     if (vnp->choice == 2) {
4132       litp = (SeqLitPtr) vnp->data.ptrvalue;
4133       if (litp == NULL) continue;
4134       if (litp->seq_data == NULL || litp->seq_data_type == Seq_code_gap) {
4135           if (litp->length > 0 || (isSP && litp->length == 0) )  {
4136           seq_gap = (SeqGapPtr)litp->seq_data; /* might be NULL */
4137           if (fakebsp == NULL) {
4138             /* to be freed with MemFree, not BioseqFree */
4139             fakebsp = MemNew (sizeof (Bioseq));
4140             if (fakebsp == NULL) return;
4141             sap = SeqAnnotNew ();
4142             if (sap == NULL) return;
4143             sap->type = 1;
4144             fakebsp->annot = sap;
4145             ValNodeAddPointer (gapvnp, 0, (Pointer) fakebsp);
4146           }
4147           ifp = ImpFeatNew ();
4148           if (ifp == NULL) continue;
4149           ifp->key = StringSave ( "gap" );
4150           sfp = SeqFeatNew ();
4151           if (sfp == NULL) continue;
4152           sfp->data.choice = SEQFEAT_IMP;
4153           sfp->data.value.ptrvalue = (Pointer) ifp;
4154           sfp->idx.subtype = FEATDEF_gap;
4155           if (last != NULL) {
4156             last->next = sfp;
4157           } else {
4158             sap->data = (Pointer) sfp;
4159           }
4160           last = sfp;
4161           fuzz = litp->fuzz;
4162           if (fuzz != NULL && fuzz->choice == 4 && fuzz->a == 0) {
4163             AddQualifierToFeature (sfp, "estimated_length", "unknown");
4164           } else {
4165             sprintf (buf, "%ld", (long) litp->length);
4166             AddQualifierToFeature (sfp, "estimated_length", buf);
4167           }
4168           if (rev_comp) {
4169             sfp->location = AddIntervalToLocation (
4170                 NULL, sip,
4171                 ( litp->length == 0 ? currpos : currpos + litp->length - 1 ),
4172                 ( litp->length == 0 ? currpos - 1 : currpos ),
4173                 FALSE, FALSE);
4174           } else {
4175             sfp->location = AddIntervalToLocation (
4176                 NULL, sip,
4177                 ( litp->length == 0 ? currpos - 1 : currpos ),
4178                 ( litp->length == 0 ? currpos : currpos + litp->length - 1 ),
4179                 FALSE, FALSE);
4180           }
4181           if( isSP && litp->length == 0 ) {
4182               sfp->comment = StringSave ("Non-consecutive residues");
4183           }
4184           if( seq_gap != NULL ) {
4185               needs_evidence = FALSE;
4186               /* I can't seem to find pound-defines for
4187                  some of these magic numbers below */
4188               gap_is_linked = ( seq_gap->linkage == 1 ); /* linked */
4189 
4190               if (seq_gap->linkage_evidence != NULL) {
4191                 gap_is_linked = TRUE; /* do not rely solely on seq_gap->linkage, which is not always set correctly */
4192               }
4193 
4194               switch( seq_gap->type ) {
4195               case 0:               /* unknown */
4196                   /* no /gap_type label - policy changed at SQD-1801 */
4197                   AddQualifierToFeature(sfp, "gap_type", "unknown" );
4198                   needs_evidence = gap_is_linked;
4199                   break;
4200               case 1:               /* fragment */
4201                   AddQualifierToFeature(sfp, "gap_type", "within scaffold" );
4202                   needs_evidence = TRUE;
4203                   break;
4204               case 2:               /* clone */
4205                   AddQualifierToFeature(sfp, "gap_type",
4206                                         ( gap_is_linked ?
4207                                           "within scaffold" :
4208                                           "between scaffolds" ) );
4209                   needs_evidence = gap_is_linked;
4210                   break;
4211               case 3:               /* short-arm */
4212                   AddQualifierToFeature(sfp, "gap_type", "short_arm" );
4213                   break;
4214               case 4:               /* heterochromatin */
4215                   AddQualifierToFeature(sfp, "gap_type", "heterochromatin" );
4216                   break;
4217               case 5:               /* centromere */
4218                   AddQualifierToFeature(sfp, "gap_type", "centromere" );
4219                   break;
4220               case 6:               /* telomere */
4221                   AddQualifierToFeature(sfp, "gap_type", "telomere");
4222                   break;
4223               case 7:               /* repeat */
4224                   AddQualifierToFeature(sfp, "gap_type",
4225                                         ( gap_is_linked ?
4226                                           "repeat within scaffold" :
4227                                           "repeat between scaffolds" ) );
4228                   needs_evidence = gap_is_linked;
4229                   break;
4230               case 8:               /* contig */
4231                   AddQualifierToFeature(sfp, "gap_type", "between scaffolds" );
4232                   break;
4233               case 9:               /* scaffold */
4234                   AddQualifierToFeature(sfp, "gap_type", "within scaffold" );
4235                   needs_evidence = gap_is_linked;
4236                   break;
4237               case 255:             /* other */
4238                   AddQualifierToFeature(sfp, "gap_type", "other" );
4239                   break;
4240               default:
4241                   sprintf (buf, "(ERROR: UNRECOGNIZED_GAP_TYPE:%ld)", (long)seq_gap->type );
4242                   AddQualifierToFeature(sfp, "gap_type", buf );
4243                   break;
4244               }
4245 
4246               /* Create the /linkage_evidence quals */
4247               if( needs_evidence ) {
4248                   for( evidvnp = seq_gap->linkage_evidence; evidvnp; evidvnp = evidvnp->next ) {
4249                       linktype = ((LinkageEvidencePtr)evidvnp->data.ptrvalue)->type;
4250                       switch( linktype ) {
4251                       case 0: /* paired-ends */
4252                           AddQualifierToFeature(sfp, "linkage_evidence",
4253                                                 "paired-ends" );
4254                           break;
4255                       case 1:         /* align-genus */
4256                           AddQualifierToFeature(sfp, "linkage_evidence",
4257                                                 "align genus" );
4258                           break;
4259                       case 2:       /* align-xgenus */
4260                           AddQualifierToFeature(sfp, "linkage_evidence",
4261                                                 "align xgenus" );
4262                           break;
4263                       case 3:     /* align-trnscpt */
4264                           AddQualifierToFeature(sfp, "linkage_evidence",
4265                                                 "align trnscpt" );
4266                           break;
4267                       case 4:   /* within-clone */
4268                           AddQualifierToFeature(sfp, "linkage_evidence",
4269                                                 "within clone" );
4270                           break;
4271                       case 5: /* clone-contig */
4272                           AddQualifierToFeature(sfp, "linkage_evidence",
4273                                                 "clone contig" );
4274                           break;
4275                       case 6:         /* map */
4276                           AddQualifierToFeature(sfp, "linkage_evidence",
4277                                                 "map" );
4278                           break;
4279                       case 7:       /* strobe */
4280                           AddQualifierToFeature(sfp, "linkage_evidence",
4281                                                 "strobe" );
4282                           break;
4283                       case 8:     /* unspecified */
4284                           AddQualifierToFeature(sfp, "linkage_evidence",
4285                                                 "unspecified" );
4286                           break;
4287                       case 9:     /* pcr */
4288                           AddQualifierToFeature(sfp, "linkage_evidence",
4289                                                 "pcr" );
4290                           break;
4291                       case 255: /* other */
4292                           AddQualifierToFeature(sfp, "linkage_evidence",
4293                                                 "other" );
4294                           break;
4295                       default:
4296                           sprintf( buf, "(UNRECOGNIZED LINKAGE EVIDENCE:%ld)",
4297                                    (long)linktype );
4298                           AddQualifierToFeature(
4299                               sfp, "linkage_evidence",
4300                               buf );
4301                           break;
4302                       }
4303                   }
4304                   /* if no linkage-evidence and needs some, add "unspecified" */
4305                   if( NULL == seq_gap->linkage_evidence ) {
4306                       AddQualifierToFeature( sfp, "linkage_evidence",
4307                                              "unspecified" );
4308                   }
4309               }
4310           }
4311         }
4312       }
4313       currpos += litp->length;
4314     }
4315   }
4316 }
4317 
MakeSPGapFeats(BioseqPtr bsp,Pointer userdata)4318 static void MakeSPGapFeats (
4319   BioseqPtr bsp,
4320   Pointer userdata
4321 )
4322 
4323 {
4324   MakeGapFeatsBase (bsp, userdata, TRUE, FALSE);
4325 }
4326 
MakeRCGapFeats(BioseqPtr bsp,Pointer userdata)4327 static void MakeRCGapFeats (
4328   BioseqPtr bsp,
4329   Pointer userdata
4330 )
4331 
4332 {
4333   MakeGapFeatsBase (bsp, userdata, FALSE, TRUE);
4334 }
4335 
MakeGapFeats(BioseqPtr bsp,Pointer userdata)4336 static void MakeGapFeats (
4337   BioseqPtr bsp,
4338   Pointer userdata
4339 )
4340 
4341 {
4342   MakeGapFeatsBase (bsp, userdata, FALSE, FALSE);
4343 }
4344 
4345 typedef struct featpolicy {
4346   Boolean  forceOnlyNearFeats;
4347   Boolean  forceAllowFarFeats;
4348 } FeatPolicy, PNTR FeatPolicyPtr;
4349 
LookFarFeatFetchPolicy(SeqDescrPtr sdp,Pointer userdata)4350 static void LookFarFeatFetchPolicy (
4351   SeqDescrPtr sdp,
4352   Pointer userdata
4353 )
4354 
4355 {
4356   FeatPolicyPtr  fpP;
4357   ObjectIdPtr    oip;
4358   CharPtr        str;
4359   UserFieldPtr   ufp;
4360   UserObjectPtr  uop;
4361 
4362   if (sdp == NULL || sdp->choice != Seq_descr_user) return;
4363   fpP = (FeatPolicyPtr) userdata;
4364   if (fpP == NULL) return;
4365 
4366   uop = (UserObjectPtr) sdp->data.ptrvalue;
4367   if (uop == NULL) return;
4368   oip = uop->type;
4369   if (oip == NULL) return;
4370   if (StringCmp (oip->str, "FeatureFetchPolicy") != 0) return;
4371 
4372   for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
4373     oip = ufp->label;
4374     if (oip == NULL || ufp->data.ptrvalue == NULL) continue;
4375     if (StringCmp (oip->str, "Policy") == 0) {
4376       str = (CharPtr) ufp->data.ptrvalue;
4377       if (StringICmp (str, "OnlyNearFeatures") == 0) {
4378         fpP->forceOnlyNearFeats = TRUE;
4379       } else if (StringICmp (str, "AllowFarFeatures") == 0) {
4380         fpP->forceAllowFarFeats = TRUE;
4381       }
4382     }
4383   }
4384 }
4385 
FindMultiIntervalGenes(SeqFeatPtr sfp,Pointer userdata)4386 static void FindMultiIntervalGenes (
4387   SeqFeatPtr sfp,
4388   Pointer userdata
4389 )
4390 
4391 {
4392   BoolPtr    multiIntervalGenesP;
4393   SeqLocPtr  slp;
4394 
4395   if (sfp == NULL || sfp->data.choice != SEQFEAT_GENE) return;
4396   multiIntervalGenesP = (BoolPtr) userdata;
4397   if (multiIntervalGenesP == NULL) return;
4398 
4399   slp = sfp->location;
4400   if (slp == NULL) return;
4401   switch (slp->choice) {
4402     case SEQLOC_PACKED_INT :
4403     case SEQLOC_PACKED_PNT :
4404     case SEQLOC_MIX :
4405     case SEQLOC_EQUIV :
4406       *multiIntervalGenesP = TRUE;
4407       break;
4408     default :
4409       break;
4410   }
4411 }
4412 
FindSegmentedBioseqs(BioseqPtr bsp,Pointer userdata)4413 static void FindSegmentedBioseqs (
4414   BioseqPtr bsp,
4415   Pointer userdata
4416 )
4417 
4418 {
4419   BoolPtr  segmentedBioseqsP;
4420 
4421   if (bsp == NULL || bsp->repr != Seq_repr_seg) return;
4422   segmentedBioseqsP = (BoolPtr) userdata;
4423   if (segmentedBioseqsP == NULL) return;
4424   *segmentedBioseqsP = TRUE;
4425 }
4426 
FindSmallGenomeSets(BioseqSetPtr bssp,Pointer userdata)4427 static void FindSmallGenomeSets (
4428   BioseqSetPtr bssp,
4429   Pointer userdata
4430 )
4431 
4432 {
4433   BoolPtr  smallGenomeSetP;
4434 
4435   if (bssp == NULL || bssp->_class != BioseqseqSet_class_small_genome_set) return;
4436   smallGenomeSetP = (BoolPtr) userdata;
4437   if (smallGenomeSetP == NULL) return;
4438   *smallGenomeSetP = TRUE;
4439 }
4440 
4441 static CharPtr bad_html_strings [] = {
4442   "<script", "<object", "<applet", "<embed", "<form", "javascript:", "vbscript:", NULL
4443 };
4444 
4445 static CharPtr defHead = "\
4446 <!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n\
4447     \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n\
4448 <html lang=\"en\" xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\">\n\
4449 <head>\n\
4450 <meta http-equiv=\"Content-Type\" content=\"text/html; charset=us-ascii\" />\
4451 <title>GenBank entry</title>\n\
4452 </head>\n\
4453 <body>\n\
4454 <hr />";
4455 
4456 static CharPtr defTail = "\
4457 <hr />\n\
4458 </body>\n\
4459 </html>\n";
4460 
4461 #define FAR_TRANS_MASK (SHOW_FAR_TRANSLATION | TRANSLATE_IF_NO_PRODUCT | ALWAYS_TRANSLATE_CDS)
4462 #define FEAT_FETCH_MASK (ONLY_NEAR_FEATURES | FAR_FEATURES_SUPPRESS | NEAR_FEATURES_SUPPRESS | FORCE_ALLOW_FAR_FEATS)
4463 #define HTML_XML_ASN_MASK (CREATE_HTML_FLATFILE | CREATE_XML_GBSEQ_FILE | CREATE_ASN_GBSEQ_FILE)
4464 #define PUBLICATION_MASK (HIDE_GENE_RIFS | ONLY_GENE_RIFS | ONLY_REVIEW_PUBS | NEWEST_PUBS | OLDEST_PUBS | HIDE_ALL_PUBS)
4465 
asn2gnbk_setup_ex(BioseqPtr bsp,BioseqSetPtr bssp,SeqLocPtr slp,FmtType format,ModType mode,StlType style,FlgType flags,LckType locks,CstType custom,XtraPtr extra,Boolean stream,FILE * fp,AsnIoPtr aip,AsnTypePtr atp)4466 static Asn2gbJobPtr asn2gnbk_setup_ex (
4467   BioseqPtr bsp,
4468   BioseqSetPtr bssp,
4469   SeqLocPtr slp,
4470   FmtType format,
4471   ModType mode,
4472   StlType style,
4473   FlgType flags,
4474   LckType locks,
4475   CstType custom,
4476   XtraPtr extra,
4477   Boolean stream,
4478   FILE *fp,
4479   AsnIoPtr aip,
4480   AsnTypePtr atp
4481 )
4482 
4483 {
4484   Asn2gbFormat     af;
4485   IntAsn2gbJobPtr  ajp = NULL;
4486   Asn2gbSectPtr    asp;
4487   Asn2gbWork       aw;
4488   BaseBlockPtr     bbp;
4489   BlockMask        bkmask = (BlockMask) 0;
4490   BaseBlockPtr     PNTR blockArray;
4491   Uint2            eID = 0;
4492   Uint2            entityID = 0;
4493   Uint2            item_type = 0;
4494   Uint4            item_id = 0;
4495   CharPtr          ffhead = NULL;
4496   CharPtr          fftail = NULL;
4497   Asn2gbWriteFunc  ffwrite = NULL;
4498   FeatPolicy       featpolicy;
4499   ValNodePtr       gapvnp = NULL;
4500   GBSeqPtr         gbseq = NULL;
4501   BioseqPtr        gbsp;
4502   SeqAnnotPtr      gsap;
4503   SeqFeatPtr       gsfp;
4504   Int4             i;
4505   IndxPtr          index = NULL;
4506   Boolean          isFarProt;
4507   Boolean          isG;
4508   Boolean          isGED;
4509   Boolean          isED;
4510   Boolean          isGeneral;
4511   Boolean          isNCBIGenomes;
4512   Boolean          isNTorNWorNG;
4513   Boolean          isNC;
4514   Boolean          isNuc;
4515   Boolean          isNZ;
4516   Boolean          isOnlyLocal;
4517   Boolean          isProt;
4518   Boolean          isRefSeq;
4519   Boolean          isSP;
4520   Boolean          isTPA;
4521   Boolean          isTPG;
4522   Int4             j;
4523   Int4             k;
4524   SeqLocPtr        loc = NULL;
4525   Boolean          lockFarComp;
4526   Boolean          lockFarLocs;
4527   Boolean          lockFarProd;
4528   Boolean          lookupFarComp;
4529   Boolean          lookupFarHist;
4530   Boolean          lookupFarInf;
4531   Boolean          lookupFarLocs;
4532   Boolean          lookupFarOthers;
4533   Boolean          lookupFarProd;
4534   ValNodePtr       manygaps = NULL;
4535   Boolean          missingVersion;
4536   Boolean          multiIntervalGenes = FALSE;
4537   BIG_ID           nextGi = 0;
4538   Boolean          noLeft;
4539   Boolean          noRight;
4540   Int4             numBlocks;
4541   Int4             numGaps;
4542   Int4             numSections;
4543   SeqEntryPtr      oldscope;
4544   ObjMgrDataPtr    omdp;
4545   Int4             numParagraphs;
4546   BaseBlockPtr     PNTR paragraphArray;
4547   BaseBlockPtr     PNTR paragraphByIDs;
4548   BioseqPtr        parent = NULL;
4549   BIG_ID           prevGi = 0;
4550   Int2             q;
4551   Boolean          reindex = TRUE;
4552   Pointer          remotedata = NULL;
4553   Asn2gbFreeFunc   remotefree = NULL;
4554   Asn2gbLockFunc   remotelock = NULL;
4555   ValNodePtr       remotevnp = NULL;
4556   Int2             sat = 0;
4557   Int4             sat_key = 0;
4558   SubmitBlockPtr   sbp;
4559   Asn2gbSectPtr    PNTR sectionArray;
4560   Boolean          segmentedBioseqs = FALSE;
4561   SeqEntryPtr      sep;
4562   Boolean          seqspans = FALSE;
4563   Boolean          smallGenomeSet = FALSE;
4564   SeqIntPtr        sintp;
4565   SeqIdPtr         sip;
4566   Boolean          skipMrnas = FALSE;
4567   Boolean          skipProts = FALSE;
4568   Boolean          sourcePubFuse;
4569   SeqSubmitPtr     ssp;
4570   BioseqSetPtr     topbssp;
4571   Pointer          userdata = NULL;
4572   ValNodePtr       vnp;
4573   Boolean          was_slp = FALSE;
4574   Boolean          rev_comp = FALSE;
4575   Boolean          is_html = FALSE;
4576 
4577   if (format == 0) {
4578     format = GENBANK_FMT;
4579   }
4580   if (mode == 0) {
4581     mode = SEQUIN_MODE;
4582   }
4583   if (style == 0) {
4584     style = NORMAL_STYLE;
4585   }
4586 
4587   if (extra != NULL) {
4588     ffwrite = extra->ffwrite;
4589     ffhead = extra->ffhead;
4590     fftail = extra->fftail;
4591     index = extra->index;
4592     gbseq = extra->gbseq;
4593     userdata = extra->userdata;
4594     remotelock = extra->remotelock;
4595     remotefree = extra->remotefree;
4596     remotedata = extra->remotedata;
4597     prevGi = extra->prevGi;
4598     nextGi = extra->nextGi;
4599     bkmask = extra->bkmask;
4600     reindex = extra->reindex;
4601     seqspans = extra->seqspans;
4602     sat = extra->sat;
4603     sat_key = extra->sat_key;
4604   }
4605 
4606   if ((custom & FORCE_SEQ_SPANS) != 0) {
4607     seqspans = TRUE;
4608   }
4609 
4610   if (slp != NULL) {
4611     sip = SeqLocId (slp);
4612     bsp = BioseqFind (sip);
4613     if (bsp == NULL) {
4614       bsp = BioseqFindFromSeqLoc (slp);
4615     }
4616     if (bsp == NULL) return NULL;
4617 
4618     /* if location is on part of segmented set, need to map to segmented bioseq */
4619 
4620     if (slp->choice == SEQLOC_WHOLE) {
4621       /* Entrez server may pass in whole location on part instead of part bioseq */
4622       slp = NULL;
4623     } else if (sip == NULL) {
4624       parent = bsp;
4625     } else {
4626 
4627       /* SeqMgrGetParentOfPart depends upon feature indexing */
4628 
4629       eID = ObjMgrGetEntityIDForPointer (bsp);
4630       if (SeqMgrFeaturesAreIndexed (eID) == 0) {
4631         SeqMgrIndexFeatures (eID, NULL);
4632       }
4633 
4634       parent = SeqMgrGetParentOfPart (bsp, NULL);
4635     }
4636     if (parent != NULL) {
4637       CheckSeqLocForPartial (slp, &noLeft, &noRight);
4638       loc = SeqLocMergeEx (parent, slp, NULL, FALSE, TRUE, FALSE, FALSE);
4639       slp = loc;
4640       FreeAllFuzz (slp);
4641       SetSeqLocPartial (slp, noLeft, noRight);
4642     }
4643 
4644     /* if location is whole, generate normal bioseq report */
4645 
4646     if (slp == NULL) {
4647       /* reality check in case SeqLocMergeEx fails and sets slp to NULL, or if was cleared above */
4648     } else if (slp->choice == SEQLOC_WHOLE) {
4649       slp = NULL;
4650       SeqLocFree (loc);
4651       loc = NULL;
4652     } else if (slp->choice == SEQLOC_INT) {
4653       sintp = (SeqIntPtr) slp->data.ptrvalue;
4654       if (sintp != NULL &&
4655           sintp->from == 0 &&
4656           sintp->to == bsp->length - 1 &&
4657           sintp->strand != Seq_strand_minus) {
4658         slp = NULL;
4659         SeqLocFree (loc);
4660         loc = NULL;
4661       } else if (sintp != NULL &&
4662           sintp->from == 0 &&
4663           sintp->to == bsp->length - 1 &&
4664           sintp->strand == Seq_strand_minus) {
4665         rev_comp = TRUE;
4666       }
4667     }
4668   }
4669 
4670   if (slp != NULL && (! rev_comp)) {
4671     /* suppress gaps if using sub-location, but show gaps if location was whole or interval 0..length-1 on either strand */
4672     was_slp = TRUE;
4673   }
4674 
4675   if (bsp != NULL) {
4676     bssp = NULL;
4677     entityID = ObjMgrGetEntityIDForPointer (bsp);
4678     item_type = OBJ_BIOSEQ;
4679     item_id = bsp->idx.itemID;
4680   } else if (bssp != NULL) {
4681     entityID = ObjMgrGetEntityIDForPointer (bssp);
4682     item_type = OBJ_BIOSEQSET;
4683     item_id = bssp->idx.itemID;
4684 
4685     if (format == FTABLE_FMT) {
4686       skipProts = TRUE;
4687       skipMrnas = TRUE;
4688     }
4689   }
4690   if ((Boolean) ((custom & SHOW_PROT_FTABLE) != 0)) {
4691     skipProts = FALSE;
4692     skipMrnas = FALSE;
4693   }
4694 
4695   if (entityID == 0) return NULL;
4696 
4697   sep = GetTopSeqEntryForEntityID (entityID);
4698 
4699   LookForGEDetc (sep, &isG, &isGED, &isED, &isNTorNWorNG, &isNC, &isNZ, &isRefSeq,
4700                  &isGeneral, &isNCBIGenomes, &isTPA, &isTPG, &isSP, &isNuc,
4701                  &isProt, &isFarProt, &isOnlyLocal, &sourcePubFuse);
4702 
4703   if (mode == RELEASE_MODE) {
4704     missingVersion = FALSE;
4705     VisitBioseqsInSep (sep, (Pointer) &missingVersion, CheckVersionWithGi);
4706     if (missingVersion) return NULL;
4707   }
4708 
4709   ajp = (IntAsn2gbJobPtr) MemNew (sizeof (IntAsn2gbJob));
4710   if (ajp == NULL) return NULL;
4711 
4712   featpolicy.forceOnlyNearFeats = FALSE;
4713   featpolicy.forceAllowFarFeats = FALSE;
4714   VisitDescriptorsInSep (sep, (Pointer) &featpolicy, LookFarFeatFetchPolicy);
4715 
4716   gapvnp = NULL;
4717   manygaps = NULL;
4718   remotevnp = NULL;
4719 
4720   if (format != FTABLE_FMT && (! was_slp)) {
4721     if (isGED /* was isG */ || isTPG || isOnlyLocal || isRefSeq || isSP || (isGeneral && (! isGED))) {
4722       if ((Boolean) ((custom & HIDE_GAP_FEATS) == 0)) {
4723         if (isSP) {
4724           VisitBioseqsInSep (sep, (Pointer) &gapvnp, MakeSPGapFeats);
4725         } else if (rev_comp) {
4726           VisitBioseqsInSep (sep, (Pointer) &gapvnp, MakeRCGapFeats);
4727         } else {
4728           VisitBioseqsInSep (sep, (Pointer) &gapvnp, MakeGapFeats);
4729         }
4730       }
4731     }
4732   }
4733 
4734   numGaps = 0;
4735   if (gapvnp != NULL) {
4736     gbsp = (BioseqPtr) gapvnp->data.ptrvalue;
4737     if (gbsp != NULL) {
4738       gsap = gbsp->annot;
4739       if (gsap != NULL && gsap->type == 1) {
4740         for (gsfp = (SeqFeatPtr) gsap->data; gsfp != NULL; gsfp = gsfp->next) {
4741           numGaps++;
4742         }
4743       }
4744     }
4745   }
4746   if (isED) {
4747     if (numGaps > 0) {
4748       manygaps = gapvnp;
4749       gapvnp = NULL;
4750     }
4751   } else {
4752     if (numGaps > 1000) {
4753       manygaps = gapvnp;
4754       gapvnp = NULL;
4755     }
4756   }
4757 
4758   ajp->gapvnp = gapvnp;
4759   ajp->manygaps = manygaps;
4760 
4761   ajp->remotelock = remotelock;
4762   ajp->remotefree = remotefree;
4763   ajp->remotedata = remotedata;
4764   if (remotelock != NULL && bsp != NULL) {
4765     sip = SeqIdFindBest (bsp->id, SEQID_GI);
4766     if (sip != NULL) {
4767       remotevnp = remotelock (sip, remotedata);
4768     }
4769   }
4770 
4771   ajp->remotevnp = remotevnp;
4772 
4773   if (gapvnp != NULL || remotevnp != NULL) {
4774     /* if both gapvnp and remotevnp, link together so everything is indexed */
4775     if (gapvnp != NULL) {
4776       ValNodeLink(&gapvnp, remotevnp);
4777     } else {
4778       gapvnp = remotevnp;
4779     }
4780     SeqMgrClearFeatureIndexes (entityID, NULL);
4781     SeqMgrIndexFeaturesExEx (entityID, NULL, FALSE, FALSE, gapvnp);
4782     gapvnp->next = NULL;
4783   }
4784 
4785   if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
4786     SeqMgrIndexFeatures (entityID, NULL);
4787   }
4788 
4789   is_html = (Boolean) ((flags & HTML_XML_ASN_MASK) == CREATE_HTML_FLATFILE);
4790   if (is_html) {
4791     InitWWW(ajp);
4792   }
4793 
4794   ajp->ajp.entityID = entityID;
4795   ajp->ajp.bsp = bsp;
4796   ajp->ajp.bssp = bssp;
4797 
4798   if (loc != NULL) {
4799     ajp->ajp.slp = loc;
4800   } else if (slp != NULL) {
4801     ajp->ajp.slp = AsnIoMemCopy ((Pointer) slp,
4802                                  (AsnReadFunc) SeqLocAsnRead,
4803                                  (AsnWriteFunc) SeqLocAsnWrite);
4804   } else {
4805     ajp->ajp.slp = NULL;
4806   }
4807 
4808   /* reality check on interval sublocation */
4809 
4810   slp = ajp->ajp.slp;
4811   if (slp != NULL && slp->choice == SEQLOC_INT) {
4812     sintp = (SeqIntPtr) slp->data.ptrvalue;
4813     if (sintp != NULL) {
4814       bsp = BioseqFind (sintp->id);
4815       if (bsp != NULL) {
4816         if (sintp->from < 0) {
4817           sintp->from = 0;
4818         } else if (sintp->from > bsp->length - 1) {
4819           sintp->from = bsp->length - 1;
4820         }
4821         if (sintp->to < 0) {
4822           sintp->to = 0;
4823         } else if (sintp->to > bsp->length - 1) {
4824           sintp->to = bsp->length - 1;
4825         }
4826       }
4827     }
4828   }
4829 
4830   /* if location specified, other than full reverse complement, normal defaults to master style */
4831 
4832   if (ajp->ajp.slp != NULL && style == NORMAL_STYLE && (! rev_comp)) {
4833     style = MASTER_STYLE;
4834   }
4835 
4836   ajp->format = format;
4837   ajp->mode = mode; /* for showing new qualifiers before quarantine ends */
4838 
4839   ajp->index = index;
4840   ajp->gbseq = gbseq; /* gbseq output can relax srcQualsToNote or goQualsToNote strictness */
4841   if (bkmask == 0) {
4842     bkmask = (BlockMask) (0xFFFFFFFF - FEAT_STATS_MASK - REF_STATS_MASK);
4843   }
4844   ajp->bkmask = bkmask;
4845   ajp->reindex = reindex;
4846   ajp->seqspans = seqspans;
4847   ajp->sat = sat;
4848   ajp->sat_key = sat_key;
4849   ajp->aip = aip;
4850   ajp->atp = atp;
4851 
4852   ajp->refseqConventions = (Boolean) ((flags & REFSEQ_CONVENTIONS) != 0);
4853 
4854   SetFlagsFromMode (ajp, mode);
4855 
4856   lockFarComp = (Boolean) ((locks & LOCK_FAR_COMPONENTS) != 0);
4857   lockFarLocs = (Boolean) ((locks & LOCK_FAR_LOCATIONS) != 0);
4858   lockFarProd = (Boolean) ((locks & LOCK_FAR_PRODUCTS) != 0);
4859 
4860   if (lockFarComp || lockFarLocs || lockFarProd) {
4861 
4862     /* lock all bioseqs in advance, including remote genome components */
4863 
4864     if (ajp->ajp.slp != NULL && lockFarComp) {
4865       ajp->lockedBspList = LockFarComponentsEx (sep, FALSE, lockFarLocs, lockFarProd, ajp->ajp.slp);
4866     } else {
4867       ajp->lockedBspList = LockFarComponentsEx (sep, lockFarComp, lockFarLocs, lockFarProd, NULL);
4868     }
4869   }
4870 
4871   lookupFarComp = (Boolean) ((locks & LOOKUP_FAR_COMPONENTS) != 0);
4872   lookupFarLocs = (Boolean) ((locks & LOOKUP_FAR_LOCATIONS) != 0);
4873   lookupFarProd = (Boolean) ((locks & LOOKUP_FAR_PRODUCTS) != 0);
4874   lookupFarHist = (Boolean) ((locks & LOOKUP_FAR_HISTORY) != 0);
4875   lookupFarInf = (Boolean) ((locks & LOOKUP_FAR_INFERENCE) != 0);
4876   lookupFarOthers = (Boolean) ((locks & LOOKUP_FAR_OTHERS) != 0);
4877 
4878   if (lookupFarComp || lookupFarLocs || lookupFarProd || lookupFarHist || lookupFarInf || lookupFarOthers) {
4879 
4880     /* lookukp all far SeqIDs in advance */
4881 
4882     LookupFarSeqIDs (sep, lookupFarComp, lookupFarLocs, lookupFarProd, FALSE, lookupFarHist, lookupFarInf, lookupFarOthers);
4883   }
4884 
4885   ajp->showFarTransl = (Boolean) ((flags & FAR_TRANS_MASK) == SHOW_FAR_TRANSLATION);
4886   ajp->transIfNoProd = (Boolean) ((flags & FAR_TRANS_MASK) == TRANSLATE_IF_NO_PRODUCT);
4887   ajp->alwaysTranslCds = (Boolean) ((flags & FAR_TRANS_MASK) == ALWAYS_TRANSLATE_CDS);
4888   if (ajp->transIfNoProd || ajp->alwaysTranslCds) {
4889     ajp->showFarTransl = TRUE;
4890   }
4891 
4892   ajp->masterStyle = (Boolean) (style == MASTER_STYLE);
4893 
4894   ajp->showTranscript = (Boolean) ((flags & SHOW_TRANCRIPTION) != 0);
4895   ajp->showPeptide = (Boolean) ((flags & SHOW_PEPTIDE) != 0);
4896 
4897   if (stream && (format == GENBANK_FMT || format == GENPEPT_FMT)) {
4898     ajp->specialGapFormat = (Boolean) ((flags & SPECIAL_GAP_DISPLAY) != 0);
4899     if (is_html && mode == ENTREZ_MODE) {
4900       ajp->specialGapFormat = TRUE;
4901     }
4902     if ((custom & EXPANDED_GAP_DISPLAY) != 0) {
4903       ajp->specialGapFormat = FALSE;
4904     }
4905   } else {
4906     ajp->specialGapFormat = FALSE;
4907   }
4908   ajp->seqGapCurrLen = 0;
4909 
4910   ajp->relaxedMapping = (Boolean) ((flags & RELAXED_MAPPING) != 0);
4911   ajp->gpipdDeflines = (Boolean) ((flags & GPIPE_DEFLINES) != 0);
4912   ajp->hideProteinID = (Boolean) ((flags & HIDE_PROTEIN_ID) != 0);
4913 
4914   ajp->produceInsdSeq = (Boolean) (((flags & PRODUCE_OLD_GBSEQ) == 0) && ((custom & OLD_GBSEQ_XML) == 0));
4915   ajp->oldXmlPolicy = (Boolean) ((custom & NEW_XML_POLICY) == 0);
4916 
4917   ajp->gihead = NULL;
4918   ajp->gitail = NULL;
4919 
4920   ajp->hideGoTerms = (Boolean) ((custom & HIDE_GO_TERMS) != 0);
4921   ajp->hideTranslation = (Boolean) ((custom & HIDE_TRANSLATION) != 0);
4922 
4923   if (format == GENBANK_FMT || format == GENPEPT_FMT) {
4924     ajp->newSourceOrg = TRUE;
4925   }
4926 
4927   VisitFeaturesInSep (sep, (Pointer) &multiIntervalGenes, FindMultiIntervalGenes);
4928   ajp->multiIntervalGenes = multiIntervalGenes;
4929   VisitBioseqsInSep (sep, (Pointer) &segmentedBioseqs, FindSegmentedBioseqs);
4930   ajp->segmentedBioseqs = segmentedBioseqs;
4931   VisitSetsInSep (sep, (Pointer) &smallGenomeSet, FindSmallGenomeSets);
4932   ajp->smallGenomeSet = smallGenomeSet;
4933 
4934   ajp->relModeError = FALSE;
4935   ajp->skipProts = skipProts;
4936   ajp->skipMrnas = skipMrnas;
4937 
4938   MemSet ((Pointer) (&aw), 0, sizeof (Asn2gbWork));
4939   aw.ajp = ajp;
4940   aw.entityID = entityID;
4941 
4942   aw.sectionList = NULL;
4943   aw.lastsection = NULL;
4944 
4945   aw.currsection = 0;
4946   aw.showAllFeats = FALSE;
4947 
4948   aw.showconfeats = (Boolean) ((flags & SHOW_CONTIG_FEATURES) != 0);
4949   aw.showconsource = (Boolean) ((flags & SHOW_CONTIG_SOURCES) != 0);
4950 
4951   aw.format = format;
4952   aw.mode = mode;
4953   aw.style = style;
4954 
4955   /* sectionCount used for hyperlinks */
4956 
4957   aw.sectionCount = 0;
4958   aw.sectionMax = 0;
4959   aw.gilistpos = NULL;
4960 
4961   aw.currGi = 0;
4962   aw.prevGi = prevGi;
4963   aw.nextGi = nextGi;
4964   aw.currAccVer [0] = '\0';
4965 
4966   /* internal format pointer if writing at time of creation */
4967 
4968   if (stream) {
4969     MemSet ((Pointer) &af, 0, sizeof (Asn2gbFormat));
4970     af.ajp = ajp;
4971     af.asp = NULL;
4972     af.qvp = NULL;
4973     af.format = format;
4974     af.ffwrite = ffwrite;
4975     af.userdata = userdata;
4976     af.fp = fp;
4977     af.aip = aip;
4978     af.atp = atp;
4979 
4980     aw.afp = &af;
4981   }
4982 
4983   /* special types of records override feature fetching and contig display parameters */
4984 
4985   if (mode == ENTREZ_MODE) {
4986     if (! aw.showconfeats) {
4987       aw.smartconfeats = TRUE;  /* features suppressed if CONTIG style and length > 1 MB */
4988       aw.showconfeats = FALSE;
4989       aw.showconsource = FALSE;
4990     }
4991   }
4992 
4993   aw.onlyNearFeats = FALSE;
4994   aw.farFeatsSuppress = FALSE;
4995   aw.nearFeatsSuppress = FALSE;
4996 
4997   if (featpolicy.forceAllowFarFeats) {
4998 
4999     /* do not set other flags */
5000 
5001   } else if ((Boolean) ((flags & FEAT_FETCH_MASK) == FORCE_ALLOW_FAR_FEATS)) {
5002 
5003     /* do not set other flags */
5004 
5005   } else if (featpolicy.forceOnlyNearFeats) {
5006 
5007     aw.onlyNearFeats = TRUE;
5008 
5009   } else if (isNC) {
5010 
5011     if ((Boolean) ((flags & FEAT_FETCH_MASK) == ONLY_NEAR_FEATURES)) {
5012       aw.onlyNearFeats = TRUE;
5013     } else if (featpolicy.forceOnlyNearFeats) {
5014       aw.onlyNearFeats = TRUE;
5015     } else {
5016       aw.nearFeatsSuppress = TRUE;
5017     }
5018 
5019   } else if (isNTorNWorNG || isTPA) {
5020 
5021     aw.onlyNearFeats = TRUE;
5022 
5023   } else if (isNZ) {
5024 
5025     aw.onlyNearFeats = TRUE;
5026 
5027   } else if (format == GENPEPT_FMT && isFarProt ) {
5028 
5029     aw.onlyNearFeats = TRUE;
5030 
5031   } else if (isGED) {
5032 
5033     if ((Boolean) ((flags & FEAT_FETCH_MASK) == ONLY_NEAR_FEATURES)) {
5034       aw.onlyNearFeats = TRUE;
5035     } else if (featpolicy.forceOnlyNearFeats) {
5036       aw.onlyNearFeats = TRUE;
5037     } else {
5038       aw.nearFeatsSuppress = TRUE;
5039     }
5040 
5041   } else {
5042 
5043     aw.onlyNearFeats = (Boolean) ((flags & FEAT_FETCH_MASK) == ONLY_NEAR_FEATURES);
5044     aw.farFeatsSuppress = (Boolean) ((flags & FEAT_FETCH_MASK) == FAR_FEATURES_SUPPRESS);
5045     aw.nearFeatsSuppress = (Boolean) ((flags & FEAT_FETCH_MASK) == NEAR_FEATURES_SUPPRESS);
5046   }
5047 
5048   if (isGED || isRefSeq) {
5049     ajp->showFarTransl = TRUE;
5050   }
5051 
5052   /* continue setting flags */
5053 
5054   aw.showFeatStats = (Boolean) ((custom & SHOW_FEATURE_STATS) != 0);
5055   aw.showRefStats = (Boolean) ((custom & SHOW_REFERENCE_STATS) != 0);
5056   aw.hideFeatures = (Boolean) ((custom & HIDE_FEATURES) != 0);
5057 
5058   aw.hideImpFeats = (Boolean) ((custom & HIDE_IMP_FEATS) != 0);
5059   aw.hideVariations = (Boolean) ((custom & HIDE_VARS_AND_REPT_REGNS) != 0);
5060   aw.hideRepeatRegions = (Boolean) ((custom & HIDE_VARS_AND_REPT_REGNS) != 0);
5061   aw.hideSitesBondsRegions = (Boolean) ((custom & HIDE_SITES_BONDS_REGIONS) != 0);
5062   aw.hideCddFeats = (Boolean) ((custom & HIDE_CDD_FEATS) != 0);
5063   aw.hideCdsProdFeats = (Boolean) ((custom & HIDE_CDS_PROD_FEATS) != 0);
5064 
5065   ajp->hideEvidence = (Boolean) ((custom & HIDE_EVIDENCE_QUALS) != 0);
5066 
5067   aw.hideGeneRIFs = (Boolean) ((custom & PUBLICATION_MASK) == HIDE_GENE_RIFS);
5068   aw.onlyGeneRIFs = (Boolean) ((custom & PUBLICATION_MASK) == ONLY_GENE_RIFS);
5069   aw.onlyReviewPubs = (Boolean) ((custom & PUBLICATION_MASK) == ONLY_REVIEW_PUBS);
5070   aw.newestPubs = (Boolean) ((custom & PUBLICATION_MASK) == NEWEST_PUBS);
5071   aw.oldestPubs = (Boolean) ((custom & PUBLICATION_MASK) == OLDEST_PUBS);
5072   aw.hidePubs = (Boolean) ((custom & PUBLICATION_MASK) == HIDE_ALL_PUBS);
5073 
5074   aw.showFtableRefs = (Boolean) ((custom & SHOW_FTABLE_REFS) != 0);
5075   aw.hideSources = (Boolean) ((custom & HIDE_SOURCE_FEATS) != 0);
5076   aw.hideGaps = (Boolean) ((custom & HIDE_GAP_FEATS) != 0);
5077   aw.hideSequence = (Boolean) ((custom & HIDE_SEQUENCE) != 0);
5078 
5079   aw.isGPS = FALSE;
5080   if (sep != NULL && IS_Bioseq_set (sep)) {
5081     topbssp = (BioseqSetPtr) sep->data.ptrvalue;
5082     if (topbssp != NULL && topbssp->_class == BioseqseqSet_class_gen_prod_set) {
5083       aw.isGPS = TRUE;
5084       aw.copyGpsCdsUp = (Boolean) ((flags & COPY_GPS_CDS_UP) != 0);
5085       aw.copyGpsGeneDown = (Boolean) ((flags & COPY_GPS_GENE_DOWN) != 0);
5086     }
5087   }
5088   aw.isNCBIGenomes = isNCBIGenomes;
5089   aw.isRefSeq = isRefSeq;
5090 
5091   aw.showContigAndSeq = (Boolean) ((flags & SHOW_CONTIG_AND_SEQ) != 0);
5092   /*
5093   if (style != MASTER_STYLE && style != SEGMENT_STYLE) {
5094     aw.showContigAndSeq = FALSE;
5095   }
5096   */
5097 
5098   aw.newLocusLine = TRUE;
5099   aw.showBaseCount = FALSE;
5100 
5101   if ((Boolean) ((flags & DDBJ_VARIANT_FORMAT) != 0)) {
5102     aw.citSubsFirst = TRUE;
5103     aw.hideGeneFeats = TRUE;
5104     aw.newLocusLine = FALSE;
5105     aw.showBaseCount = TRUE;
5106     ajp->newSourceOrg = FALSE;
5107   }
5108   if (mode == SEQUIN_MODE || mode == DUMP_MODE) {
5109     aw.showBaseCount = TRUE;
5110   }
5111   aw.forcePrimaryBlock = (Boolean) ((flags & FORCE_PRIMARY_BLOCK) != 0);
5112 
5113   aw.localFeatCount = VisitFeaturesInSep (sep, NULL, NULL);
5114 
5115   aw.sourcePubFuse = sourcePubFuse;
5116 
5117   aw.hup = FALSE;
5118   aw.ssp = NULL;
5119 
5120   aw.failed = FALSE;
5121 
5122   omdp = ObjMgrGetData (entityID);
5123   if (omdp != NULL && omdp->datatype == OBJ_SEQSUB) {
5124     ssp = (SeqSubmitPtr) omdp->dataptr;
5125     if (ssp != NULL && ssp->datatype == 1) {
5126       aw.ssp = ssp;
5127       sbp = ssp->sub;
5128       if (sbp != NULL) {
5129         aw.hup = sbp->hup;
5130       }
5131     }
5132   }
5133 
5134   ajp->hideGI = (Boolean) ((flags & HIDE_GI_NUMBERS) != 0);
5135   ajp->bad_html_fsa = TextFsaNew ();
5136 
5137   for (q = 0; bad_html_strings [q] != NULL; q++) {
5138     TextFsaAdd (ajp->bad_html_fsa, bad_html_strings [q]);
5139   }
5140 
5141   InitUrlAnchorFSA ();
5142 
5143   oldscope = SeqEntrySetScope (sep);
5144 
5145   if (stream) {
5146     /* send optional head string */
5147 
5148     is_html = (Boolean) ((flags & HTML_XML_ASN_MASK) == CREATE_HTML_FLATFILE);
5149     if (ffhead == NULL && is_html) {
5150       ffhead = defHead;
5151     }
5152     if (ffhead != NULL) {
5153       if (fp != NULL) {
5154         fprintf (fp, "%s", ffhead);
5155       }
5156     }
5157     if (ffwrite != NULL) {
5158       ffwrite (ffhead, userdata, HEAD_BLOCK, entityID, item_type, item_id, 0, 0);
5159     }
5160     if (is_html) {
5161       DoQuickLinkFormat (aw.afp, "<div class=\"sequence\">\n");
5162     }
5163   }
5164 
5165   /* if Web Entrez, set awp->sectionMax to decide when Next hyperlink is needed */
5166 
5167   if (is_html && mode == ENTREZ_MODE && stream &&
5168       (format == GENBANK_FMT || format == GENPEPT_FMT)) {
5169      /* add dummy node as prev id for first section */
5170      ajp->gihead = ValNodeAddInt (&(ajp->gitail), 0, (Int4) 0);
5171      ajp->gitail = ajp->gihead;
5172     if (bssp != NULL) {
5173       CountOneBioseqSet (SeqMgrGetSeqEntryForData (bssp), &aw);
5174     } else {
5175       CountOneBioseq (bsp, &aw);
5176     }
5177   }
5178 
5179   if (bssp != NULL) {
5180 
5181     /* handle all components of a pop/phy/mut/eco set */
5182 
5183     sep = SeqMgrGetSeqEntryForData (bssp);
5184     DoOneBioseqSet (sep, &aw);
5185 
5186   } else {
5187 
5188     /* handle single bioseq, which may be segmented or a local part */
5189 
5190     DoOneBioseq (bsp, &aw);
5191   }
5192 
5193   if (stream) {
5194     if (is_html) {
5195       DoQuickLinkFormat (aw.afp, "</div>");
5196     }
5197 
5198     /* send optional tail string */
5199 
5200     if (fftail == NULL && is_html) {
5201       fftail = defTail;
5202     }
5203     if (fftail != NULL) {
5204       if (fp != NULL) {
5205         fprintf (fp, "%s", fftail);
5206       }
5207     }
5208     if (ffwrite != NULL) {
5209       ffwrite (fftail, userdata, TAIL_BLOCK, entityID, item_type, item_id, 0, 0);
5210     }
5211   }
5212 
5213   SeqEntrySetScope (oldscope);
5214 
5215   /* check for failure to populate anything */
5216 
5217   numSections = ValNodeLen (aw.sectionList);
5218   ajp->ajp.numSections = numSections;
5219 
5220   if (numSections == 0) return asn2gnbk_cleanup ((Asn2gbJobPtr) ajp);
5221 
5222   /* allocate section array for this job - needed for memory cleanup even if streamed */
5223 
5224   sectionArray = (Asn2gbSectPtr PNTR) MemNew (sizeof (Asn2gbSectPtr) * (numSections + 1));
5225   ajp->ajp.sectionArray = sectionArray;
5226 
5227   if (sectionArray == NULL) return asn2gnbk_cleanup ((Asn2gbJobPtr) ajp);
5228 
5229   /* fill in section and paragraph arrays */
5230 
5231   numParagraphs = 0;
5232   for (vnp = aw.sectionList, i = 0; vnp != NULL && i < numSections; vnp = vnp->next, i++) {
5233     asp = (Asn2gbSectPtr) vnp->data.ptrvalue;
5234     sectionArray [i] = asp;
5235     if (asp != NULL) {
5236       numParagraphs += asp->numBlocks;
5237     }
5238   }
5239 
5240   /* allocate paragraph array pointing to all blocks in all sections */
5241 
5242   ajp->ajp.numParagraphs = numParagraphs;
5243   if (numParagraphs == 0) return asn2gnbk_cleanup ((Asn2gbJobPtr) ajp);
5244 
5245   paragraphArray = (BaseBlockPtr PNTR) MemNew (sizeof (BaseBlockPtr) * (numParagraphs + 1));
5246   ajp->ajp.paragraphArray = paragraphArray;
5247 
5248   paragraphByIDs = (BaseBlockPtr PNTR) MemNew (sizeof (BaseBlockPtr) * (numParagraphs + 1));
5249   ajp->ajp.paragraphByIDs = paragraphByIDs;
5250 
5251   if (paragraphArray == NULL || paragraphByIDs == NULL) return asn2gnbk_cleanup ((Asn2gbJobPtr) ajp);
5252 
5253   k = 0;
5254   for (i = 0; i < numSections; i++) {
5255     asp = sectionArray [i];
5256     if (asp != NULL) {
5257 
5258       numBlocks = asp->numBlocks;
5259       blockArray = asp->blockArray;
5260       if (blockArray != NULL) {
5261 
5262         for (j = 0; j < numBlocks; j++) {
5263           bbp = blockArray [j];
5264 
5265           paragraphArray [k] = bbp;
5266           paragraphByIDs [k] = bbp;
5267           bbp->paragraph = k;
5268           k++;
5269         }
5270       }
5271     }
5272   }
5273 
5274   /* sort paragraphByIDs array by entityID/itemtype/itemID/paragraph */
5275 
5276   StableMergeSort (paragraphByIDs, (size_t) numParagraphs, sizeof (BaseBlockPtr), SortParagraphByIDProc);
5277 
5278   /* free sectionList, but leave data, now pointed to by sectionArray elements */
5279 
5280   ValNodeFree (aw.sectionList);
5281 
5282   /* check for failure to to make legal flatfile */
5283 
5284   if (ajp->flags.needAtLeastOneRef && aw.failed) return asn2gnbk_cleanup ((Asn2gbJobPtr) ajp);
5285 
5286   return (Asn2gbJobPtr) ajp;
5287 }
5288 
asn2gnbk_setup(BioseqPtr bsp,BioseqSetPtr bssp,SeqLocPtr slp,FmtType format,ModType mode,StlType style,FlgType flags,LckType locks,CstType custom,XtraPtr extra)5289 NLM_EXTERN Asn2gbJobPtr asn2gnbk_setup (
5290   BioseqPtr bsp,
5291   BioseqSetPtr bssp,
5292   SeqLocPtr slp,
5293   FmtType format,
5294   ModType mode,
5295   StlType style,
5296   FlgType flags,
5297   LckType locks,
5298   CstType custom,
5299   XtraPtr extra
5300 )
5301 
5302 {
5303   return asn2gnbk_setup_ex (bsp, bssp, slp, format, mode, style,
5304                             flags, locks, custom, extra,
5305                             FALSE, NULL, NULL, NULL);
5306 }
5307 
5308 /* ********************************************************************** */
5309 
5310 /* format functions allocate printable string for given paragraph */
5311 
DefaultFormatBlock(Asn2gbFormatPtr afp,BaseBlockPtr bbp)5312 NLM_EXTERN CharPtr DefaultFormatBlock (
5313   Asn2gbFormatPtr afp,
5314   BaseBlockPtr bbp
5315 )
5316 
5317 {
5318   if (afp == NULL || bbp == NULL) return NULL;
5319 
5320   /* default format function assumes string pre-allocated by add block function */
5321 
5322   return StringSaveNoNull (bbp->string);
5323 }
5324 
5325 typedef CharPtr (*FormatProc) (Asn2gbFormatPtr afp, BaseBlockPtr bbp);
5326 
5327 static FormatProc asn2gnbk_fmt_functions [30] = {
5328   NULL,
5329   NULL,
5330   DefaultFormatBlock,
5331   DefaultFormatBlock,
5332   DefaultFormatBlock,
5333   DefaultFormatBlock,
5334   DefaultFormatBlock,
5335   DefaultFormatBlock,
5336   DefaultFormatBlock,
5337   DefaultFormatBlock,
5338   DefaultFormatBlock,
5339   DefaultFormatBlock,
5340   FormatSourceBlock,
5341   FormatOrganismBlock,
5342   DefaultFormatBlock,
5343   FormatReferenceBlock,
5344   DefaultFormatBlock,
5345   FormatCommentBlock,
5346   DefaultFormatBlock,
5347   FormatFeatHeaderBlock,
5348   FormatSourceFeatBlock,
5349   FormatFeatureBlock,
5350   FormatBasecountBlock,
5351   DefaultFormatBlock,
5352   FormatSequenceBlock,
5353   FormatContigBlock,
5354   DefaultFormatBlock,
5355   DefaultFormatBlock,
5356   FormatSlashBlock,
5357   NULL
5358 };
5359 
5360 static CharPtr asn2gnbk_fmt_labels [30] = {
5361   NULL,
5362   NULL,
5363   "locus",
5364   "defline",
5365   "accession",
5366   "version",
5367   "project",
5368   "pid",
5369   "dbsource",
5370   "date",
5371   "keywords",
5372   "segment",
5373   "source",
5374   "organism",
5375   "refstats",
5376   "reference",
5377   "primary",
5378   "comment",
5379   "featstats",
5380   "featheader",
5381   "sourcefeat",
5382   "feature",
5383   "basecount",
5384   "origin",
5385   "sequence",
5386   "contig",
5387   "wgs",
5388   "genome",
5389   "slash",
5390   NULL
5391 };
5392 
asn2gnbk_block_label(BlockType blocktype)5393 NLM_EXTERN CharPtr asn2gnbk_block_label (
5394   BlockType blocktype
5395 )
5396 
5397 {
5398   if (blocktype < LOCUS_BLOCK || blocktype > SLASH_BLOCK) return NULL;
5399   return asn2gnbk_fmt_labels [(int) blocktype];
5400 }
5401 
PrintFtableIntervals(ValNodePtr PNTR head,BioseqPtr target,SeqLocPtr location,CharPtr label,Boolean relaxed)5402 NLM_EXTERN void PrintFtableIntervals (
5403   ValNodePtr PNTR head,
5404   BioseqPtr target,
5405   SeqLocPtr location,
5406   CharPtr label,
5407   Boolean relaxed
5408 )
5409 
5410 {
5411   IntFuzzPtr  ifp;
5412   Boolean     is_circular;
5413   Boolean     partial5;
5414   Boolean     partial3;
5415   SeqLocPtr   slp;
5416   SeqPntPtr   spp;
5417   Int4        start;
5418   Int4        stop;
5419   Char        str [64];
5420   Char        str1 [32];
5421   Char        str2 [32];
5422   SeqLocPtr   tmp;
5423 
5424   if (head == NULL || target == NULL || location == NULL || label == NULL) return;
5425 
5426   if (location->choice == SEQLOC_PNT) {
5427     spp = (SeqPntPtr) location->data.ptrvalue;
5428     if (spp != NULL) {
5429       ifp = spp->fuzz;
5430       if (ifp != NULL && ifp->choice == 4 && ifp->a == 3) {
5431         sprintf (str, "%ld^\t%ld\t%s\n", (long) (spp->point + 1),
5432                  (long) (spp->point + 2), label);
5433         ValNodeCopyStr (head, 0, str);
5434         return;
5435       }
5436     }
5437   }
5438 
5439   slp = SeqLocFindNext (location, NULL);
5440   if (slp == NULL) return;
5441 
5442   is_circular = (Boolean) (target->topology == TOPOLOGY_CIRCULAR);
5443 
5444   start = GetOffsetInBioseqEx (slp, target, SEQLOC_START, is_circular, relaxed) + 1;
5445   stop = GetOffsetInBioseqEx (slp, target, SEQLOC_STOP, is_circular, relaxed) + 1;
5446   CheckSeqLocForPartial (slp, &partial5, &partial3);
5447   if (start == 0 || stop == 0) {
5448     tmp = TrimLocInSegment (target, slp, &partial5, &partial3);
5449     start = GetOffsetInBioseqEx (tmp, target, SEQLOC_START, is_circular, relaxed) + 1;
5450     stop = GetOffsetInBioseqEx (tmp, target, SEQLOC_STOP, is_circular, relaxed) + 1;
5451     SeqLocFree (tmp);
5452   }
5453   if (partial5) {
5454     sprintf (str1, "<%ld", (long) start);
5455   } else {
5456     sprintf (str1, "%ld", (long) start);
5457   }
5458   if (partial3) {
5459     sprintf (str2, ">%ld", (long) stop);
5460   } else {
5461     sprintf (str2, "%ld", (long) stop);
5462   }
5463   sprintf (str, "%s\t%s\t%s\n", str1, str2, label);
5464   ValNodeCopyStr (head, 0, str);
5465 
5466   while ((slp = SeqLocFindNext (location, slp)) != NULL) {
5467     start = GetOffsetInBioseqEx (slp, target, SEQLOC_START, is_circular, relaxed) + 1;
5468     stop = GetOffsetInBioseqEx (slp, target, SEQLOC_STOP, is_circular, relaxed) + 1;
5469     CheckSeqLocForPartial (slp, &partial5, &partial3);
5470     if (start == 0 || stop == 0) {
5471       tmp = TrimLocInSegment (target, slp, &partial5, &partial3);
5472       start = GetOffsetInBioseqEx (tmp, target, SEQLOC_START, is_circular, relaxed) + 1;
5473       stop = GetOffsetInBioseqEx (tmp, target, SEQLOC_STOP, is_circular, relaxed) + 1;
5474       SeqLocFree (tmp);
5475     }
5476     if (partial5) {
5477       sprintf (str1, "<%ld", (long) start);
5478     } else {
5479       sprintf (str1, "%ld", (long) start);
5480     }
5481     if (partial3) {
5482       sprintf (str2, ">%ld", (long) stop);
5483     } else {
5484       sprintf (str2, "%ld", (long) stop);
5485     }
5486     if (start != 0 && stop != 0) {
5487       sprintf (str, "%s\t%s\n", str1, str2);
5488       ValNodeCopyStr (head, 0, str);
5489     }
5490   }
5491 
5492   if (LocationHasNullsBetween (location)) {
5493     ValNodeCopyStr (head, 0, "\t\t\torder\n");
5494   }
5495 }
5496 
5497 static CharPtr goQualList [] = {
5498   "", "go_process", "go_component", "go_function", NULL
5499 };
5500 
PrintGeneOntologyUserFld(UserFieldPtr ufp,Pointer userdata)5501 static void PrintGeneOntologyUserFld (
5502   UserFieldPtr ufp,
5503   Pointer userdata
5504 )
5505 
5506 {
5507   UserFieldPtr     entry;
5508   CharPtr          evidence;
5509   Char             gid [32];
5510   CharPtr          goid;
5511   CharPtr          goref;
5512   ValNodePtr PNTR  head;
5513   Int2             i;
5514   Int2             j;
5515   size_t           len;
5516   ObjectIdPtr      oip;
5517   Int4             pmid;
5518   CharPtr          str;
5519   CharPtr          textstr;
5520   Char             tmp [16];
5521 
5522   if (ufp == NULL || ufp->choice != 11) return;
5523   oip = ufp->label;
5524   if (oip == NULL) return;
5525   for (i = 0; goQualType [i] != NULL; i++) {
5526     if (StringICmp (oip->str, goQualType [i]) == 0) break;
5527   }
5528   if (goQualType [i] == NULL) return;
5529 
5530   /* loop to allow multiple entries for each type of GO term */
5531   for (entry = ufp->data.ptrvalue; entry != NULL; entry = entry->next) {
5532     if (entry == NULL || entry->choice != 11) break;
5533 
5534     pmid = 0;
5535     goid = NULL;
5536     goref = NULL;
5537     evidence = NULL;
5538     textstr = NULL;
5539 
5540     for (ufp = (UserFieldPtr) entry->data.ptrvalue; ufp != NULL; ufp = ufp->next) {
5541       oip = ufp->label;
5542       if (oip == NULL) continue;
5543       for (j = 0; goFieldType [j] != NULL; j++) {
5544         if (StringICmp (oip->str, goFieldType [j]) == 0) break;
5545       }
5546       if (goFieldType [j] == NULL) continue;
5547       switch (j) {
5548         case 1 :
5549           if (ufp->choice == 1) {
5550             textstr = (CharPtr) ufp->data.ptrvalue;
5551           }
5552           break;
5553         case 2 :
5554           if (ufp->choice == 1) {
5555             goid = (CharPtr) ufp->data.ptrvalue;
5556           } else if (ufp->choice == 2) {
5557             sprintf (gid, "%ld", (long) (Int4) ufp->data.intvalue);
5558             goid = (CharPtr) gid;
5559           }
5560           break;
5561         case 3 :
5562           if (ufp->choice == 2) {
5563             pmid = (Int4) ufp->data.intvalue;
5564           }
5565           break;
5566         case 4 :
5567           if (ufp->choice == 1) {
5568             goref = (CharPtr) ufp->data.ptrvalue;
5569           }
5570           break;
5571         case 5 :
5572           if (ufp->choice == 1) {
5573             evidence = (CharPtr) ufp->data.ptrvalue;
5574           }
5575           break;
5576         default :
5577           break;
5578       }
5579     }
5580     /* if (StringHasNoText (textstr)) break; */
5581 
5582     len = StringLen (textstr) + StringLen (goid) + StringLen (goref) + StringLen (evidence) + 40;
5583     str = (CharPtr) MemNew (len);
5584     if (str == NULL) return;
5585     StringCpy (str, "\t\t\t");
5586     StringCat (str, goQualList [i]);
5587     StringCat (str, "\t");
5588     StringCat (str, textstr);
5589     if (StringDoesHaveText (goid)) {
5590       StringCat (str, "|");
5591       StringCat (str, goid);
5592     } else {
5593       StringCat (str, "|");
5594     }
5595     if (pmid != 0) {
5596       sprintf (tmp, "|%ld", (long) pmid);
5597       StringCat (str, tmp);
5598     } else if (StringDoesHaveText (goref)) {
5599       StringCat (str, "|");
5600       StringCat (str, goref);
5601     } else {
5602       StringCat (str, "|");
5603     }
5604     if (StringDoesHaveText (evidence)) {
5605       StringCat (str, "|");
5606       StringCat (str, evidence);
5607     }
5608     len = StringLen (str);
5609     while (len > 0 && str [len - 1] == '|') {
5610       str [len - 1] = '\0';
5611       len--;
5612     }
5613     StringCat (str, "\n");
5614 
5615     head = (ValNodePtr PNTR) userdata;
5616     ValNodeCopyStr (head, 0, str);
5617     MemFree (str);
5618   }
5619 }
5620 
PrintNomenclatureUserObject(UserObjectPtr uop,Pointer userdata)5621 static void PrintNomenclatureUserObject (
5622   UserObjectPtr uop,
5623   Pointer userdata
5624 )
5625 
5626 {
5627   CharPtr          ds = NULL, me = NULL, nm = NULL, sy = NULL;
5628   ValNodePtr PNTR  head;
5629   size_t           len;
5630   ObjectIdPtr      oip;
5631   CharPtr          str = NULL;
5632   UserFieldPtr     ufp;
5633 
5634   if (uop == NULL) return;
5635   oip = uop->type;
5636   if (oip == NULL) return;
5637   if (StringCmp (oip->str, "OfficialNomenclature") != 0) return;
5638 
5639   for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
5640     oip = ufp->label;
5641     if (oip == NULL || oip->str == NULL) continue;
5642     if (StringICmp (oip->str, "Symbol") == 0) {
5643       if (ufp->choice == 1) {
5644         str = (CharPtr) ufp->data.ptrvalue;
5645         if (StringDoesHaveText (str)) {
5646           sy = str;
5647         }
5648       }
5649     } else if (StringICmp (oip->str, "Name") == 0) {
5650       if (ufp->choice == 1) {
5651         str = (CharPtr) ufp->data.ptrvalue;
5652         if (StringDoesHaveText (str)) {
5653           nm = str;
5654         }
5655       }
5656     } else if (StringICmp (oip->str, "DataSource") == 0) {
5657       if (ufp->choice == 1) {
5658         str = (CharPtr) ufp->data.ptrvalue;
5659         if (StringDoesHaveText (str)) {
5660           ds = str;
5661         }
5662       }
5663     } else if (StringICmp (oip->str, "Status") == 0) {
5664       if (ufp->choice == 1) {
5665         str = (CharPtr) ufp->data.ptrvalue;
5666         if (StringDoesHaveText (str)) {
5667           me = str;
5668         }
5669       }
5670     }
5671   }
5672   if (me == NULL) {
5673     me = "Unclassified";
5674   }
5675 
5676   if (StringHasNoText (sy)) return;
5677 
5678   len = StringLen (ds) + StringLen (me) + StringLen (nm) + StringLen (sy) + 80;
5679   str = (CharPtr) MemNew (len);
5680   if (str == NULL) return;
5681 
5682   StringCpy (str, "\t\t\tnomenclature\t");
5683   StringCat (str, me);
5684   StringCat (str, "|");
5685   StringCat (str, sy);
5686   StringCat (str, "|");
5687   if (StringDoesHaveText (nm)) {
5688     StringCat (str, nm);
5689   }
5690   StringCat (str, "|");
5691   if (StringDoesHaveText (ds)) {
5692     StringCat (str, ds);
5693   }
5694   StringCat (str, "\n");
5695 
5696   head = (ValNodePtr PNTR) userdata;
5697   ValNodeCopyStr (head, 0, str);
5698   MemFree (str);
5699 }
5700 
PrintFTUserObj(UserObjectPtr uop,Pointer userdata)5701 static void PrintFTUserObj (
5702   UserObjectPtr uop,
5703   Pointer userdata
5704 )
5705 
5706 {
5707   ObjectIdPtr  oip;
5708 
5709   if (uop == NULL) return;
5710   oip = uop->type;
5711   if (oip == NULL) return;
5712   if (StringICmp (oip->str, "GeneOntology") == 0) {
5713     VisitUserFieldsInUop (uop, userdata, PrintGeneOntologyUserFld);
5714   } else if (StringICmp (oip->str, "OfficialNomenclature") == 0) {
5715     PrintNomenclatureUserObject (uop, userdata);
5716   }
5717 }
5718 
PrintFTCodeBreakEx(ValNodePtr PNTR head,CodeBreakPtr cbp,BioseqPtr target,Boolean masterStyle,Boolean relaxed,SeqLocPtr subloc)5719 NLM_EXTERN void PrintFTCodeBreakEx (
5720   ValNodePtr PNTR head,
5721   CodeBreakPtr cbp,
5722   BioseqPtr target,
5723   Boolean masterStyle,
5724   Boolean relaxed,
5725   SeqLocPtr subloc
5726 )
5727 
5728 {
5729   Char             buf [128];
5730   Choice           cbaa;
5731   IntAsn2gbJob     iaj;
5732   SeqLocPtr        newloc;
5733   CharPtr          ptr;
5734   Uint1            residue;
5735   SeqCodeTablePtr  sctp;
5736   Uint1            seqcode;
5737   SeqIdPtr         sip;
5738   SeqLocPtr        slp;
5739   CharPtr          str;
5740 
5741   seqcode = 0;
5742   sctp = NULL;
5743   cbaa = cbp->aa;
5744   switch (cbaa.choice) {
5745     case 1 :
5746       seqcode = Seq_code_ncbieaa;
5747       break;
5748     case 2 :
5749       seqcode = Seq_code_ncbi8aa;
5750       break;
5751     case 3 :
5752       seqcode = Seq_code_ncbistdaa;
5753       break;
5754     default :
5755       break;
5756   }
5757   if (seqcode == 0) return;
5758   sctp = SeqCodeTableFind (seqcode);
5759   if (sctp == NULL) return;
5760 
5761   MemSet ((Pointer) &iaj, 0, sizeof (IntAsn2gbJob));
5762   iaj.flags.iupacaaOnly = FALSE;
5763   iaj.relModeError = FALSE;
5764 
5765   slp = cbp->loc;
5766   if (slp != NULL) {
5767 	str = NULL;
5768 	if (subloc != NULL) {
5769 	  sip = SeqIdParse ("lcl|dummy");
5770 	  newloc = SeqLocReMapEx (sip, subloc, slp, 0, FALSE, masterStyle, relaxed);
5771 
5772 	  SeqIdFree (sip);
5773 	  if (newloc != NULL) {
5774 		A2GBSeqLocReplaceID (newloc, subloc);
5775 		str = FFFlatLoc (&iaj, target, newloc, masterStyle, FALSE);
5776 		SeqLocFree (newloc);
5777 	  }
5778 	} else {
5779 	  str = FFFlatLoc (&iaj, target, slp, masterStyle, FALSE);
5780 	}
5781     if (str != NULL) {
5782       residue = cbaa.value.intvalue;
5783       ptr = Get3LetterSymbol (&iaj, seqcode, sctp, residue);
5784       if (ptr == NULL) {
5785         ptr = "OTHER";
5786       }
5787       sprintf (buf, "\t\t\ttransl_except\t(pos:%s,aa:%s)\n", str, ptr);
5788       ValNodeCopyStr (head, 0, buf);
5789       MemFree (str);
5790     }
5791   }
5792 }
5793 
PrintFTCodeBreak(ValNodePtr PNTR head,CodeBreakPtr cbp,BioseqPtr target)5794 NLM_EXTERN void PrintFTCodeBreak (
5795   ValNodePtr PNTR head,
5796   CodeBreakPtr cbp,
5797   BioseqPtr target
5798 )
5799 
5800 {
5801   PrintFTCodeBreakEx (head, cbp, target, FALSE, FALSE, NULL);
5802 }
5803 
SeqIdWriteForTable(SeqIdPtr sip,CharPtr buf,size_t buflen,IntAsn2gbJobPtr ajp,Boolean giOK)5804 static Boolean SeqIdWriteForTable (SeqIdPtr sip, CharPtr buf, size_t buflen, IntAsn2gbJobPtr ajp, Boolean giOK)
5805 
5806 {
5807   SeqIdPtr  accn = NULL, local = NULL, patent = NULL,
5808             pdb = NULL, general = NULL, gi = NULL;
5809   DbtagPtr  dbt;
5810   Char      id [128], str [250];
5811   Int2      numids;
5812   CharPtr   prefix = NULL;
5813 
5814   if (sip == NULL || buf == NULL || ajp == NULL) return FALSE;
5815 
5816   while (sip != NULL) {
5817     switch (sip->choice) {
5818       case SEQID_LOCAL :
5819         local = sip;
5820         break;
5821       case SEQID_GENBANK :
5822       case SEQID_EMBL :
5823       case SEQID_PIR :
5824       case SEQID_SWISSPROT :
5825       case SEQID_DDBJ :
5826       case SEQID_PRF :
5827       case SEQID_TPG :
5828       case SEQID_TPE :
5829       case SEQID_TPD :
5830       case SEQID_OTHER :
5831       case SEQID_GPIPE :
5832         accn = sip;
5833         break;
5834       case SEQID_PATENT :
5835         patent = sip;
5836         break;
5837       case SEQID_GENERAL :
5838         dbt = (DbtagPtr) sip->data.ptrvalue;
5839         if (dbt != NULL && ! IsSkippableDbtag(dbt)) {
5840           general = sip;
5841         }
5842         break;
5843       case SEQID_PDB :
5844         pdb = sip;
5845         break;
5846       case SEQID_GI :
5847         gi = sip;
5848         break;
5849       default :
5850         break;
5851     }
5852     sip = sip->next;
5853   }
5854 
5855   str [0] = '\0';
5856   numids = 0;
5857 
5858   if (accn != NULL) {
5859     if (SeqIdWrite (accn, id, PRINTID_FASTA_SHORT, sizeof (id) - 1) != NULL) {
5860       StringCat (str, prefix);
5861       StringCat (str, id);
5862       prefix = "|";
5863       numids++;
5864     }
5865   }
5866 
5867   if (general != NULL) {
5868     if (SeqIdWrite (general, id, PRINTID_FASTA_SHORT, sizeof (id) - 1) != NULL) {
5869       StringCat (str, prefix);
5870       StringCat (str, id);
5871       prefix = "|";
5872       numids++;
5873     }
5874   }
5875 
5876   if (local != NULL && (! ajp->flags.suppressLocalID) && numids == 0) {
5877     if (SeqIdWrite (local, id, PRINTID_FASTA_SHORT, sizeof (id) - 1) != NULL) {
5878       StringCat (str, prefix);
5879       StringCat (str, id);
5880       prefix = "|";
5881       numids++;
5882     }
5883   }
5884 
5885   if (gi != NULL && giOK && numids == 0) {
5886     if (SeqIdWrite (accn, id, PRINTID_FASTA_SHORT, sizeof (id) - 1) != NULL) {
5887       StringCat (str, prefix);
5888       StringCat (str, id);
5889       prefix = "|";
5890       numids++;
5891     }
5892   }
5893 
5894   StringNCpy_0 (buf, str, buflen);
5895   if (StringHasNoText (buf)) return FALSE;
5896 
5897   return TRUE;
5898 }
5899 
PrintBioSourceFtableEntry(ValNodePtr PNTR head,BioSourcePtr biop)5900 static void PrintBioSourceFtableEntry (
5901   ValNodePtr PNTR head,
5902   BioSourcePtr    biop
5903 )
5904 {
5905   OrgModPtr    mod;
5906   SubSourcePtr ssp;
5907   Char         str [256];
5908 
5909   if (head == NULL || biop == NULL) return;
5910 
5911   if (biop->org != NULL && ! StringHasNoText (biop->org->taxname))
5912   {
5913     sprintf (str, "\t\t\torganism\t%s\n", biop->org->taxname);
5914     ValNodeCopyStr (head, 0, str);
5915   }
5916 
5917   /* add OrgMods */
5918   if (biop->org != NULL && biop->org->orgname != NULL)
5919   {
5920     for (mod = biop->org->orgname->mod;
5921          mod != NULL;
5922          mod = mod->next)
5923     {
5924       switch (mod->subtype)
5925       {
5926         case ORGMOD_strain :
5927           sprintf (str, "\t\t\tstrain\t");
5928           break;
5929         case ORGMOD_substrain :
5930           sprintf (str, "\t\t\tsubstrain\t");
5931           break;
5932         case ORGMOD_type :
5933           sprintf (str, "\t\t\ttype\t");
5934           break;
5935         case ORGMOD_subtype :
5936           sprintf (str, "\t\t\tsubtype\t");
5937           break;
5938         case ORGMOD_variety :
5939           sprintf (str, "\t\t\tvariety\t");
5940           break;
5941         case ORGMOD_serotype :
5942           sprintf (str, "\t\t\tserotype\t");
5943           break;
5944         case ORGMOD_serogroup :
5945           sprintf (str, "\t\t\tserogroup\t");
5946           break;
5947         case ORGMOD_serovar :
5948           sprintf (str, "\t\t\tserovar\t");
5949           break;
5950         case ORGMOD_cultivar :
5951           sprintf (str, "\t\t\tcultivar\t");
5952           break;
5953         case ORGMOD_pathovar :
5954           sprintf (str, "\t\t\tpathovar\t");
5955           break;
5956         case ORGMOD_chemovar :
5957           sprintf (str, "\t\t\tchemovar\t");
5958           break;
5959         case ORGMOD_biovar :
5960           sprintf (str, "\t\t\tbiovar\t");
5961           break;
5962         case ORGMOD_biotype :
5963           sprintf (str, "\t\t\tbiotype\t");
5964           break;
5965         case ORGMOD_group :
5966           sprintf (str, "\t\t\tgroup\t");
5967           break;
5968         case ORGMOD_subgroup :
5969           sprintf (str, "\t\t\tsubgroup\t");
5970           break;
5971         case ORGMOD_isolate :
5972           sprintf (str, "\t\t\tisolate\t");
5973           break;
5974         case ORGMOD_common :
5975           sprintf (str, "\t\t\tcommon\t");
5976           break;
5977         case ORGMOD_acronym :
5978           sprintf (str, "\t\t\tacronym\t");
5979           break;
5980         case ORGMOD_dosage :
5981           sprintf (str, "\t\t\tdosage\t");
5982           break;
5983         case ORGMOD_nat_host :
5984           sprintf (str, "\t\t\tnat_host\t");
5985           break;
5986         case ORGMOD_sub_species :
5987           sprintf (str, "\t\t\tsub_species\t");
5988           break;
5989         case ORGMOD_specimen_voucher :
5990           sprintf (str, "\t\t\tspecimen_voucher\t");
5991           break;
5992         case ORGMOD_authority :
5993           sprintf (str, "\t\t\tauthority\t");
5994           break;
5995         case ORGMOD_forma :
5996           sprintf (str, "\t\t\tforma\t");
5997           break;
5998         case ORGMOD_forma_specialis :
5999           sprintf (str, "\t\t\tforma_specialis\t");
6000           break;
6001         case ORGMOD_ecotype :
6002           sprintf (str, "\t\t\tecotype\t");
6003           break;
6004         case ORGMOD_synonym :
6005           sprintf (str, "\t\t\tsynonym\t");
6006           break;
6007         case ORGMOD_anamorph :
6008           sprintf (str, "\t\t\tanamorph\t");
6009           break;
6010         case ORGMOD_teleomorph :
6011           sprintf (str, "\t\t\tteleomorph\t");
6012           break;
6013         case ORGMOD_breed :
6014           sprintf (str, "\t\t\tbreed\t");
6015           break;
6016         case ORGMOD_gb_acronym :
6017           sprintf (str, "\t\t\tgb_acronym\t");
6018           break;
6019         case ORGMOD_gb_anamorph :
6020           sprintf (str, "\t\t\tgb_anamorph\t");
6021           break;
6022         case ORGMOD_culture_collection :
6023           sprintf (str, "\t\t\tculture_collection\t");
6024           break;
6025         case ORGMOD_bio_material :
6026           sprintf (str, "\t\t\tbio_material\t");
6027           break;
6028         case ORGMOD_metagenome_source :
6029           sprintf (str, "\t\t\tmetagenome_source\t");
6030           break;
6031         case ORGMOD_type_material :
6032           sprintf (str, "\t\t\ttype_material\t");
6033           break;
6034         case ORGMOD_old_lineage :
6035           sprintf (str, "\t\t\told_lineage\t");
6036           break;
6037         case ORGMOD_old_name :
6038           sprintf (str, "\t\t\told_name\t");
6039           break;
6040         case ORGMOD_other :
6041           sprintf (str, "\t\t\tnote\t");
6042           break;
6043         default :
6044           str [0] = 0;
6045       }
6046       if ( str [0] == 0) continue;
6047       if (! StringHasNoText (mod->subname))
6048       {
6049         StringNCat (str, mod->subname, sizeof (str) - StringLen (str) - 2);
6050         str [sizeof (str) - 2] = 0;
6051       }
6052       StringCat (str, "\n");
6053       ValNodeCopyStr (head, 0, str);
6054     }
6055   }
6056 
6057   for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next)
6058   {
6059     switch (ssp->subtype)
6060     {
6061       case SUBSRC_chromosome :
6062         sprintf (str, "\t\t\tchromosome\t");
6063         break;
6064       case SUBSRC_map :
6065         sprintf (str, "\t\t\tmap\t");
6066         break;
6067       case SUBSRC_clone :
6068         sprintf (str, "\t\t\tclone\t");
6069         break;
6070       case SUBSRC_haplotype :
6071         sprintf (str, "\t\t\thaplotype\t");
6072         break;
6073       case SUBSRC_genotype :
6074         sprintf (str, "\t\t\tgenotype\t");
6075         break;
6076       case SUBSRC_sex :
6077         sprintf (str, "\t\t\tsex\t");
6078         break;
6079       case SUBSRC_cell_line :
6080         sprintf (str, "\t\t\tcell_line\t");
6081         break;
6082       case SUBSRC_cell_type :
6083         sprintf (str, "\t\t\tcell_type\t");
6084         break;
6085       case SUBSRC_tissue_type :
6086         sprintf (str, "\t\t\ttissue_type\t");
6087         break;
6088       case SUBSRC_clone_lib :
6089         sprintf (str, "\t\t\tclone_lib\t");
6090         break;
6091       case SUBSRC_dev_stage :
6092         sprintf (str, "\t\t\tdev_stage\t");
6093         break;
6094       case SUBSRC_frequency :
6095         sprintf (str, "\t\t\tfrequency\t");
6096         break;
6097       case SUBSRC_germline :
6098         sprintf (str, "\t\t\tgermline\t");
6099         break;
6100       case SUBSRC_rearranged :
6101         sprintf (str, "\t\t\trearranged\t");
6102         break;
6103       case SUBSRC_lab_host :
6104         sprintf (str, "\t\t\tlab_host\t");
6105         break;
6106       case SUBSRC_pop_variant :
6107         sprintf (str, "\t\t\tpop_variant\t");
6108         break;
6109       case SUBSRC_tissue_lib :
6110         sprintf (str, "\t\t\ttissue_lib\t");
6111         break;
6112       case SUBSRC_plasmid_name :
6113         sprintf (str, "\t\t\tplasmid_name\t");
6114         break;
6115       case SUBSRC_transposon_name :
6116         sprintf (str, "\t\t\ttransposon_name\t");
6117         break;
6118       case SUBSRC_insertion_seq_name :
6119         sprintf (str, "\t\t\tinsertion_seq_name\t");
6120         break;
6121       case SUBSRC_plastid_name :
6122         sprintf (str, "\t\t\tplastid_name\t");
6123         break;
6124       case SUBSRC_country :
6125         sprintf (str, "\t\t\tcountry\t");
6126         break;
6127       case SUBSRC_segment :
6128         sprintf (str, "\t\t\tsegment\t");
6129         break;
6130       case SUBSRC_endogenous_virus_name :
6131         sprintf (str, "\t\t\tendogenous_virus_name\t");
6132         break;
6133       case SUBSRC_transgenic :
6134         sprintf (str, "\t\t\ttransgenic\t");
6135         break;
6136       case SUBSRC_environmental_sample :
6137         sprintf (str, "\t\t\tenvironmental_sample\t");
6138         break;
6139       case SUBSRC_isolation_source :
6140         sprintf (str, "\t\t\tisolation_source\t");
6141         break;
6142       case SUBSRC_lat_lon :
6143         sprintf (str, "\t\t\tlat_lon\t");
6144         break;
6145       case SUBSRC_collection_date :
6146         sprintf (str, "\t\t\tcollection_date\t");
6147         break;
6148       case SUBSRC_collected_by :
6149         sprintf (str, "\t\t\tcollected_by\t");
6150         break;
6151       case SUBSRC_identified_by :
6152         sprintf (str, "\t\t\tidentified_by\t");
6153         break;
6154       case SUBSRC_fwd_primer_seq :
6155         sprintf (str, "\t\t\tfwd_pcr_primer_seq\t");
6156         break;
6157       case SUBSRC_rev_primer_seq :
6158         sprintf (str, "\t\t\trev_pcr_primer_seq\t");
6159         break;
6160       case SUBSRC_fwd_primer_name :
6161         sprintf (str, "\t\t\tfwd_pcr_primer_name\t");
6162         break;
6163       case SUBSRC_rev_primer_name :
6164         sprintf (str, "\t\t\trev_pcr_primer_name\t");
6165         break;
6166       case SUBSRC_metagenomic :
6167         sprintf (str, "\t\t\tmetagenomic\t");
6168         break;
6169       case SUBSRC_mating_type :
6170         sprintf (str, "\t\t\tmating_type\t");
6171         break;
6172       case SUBSRC_linkage_group :
6173         sprintf (str, "\t\t\tlinkage_group\t");
6174         break;
6175       case SUBSRC_haplogroup :
6176         sprintf (str, "\t\t\thaplogroup\t");
6177         break;
6178       case SUBSRC_phenotype :
6179         sprintf (str, "\t\t\tphenotype\t");
6180         break;
6181       case SUBSRC_altitude :
6182         sprintf (str, "\t\t\taltitude\t");
6183         break;
6184       case SUBSRC_other :
6185         sprintf (str, "\t\t\tnote\t");
6186         break;
6187       default :
6188         str [0] = 0;
6189     }
6190     if ( str [0] == 0) continue;
6191     if (! StringHasNoText (ssp->name))
6192     {
6193       StringNCat (str, ssp->name, sizeof (str) - StringLen (str) - 2);
6194       str [sizeof (str) - 2] = 0;
6195     }
6196     StringCat (str, "\n");
6197     ValNodeCopyStr (head, 0, str);
6198   }
6199 }
6200 
AddOneFtableQual(ValNodePtr PNTR head,CharPtr qual,CharPtr val)6201 static void AddOneFtableQual (
6202   ValNodePtr PNTR head,
6203   CharPtr qual,
6204   CharPtr val
6205 )
6206 
6207 {
6208   size_t   len;
6209   CharPtr  tmp;
6210 
6211   if (head == NULL) return;
6212   if (StringHasNoText (qual)) return;
6213   if (StringHasNoText (val)) return;
6214 
6215   if (StringCmp (qual, "orig_protein_id") == 0) {
6216     qual = "protein_id";
6217   } else if (StringCmp (qual, "orig_transcript_id") == 0) {
6218     qual = "transcript_id";
6219   }
6220 
6221   len = StringLen (qual) + StringLen (val) + 10;
6222   tmp = (CharPtr) MemNew (sizeof (Char) * len);
6223   if (tmp == NULL) return;
6224 
6225   StringCpy (tmp, "\t\t\t");
6226   StringCat (tmp, qual);
6227   StringCat (tmp, "\t");
6228   StringCat (tmp, val);
6229   StringCat (tmp, "\n");
6230 
6231   ValNodeAddStr (head, 0, tmp);
6232 }
6233 
6234 
GetGeneticCodeNumber(ValNodePtr gcp)6235 static Int4 GetGeneticCodeNumber (ValNodePtr gcp)
6236 {
6237   Int4 gcode = 0;
6238   ValNodePtr vnp, tmp;
6239 
6240   if (gcp == NULL) {
6241     return 0;
6242   }
6243   for (vnp = (ValNodePtr) gcp->data.ptrvalue; vnp != NULL; vnp = vnp->next) {
6244     if (vnp->choice == 2) {
6245       gcode = vnp->data.intvalue;
6246     }
6247   }
6248   if (gcode == 0) {
6249     for (vnp = (ValNodePtr) gcp->data.ptrvalue; vnp != NULL; vnp = vnp->next) {
6250       if (vnp->choice == 1) {
6251         tmp = GeneticCodeFind (0, vnp->data.ptrvalue);
6252         gcode = GetGeneticCodeNumber(tmp);
6253       }
6254     }
6255   }
6256   return gcode;
6257 }
6258 
6259 
6260 
PrintFtableLocAndQuals(IntAsn2gbJobPtr ajp,ValNodePtr PNTR head,BioseqPtr target,SeqFeatPtr sfp,SeqMgrFeatContextPtr context)6261 NLM_EXTERN void PrintFtableLocAndQuals (
6262   IntAsn2gbJobPtr ajp,
6263   ValNodePtr PNTR head,
6264   BioseqPtr target,
6265   SeqFeatPtr sfp,
6266   SeqMgrFeatContextPtr context
6267 )
6268 
6269 {
6270   CharPtr            aa;
6271   Char               anticodon [8];
6272   Int2               bondidx;
6273   BioseqSetPtr       bssp;
6274   CodeBreakPtr       cbp;
6275   BioseqPtr          cdna;
6276   SeqFeatPtr         cds;
6277   Char               ch;
6278   CdRegionPtr        crp;
6279   SeqMgrDescContext  dcontext;
6280   DbtagPtr           dbt;
6281   SeqMgrFeatContext  fcontext;
6282   GBQualPtr          gbq;
6283   ValNodePtr         geneorprotdb;
6284   GeneRefPtr         grp;
6285   Boolean            is_gps_genomic = FALSE;
6286   CharPtr            label;
6287   MolInfoPtr         mip;
6288   SeqLocPtr          newloc;
6289   Char               numbuf [32];
6290   Int2               numcodons;
6291   ObjectIdPtr        oip;
6292   BioseqPtr          prod;
6293   SeqFeatPtr         prot;
6294   ProtRefPtr         prp = NULL;
6295   Boolean            pseudo;
6296   CharPtr            pseudogene = NULL;
6297   CharPtr            ptr;
6298   RNAGenPtr          rgp;
6299   RNAQualPtr         rqp;
6300   RnaRefPtr          rrp;
6301   SeqDescrPtr        sdp;
6302   Int4               sec_str;
6303   SeqIdPtr           sip;
6304   SeqIdPtr           sip2;
6305   Int2               siteidx;
6306   SeqLocPtr          slp;
6307   Char               str [256];
6308   Char               tmp [512];
6309   CharPtr            tmpx;
6310   CharPtr            tmpy;
6311   tRNAPtr            trp;
6312   ValNodePtr         vnp;
6313   Int4               gcode;
6314 
6315   if (head == NULL || target == NULL || sfp == NULL || context == NULL) return;
6316   /* label = (CharPtr) FeatDefTypeLabel (sfp); */
6317   label = FindKeyFromFeatDefType (sfp->idx.subtype, FALSE);
6318   if (StringCmp (label, "Gene") == 0) {
6319     label = "gene";
6320   }
6321   else if (StringCmp (label, "Src") == 0) {
6322     label = "source";
6323   }
6324   if (StringHasNoText (label)) {
6325     label = "???";
6326   }
6327 
6328   /* check if genomic sequence in genomic product set */
6329 
6330   if (target->idx.parenttype == OBJ_BIOSEQSET) {
6331     bssp = (BioseqSetPtr) target->idx.parentptr;
6332     if (bssp != NULL && bssp->_class == BioseqseqSet_class_gen_prod_set) {
6333       sdp = SeqMgrGetNextDescriptor (target, NULL, Seq_descr_molinfo, &dcontext);
6334       if (sdp != NULL) {
6335         mip = (MolInfoPtr) sdp->data.ptrvalue;
6336         if (mip != NULL && mip->biomol == MOLECULE_TYPE_GENOMIC) {
6337           is_gps_genomic = TRUE;
6338         }
6339       }
6340     }
6341   }
6342 
6343   PrintFtableIntervals (head, target, sfp->location, label, ajp->relaxedMapping);
6344 
6345   geneorprotdb = NULL;
6346   pseudo = sfp->pseudo;
6347 
6348   switch (context->seqfeattype) {
6349     case SEQFEAT_GENE :
6350       grp = (GeneRefPtr) sfp->data.value.ptrvalue;
6351       if (grp != NULL) {
6352         geneorprotdb = grp->db;
6353         pseudo |= grp->pseudo;
6354 
6355         StringNCpy_0 (str, (CharPtr) grp->locus, sizeof (str));
6356         if (! StringHasNoText (str)) {
6357           sprintf (tmp, "\t\t\tgene\t%s\n", str);
6358           ValNodeCopyStr (head, 0, tmp);
6359         }
6360         if (! StringHasNoText (grp->allele)) {
6361           sprintf (tmp, "\t\t\tallele\t%s\n", grp->allele);
6362           ValNodeCopyStr (head, 0, tmp);
6363         }
6364         for (vnp = grp->syn; vnp != NULL; vnp = vnp->next) {
6365           StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str));
6366           if (! StringHasNoText (str)) {
6367             sprintf (tmp, "\t\t\tgene_syn\t%s\n", str);
6368             ValNodeCopyStr (head, 0, tmp);
6369           }
6370         }
6371         if (! StringHasNoText (grp->desc)) {
6372           sprintf (tmp, "\t\t\tgene_desc\t%s\n", grp->desc);
6373           ValNodeCopyStr (head, 0, tmp);
6374         }
6375         if (! StringHasNoText (grp->maploc)) {
6376           sprintf (tmp, "\t\t\tmap\t%s\n", grp->maploc);
6377           ValNodeCopyStr (head, 0, tmp);
6378         }
6379         if (! StringHasNoText (grp->locus_tag)) {
6380           sprintf (tmp, "\t\t\tlocus_tag\t%s\n", grp->locus_tag);
6381           ValNodeCopyStr (head, 0, tmp);
6382         }
6383       }
6384       break;
6385     case SEQFEAT_CDREGION :
6386       prod = BioseqFind (SeqLocId (sfp->product));
6387       prot = SeqMgrGetBestProteinFeature (prod, NULL);
6388       if (prot != NULL) {
6389         prp = (ProtRefPtr) prot->data.value.ptrvalue;
6390       }
6391       if (prp == NULL) {
6392         prp = SeqMgrGetProtXref (sfp);
6393       }
6394       if (prp != NULL) {
6395         geneorprotdb = prp->db;
6396         if (prp->name != NULL) {
6397           for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
6398             StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str));
6399             if (! StringHasNoText (str)) {
6400               sprintf (tmp, "\t\t\tproduct\t%s\n", str);
6401               ValNodeCopyStr (head, 0, tmp);
6402             }
6403           }
6404         }
6405         if (prp->desc != NULL) {
6406           StringNCpy_0 (str, prp->desc, sizeof (str));
6407           if (! StringHasNoText (str)) {
6408             sprintf (tmp, "\t\t\tprot_desc\t%s\n", str);
6409             ValNodeCopyStr (head, 0, tmp);
6410           }
6411         }
6412         for (vnp = prp->activity; vnp != NULL; vnp = vnp->next) {
6413           StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str));
6414           if (! StringHasNoText (str)) {
6415             sprintf (tmp, "\t\t\tfunction\t%s\n", str);
6416             ValNodeCopyStr (head, 0, tmp);
6417           }
6418         }
6419         for (vnp = prp->ec; vnp != NULL; vnp = vnp->next) {
6420           StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str));
6421           if (! StringHasNoText (str)) {
6422             sprintf (tmp, "\t\t\tEC_number\t%s\n", str);
6423             ValNodeCopyStr (head, 0, tmp);
6424           }
6425         }
6426       }
6427       if (prot != NULL) {
6428         AddOneFtableQual (head, "prot_note", prot->comment);
6429         /*
6430         StringNCpy_0 (str, prot->comment, sizeof (str));
6431         if (! StringHasNoText (str)) {
6432           sprintf (tmp, "\t\t\tprot_note\t%s\n", str);
6433           ValNodeCopyStr (head, 0, tmp);
6434         }
6435         */
6436       }
6437       crp = (CdRegionPtr) sfp->data.value.ptrvalue;
6438       if (crp != NULL) {
6439         if (crp->frame > 1 && crp->frame <= 3) {
6440           sprintf (tmp, "\t\t\tcodon_start\t%d\n", (int) crp->frame);
6441           ValNodeCopyStr (head, 0, tmp);
6442         }
6443         for (cbp = crp->code_break; cbp != NULL; cbp = cbp->next) {
6444           PrintFTCodeBreakEx (head, cbp, target, ajp->masterStyle, ajp->relaxedMapping, ajp->ajp.slp);
6445         }
6446         gcode = GetGeneticCodeNumber(crp->genetic_code);
6447         if (gcode > 0) {
6448           sprintf (tmp, "\t\t\ttransl_table\t%d\n", gcode);
6449           ValNodeCopyStr (head, 0, tmp);
6450         }
6451       }
6452       if (prod != NULL && ! ajp->hideProteinID) {
6453         if (SeqIdWriteForTable (prod->id, str, sizeof (str), ajp, FALSE)) {
6454           sprintf (tmp, "\t\t\tprotein_id\t%s\n", str);
6455           ValNodeCopyStr (head, 0, tmp);
6456         }
6457         if (is_gps_genomic) {
6458           cds = SeqMgrGetCDSgivenProduct (prod, NULL);
6459           if (cds != NULL) {
6460             cdna = BioseqFindFromSeqLoc (cds->location);
6461             if (cdna != NULL) {
6462               if (SeqIdWriteForTable (cdna->id, str, sizeof (str), ajp, FALSE)) {
6463                 sprintf (tmp, "\t\t\ttranscript_id\t%s\n", str);
6464                 ValNodeCopyStr (head, 0, tmp);
6465               }
6466             }
6467           }
6468         }
6469       } else if (sfp->product != NULL && ! ajp->hideProteinID) {
6470         sip = SeqLocId (sfp->product);
6471         if (sip != NULL) {
6472           if (sip->choice == SEQID_GI) {
6473             sip2 = GetSeqIdForGI (sip->data.intvalue);
6474             if (sip2 != NULL) {
6475               sip = sip2;
6476             }
6477           }
6478           if (SeqIdWriteForTable (sip, str, sizeof (str), ajp, TRUE)) {
6479             sprintf (tmp, "\t\t\tprotein_id\t%s\n", str);
6480             ValNodeCopyStr (head, 0, tmp);
6481           }
6482         }
6483       }
6484       break;
6485     case SEQFEAT_RNA :
6486       prod = BioseqFind (SeqLocId (sfp->product));
6487       rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
6488       if (rrp != NULL) {
6489         switch (rrp->ext.choice) {
6490           case 1 :
6491             StringNCpy_0 (str, (CharPtr) rrp->ext.value.ptrvalue, sizeof (str));
6492             if (! StringHasNoText (str)) {
6493               if (rrp->type == 255 &&
6494                   (StringICmp (str, "misc_RNA") == 0 ||
6495                    StringICmp (str, "ncRNA") == 0 ||
6496                    StringICmp (str, "tmRNA") == 0)) {
6497                 /* type other now uses name for type, product gbqual for product name */
6498               } else {
6499                 sprintf (tmp, "\t\t\tproduct\t%s\n", str);
6500                 ValNodeCopyStr (head, 0, tmp);
6501               }
6502             }
6503             break;
6504           case 2 :
6505             trp = (tRNAPtr) rrp->ext.value.ptrvalue;
6506             if (trp != NULL) {
6507               FeatDefLabel (sfp, str, sizeof (str) - 1, OM_LABEL_CONTENT);
6508               if (! StringHasNoText (str)) {
6509                 sprintf (tmp, "\t\t\tproduct\t%s\n", str);
6510                 ValNodeCopyStr (head, 0, tmp);
6511               }
6512               numcodons = ComposeCodonsRecognizedString (trp, numbuf, sizeof (numbuf));
6513               if (numcodons > 0 && StringDoesHaveText (numbuf)) {
6514                 sprintf (tmp, "\t\t\tcodon_recognized\t%s\n", numbuf);
6515                 ValNodeCopyStr (head, 0, tmp);
6516               }
6517               slp = trp->anticodon;
6518               newloc = NULL;
6519               if (slp != NULL && ajp->ajp.slp != NULL) {
6520                 sip = SeqIdParse ("lcl|dummy");
6521                 newloc = SeqLocReMapEx (sip, ajp->ajp.slp, slp, 0, FALSE, ajp->masterStyle, ajp->relaxedMapping);
6522                 SeqIdFree (sip);
6523                 slp = newloc;
6524                 if (newloc != NULL) {
6525                   A2GBSeqLocReplaceID (newloc, ajp->ajp.slp);
6526                 }
6527               }
6528               aa = str;
6529               if (StringNICmp (aa, "tRNA-", 5) == 0) {
6530                 aa += 5;
6531               }
6532               if (slp != NULL && StringDoesHaveText (aa)) {
6533                 anticodon [0] = '\0';
6534                 if (SeqLocLen (slp) == 3) {
6535                   tmpy = GetSequenceByLocation (slp);
6536                   if (tmpy != NULL) {
6537                     ptr = tmpy;
6538                     ch = *ptr;
6539                     while (ch != '\0') {
6540                       ch = TO_LOWER(ch);
6541                       *ptr = ch;
6542                       ptr++;
6543                       ch = *ptr;
6544                     }
6545                     if (! StringHasNoText (tmpy)) {
6546                       StringNCpy_0 (anticodon, tmpy, sizeof (anticodon));
6547                     }
6548                     MemFree (tmpy);
6549                   }
6550                 }
6551 
6552                 tmpx = FFFlatLoc (ajp, target, slp, ajp->masterStyle, FALSE);
6553                 if (tmpx != NULL) {
6554                   if (StringDoesHaveText (anticodon)) {
6555                     sprintf (tmp, "\t\t\tanticodon\t(pos:%s,aa:%s,seq:%s)\n", tmpx, aa, anticodon);
6556                   } else {
6557                     sprintf (tmp, "\t\t\tanticodon\t(pos:%s,aa:%s)\n", tmpx, aa);
6558                   }
6559                   ValNodeCopyStr (head, 0, tmp);
6560                 }
6561                 MemFree (tmpx);
6562               }
6563               if (newloc != NULL) {
6564                 SeqLocFree (newloc);
6565               }
6566             }
6567             break;
6568           case 3 :
6569             rgp = (RNAGenPtr) rrp->ext.value.ptrvalue;
6570             if (rgp != NULL) {
6571               StringNCpy_0 (str, rgp->_class, sizeof (str));
6572               if (StringDoesHaveText (str)) {
6573                 sprintf (tmp, "\t\t\tncRNA_class\t%s\n", str);
6574                 ValNodeCopyStr (head, 0, tmp);
6575               }
6576               StringNCpy_0 (str, rgp->product, sizeof (str));
6577               if (StringDoesHaveText (str)) {
6578                 sprintf (tmp, "\t\t\tproduct\t%s\n", str);
6579                 ValNodeCopyStr (head, 0, tmp);
6580               }
6581               for (rqp = rgp->quals; rqp != NULL; rqp = rqp->next) {
6582                 if (StringDoesHaveText (rqp->qual) && StringDoesHaveText (rqp->val)) {
6583                   AddOneFtableQual (head, rqp->qual, rqp->val);
6584                 }
6585               }
6586             }
6587           default :
6588             break;
6589         }
6590       }
6591       if (prod != NULL && ! ajp->hideProteinID) {
6592         if (SeqIdWriteForTable (prod->id, str, sizeof (str), ajp, FALSE)) {
6593           sprintf (tmp, "\t\t\ttranscript_id\t%s\n", str);
6594           ValNodeCopyStr (head, 0, tmp);
6595         }
6596         if (is_gps_genomic) {
6597           cds = SeqMgrGetNextFeature (prod, NULL, SEQFEAT_CDREGION, 0, &fcontext);
6598           if (cds != NULL && SeqMgrGetNextFeature (prod, cds, SEQFEAT_CDREGION, 0, &fcontext) == NULL) {
6599             prod = BioseqFindFromSeqLoc (cds->product);
6600             if (prod != NULL) {
6601               if (SeqIdWriteForTable (prod->id, str, sizeof (str), ajp, FALSE)) {
6602                 sprintf (tmp, "\t\t\tprotein_id\t%s\n", str);
6603                 ValNodeCopyStr (head, 0, tmp);
6604               }
6605             }
6606           }
6607         }
6608       } else if (sfp->product != NULL && ! ajp->hideProteinID) {
6609         sip = SeqLocId (sfp->product);
6610         if (sip != NULL) {
6611           if (sip->choice == SEQID_GI) {
6612             sip2 = GetSeqIdForGI (sip->data.intvalue);
6613             if (sip2 != NULL) {
6614               sip = sip2;
6615             }
6616           }
6617           if (SeqIdWriteForTable (sip, str, sizeof (str), ajp, TRUE)) {
6618             sprintf (tmp, "\t\t\ttranscript_id\t%s\n", str);
6619             ValNodeCopyStr (head, 0, tmp);
6620           }
6621         }
6622       }
6623       break;
6624     case SEQFEAT_PROT :
6625       prod = BioseqFind (SeqLocId (sfp->product));
6626       prp = (ProtRefPtr) sfp->data.value.ptrvalue;
6627       if (prp != NULL) {
6628         if (prp->name != NULL) {
6629           for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
6630             StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str));
6631             if (! StringHasNoText (str)) {
6632               sprintf (tmp, "\t\t\tproduct\t%s\n", str);
6633               ValNodeCopyStr (head, 0, tmp);
6634             }
6635           }
6636         }
6637         if (prp->desc != NULL) {
6638           StringNCpy_0 (str, prp->desc, sizeof (str));
6639           if (! StringHasNoText (str)) {
6640             sprintf (tmp, "\t\t\tprot_desc\t%s\n", str);
6641             ValNodeCopyStr (head, 0, tmp);
6642           }
6643         }
6644         for (vnp = prp->activity; vnp != NULL; vnp = vnp->next) {
6645           StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str));
6646           if (! StringHasNoText (str)) {
6647             sprintf (tmp, "\t\t\tfunction\t%s\n", str);
6648             ValNodeCopyStr (head, 0, tmp);
6649           }
6650         }
6651         for (vnp = prp->ec; vnp != NULL; vnp = vnp->next) {
6652           StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str));
6653           if (! StringHasNoText (str)) {
6654             sprintf (tmp, "\t\t\tEC_number\t%s\n", str);
6655             ValNodeCopyStr (head, 0, tmp);
6656           }
6657         }
6658       }
6659       AddOneFtableQual (head, "prot_note", sfp->comment);
6660       /*
6661       StringNCpy_0 (str, sfp->comment, sizeof (str));
6662       if (! StringHasNoText (str)) {
6663         sprintf (tmp, "\t\t\tprot_note\t%s\n", str);
6664         ValNodeCopyStr (head, 0, tmp);
6665       }
6666       */
6667       if (prod != NULL && ! ajp->hideProteinID) {
6668         if (SeqIdWriteForTable (prod->id, str, sizeof (str), ajp, FALSE)) {
6669           sprintf (tmp, "\t\t\tprotein_id\t%s\n", str);
6670           ValNodeCopyStr (head, 0, tmp);
6671         }
6672       } else if (sfp->product != NULL && ! ajp->hideProteinID) {
6673         sip = SeqLocId (sfp->product);
6674         if (sip != NULL) {
6675           if (sip->choice == SEQID_GI) {
6676             sip2 = GetSeqIdForGI (sip->data.intvalue);
6677             if (sip2 != NULL) {
6678               sip = sip2;
6679             }
6680           }
6681           if (SeqIdWriteForTable (sip, str, sizeof (str), ajp, TRUE)) {
6682             sprintf (tmp, "\t\t\tprotein_id\t%s\n", str);
6683             ValNodeCopyStr (head, 0, tmp);
6684           }
6685         }
6686       }
6687       break;
6688     case SEQFEAT_REGION :
6689       StringNCpy_0 (str, (CharPtr) sfp->data.value.ptrvalue, sizeof (str));
6690       if (! StringHasNoText (str)) {
6691         sprintf (tmp, "\t\t\tregion_name\t%s\n", str);
6692         ValNodeCopyStr (head, 0, tmp);
6693       }
6694       break;
6695     case SEQFEAT_BOND :
6696       bondidx = (Int2) sfp->data.value.intvalue;
6697       if (bondidx == 255) {
6698         bondidx = 5;
6699       }
6700       if (bondidx > 0 && bondidx < 6) {
6701         sprintf (tmp, "\t\t\tbond_type\t%s\n", bondList [bondidx]);
6702         ValNodeCopyStr (head, 0, tmp);
6703       }
6704       break;
6705     case SEQFEAT_SITE :
6706       siteidx = (Int2) sfp->data.value.intvalue;
6707       if (siteidx == 255) {
6708         siteidx = 26;
6709       }
6710       if (siteidx > 0 && siteidx < 27) {
6711         sprintf (tmp, "\t\t\tsite_type\t%s\n", siteList [siteidx]);
6712         ValNodeCopyStr (head, 0, tmp);
6713       }
6714       break;
6715     case SEQFEAT_PSEC_STR :
6716       sec_str = (Int2) sfp->data.value.intvalue;
6717       if (sec_str > 0 && sec_str <= 3) {
6718         sprintf (tmp, "\t\t\tsec_str_type\t%s\n", secStrText [sec_str]);
6719         ValNodeCopyStr (head, 0, tmp);
6720       }
6721       break;
6722     case SEQFEAT_HET :
6723       StringNCpy_0 (str, (CharPtr) sfp->data.value.ptrvalue, sizeof (str));
6724       if (! StringHasNoText (str)) {
6725         sprintf (tmp, "\t\t\theterogen\t%s\n", str);
6726         ValNodeCopyStr (head, 0, tmp);
6727       }
6728       break;
6729     case SEQFEAT_BIOSRC :
6730       PrintBioSourceFtableEntry (head, sfp->data.value.ptrvalue);
6731       break;
6732     default :
6733       break;
6734   }
6735   for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
6736     if (StringICmp (gbq->qual, "pseudogene") != 0) continue;
6737     pseudogene = gbq->val;
6738     if (StringICmp (pseudogene, "processed") == 0 ||
6739         StringICmp (pseudogene, "unprocessed") == 0 ||
6740         StringICmp (pseudogene, "unitary") == 0 ||
6741         StringICmp (pseudogene, "allelic") == 0 ||
6742         StringICmp (pseudogene, "unknown") == 0) {
6743       pseudo = FALSE;
6744     }
6745   }
6746   if (pseudo) {
6747     ValNodeCopyStr (head, 0, "\t\t\tpseudo\n");
6748   }
6749   grp = SeqMgrGetGeneXref (sfp);
6750   if (grp != NULL) {
6751     if (SeqMgrGeneIsSuppressed (grp)) {
6752       ValNodeCopyStr (head, 0, "\t\t\tgene\t-\n");
6753     } else {
6754       if (StringDoesHaveText (grp->locus)) {
6755         sprintf (tmp, "\t\t\tgene\t%s\n", grp->locus);
6756         ValNodeCopyStr (head, 0, tmp);
6757       }
6758       if (StringDoesHaveText (grp->locus_tag)) {
6759         sprintf (tmp, "\t\t\tlocus_tag\t%s\n", grp->locus_tag);
6760         ValNodeCopyStr (head, 0, tmp);
6761       }
6762     }
6763   }
6764   if (! StringHasNoText (sfp->comment)) {
6765     ValNodeCopyStr (head, 0, "\t\t\tnote\t");
6766     ValNodeCopyStr (head, 0, sfp->comment);
6767     ValNodeCopyStr (head, 0, "\n");
6768   }
6769   switch (sfp->exp_ev) {
6770     case 1 :
6771       ValNodeCopyStr (head, 0, "\t\t\tevidence\texperimental\n");
6772       break;
6773     case 2 :
6774       ValNodeCopyStr (head, 0, "\t\t\tevidence\tnot_experimental\n");
6775       break;
6776     default :
6777       break;
6778   }
6779   if (! StringHasNoText (sfp->except_text)) {
6780     ValNodeCopyStr (head, 0, "\t\t\texception\t");
6781     ValNodeCopyStr (head, 0, sfp->except_text);
6782     ValNodeCopyStr (head, 0, "\n");
6783   } else if (sfp->excpt) {
6784     ValNodeCopyStr (head, 0, "\t\t\texception\n");
6785   }
6786   for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
6787     AddOneFtableQual (head, gbq->qual, gbq->val);
6788     /*
6789     if (! StringHasNoText (gbq->qual)) {
6790       if (! StringHasNoText (gbq->val)) {
6791         sprintf (tmp, "\t\t\t%s\t%s\n", gbq->qual, gbq->val);
6792         ValNodeCopyStr (head, 0, tmp);
6793       }
6794     }
6795     */
6796   }
6797   VisitUserObjectsInUop (sfp->ext, (Pointer) head, PrintFTUserObj);
6798   for (vnp = geneorprotdb; vnp != NULL; vnp = vnp->next) {
6799     dbt = (DbtagPtr) vnp->data.ptrvalue;
6800     if (dbt != NULL) {
6801       if (! StringHasNoText (dbt->db)) {
6802         oip = dbt->tag;
6803         if (oip->str != NULL && (! StringHasNoText (oip->str))) {
6804           sprintf (tmp, "\t\t\tdb_xref\t%s:%s\n", dbt->db, oip->str);
6805           ValNodeCopyStr (head, 0, tmp);
6806         } else {
6807           sprintf (tmp, "\t\t\tdb_xref\t%s:%ld\n", dbt->db, (long) oip->id);
6808           ValNodeCopyStr (head, 0, tmp);
6809         }
6810       }
6811     }
6812   }
6813   for (vnp = sfp->dbxref; vnp != NULL; vnp = vnp->next) {
6814     dbt = (DbtagPtr) vnp->data.ptrvalue;
6815     if (dbt != NULL) {
6816       if (! StringHasNoText (dbt->db)) {
6817         oip = dbt->tag;
6818         if (oip->str != NULL && (! StringHasNoText (oip->str))) {
6819           sprintf (tmp, "\t\t\tdb_xref\t%s:%s\n", dbt->db, oip->str);
6820           ValNodeCopyStr (head, 0, tmp);
6821         } else {
6822           sprintf (tmp, "\t\t\tdb_xref\t%s:%ld\n", dbt->db, (long) oip->id);
6823           ValNodeCopyStr (head, 0, tmp);
6824         }
6825       }
6826     }
6827   }
6828 }
6829 
FindFirstBioseq(SeqEntryPtr sep)6830 static BioseqPtr FindFirstBioseq (SeqEntryPtr sep)
6831 
6832 {
6833   BioseqPtr     bsp;
6834   BioseqSetPtr  bssp;
6835 
6836   if (sep == NULL || sep->data.ptrvalue == NULL ||
6837       /* sep->choice < 0 || */ sep->choice > 2) return NULL;
6838   if (IS_Bioseq (sep)) {
6839     bsp = (BioseqPtr) sep->data.ptrvalue;
6840     return bsp;
6841   }
6842   bssp = (BioseqSetPtr) sep->data.ptrvalue;
6843   for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
6844     bsp = FindFirstBioseq (sep);
6845     if (bsp != NULL) return bsp;
6846   }
6847   return NULL;
6848 }
6849 
BioseqLockAndIndexByEntity(Uint2 entityID)6850 static BioseqPtr BioseqLockAndIndexByEntity (Uint2 entityID)
6851 
6852 {
6853   BioseqPtr    bsp;
6854   SeqEntryPtr  sep;
6855   SeqIdPtr     sip;
6856 
6857   if (entityID < 1) return NULL;
6858 
6859   sep = SeqMgrGetSeqEntryForEntityID (entityID);
6860   if (sep == NULL) return NULL;
6861 
6862   bsp = FindFirstBioseq (sep);
6863   if (bsp == NULL) return NULL;
6864 
6865   sip = SeqIdFindBest (bsp->id, 0);
6866   if (sip == NULL) return NULL;
6867 
6868   bsp = BioseqLockById (sip);
6869   if (bsp == NULL) return NULL;
6870 
6871   if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
6872     SeqMgrIndexFeatures (entityID, NULL);
6873   }
6874 
6875   return bsp;
6876 }
6877 
FormatFtableSourceFeatBlock(BaseBlockPtr bbp,BioseqPtr target)6878 NLM_EXTERN CharPtr FormatFtableSourceFeatBlock (
6879   BaseBlockPtr bbp,
6880   BioseqPtr target
6881 )
6882 
6883 {
6884   SeqFeatPtr        sfp;
6885   SeqDescPtr        sdp;
6886   SeqMgrDescContext dcontext;
6887   SeqMgrFeatContext fcontext;
6888   BioSourcePtr      biop;
6889   ValNodePtr        head;
6890   IntSrcBlockPtr    isp;
6891   CharPtr           str;
6892 
6893   if (bbp == NULL) return NULL;
6894 
6895   isp = (IntSrcBlockPtr) bbp;
6896   head = NULL;
6897   biop = NULL;
6898 
6899   if (bbp->itemtype == OBJ_SEQDESC) {
6900     sdp = SeqMgrGetDesiredDescriptor (bbp->entityID, NULL, bbp->itemID,
6901                                       0, NULL, &dcontext);
6902     if (sdp == NULL) return NULL;
6903     biop = sdp->data.ptrvalue;
6904   } else if (bbp->itemtype == OBJ_SEQFEAT) {
6905     sfp = SeqMgrGetDesiredFeature (bbp->entityID, NULL, bbp->itemID, 0, NULL, &fcontext);
6906     if (sfp == NULL) return NULL;
6907     biop = sfp->data.value.ptrvalue;
6908   }
6909   if (biop == NULL) return NULL;
6910   PrintFtableIntervals (&head, target, isp->loc, "source", FALSE);
6911   PrintBioSourceFtableEntry (&head, biop);
6912 
6913   str = MergeFFValNodeStrs (head);
6914   ValNodeFreeData (head);
6915 
6916   return str;
6917 }
6918 
DoImmediateFormat(Asn2gbFormatPtr afp,BaseBlockPtr bbp)6919 NLM_EXTERN void DoImmediateFormat (
6920   Asn2gbFormatPtr afp,
6921   BaseBlockPtr bbp
6922 )
6923 
6924 {
6925   IntAsn2gbJobPtr  ajp;
6926   BlockType        blocktype;
6927   BioseqPtr        bsp;
6928   FormatProc       fmt;
6929   IntFeatBlockPtr  ifp;
6930   Boolean          is_www;
6931   Int4             left = 0;
6932   size_t           max;
6933   SeqEntryPtr      oldscope;
6934   QualValPtr       qv = NULL;
6935   Int4             right = 0;
6936   SeqEntryPtr      sep;
6937   CharPtr          str = NULL;
6938   Uint2            itemtype;
6939   Uint2            itemID;
6940 
6941   if (afp == NULL || bbp == NULL) return;
6942   ajp = afp->ajp;
6943   if (ajp == NULL) return;
6944   is_www = GetWWW (ajp);
6945 
6946   blocktype = bbp->blocktype;
6947   if (blocktype < LOCUS_BLOCK || blocktype > SLASH_BLOCK) return;
6948   fmt = asn2gnbk_fmt_functions [(int) blocktype];
6949   if (fmt == NULL) return;
6950 
6951   max = (size_t) (MAX (ASN2GNBK_TOTAL_SOURCE, ASN2GNBK_TOTAL_FEATUR));
6952   qv = MemNew (sizeof (QualVal) * (max + 5));
6953   if (qv == NULL) return;
6954 
6955   sep = GetTopSeqEntryForEntityID (bbp->entityID);
6956 
6957   bsp = BioseqLockAndIndexByEntity (bbp->entityID);
6958   oldscope = SeqEntrySetScope (sep);
6959 
6960   afp->qvp = qv;
6961   str = fmt (afp, bbp);
6962   afp->qvp = NULL;
6963 
6964   if (bbp->itemtype == 0 && ajp->ajp.bsp != NULL) {
6965     itemtype = ajp->ajp.bsp->idx.itemtype;
6966     itemID = ajp->ajp.bsp->idx.itemID;
6967   } else {
6968     itemtype = bbp->itemtype;
6969     itemID = bbp->itemID;
6970   }
6971 
6972   SeqEntrySetScope (oldscope);
6973   BioseqUnlock (bsp);
6974 
6975   if (blocktype == FEATURE_BLOCK && afp->ffwrite != NULL) {
6976     ifp = (IntFeatBlockPtr) bbp;
6977     left = ifp->left + 1;
6978     right = ifp->right + 1;
6979   }
6980 
6981   if (str != NULL) {
6982     if (afp->fp != NULL) {
6983       fprintf (afp->fp, "%s", str);
6984     }
6985     if (afp->ffwrite != NULL) {
6986       afp->ffwrite (str, afp->userdata, blocktype, bbp->entityID, itemtype, itemID, left, right);
6987     }
6988   } else {
6989     if (afp->fp != NULL) {
6990       fprintf (afp->fp, "?\n");
6991     }
6992     if (afp->ffwrite != NULL) {
6993       afp->ffwrite ("?\n", afp->userdata, blocktype, bbp->entityID, itemtype, itemID, left, right);
6994     }
6995   }
6996 
6997   MemFree (str);
6998   MemFree (qv);
6999 }
7000 
DoQuickLinkFormat(Asn2gbFormatPtr afp,CharPtr str)7001 NLM_EXTERN void DoQuickLinkFormat (
7002   Asn2gbFormatPtr afp,
7003   CharPtr str
7004 )
7005 
7006 {
7007   Uint2 entityID = 0, item_type = 0;
7008   Uint4 itemID = 0;
7009 
7010   if (afp == NULL || StringHasNoText (str)) return;
7011 
7012   if (afp->ajp != NULL) {
7013     if (afp->ajp->ajp.bsp != NULL) {
7014       entityID = afp->ajp->ajp.bsp->idx.entityID;
7015       item_type = OBJ_BIOSEQ;
7016       itemID = afp->ajp->ajp.bsp->idx.itemID;
7017     } else if (afp->ajp->ajp.bssp != NULL) {
7018       entityID = afp->ajp->ajp.bssp->idx.entityID;
7019       item_type = OBJ_BIOSEQSET;
7020       itemID = afp->ajp->ajp.bssp->idx.itemID;
7021     }
7022   }
7023 
7024   if (str != NULL) {
7025     if (afp->fp != NULL) {
7026       fprintf (afp->fp, "%s", str);
7027     }
7028     if (afp->ffwrite != NULL) {
7029       afp->ffwrite (str, afp->userdata, (BlockType) 0, entityID, item_type, itemID, 0, 0);
7030     }
7031   }
7032 }
7033 
asn2gnbk_format(Asn2gbJobPtr ajp,Int4 paragraph)7034 NLM_EXTERN CharPtr asn2gnbk_format (
7035   Asn2gbJobPtr ajp,
7036   Int4 paragraph
7037 )
7038 
7039 {
7040   Asn2gbFormat     af;
7041   Asn2gbSectPtr    asp;
7042   BaseBlockPtr     bbp;
7043   BlockType        blocktype;
7044   BioseqPtr        bsp;
7045   FormatProc       fmt;
7046   IntAsn2gbJobPtr  iajp;
7047   size_t           max;
7048   SeqEntryPtr      oldscope;
7049   QualValPtr       qv;
7050   Int4             section;
7051   SeqEntryPtr      sep;
7052   CharPtr          str = NULL;
7053 
7054   /* qv must hold MAX (ASN2GNBK_TOTAL_SOURCE, ASN2GNBK_TOTAL_FEATUR) */
7055 
7056   iajp = (IntAsn2gbJobPtr) ajp;
7057   if (iajp == NULL || ajp->sectionArray == NULL || ajp->paragraphArray == NULL) return NULL;
7058   if (paragraph < 0 || paragraph >= ajp->numParagraphs) return NULL;
7059 
7060   bbp = ajp->paragraphArray [paragraph];
7061   if (bbp == NULL) return NULL;
7062 
7063   section = bbp->section;
7064   if (section < 0 || section >= ajp->numSections) return NULL;
7065 
7066   asp = ajp->sectionArray [section];
7067   if (asp == NULL) return NULL;
7068 
7069   blocktype = bbp->blocktype;
7070   if (blocktype < LOCUS_BLOCK || blocktype > SLASH_BLOCK) return NULL;
7071 
7072   max = (size_t) (MAX (ASN2GNBK_TOTAL_SOURCE, ASN2GNBK_TOTAL_FEATUR));
7073   qv = MemNew (sizeof (QualVal) * (max + 5));
7074   if (qv == NULL) return NULL;
7075 
7076   MemSet ((Pointer) &af, 0, sizeof (Asn2gbFormat));
7077   af.ajp = (IntAsn2gbJobPtr) ajp;
7078   af.asp = asp;
7079   af.qvp = qv;
7080   af.format = iajp->format;
7081   af.aip = iajp->aip;
7082   af.atp = iajp->atp;
7083 
7084   sep = GetTopSeqEntryForEntityID (bbp->entityID);
7085 
7086   fmt = asn2gnbk_fmt_functions [(int) blocktype];
7087   if (fmt == NULL) return NULL;
7088 
7089   bsp = BioseqLockAndIndexByEntity (bbp->entityID);
7090   oldscope = SeqEntrySetScope (sep);
7091 
7092   str = fmt (&af, bbp);
7093 
7094   SeqEntrySetScope (oldscope);
7095   BioseqUnlock (bsp);
7096 
7097   if (str == NULL) {
7098     str = StringSave ("???\n");
7099   }
7100 
7101   MemFree (qv);
7102 
7103   return str;
7104 }
7105 
asn2gnbk_cleanup(Asn2gbJobPtr ajp)7106 NLM_EXTERN Asn2gbJobPtr asn2gnbk_cleanup (
7107   Asn2gbJobPtr ajp
7108 )
7109 
7110 {
7111   Asn2gbSectPtr     asp;
7112   BaseBlockPtr      bbp;
7113   BaseBlockPtr      PNTR blockArray;
7114   BioseqPtr         bsp;
7115   ValNodePtr        gapvnp;
7116   Int4              i;
7117   IntAsn2gbJobPtr   iajp;
7118   IntAsn2gbSectPtr  iasp;
7119   IntCdsBlockPtr    icp;
7120   IntFeatBlockPtr   ifp;
7121   IntRefBlockPtr    irp;
7122   IntSrcBlockPtr    isp;
7123   Int4              j;
7124   Int4              numBlocks;
7125   Int4              numSections;
7126   RefBlockPtr       rbp;
7127   Asn2gbFreeFunc    remotefree;
7128   ValNodePtr        remotevnp;
7129   SeqAnnotPtr       sap;
7130   SeqAnnotPtr       sapnext;
7131   Asn2gbSectPtr     PNTR sectionArray;
7132   StringItemPtr     sip, nxt;
7133   SeqBlockPtr       sbp;
7134   ValNodePtr        vnp;
7135 
7136   iajp = (IntAsn2gbJobPtr) ajp;
7137   if (iajp == NULL) return NULL;
7138 
7139   SeqLocFree (iajp->ajp.slp);
7140 
7141   numSections = ajp->numSections;
7142   sectionArray = ajp->sectionArray;
7143 
7144   if (sectionArray != NULL) {
7145 
7146     for (i = 0; i < numSections; i++) {
7147       asp = sectionArray [i];
7148       if (asp != NULL) {
7149         iasp = (IntAsn2gbSectPtr) asp;
7150 
7151         numBlocks = asp->numBlocks;
7152         blockArray = asp->blockArray;
7153         if (blockArray != NULL) {
7154 
7155           for (j = 0; j < numBlocks; j++) {
7156             bbp = blockArray [j];
7157             if (bbp != NULL) {
7158 
7159               MemFree (bbp->string);
7160 
7161               if (bbp->blocktype == REFERENCE_BLOCK) {
7162                 rbp = (RefBlockPtr) bbp;
7163                 MemFree (rbp->uniquestr);
7164                 irp = (IntRefBlockPtr) rbp;
7165                 DateFree (irp->date);
7166                 SeqLocFree (irp->loc);
7167                 MemFree (irp->authstr);
7168                 MemFree (irp->fig);
7169                 MemFree (irp->maploc);
7170 
7171               } else if (bbp->blocktype == SOURCEFEAT_BLOCK) {
7172 
7173                 isp = (IntSrcBlockPtr) bbp;
7174                 SeqLocFree (isp->loc);
7175 
7176               } else if (bbp->blocktype == FEATURE_BLOCK) {
7177 
7178                 ifp = (IntFeatBlockPtr) bbp;
7179                 if (ifp->isCDS) {
7180                   icp = (IntCdsBlockPtr) ifp;
7181                   MemFree (icp->fig);
7182                   MemFree (icp->maploc);
7183                 }
7184 
7185               } else if (bbp->blocktype == SEQUENCE_BLOCK) {
7186 
7187                 sbp = (SeqBlockPtr) bbp;
7188                 MemFree (sbp->bases);
7189               }
7190 
7191               MemFree (bbp);
7192             }
7193           }
7194         }
7195         MemFree (asp->blockArray);
7196         MemFree (asp->referenceArray);
7197         MemFree (asp);
7198       }
7199     }
7200   }
7201 
7202   MemFree (ajp->sectionArray);
7203   MemFree (ajp->paragraphArray);
7204   MemFree (ajp->paragraphByIDs);
7205 
7206   sip = iajp->pool;
7207   while (sip != NULL) {
7208     nxt = sip->next;
7209     MemFree (sip);
7210     sip = nxt;
7211   }
7212 
7213   if (iajp->lockedBspList != NULL) {
7214     UnlockFarComponents (iajp->lockedBspList);
7215   }
7216 
7217   if (iajp->manygaps != NULL) {
7218     ValNodeFreeData (iajp->manygaps);
7219   }
7220 
7221   if (iajp->gapvnp != NULL || iajp->remotevnp != NULL) {
7222     SeqMgrClearFeatureIndexes (ajp->entityID, NULL);
7223     if (iajp->reindex) {
7224       SeqMgrIndexFeaturesExEx (ajp->entityID, NULL, FALSE, FALSE, NULL);
7225     }
7226   }
7227 
7228   if (iajp->gapvnp != NULL) {
7229     gapvnp = iajp->gapvnp;
7230     gapvnp->next = NULL;  /* unlink in case remotevnp still linked after gapvnp */
7231     bsp = (BioseqPtr) gapvnp->data.ptrvalue;
7232     if (bsp != NULL) {
7233       sap = bsp->annot;
7234       while (sap != NULL) {
7235         sapnext = sap->next;
7236         SeqAnnotFree (sap);
7237         sap = sapnext;
7238       }
7239     }
7240     /* frees fake Bioseq that was created by MemNew, not BioseqNew */
7241     ValNodeFreeData (gapvnp);
7242   }
7243 
7244   if (iajp->remotevnp != NULL) {
7245     remotevnp = iajp->remotevnp;
7246     remotefree = iajp->remotefree;
7247     if (remotefree != NULL) {
7248       /* if remotefree exists, it is responsible for all freeing */
7249       remotefree (remotevnp, iajp->remotedata);
7250     } else {
7251       /* otherwise free Bioseqs and ValNode chain ourselves */
7252       for (vnp = remotevnp; vnp != NULL; vnp = vnp->next) {
7253         bsp = (BioseqPtr) vnp->data.ptrvalue;
7254         if (bsp != NULL) {
7255           BioseqFree (bsp);
7256         }
7257       }
7258       ValNodeFree (remotevnp);
7259     }
7260   }
7261 
7262   TextFsaFree (iajp->bad_html_fsa);
7263 
7264   FreeUrlAnchorFSA ();
7265 
7266   ValNodeFree (iajp->gihead);
7267 
7268   free_buff ();
7269   FiniWWW (iajp);
7270 
7271   MemFree (iajp);
7272 
7273   return NULL;
7274 }
7275 
SeqEntryToGnbk(SeqEntryPtr sep,SeqLocPtr slp,FmtType format,ModType mode,StlType style,FlgType flags,LckType locks,CstType custom,XtraPtr extra,FILE * fp)7276 NLM_EXTERN Boolean SeqEntryToGnbk (
7277   SeqEntryPtr sep,
7278   SeqLocPtr slp,
7279   FmtType format,
7280   ModType mode,
7281   StlType style,
7282   FlgType flags,
7283   LckType locks,
7284   CstType custom,
7285   XtraPtr extra,
7286   FILE *fp
7287 )
7288 
7289 {
7290   AsnIoPtr           aip = NULL;
7291   AsnIoPtr           aipfree = NULL;
7292   Asn2gbJobPtr       ajp;
7293   AsnTypePtr         atp = NULL;
7294   BioseqPtr          bsp = NULL;
7295   BioseqSetPtr       bssp = NULL;
7296   Boolean            do_gbseq_asn = FALSE;
7297   Boolean            do_gbseq_xml = FALSE;
7298   Asn2gbWriteFunc    ffwrite = NULL;
7299   GBSeqPtr           gbseq = NULL;
7300   GBSeq              gbsq;
7301   IntAsn2gbJobPtr    iajp;
7302   Boolean            rsult = FALSE;
7303   Int1               type = ASNIO_TEXT_OUT;
7304   Pointer            userdata = NULL;
7305   XtraBlock          xtra;
7306   /*
7307   BaseBlockPtr       bbp;
7308   BlockType          block;
7309   CharPtr            ffhead = NULL;
7310   CharPtr            fftail = NULL;
7311   Int4               i;
7312   Boolean            is_html;
7313   Int4               numParagraphs;
7314   BaseBlockPtr PNTR  paragraphArray;
7315   CharPtr            str;
7316   */
7317 #ifdef WIN_MAC
7318 #if __profile__
7319   ValNodePtr         bsplist = NULL;
7320   Uint2              entityID;
7321   Boolean            lockFarComp;
7322   Boolean            lockFarLocs;
7323   Boolean            lockFarProd;
7324   Boolean            lookupFarComp;
7325   Boolean            lookupFarHist;
7326   Boolean            lookupFarInf;
7327   Boolean            lookupFarLocs;
7328   Boolean            lookupFarOthers;
7329   Boolean            lookupFarProd;
7330 #endif
7331 #endif
7332 
7333   if (extra != NULL) {
7334     ffwrite = extra->ffwrite;
7335     /*
7336     ffhead = extra->ffhead;
7337     fftail = extra->fftail;
7338     */
7339     gbseq = extra->gbseq;
7340     aip = extra->aip;
7341     atp = extra->atp;
7342     userdata = extra->userdata;
7343   }
7344   if (fp == NULL && ffwrite == NULL && aip == NULL) return FALSE;
7345   if (sep == NULL && slp == NULL) return FALSE;
7346   if (sep != NULL) {
7347     if (IS_Bioseq (sep)) {
7348       bsp = (BioseqPtr) sep->data.ptrvalue;
7349     } else if (IS_Bioseq_set (sep)) {
7350       bssp = (BioseqSetPtr) sep->data.ptrvalue;
7351     }
7352   }
7353 
7354 #ifdef WIN_MAC
7355 #if __profile__
7356   /* this allows profiling of just the formatter, without feature indexing, on the Mac */
7357 
7358   if (sep != NULL) {
7359     entityID = ObjMgrGetEntityIDForPointer (sep->data.ptrvalue);
7360     if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
7361       SeqMgrIndexFeatures (entityID, NULL);
7362     }
7363   }
7364 
7365   lockFarComp = (Boolean) ((locks & LOCK_FAR_COMPONENTS) != 0);
7366   lockFarLocs = (Boolean) ((locks & LOCK_FAR_LOCATIONS) != 0);
7367   lockFarProd = (Boolean) ((locks & LOCK_FAR_PRODUCTS) != 0);
7368 
7369   if (lockFarComp || lockFarLocs || lockFarProd) {
7370     locks = locks ^ (LOCK_FAR_COMPONENTS | LOCK_FAR_LOCATIONS | LOCK_FAR_PRODUCTS);
7371     if (slp != NULL && lockFarComp) {
7372       bsplist = LockFarComponentsEx (sep, FALSE, lockFarLocs, lockFarProd, slp);
7373     } else {
7374       bsplist = LockFarComponentsEx (sep, lockFarComp, lockFarLocs, lockFarProd, NULL);
7375     }
7376   }
7377 
7378   lookupFarComp = (Boolean) ((locks & LOOKUP_FAR_COMPONENTS) != 0);
7379   lookupFarLocs = (Boolean) ((locks & LOOKUP_FAR_LOCATIONS) != 0);
7380   lookupFarProd = (Boolean) ((locks & LOOKUP_FAR_PRODUCTS) != 0);
7381   lookupFarHist = (Boolean) ((locks & LOOKUP_FAR_HISTORY) != 0);
7382   lookupFarInf = (Boolean) ((locks & LOOKUP_FAR_INFERENCE) != 0);
7383   lookupFarOthers = (Boolean) ((locks & LOOKUP_FAR_OTHERS) != 0);
7384 
7385   if (lookupFarComp || lookupFarLocs || lookupFarProd || lookupFarHist || lookupFarInf || lookupFarOthers) {
7386     locks = locks ^ (LOOKUP_FAR_COMPONENTS | LOOKUP_FAR_LOCATIONS | LOOKUP_FAR_PRODUCTS | LOOKUP_FAR_HISTORY | LOOKUP_FAR_INFERENCE | LOOKUP_FAR_OTHERS);
7387     LookupFarSeqIDs (sep, lookupFarComp, lookupFarLocs, lookupFarProd, FALSE, lookupFarHist, lookupFarInf, lookupFarOthers);
7388   }
7389 
7390   ProfilerSetStatus (TRUE);
7391 #endif
7392 #endif
7393 
7394   do_gbseq_xml = (Boolean) ((flags & HTML_XML_ASN_MASK) == CREATE_XML_GBSEQ_FILE);
7395   do_gbseq_asn = (Boolean) ((flags & HTML_XML_ASN_MASK) == CREATE_ASN_GBSEQ_FILE);
7396 
7397   if (do_gbseq_xml || do_gbseq_asn) {
7398     if (fp != NULL && aip == NULL) {
7399       if (do_gbseq_xml) {
7400         type |= ASNIO_XML;
7401       }
7402       aip = AsnIoNew (type, fp, NULL, NULL, NULL);
7403       aipfree = aip;
7404       fp = NULL;
7405     }
7406     if (extra == NULL) {
7407       MemSet ((Pointer) &xtra, 0, sizeof (XtraBlock));
7408       extra = &xtra;
7409     }
7410     if (extra->gbseq == NULL) {
7411       MemSet ((Pointer) &gbsq, 0, sizeof (GBSeq));
7412       extra->gbseq = &gbsq;
7413       gbseq = extra->gbseq;
7414     }
7415   }
7416 
7417   /* pass TRUE for stream to do immediate write at time of creation for speed */
7418 
7419   ajp = asn2gnbk_setup_ex (bsp, bssp, slp, format, mode, style,
7420                            flags, locks, custom, extra,
7421                            TRUE, fp, aip, atp);
7422 
7423   if (ajp != NULL) {
7424     rsult = TRUE;
7425     iajp = (IntAsn2gbJobPtr) ajp;
7426 
7427 #if 0
7428     /* if streaming, all output was written in setup function, otherwise output here */
7429 
7430     if (! stream) {
7431 
7432       /* send optional head string */
7433 
7434       is_html = (Boolean) ((flags & HTML_XML_ASN_MASK) == CREATE_HTML_FLATFILE);
7435       if (ffhead == NULL && is_html) {
7436         ffhead = defHead;
7437       }
7438       if (ffhead != NULL) {
7439         if (fp != NULL) {
7440           fprintf (fp, ffhead);
7441         }
7442       }
7443       if (ffwrite != NULL) {
7444         ffwrite (ffhead, userdata, HEAD_BLOCK, 0, 0, 0, 0, 0, 0, 0);
7445       }
7446 
7447       /* send each paragraph */
7448 
7449       numParagraphs = ajp->numParagraphs;
7450       paragraphArray = ajp->paragraphArray;
7451 
7452       for (i = 0; i < numParagraphs; i++) {
7453         str = asn2gnbk_format (ajp, i);
7454         block = (BlockType) 0;
7455         if (paragraphArray != NULL) {
7456           bbp = paragraphArray [i];
7457           if (bbp != NULL) {
7458             block = bbp->blocktype;
7459           }
7460         }
7461         if (str != NULL) {
7462           if (fp != NULL) {
7463             fprintf (fp, "%s", str);
7464           }
7465           if (ffwrite != NULL) {
7466             ffwrite (str, userdata, block, 0, 0, 0, 0, 0);
7467           }
7468         } else {
7469           if (fp != NULL) {
7470             fprintf (fp, "?\n");
7471           }
7472           if (ffwrite != NULL) {
7473             ffwrite ("?\n", userdata, block, 0, 0, 0, 0, 0);
7474           }
7475         }
7476 
7477         MemFree (str);
7478       }
7479 
7480       /* send optional tail string */
7481 
7482       if (fftail == NULL && is_html) {
7483         fftail = defTail;
7484       }
7485       if (fftail != NULL) {
7486         if (fp != NULL) {
7487           fprintf (fp, fftail);
7488         }
7489       }
7490       if (ffwrite != NULL) {
7491         ffwrite (fftail, userdata, TAIL_BLOCK, 0, 0, 0, 0, 0);
7492       }
7493     }
7494 #endif
7495 
7496     /* if RELEASE_MODE, warn if unresolved gi numbers, missing translation, etc. */
7497 
7498     if (iajp->relModeError && mode == RELEASE_MODE) {
7499       rsult = FALSE;
7500     }
7501 
7502     asn2gnbk_cleanup (ajp);
7503   }
7504 
7505   if (aipfree != NULL) {
7506     AsnIoFree (aipfree, FALSE);
7507   }
7508 
7509 #ifdef WIN_MAC
7510 #if __profile__
7511   ProfilerSetStatus (FALSE);
7512 
7513   UnlockFarComponents (bsplist);
7514 #endif
7515 #endif
7516 
7517   return rsult;
7518 }
7519 
BioseqToGnbk(BioseqPtr bsp,SeqLocPtr slp,FmtType format,ModType mode,StlType style,FlgType flags,LckType locks,CstType custom,XtraPtr extra,FILE * fp)7520 NLM_EXTERN Boolean BioseqToGnbk (
7521   BioseqPtr bsp,
7522   SeqLocPtr slp,
7523   FmtType format,
7524   ModType mode,
7525   StlType style,
7526   FlgType flags,
7527   LckType locks,
7528   CstType custom,
7529   XtraPtr extra,
7530   FILE *fp
7531 )
7532 
7533 {
7534   SeqEntryPtr  sep = NULL;
7535 
7536   if (bsp == NULL && slp == NULL) return FALSE;
7537   if (bsp != NULL) {
7538     sep = SeqMgrGetSeqEntryForData (bsp);
7539   }
7540   return SeqEntryToGnbk (sep, slp, format, mode, style, flags, locks, custom, extra, fp);
7541 }
7542 
7543 
7544