1 /* asn2gnb1.c
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information (NCBI)
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government do not place any restriction on its use or reproduction.
13 * We would, however, appreciate having the NCBI and the author cited in
14 * any work or product based on this material
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name: asn2gnb1.c
27 *
28 * Author: Karl Sirotkin, Tom Madden, Tatiana Tatusov, Jonathan Kans,
29 * Mati Shomrat
30 *
31 *
32 * Version Creation Date: 10/21/98
33 *
34 * $Revision: 1.285 $
35 *
36 * File Description: New GenBank flatfile generator - work in progress
37 *
38 * Modifications:
39 * --------------------------------------------------------------------------
40 * ==========================================================================
41 */
42
43 #include <ncbi.h>
44 #include <objall.h>
45 #include <objsset.h>
46 #include <objsub.h>
47 #include <objfdef.h>
48 #include <objpubme.h>
49 #include <seqport.h>
50 #include <sequtil.h>
51 #include <sqnutils.h>
52 #include <subutil.h>
53 #include <tofasta.h>
54 #include <explore.h>
55 #include <gbfeat.h>
56 #include <gbftdef.h>
57 #include <edutil.h>
58 #include <alignmgr2.h>
59 #include <asn2gnbi.h>
60
61 #ifdef WIN_MAC
62 #if __profile__
63 #include <Profiler.h>
64 #endif
65 #endif
66
GetAccVerForBioseq(BioseqPtr bsp,CharPtr buf,size_t buflen,Boolean hideGi,Boolean isSpan)67 NLM_EXTERN Boolean GetAccVerForBioseq (BioseqPtr bsp, CharPtr buf, size_t buflen, Boolean hideGi, Boolean isSpan) {
68
69 Char ch;
70 BIG_ID gi;
71 SeqIdPtr sip;
72 CharPtr tmp;
73 Boolean acc_found = FALSE;
74
75 if (buf == NULL || buflen < 1) {
76 return FALSE;
77 }
78 *buf = '\0';
79 if (bsp == NULL) {
80 return FALSE;
81 }
82
83 if (! hideGi) {
84 for (sip = bsp->id; sip != NULL; sip = sip->next) {
85 if (sip->choice == SEQID_GI) {
86 gi = (BIG_ID) sip->data.intvalue;
87 if (gi > 0) {
88 sprintf (buf, "%lld", (long long) gi);
89 return TRUE;
90 }
91 }
92 }
93 }
94 for (sip = bsp->id; sip != NULL; sip = sip->next) {
95 if (sip->choice == SEQID_GENBANK ||
96 sip->choice == SEQID_EMBL ||
97 sip->choice == SEQID_DDBJ ||
98 sip->choice == SEQID_GPIPE ||
99 sip->choice == SEQID_TPG ||
100 sip->choice == SEQID_TPE ||
101 sip->choice == SEQID_TPD ||
102 sip->choice == SEQID_PIR ||
103 sip->choice == SEQID_SWISSPROT ||
104 sip->choice == SEQID_PRF ||
105 sip->choice == SEQID_PDB ||
106 sip->choice == SEQID_OTHER) {
107 SeqIdWrite (sip, buf, PRINTID_TEXTID_ACC_VER, buflen - 1);
108 acc_found = TRUE;
109 }
110 /* RefSeq, GPipe and PDB accessions can contain an underscore character '_',
111 * which is used as delimiter in the Javascript functions processing the
112 * HTML markup, hence change '_' to '-'.
113 */
114 if (sip->choice == SEQID_OTHER ||
115 sip->choice == SEQID_PDB ||
116 sip->choice == SEQID_GPIPE) {
117 if (isSpan) {
118 tmp = buf;
119 ch = *tmp;
120 while (ch != '\0') {
121 if (ch == '_') {
122 *tmp = '-';
123 }
124 tmp++;
125 ch = *tmp;
126 }
127 }
128 }
129 if (acc_found)
130 return TRUE;
131 }
132
133 return FALSE;
134 }
135
136 static Boolean FFIsStartOfLinkEx (StringItemPtr iter, Int4 pos, Int4Ptr lenP);
137
138 /* utility functions */
139
ValNodeCopyStrToHead(ValNodePtr PNTR head,Int2 choice,CharPtr str)140 NLM_EXTERN ValNodePtr ValNodeCopyStrToHead (ValNodePtr PNTR head, Int2 choice, CharPtr str)
141
142 {
143 ValNodePtr newnode;
144
145 if (head == NULL || str == NULL) return NULL;
146
147 newnode = ValNodeNew (NULL);
148 if (newnode == NULL) return NULL;
149
150 newnode->choice = (Uint1) choice;
151 newnode->data.ptrvalue = StringSave (str);
152
153 newnode->next = *head;
154 *head = newnode;
155
156 return newnode;
157 }
158
159 /* the val node strings mechanism will be replaced by a more efficient method later */
160
MergeFFValNodeStrs(ValNodePtr list)161 NLM_EXTERN CharPtr MergeFFValNodeStrs (
162 ValNodePtr list
163 )
164
165 {
166 size_t len;
167 CharPtr ptr;
168 CharPtr str;
169 CharPtr tmp;
170 ValNodePtr vnp;
171
172
173 if (list == NULL) return NULL;
174
175 for (vnp = list, len = 0; vnp != NULL; vnp = vnp->next) {
176 str = (CharPtr) vnp->data.ptrvalue;
177 len += StringLen (str);
178 }
179 if (len == 0) return NULL;
180
181 ptr = MemNew (sizeof (Char) * (len + 2));
182 if (ptr == NULL) return NULL;
183
184 for (vnp = list, tmp = ptr; vnp != NULL; vnp = vnp->next) {
185 str = (CharPtr) vnp->data.ptrvalue;
186 tmp = StringMove (tmp, str);
187 }
188
189 return ptr;
190 }
191
192
AddValNodeString(ValNodePtr PNTR head,CharPtr prefix,CharPtr string,CharPtr suffix)193 NLM_EXTERN void AddValNodeString (
194 ValNodePtr PNTR head,
195 CharPtr prefix,
196 CharPtr string,
197 CharPtr suffix
198 )
199
200 {
201 Char buf [256];
202 CharPtr freeme = NULL;
203 size_t len;
204 CharPtr newstr;
205 CharPtr strptr;
206
207 len = StringLen (prefix) + StringLen (string) + StringLen (suffix);
208 if (len == 0) return;
209
210 if (len < sizeof (buf)) {
211
212 /* if new string fits in stack buffer, no need to allocate */
213
214 MemSet ((Pointer) buf, 0, sizeof (buf));
215 newstr = buf;
216
217 } else {
218
219 /* new string bigger than stack buffer, so allocate sufficient string */
220
221 newstr = (CharPtr) MemNew (sizeof (Char) * (len + 2));
222 if (newstr == NULL) return;
223
224 /* allocated string will be freed at end of function */
225
226 freeme = newstr;
227 }
228
229 strptr = newstr;
230
231 if (prefix != NULL) {
232 strptr = StringMove (strptr, prefix);
233 }
234
235 if (string != NULL) {
236 strptr = StringMove (strptr, string);
237 }
238
239 if (suffix != NULL) {
240 strptr = StringMove (strptr, suffix);
241 }
242
243 /* currently just makes a valnode list, to be enhanced later */
244
245 ValNodeCopyStr (head, 0, newstr);
246
247 /* if large string was allocated, free it now */
248
249 if (freeme != NULL) {
250 MemFree (freeme);
251 }
252 }
253
254
FFAddString_NoRedund(StringItemPtr unique,CharPtr prefix,CharPtr string,CharPtr suffix,Boolean convertQuotes)255 NLM_EXTERN void FFAddString_NoRedund (
256 StringItemPtr unique,
257 CharPtr prefix,
258 CharPtr string,
259 CharPtr suffix,
260 Boolean convertQuotes
261 )
262 {
263 CharPtr str = string;
264 Int4 foundPos = 0;
265 Boolean wholeWord = FALSE;
266
267 if ( StringHasNoText(prefix) &&
268 StringHasNoText(string) &&
269 StringHasNoText(suffix) ) return;
270
271 if (StringNICmp (string, "tRNA-", 5) == 0) {
272 str = string+5;
273 }
274
275 while ( foundPos >= 0 && !wholeWord ) {
276 foundPos = FFStringSearch(unique, str, foundPos);
277 if ( foundPos >= 0 ) {
278 wholeWord = IsWholeWordSubstr(unique, foundPos, str);
279 foundPos += StringLen(str);
280 }
281 }
282
283 if ( foundPos < 0 || !wholeWord ) {
284 FFAddTextToString(unique, prefix, string, suffix, FALSE, convertQuotes, TILDE_IGNORE);
285 }
286 }
287
288 NLM_EXTERN void FFAddString_NoRedundEx (
289 StringItemPtr unique,
290 CharPtr prefix,
291 CharPtr string,
292 CharPtr suffix,
293 Boolean convertQuotes
294 );
295
FFAddString_NoRedundEx(StringItemPtr unique,CharPtr prefix,CharPtr string,CharPtr suffix,Boolean convertQuotes)296 NLM_EXTERN void FFAddString_NoRedundEx (
297 StringItemPtr unique,
298 CharPtr prefix,
299 CharPtr string,
300 CharPtr suffix,
301 Boolean convertQuotes
302 )
303 {
304 CharPtr str = string;
305 Int4 foundPos = 0;
306 Boolean wholeWord = FALSE;
307
308 if ( StringHasNoText(prefix) &&
309 StringHasNoText(string) &&
310 StringHasNoText(suffix) ) return;
311
312 if (StringNICmp (string, "tRNA-", 5) == 0) {
313 str = string+5;
314 }
315
316 while ( foundPos >= 0 && !wholeWord ) {
317 foundPos = FFStringSearch(unique, str, foundPos);
318 if ( foundPos >= 0 ) {
319 wholeWord = IsWholeWordSubstr(unique, foundPos, str);
320 foundPos += StringLen(str);
321 }
322 }
323
324 if ( foundPos < 0 || !wholeWord ) {
325 FFAddTextToString(unique, prefix, string, suffix, FALSE, convertQuotes, TILDE_EXPAND);
326 }
327 }
328
329
330
331 /* s_AddPeriodToEnd () -- Adds a '.' to the end of a given string if */
332 /* there is not already one there. */
333 /* */
334 /* Note that this adds one character to the */
335 /* length of the string, leading to a */
336 /* memory overrun if space was not previously */
337 /* allocated for this. */
338
s_AddPeriodToEnd(CharPtr someString)339 NLM_EXTERN void s_AddPeriodToEnd (CharPtr someString)
340 {
341 Int4 len;
342
343 if (StringHasNoText (someString)) return;
344 len = StringLen (someString);
345 if (len < 1) return;
346 if (someString[len-1] != '.')
347 {
348 someString[len] = '.';
349 someString[len+1] = '\0';
350 }
351 }
352
353 /* s_RemovePeriodFromEnd () -- If the last character in a given */
354 /* string is a '.', removes it. */
355
s_RemovePeriodFromEnd(CharPtr someString)356 NLM_EXTERN Boolean s_RemovePeriodFromEnd (CharPtr someString)
357 {
358 Int4 len;
359
360 if (StringHasNoText (someString)) return FALSE;
361 len = StringLen (someString);
362 if (len < 1) return FALSE;
363 if (someString[len-1] == '.') {
364 someString[len-1] = '\0';
365 return TRUE;
366 }
367 return FALSE;
368 }
369
370 /**/
371 /* isEllipsis () - Determines if a string ends in an ellipses */
372 /**/
373
IsEllipsis(CharPtr str)374 NLM_EXTERN Boolean IsEllipsis (
375 CharPtr str
376 )
377
378 {
379 size_t len;
380 CharPtr ptr;
381
382 if (StringHasNoText (str)) return FALSE;
383 len = StringLen (str);
384 if (len < 3) return FALSE;
385 ptr = str + len - 3;
386 return (Boolean) (ptr [0] == '.' && ptr [1] == '.' && ptr [2] == '.');
387 }
388
A2GBSeqLocReplaceID(SeqLocPtr newloc,SeqLocPtr ajpslp)389 NLM_EXTERN void A2GBSeqLocReplaceID (
390 SeqLocPtr newloc,
391 SeqLocPtr ajpslp
392 )
393
394 {
395 BioseqPtr bsp;
396 SeqIdPtr sip;
397
398 bsp = BioseqFindFromSeqLoc (ajpslp);
399 if (bsp == NULL) return;
400 sip = SeqIdFindBest (bsp->id, 0);
401 SeqLocReplaceID (newloc, sip);
402 }
403
asn2gb_PrintDate(DatePtr dp)404 NLM_EXTERN CharPtr asn2gb_PrintDate (
405 DatePtr dp
406 )
407
408 {
409 Char buf [30];
410 size_t len;
411
412 if (dp == NULL) return NULL;
413
414 if (DatePrint (dp, buf)) {
415 if (StringICmp (buf, "Not given") != 0) {
416 len = StringLen (buf);
417 if (len > 0) {
418 if (buf [len - 1] == '\n') {
419 if (buf [len - 2] == '.') {
420 buf [len - 2] = '\0';
421 } else {
422 buf [len - 1] = '\0';
423 }
424 }
425 }
426 return StringSave (buf);
427 }
428 }
429
430 return NULL;
431 }
432
433 static CharPtr month_names [] = {
434 "JAN", "FEB", "MAR", "APR", "MAY", "JUN",
435 "JUL", "AUG", "SEP", "OCT", "NOV", "DEC",
436 "??"
437 };
438
DateToFF(CharPtr buf,DatePtr dp,Boolean citSub)439 NLM_EXTERN CharPtr DateToFF (
440 CharPtr buf,
441 DatePtr dp,
442 Boolean citSub
443 )
444
445 {
446 Int2 day;
447 Int2 month;
448 Int2 year;
449
450 if (buf != NULL) {
451 *buf = '\0';
452 }
453 if (dp == NULL) return NULL;
454
455 if (dp->data [0] == 0) {
456
457 StringCpy (buf, dp->str);
458
459 } else if (dp->data [0] == 1) {
460
461 year = 1900 + (Int2) dp->data [1];
462 month = (Int2) dp->data [2];
463 day = (Int2) dp->data [3];
464
465 if (citSub) {
466 if (month < 1 || month > 12) {
467 month = 13;
468 }
469 if (day < 1 || day > 31) {
470 day = 0;
471 }
472 } else {
473 if (month < 1 || month > 12) {
474 month = 1;
475 }
476 if (day < 1 || day > 31) {
477 day = 1;
478 }
479 }
480
481 if (day < 1) {
482 sprintf (buf, "\?\?-%s-%ld",
483 month_names [month-1], (long) year);
484 } else if (day < 10) {
485 sprintf (buf, "0%ld-%s-%ld",
486 (long) day, month_names [month-1], (long) year);
487 } else {
488 sprintf(buf, "%ld-%s-%ld",
489 (long) day, month_names [month-1], (long) year);
490 }
491 }
492
493 return buf;
494 }
495
496
FFGetString(IntAsn2gbJobPtr ajp)497 NLM_EXTERN StringItemPtr FFGetString (IntAsn2gbJobPtr ajp)
498
499 {
500 StringItemPtr sip;
501
502 if (ajp == NULL) return NULL;
503 if (ajp->pool != NULL) {
504 sip = ajp->pool;
505 ajp->pool = sip->next;
506 sip->next = NULL;
507 MemSet ((Pointer) sip, 0, sizeof (StringItem));
508 } else {
509 sip = (StringItemPtr) MemNew (sizeof (StringItem));
510 if (sip == NULL) return NULL;
511 }
512 sip->curr = sip;
513 sip->iajp = ajp;
514 sip->pos = 0;
515 return sip;
516 }
517
FFRecycleString(IntAsn2gbJobPtr ajp,StringItemPtr ffstring)518 NLM_EXTERN void FFRecycleString (IntAsn2gbJobPtr ajp, StringItemPtr ffstring)
519
520 {
521 StringItemPtr nxt;
522
523 if (ajp == NULL || ffstring == NULL) return;
524 if ( ffstring->pos == -1 ) return;
525
526 nxt = ffstring;
527 nxt->pos = -1;
528 while (nxt->next != NULL) {
529 nxt->pos = -1;
530 nxt = nxt->next;
531 }
532 nxt->next = ajp->pool;
533 ajp->pool = ffstring;
534
535 ffstring->curr = NULL;
536 }
537
FFAddOneChar(StringItemPtr sip,Char ch,Boolean convertQuotes)538 NLM_EXTERN void FFAddOneChar (
539 StringItemPtr sip,
540 Char ch,
541 Boolean convertQuotes
542 )
543 {
544 StringItemPtr current = sip->curr;
545
546 if ( current->pos == STRING_BUF_LEN ) {
547 current->next = FFGetString(sip->iajp);
548 current = current->next;
549 current->pos = 0;
550 sip->curr = current;
551 }
552
553 if ( convertQuotes && ch == '\"' ) {
554 ch = '\'';
555 }
556 current->buf[current->pos] = ch;
557 current->pos++;
558 }
559
FFAddNewLine(StringItemPtr ffstring)560 NLM_EXTERN void FFAddNewLine(StringItemPtr ffstring) {
561 FFAddOneChar(ffstring, '\n', FALSE);
562 }
563
FFAddNChar(StringItemPtr sip,Char ch,Int4 n,Boolean convertQuotes)564 NLM_EXTERN void FFAddNChar (
565 StringItemPtr sip,
566 Char ch,
567 Int4 n,
568 Boolean convertQuotes
569 )
570 {
571 Int4 i;
572
573 for ( i = 0; i < n; ++i ) {
574 FFAddOneChar(sip, ch, convertQuotes);
575 }
576 }
577
578
FFExpandTildes(StringItemPtr sip,CharPtr PNTR cpp)579 NLM_EXTERN void FFExpandTildes (StringItemPtr sip, CharPtr PNTR cpp) {
580 Char replace = **cpp;
581
582 if ( **cpp == '~' ) {
583 if ( *((*cpp) + 1) == '~' ) { /* "~~" -> '~' */
584 replace = '~';
585 (*cpp)++;
586 } else {
587 replace = '\n';
588 }
589 }
590
591 FFAddOneChar(sip, replace, FALSE);
592 }
593
594
FFSemicolonSeparateTildes(StringItemPtr sip,CharPtr PNTR cpp)595 NLM_EXTERN void FFSemicolonSeparateTildes (StringItemPtr sip, CharPtr PNTR cpp)
596
597 {
598 Char replace = **cpp;
599
600 if ( **cpp == '~' ) {
601 if ( *((*cpp) + 1) == '~' ) { /* "~~" -> '~' */
602 replace = '~';
603 (*cpp)++;
604 } else {
605 FFAddOneChar(sip, ';', FALSE);
606 replace = '\n';
607 }
608 }
609
610 FFAddOneChar(sip, replace, FALSE);
611 }
612
613
FFReplaceTildesWithSpaces(StringItemPtr ffstring,CharPtr PNTR cpp)614 NLM_EXTERN void FFReplaceTildesWithSpaces (StringItemPtr ffstring, CharPtr PNTR cpp) {
615 Char replace = **cpp, lookahead;
616 CharPtr cptr = *cpp;
617
618 if ( *cptr == '`' ) {
619 FFAddOneChar(ffstring, replace, FALSE);
620 return;
621 }
622
623 replace = ' ';
624 lookahead = *(cptr + 1);
625
626 if ( IS_DIGIT(lookahead) ) {
627 replace = '~';
628 }
629 else {
630 if ( (lookahead == ' ') || (lookahead == '(') ) {
631 if ( IS_DIGIT(*(cptr + 2)) ) {
632 replace = '~';
633 }
634 }
635 }
636
637 FFAddOneChar(ffstring, replace, FALSE);
638 }
639
FFOldExpand(StringItemPtr sip,CharPtr PNTR cpp)640 NLM_EXTERN void FFOldExpand (StringItemPtr sip, CharPtr PNTR cpp) {
641 /* "~" -> "\n", "~~" or "~~ ~~" -> "\n\n" */
642 CharPtr cp = *cpp;
643 Char current = *cp;
644 Char next = *(cp + 1);
645
646 /* handle "'~" */
647 if ( current == '`' ) {
648 if ( next != '~' ) {
649 FFAddOneChar(sip, current, FALSE);
650 } else {
651 FFAddOneChar(sip, '~', FALSE);
652 (*cpp)++;
653 }
654 return;
655 }
656
657 /* handle "~", "~~" or "~~ ~~" */
658 FFAddOneChar(sip, '\n', FALSE);
659 if ( next == '~' ) {
660 FFAddOneChar(sip, '\n', FALSE);
661 cp++;
662 *cpp = cp;
663 cp++;
664 if ( *cp == ' ' ) {
665 cp++;
666 if ( *cp == '~' ) {
667 cp++;
668 if ( *cp == '~' ) { /* saw "~~ ~~" */
669 *cpp = cp;
670 }
671 }
672 }
673 }
674 }
675
AddCommentStringWithTildes(StringItemPtr ffstring,CharPtr string)676 NLM_EXTERN void AddCommentStringWithTildes (StringItemPtr ffstring, CharPtr string)
677 {
678 /* One "~" is a new line, "~~" or "~~ ~~" means 2 returns */
679
680 /* Int2 i; */
681
682 while (*string != '\0') {
683 if (*string == '`' && *(string+1) == '~') {
684 FFAddOneChar(ffstring, '~', FALSE);
685 string += 2;
686 } else if (*string == '~') {
687 FFAddOneChar(ffstring, '\n', FALSE);
688 string++;
689 if (*string == '~') {
690 /*
691 for (i = 0; i < 12; i++) {
692 FFAddOneChar(ffstring, ' ', FALSE);
693 }
694 */
695 FFAddOneChar(ffstring, '\n', FALSE);
696 string++;
697 if (*string == ' ' && *(string+1) == '~' && *(string+2) == '~') {
698 string += 3;
699 }
700 }
701 } else if (*string == '\"') {
702 *string = '\'';
703 FFAddOneChar(ffstring, *string, FALSE);
704 string++;
705 } else {
706 FFAddOneChar(ffstring, *string, FALSE);
707 string++;
708 }
709 }
710 } /* AddCommentStringWithTildes */
711
712
AddStringWithTildes(StringItemPtr ffstring,CharPtr string)713 NLM_EXTERN void AddStringWithTildes (StringItemPtr ffstring, CharPtr string)
714 {
715 /* One "~" is a new line, "~~" or "~~ ~~" means 2 returns */
716
717 while (*string != '\0') {
718 if (*string == '`' && *(string+1) == '~') {
719 FFAddOneChar(ffstring, '~', FALSE);
720 string += 2;
721 } else if (*string == '~') {
722 FFAddOneChar(ffstring, '\n', FALSE);
723 string++;
724 if (*string == '~') {
725 FFAddOneChar(ffstring, '\n', FALSE);
726 string++;
727 if (*string == ' ' && *(string+1) == '~' && *(string+2) == '~') {
728 string += 3;
729 }
730 }
731 } else if (*string == '\"') {
732 *string = '\'';
733 FFAddOneChar(ffstring, *string, FALSE);
734 string++;
735 } else {
736 FFAddOneChar(ffstring, *string, FALSE);
737 string++;
738 }
739 }
740 } /* AddStringWithTildes */
741
742
FFProcessTildes(StringItemPtr sip,CharPtr PNTR cpp,Int2 tildeAction)743 NLM_EXTERN void FFProcessTildes (StringItemPtr sip, CharPtr PNTR cpp, Int2 tildeAction) {
744
745 switch (tildeAction) {
746
747 case TILDE_EXPAND :
748 FFExpandTildes(sip, cpp);
749 break;
750
751 case TILDE_SEMICOLON :
752 FFSemicolonSeparateTildes(sip, cpp);
753 break;
754
755 case TILDE_OLD_EXPAND :
756 FFOldExpand(sip, cpp);
757 break;
758
759 case TILDE_TO_SPACES :
760 FFReplaceTildesWithSpaces (sip, cpp);
761 break;
762
763 case TILDE_IGNORE:
764 default:
765 FFAddOneChar(sip, **cpp, FALSE);
766 break;
767 }
768 }
769
FFAddPeriod(StringItemPtr sip)770 NLM_EXTERN void FFAddPeriod (StringItemPtr sip) {
771 Int4 i;
772 Char ch = '\0';
773 StringItemPtr riter = sip->curr, prev;
774 IntAsn2gbJobPtr ajp;
775
776 if ( sip == NULL ) return;
777 ajp = (IntAsn2gbJobPtr)sip->iajp;
778 if ( ajp == NULL ) return;
779
780 for ( i = riter->pos - 1; i >= 0; --i ) {
781 ch = riter->buf[i];
782
783 if ( (ch == ' ') || (ch == '\t') || (ch == '~') || (ch == '.') || (ch == '\n') || (ch == '\r')) {
784 riter->pos--;
785
786 if ( i < 0 && riter != sip ) {
787 for ( prev = sip; prev->next != NULL; prev = prev->next ) {
788 if ( prev->next == riter ) {
789 i = prev->pos - 1;
790 FFRecycleString(ajp, riter);
791 riter = prev;
792 riter->next = NULL;
793 sip->curr = riter;
794 break;
795 }
796 }
797 }
798
799 } else {
800 break;
801 }
802 }
803
804 if (ch != '.') {
805 FFAddOneChar(sip, '.', FALSE);
806 }
807 }
808
FFAddOneString(StringItemPtr sip,CharPtr string,Boolean addPeriod,Boolean convertQuotes,Int2 tildeAction)809 NLM_EXTERN void FFAddOneString (
810 StringItemPtr sip,
811 CharPtr string,
812 Boolean addPeriod,
813 Boolean convertQuotes,
814 Int2 tildeAction
815 )
816 {
817 CharPtr strp = string;
818 Char ch;
819 Char prevchar = '\0';
820
821 if ( string == NULL ) return;
822
823 ch = *strp;
824 while ( ch != '\0' ) {
825 if ( (ch == '`') || (ch == '~') ) {
826 if (tildeAction == TILDE_SEMICOLON && prevchar == ';') {
827 FFProcessTildes(sip, &strp, TILDE_EXPAND);
828 } else if (tildeAction == TILDE_SEMICOLON && prevchar == ' ') {
829 FFProcessTildes(sip, &strp, TILDE_EXPAND);
830 } else {
831 FFProcessTildes(sip, &strp, tildeAction);
832 }
833 } else {
834 FFAddOneChar(sip, ch, convertQuotes);
835 }
836 prevchar = ch;
837 strp++;
838 ch = *strp;
839 }
840
841 if ( addPeriod ) {
842 FFAddPeriod(sip);
843 }
844 }
845
FFCatenateSubString(StringItemPtr dest,StringItemPtr start_sip,Int4 start_pos,StringItemPtr end_sip,Int4 end_pos,Uint4 line_max)846 NLM_EXTERN void FFCatenateSubString (
847 StringItemPtr dest,
848 StringItemPtr start_sip, Int4 start_pos,
849 StringItemPtr end_sip, Int4 end_pos,
850 Uint4 line_max
851 )
852 {
853 Int4 max_i, min_i, i, len = 0;
854 StringItemPtr current;
855 Boolean in_url = FALSE, found_start = FALSE;
856 Boolean in_html_ampersand_escape = FALSE;
857 IntAsn2gbJobPtr ajp = (IntAsn2gbJobPtr)dest->iajp;
858 Uint4 char_count = 0;
859
860 for ( current = start_sip, i = start_pos;
861 current != NULL;
862 current = current->next ) {
863 if ( current == start_sip ) {
864 min_i = start_pos;
865 } else {
866 min_i = 0;
867 }
868
869 if ( current == end_sip ) {
870 max_i = end_pos;
871 } else {
872 max_i = current->pos;
873 }
874
875 for ( i = min_i; i < max_i; ++i ) {
876
877 /* -----------------------------------------------------------------------
878 * HTML specific processing:
879 * ---------------------------------------------------------------------*/
880 if ( GetWWW(ajp) ) {
881 if ( ! in_url && ! in_html_ampersand_escape ) {
882 if ( current->buf[i] == '<' ) {
883 /* Watch out! */
884 if (FFIsStartOfLinkEx (current, i, &len)) {
885 FFAddOneChar(dest, '<', FALSE);
886 in_url = TRUE;
887 found_start = TRUE;
888 continue;
889 } else {
890 FFAddOneString(dest, "<", FALSE, FALSE, TILDE_IGNORE);
891 ++char_count;
892 continue;
893 }
894 }
895 if( current->buf[i] == '&' )
896 {
897 FFAddOneChar(dest, '&', FALSE);
898 if( FFIsStartOfHTMLAmpersandEscape(current, i) ) {
899 in_html_ampersand_escape = TRUE;
900 }
901 ++char_count;
902 continue;
903 }
904 if (char_count == line_max) {
905 break;
906 }
907
908 if ( current->buf[i] == '>' ) {
909 /* Obviously *not* a tag terminator */
910 FFAddOneString(dest, ">", FALSE, FALSE, TILDE_IGNORE);
911 ++char_count;
912 continue;
913 }
914
915 /* Common garden variety of character */
916 FFAddOneChar(dest, current->buf[i], FALSE);
917 ++char_count;
918
919 if (found_start && len > 0) {
920 len--;
921 if (len == 0) {
922 FFAddOneChar(dest, '"', FALSE);
923 found_start = FALSE;
924 }
925 }
926 }
927
928 else if( in_html_ampersand_escape ) {
929 FFAddOneChar(dest, current->buf[i], FALSE);
930 if( current->buf[i] == ';' ) {
931 in_html_ampersand_escape = FALSE;
932 }
933 continue;
934 }
935
936 else /* in_url */ {
937 if ( current->buf[i] == '&' &&
938 ! FFStartsWith(current, i, "&", TRUE) )
939 {
940 /* encode ampersand for XHMLT */
941 FFAddOneString(dest, "&", FALSE, FALSE, TILDE_IGNORE);
942 continue;
943 }
944 if ( current->buf[i] == '>' ) {
945 FFAddOneChar(dest, '>', FALSE);
946 in_url = FALSE;
947 found_start = FALSE;
948 continue;
949 }
950
951 /* nothing inside a link needs any cooking. And neither does it
952 count against the page width limit. */
953 FFAddOneChar(dest, current->buf[i], FALSE);
954 }
955 }
956
957 /* ---------------------------------------------------------------------
958 * TEXT mode processing:
959 * --------------------------------------------------------------------*/
960 else {
961 FFAddOneChar(dest, current->buf[i], FALSE);
962 if (++char_count == line_max) {
963 break;
964 }
965 }
966
967 }
968 if ( current == end_sip || char_count == line_max ) break;
969 }
970 }
971
FFToCharPtrEx(StringItemPtr sip,CharPtr pfx,CharPtr sfx)972 NLM_EXTERN CharPtr FFToCharPtrEx (StringItemPtr sip, CharPtr pfx, CharPtr sfx)
973
974 {
975 Int4 size = 0;
976 StringItemPtr iter;
977 CharPtr result, temp;
978 size_t pfx_len, sfx_len;
979
980 pfx_len = StringLen (pfx);
981 sfx_len = StringLen (sfx);
982
983 for ( iter = sip; iter != NULL; iter = iter->next ) {
984 size += iter->pos;
985 }
986
987 result = (CharPtr)MemNew(size + pfx_len + sfx_len + 2);
988 temp = result;
989
990 if (pfx_len > 0) {
991 MemCpy( temp, pfx, pfx_len );
992 temp += pfx_len;
993 }
994 for ( iter = sip; iter != NULL; iter = iter->next ) {
995 MemCpy( temp, iter->buf, iter->pos );
996 temp += iter->pos;
997 }
998 if (sfx_len > 0) {
999 MemCpy( temp, sfx, sfx_len );
1000 temp += sfx_len;
1001 }
1002
1003 *temp = '\0';
1004
1005 return result;
1006 }
1007
FFToCharPtr(StringItemPtr sip)1008 NLM_EXTERN CharPtr FFToCharPtr (StringItemPtr sip)
1009
1010 {
1011 return FFToCharPtrEx (sip, NULL, NULL);
1012 }
1013
1014 /* word wrap functions */
1015
1016 static CharPtr url_anchor_strings [] = {
1017 "</A>",
1018 "</ACRONYM>",
1019 "<A HREF=/",
1020 "<A HREF=\"/",
1021 "<A HREF=FTP://",
1022 "<A HREF=MAILTO:",
1023 "<A HREF=HTTP://",
1024 "<A HREF=HTTPS://",
1025 "<A HREF=\"HTTP://",
1026 "<A HREF=\"HTTPS://",
1027 "<ACRONYM TITLE=\"",
1028 "<DIV ",
1029 "</DIV>",
1030 NULL
1031 };
1032
GetUrlAnchorFSA(void)1033 static TextFsaPtr GetUrlAnchorFSA (void)
1034
1035 {
1036 return (TextFsaPtr) GetAppProperty ("Asn2gbUrlAnchorFSA");
1037 }
1038
InitUrlAnchorFSA(void)1039 static TextFsaPtr InitUrlAnchorFSA (void)
1040
1041 {
1042 TextFsaPtr fsa;
1043 Int2 q;
1044
1045 fsa = GetUrlAnchorFSA ();
1046 if (fsa != NULL) return fsa;
1047
1048 fsa = TextFsaNew ();
1049 if (fsa == NULL) return NULL;
1050
1051 for (q = 0; url_anchor_strings [q] != NULL; q++) {
1052 TextFsaAdd (fsa, url_anchor_strings [q]);
1053 }
1054
1055 SetAppProperty ("Asn2gbUrlAnchorFSA", (Pointer) fsa);
1056
1057 return fsa;
1058 }
1059
FreeUrlAnchorFSA(void)1060 static void FreeUrlAnchorFSA (void)
1061
1062 {
1063 TextFsaPtr fsa;
1064
1065 fsa = GetUrlAnchorFSA ();
1066 if (fsa == NULL) return;
1067
1068 SetAppProperty ("Asn2gbUrlAnchorFSA", NULL);
1069 TextFsaFree (fsa);
1070 }
1071
FFSkipLink(StringItemPtr PNTR iterp,Int4Ptr ip)1072 NLM_EXTERN void FFSkipLink (StringItemPtr PNTR iterp, Int4Ptr ip) {
1073 StringItemPtr iter = *iterp;
1074 Int4 i = *ip;
1075
1076 while ( (iter != NULL) && (iter->buf[i] != '>') ) {
1077 ++i;
1078
1079 if ( i == iter->pos ) {
1080 iter = iter->next;
1081 i = 0;
1082 }
1083 }
1084 ++i;
1085 if ( iter != NULL && i == iter->pos && iter->next != NULL ) {
1086 iter = iter->next;
1087 i = 0;
1088 }
1089
1090 *iterp = iter;
1091 *ip = i;
1092 }
1093
FFSkipHTMLAmpersandEscape(StringItemPtr PNTR iterp,Int4Ptr ip)1094 NLM_EXTERN void FFSkipHTMLAmpersandEscape (StringItemPtr PNTR iterp, Int4Ptr ip)
1095 {
1096 StringItemPtr iter = *iterp;
1097 Int4 i = *ip;
1098
1099 while ( (iter != NULL) && (iter->buf[i] != ';') ) {
1100 ++i;
1101
1102 if ( i == iter->pos ) {
1103 iter = iter->next;
1104 i = 0;
1105 }
1106 }
1107
1108 *iterp = iter;
1109 *ip = i;
1110 }
1111
FFIsStartOfLinkEx(StringItemPtr iter,Int4 pos,Int4Ptr lenP)1112 static Boolean FFIsStartOfLinkEx (StringItemPtr iter, Int4 pos, Int4Ptr lenP)
1113
1114 {
1115 Char ch;
1116 TextFsaPtr fsa;
1117 Int4 i;
1118 ValNodePtr matches;
1119 Int4 max_url_len;
1120 Int4 state = 0;
1121
1122 if ( iter == NULL || pos >= iter->pos ) return FALSE;
1123 if ( iter->buf [pos] != '<' ) return FALSE;
1124
1125 fsa = GetUrlAnchorFSA ();
1126 if (fsa == NULL) return FALSE;
1127
1128 if (! TextFsaGetStats (fsa, NULL, NULL, &max_url_len)) return FALSE;
1129
1130 for (i = 0; i < max_url_len; i++) {
1131 ch = iter->buf [pos];
1132 ch = TO_UPPER (ch);
1133 state = TextFsaNext (fsa, state, ch, &matches);
1134 if (matches != NULL) {
1135 if (lenP != NULL) {
1136 *lenP = i + 1;
1137 }
1138 return TRUE;
1139 }
1140
1141 pos++;
1142 if (pos >= iter->pos) {
1143 iter = iter->next;
1144 pos = 0;
1145 if (iter == NULL) return FALSE;
1146 }
1147 }
1148
1149 return FALSE;
1150 }
1151
FFIsStartOfLink(StringItemPtr iter,Int4 pos)1152 NLM_EXTERN Boolean FFIsStartOfLink (StringItemPtr iter, Int4 pos)
1153
1154 {
1155 return FFIsStartOfLinkEx (iter, pos, NULL);
1156 }
1157
FFIsStartOfHTMLAmpersandEscape(StringItemPtr iter,Int4 pos)1158 NLM_EXTERN Boolean FFIsStartOfHTMLAmpersandEscape (
1159 StringItemPtr iter,
1160 Int4 pos )
1161 {
1162 Char ch;
1163 Int4 i;
1164 Int4 max_len = 20;
1165
1166 if ( iter == NULL || pos >= iter->pos ) return FALSE;
1167 if ( iter->buf [pos] != '&' ) return FALSE;
1168
1169 /* skip the initial '&' */
1170 pos++;
1171 if (pos >= iter->pos) {
1172 iter = iter->next;
1173 pos = 0;
1174 if (iter == NULL) return FALSE;
1175 }
1176
1177 for (i = 0; i < max_len; i++) {
1178 ch = iter->buf [pos];
1179 if( isalnum(ch) || ch == '#' ) {
1180 /* fine; these are chars expected in HTML ampersand char */
1181 } else if( ch == ';' ) {
1182 /* found end of HTML ampersand char */
1183 return TRUE;
1184 } else {
1185 /* illegal char in HTML ampersand char */
1186 return FALSE;
1187 }
1188
1189 pos++;
1190 if (pos >= iter->pos) {
1191 iter = iter->next;
1192 pos = 0;
1193 if (iter == NULL) return FALSE;
1194 }
1195 }
1196
1197 return FALSE;
1198 }
1199
1200 /*
1201 NLM_EXTERN Boolean FFIsStartOfLink (StringItemPtr iter, Int4 pos) {
1202 static CharPtr start_link = "<A HREF=";
1203 static CharPtr end_link = "</A>";
1204 Int4 start_len = StringLen(start_link);
1205 Int4 end_len = StringLen(end_link);
1206 Char temp[10];
1207 Int4 i;
1208
1209 if ( iter == NULL || pos >= iter->pos ) return FALSE;
1210 if ( iter->buf[pos] != '<' ) return FALSE;
1211
1212 MemSet(temp, 0, sizeof(temp));
1213 for ( i = 0; i < start_len && iter != NULL; ++i ) {
1214 if ( pos + i < iter->pos ) {
1215 temp[i] = iter->buf[pos+i];
1216 if ( i == end_len - 1 ) {
1217 if ( StringNICmp(temp, end_link, end_len) == 0 ) {
1218 return TRUE;
1219 }
1220 }
1221 } else {
1222 iter = iter->next;
1223 pos = -i;
1224 --i;
1225 }
1226 }
1227
1228 if ( i == start_len ) {
1229 if ( StringNICmp(temp, start_link, start_len) == 0 ) {
1230 return TRUE;
1231 }
1232 }
1233
1234 return FALSE;
1235 }
1236 */
1237
1238
FFSavePosition(StringItemPtr ffstring,StringItemPtr PNTR bufptr,Int4 PNTR posptr)1239 NLM_EXTERN void FFSavePosition(StringItemPtr ffstring, StringItemPtr PNTR bufptr, Int4 PNTR posptr) {
1240 *bufptr = ffstring->curr;
1241 *posptr = ffstring->curr->pos;
1242 }
1243
1244
FFTrim(StringItemPtr ffstring,StringItemPtr line_start,Int4 line_pos,Int4 line_prefix_len)1245 NLM_EXTERN void FFTrim (
1246 StringItemPtr ffstring,
1247 StringItemPtr line_start,
1248 Int4 line_pos,
1249 Int4 line_prefix_len
1250 )
1251 {
1252 StringItemPtr riter, iter;
1253 Int4 i;
1254 IntAsn2gbJobPtr ajp = (IntAsn2gbJobPtr)ffstring->iajp;
1255
1256 for ( i = 0; i < line_prefix_len; ++i ) {
1257 ++line_pos;
1258 if ( line_pos == STRING_BUF_LEN ) {
1259 line_pos = 0;
1260 line_start= line_start->next;
1261 }
1262 }
1263
1264 riter = ffstring->curr;
1265 while ( riter != NULL ) {
1266 for ( i = riter->pos - 1;
1267 /* (i >= 0) && !(riter == line_start && i <= line_pos); */
1268 (i >= 0) && ((riter != line_start) || (i >= line_pos));
1269 --i ) {
1270 if ( !IS_WHITESP(riter->buf[i]) || (riter->buf[i] == '\n') ) {
1271 break;
1272 }
1273 }
1274 if ( i < 0 ) {
1275 i = STRING_BUF_LEN - 1;
1276 for ( iter = ffstring; iter != NULL; iter = iter->next ) {
1277 if ( iter->next == riter ) {
1278 break;
1279 }
1280 }
1281 if ( iter == NULL ){
1282 ffstring->pos = 0;
1283 break;
1284 } else {
1285
1286 riter = iter;
1287 ffstring->curr = riter;
1288 }
1289 } else {
1290 riter->pos = i + 1;
1291 FFRecycleString(ajp, riter->next);
1292 riter->next = NULL;
1293 break;
1294 }
1295 }
1296 }
1297
FFNextChar(StringItemPtr start_sip,Int4 start_pos)1298 NLM_EXTERN int FFNextChar(
1299 StringItemPtr start_sip,
1300 Int4 start_pos
1301 )
1302 {
1303 if (start_pos < start_sip->pos-1) {
1304 return start_sip->buf[start_pos+1];
1305 }
1306 else if (start_sip->next != NULL) {
1307 return (start_sip->next->buf)[0];
1308 }
1309 else {
1310 return 0;
1311 }
1312 }
1313
FFAdvanceChar(StringItemPtr * start_sip,Int4 * start_pos)1314 NLM_EXTERN void FFAdvanceChar(
1315 StringItemPtr* start_sip,
1316 Int4* start_pos
1317 )
1318 {
1319 if (*start_pos < (*start_sip)->pos-1) {
1320 ++(*start_pos);
1321 }
1322 else {
1323 (*start_sip) = (*start_sip)->next;
1324 *start_pos = 0;
1325 }
1326 }
1327
1328 /* A line is wrapped when the visble text in th eline exceeds the line size. */
1329 /* Visible text is text that is not an HTML hyper-link. */
1330 /* A line may be broken in one of the following characters: */
1331 /* space, comma and dash */
1332 /* the oredr of search is first spaces, then commas and then dashes. */
1333 /* We nee to take into account the possiblity that a 'new-line' character */
1334 /* already exists in the line, in such case we break at the 'new-line' */
1335 /* spaces, dashes and new-lines will be broken at that character wheras for */
1336 /* commas we break at the character following the comma. */
1337
FFCalculateLineBreak(StringItemPtr PNTR break_sip,Int4 PNTR break_pos,Int4 init_indent,Int4 visible,Boolean is_html)1338 NLM_EXTERN void FFCalculateLineBreak (
1339 StringItemPtr PNTR break_sip, Int4 PNTR break_pos,
1340 Int4 init_indent, Int4 visible,
1341 Boolean is_html
1342 )
1343 {
1344 StringItemPtr iter, prev;
1345 Int4 i,
1346 done = FALSE,
1347 copied = 0,
1348 start = *break_pos,
1349 pos = 0;
1350 Char ch;
1351 Boolean found_comma = FALSE, found_dash = FALSE, found_lb = FALSE;
1352 /* each candidate is a pair of buffer and position withingh this buffer */
1353 StringItemPtr candidate_sip_space = NULL,
1354 candidate_sip_comma = NULL,
1355 candidate_sip_dash = NULL;
1356 Int4 candidate_int_space = -1,
1357 candidate_int_comma = -1,
1358 candidate_int_dash = -1;
1359 /* This is set when the line consists entirely of one huge word that
1360 we actually ended up breaking in the middle */
1361 /* This variable was introduced to cover problems with the corner case
1362 of having a really long word such that it would be broken at exactly the
1363 point where only its last letter ends up on the next line. */
1364 /* e.g. AA000002 */
1365 Boolean breaking_long_word = FALSE;
1366
1367
1368 iter = *break_sip;
1369 prev = iter;
1370
1371 i = start;
1372
1373 /* skip the first 'init_indent' characters of the line */
1374 while ( iter != NULL && !done ) {
1375 for ( i = start; i < iter->pos && init_indent > 0; ++i ) {
1376 if ( iter->buf[i] == '\n' ) {
1377 candidate_sip_space = iter;
1378 candidate_int_space = i;
1379 done = TRUE;
1380 break;
1381 }
1382 if( is_html ) {
1383 if ( FFIsStartOfLink(iter, i) ) {
1384 FFSkipLink(&iter, &i);
1385 --i;
1386 continue;
1387 }
1388 if( FFIsStartOfHTMLAmpersandEscape(iter, i) ) {
1389 FFSkipHTMLAmpersandEscape(&iter, &i);
1390 }
1391 }
1392
1393 --init_indent;
1394 ++copied;
1395 }
1396 if ( init_indent > 0 ) {
1397 start = 0;
1398 iter = iter->next;
1399 } else {
1400 break;
1401 }
1402 }
1403 start = i;
1404
1405 while ( iter != NULL && !done ) {
1406 for ( i = start; iter != NULL && i < iter->pos; ++i ) {
1407 if ( found_comma ) {
1408 candidate_sip_comma = iter;
1409 candidate_int_comma = i;
1410 found_comma = FALSE;
1411 }
1412 if ( found_dash ) {
1413 candidate_sip_dash = iter;
1414 candidate_int_dash = i;
1415 found_dash= FALSE;
1416 }
1417
1418 ch = iter->buf[i];
1419 if ( ch == '\n' ) {
1420 candidate_sip_space = iter;
1421 candidate_int_space = i;
1422 found_lb = TRUE;
1423 done = TRUE;
1424 break;
1425 } else if ( ch == ' ' ) {
1426 candidate_sip_space = iter;
1427 candidate_int_space = i;
1428 } else if ( ch == ',' ) {
1429 found_comma = TRUE;
1430 } else if ( ch == '-' ) {
1431 found_dash = TRUE;
1432 /*candidate_sip_dash = iter;
1433 candidate_int_dash = i;*/
1434 }
1435
1436 if( is_html ) {
1437 if ( FFIsStartOfLink(iter, i) ) {
1438 FFSkipLink(&iter, &i);
1439 --i;
1440 continue;
1441 }
1442 if( FFIsStartOfHTMLAmpersandEscape(iter, i) ) {
1443 FFSkipHTMLAmpersandEscape(&iter, &i);
1444 }
1445 }
1446
1447 ++copied;
1448 if ( copied >= visible ) {
1449 if ( (candidate_sip_space == NULL) && (candidate_int_space == -1) &&
1450 (candidate_sip_comma == NULL) && (candidate_int_comma == -1) &&
1451 (candidate_sip_dash == NULL) && (candidate_int_dash == -1) ) {
1452 breaking_long_word = TRUE;
1453 candidate_sip_space = iter;
1454 candidate_int_space = i;
1455 }
1456 done = TRUE;
1457 break;
1458 }
1459 }
1460 start = 0;
1461 if ( iter != NULL && !done ) {
1462 prev = iter;
1463 pos = prev->pos;
1464 iter = iter->next;
1465 }
1466 }
1467
1468 /* the order in which we examine the various candidate breaks is important */
1469 if ( iter == NULL && !done) { /* reached the end */
1470 *break_sip = prev;
1471 *break_pos = pos;
1472 } else {
1473 if( candidate_sip_space != NULL ) {
1474 *break_sip = candidate_sip_space;
1475 *break_pos = candidate_int_space;
1476 } else if( candidate_sip_comma != NULL ) {
1477 *break_sip = candidate_sip_comma;
1478 *break_pos = candidate_int_comma;
1479 } else if( candidate_sip_dash != NULL ) {
1480 *break_sip = candidate_sip_dash;
1481 *break_pos = candidate_int_dash;
1482 }
1483 if ( ! found_lb && ! breaking_long_word ) {
1484 while (FFNextChar(*break_sip, *break_pos) == ' ') {
1485 FFAdvanceChar(break_sip, break_pos);
1486 }
1487 if (FFNextChar(*break_sip, *break_pos) == '\n') {
1488 FFAdvanceChar(break_sip, break_pos);
1489 }
1490 }
1491 }
1492 }
1493
1494 /*
1495 * Scans the given buffer froma given scan position, for the next occurrence of
1496 * the indicated character. The search breaks when the character is found, or the
1497 * supplied break position is reached.
1498 * On exit, the scan position will either be on the character found, or at the
1499 * given break position.
1500 *
1501 * *p_line_sip: in: points to the buffer where scan should start
1502 * out: points to the buffer where the scan ended
1503 * *p_line_pos: in: points to the position in *p_line_sip where the scan should
1504 * start
1505 * out: points to the position in *p_line_sip where the scan ended.
1506 * break_sip: points to buffer where the scan should stop
1507 * break_pos: position in *break_sip where the scan should stop
1508 * c: the character we are looking for
1509 */
FFFindSingleChar(StringItemPtr * p_line_sip,Int4 * p_line_pos,StringItemPtr break_sip,Int4 break_pos,char c)1510 NLM_EXTERN Boolean FFFindSingleChar(
1511 StringItemPtr* p_line_sip,
1512 Int4* p_line_pos,
1513 StringItemPtr break_sip,
1514 Int4 break_pos,
1515 char c )
1516 {
1517 while( *p_line_pos >= (*p_line_sip)->pos) {
1518 *p_line_pos -= (*p_line_sip)->pos;
1519 (*p_line_sip) = (*p_line_sip)->next;
1520 if ( *p_line_sip == NULL ) {
1521 return FALSE;
1522 }
1523 }
1524 while (*p_line_sip != break_sip){
1525 while (*p_line_pos <(*p_line_sip)->pos) {
1526 if ((*p_line_sip)->buf[ *p_line_pos ] == c)
1527 return TRUE;
1528 else
1529 ++(*p_line_pos);
1530 }
1531 *p_line_pos = 0;
1532 *p_line_sip = (*p_line_sip)->next;
1533 }
1534 while (*p_line_pos < break_pos){
1535 if ( (*p_line_sip)->buf[ *p_line_pos ] == c )
1536 return TRUE;
1537 else
1538 ++(*p_line_pos);
1539 }
1540 return FALSE;
1541 }
1542
1543 /*
1544 * Returns the number of bytes remaining in the buffer chain, starting from the
1545 * given buffer and a read mark inside it.
1546 *
1547 * sip: points to the buffer where the string starts,
1548 * cur_pos: read mark in the buffer
1549 */
FFRemainingLength(StringItemPtr sip,Int4 cur_pos)1550 NLM_EXTERN Int4 FFRemainingLength(
1551 StringItemPtr sip,
1552 Int4 cur_pos )
1553 {
1554 return FFLength(sip)-cur_pos;
1555 }
1556
1557 /*
1558 * Scans the given line for the next opening tag of an HTML hyperlink. Ajusts
1559 * the line position to immediately after the opening tag (if such a tag is
1560 * found) or the the end of the line (if no such tag is found).
1561 * If a character buffer is supplied, this function will copy any opening tag
1562 * it finds into that buffer.
1563 *
1564 * *p_line_sip: in: points to the string buffer where the scan should start
1565 * out: points to the string buffer where the scan ended
1566 * *p_line_pos: in: position in **p_start_sip where the scan should start
1567 * out: position in **p_start_sip where the scan ended
1568 * break_sip: buffer that contain the line break
1569 * break_pos: position in break_sip that represents the line break
1570 * buf_open_link: character buffer to hold a copy of the opening link found
1571 * (or =0 if this information is not required).
1572 */
FFExtractNextOpenLink(StringItemPtr * p_line_sip,Int4 * p_line_pos,StringItemPtr break_sip,Int4 break_pos,char * buf_open_link)1573 NLM_EXTERN Boolean FFExtractNextOpenLink(
1574 StringItemPtr* p_line_sip,
1575 Int4* p_line_pos,
1576 StringItemPtr break_sip,
1577 Int4 break_pos,
1578 char* buf_open_link )
1579 {
1580 int i;
1581
1582 const char* buf_markup_open = "<A HREF";
1583 const int markup_size = strlen(buf_markup_open);
1584
1585 while ((*p_line_sip != break_sip) || (*p_line_pos < break_pos)) {
1586
1587 if (FFFindSingleChar(p_line_sip, p_line_pos, break_sip, break_pos, '<' )) {
1588
1589 if (FFRemainingLength(*p_line_sip, *p_line_pos) < markup_size) {
1590 *p_line_sip = break_sip;
1591 *p_line_pos = break_pos;
1592 return FALSE;
1593 }
1594 for ( i=0; i < markup_size; ++i ) {
1595 if (buf_markup_open[i] != toupper( FFCharAt( *p_line_sip, (*p_line_pos)+i )))
1596 break;
1597 }
1598 if ( i == markup_size ) {
1599 if (buf_open_link != 0) {
1600
1601 char next;
1602
1603 for (i=0; '>' != (next = FFCharAt( *p_line_sip, *p_line_pos )); ++(*p_line_pos)) {
1604
1605 if (next == '&' && ! FFStartsWith(*p_line_sip, *p_line_pos, "&", TRUE) ) {
1606
1607 MemCopy( buf_open_link+i, "&", strlen( "&" ) );
1608
1609 i += strlen("&");
1610
1611 }
1612
1613 else {
1614
1615 buf_open_link[i++] = next;
1616
1617 }
1618
1619 }
1620
1621 buf_open_link[i++] = '>';
1622
1623 buf_open_link[i] = 0;
1624
1625 } else {
1626 *p_line_pos += markup_size;
1627 }
1628
1629 return TRUE;
1630 } else {
1631 ++(*p_line_pos);
1632 }
1633 }
1634 }
1635 return FALSE;
1636 }
1637
1638 /*
1639 * Scans the given line for the next closing tag of an HTML hyperlink. Ajusts
1640 * the line position to immediately after the closing tag (if such a tag is
1641 * found) or the the end of the line (if no such tag is found).
1642 *
1643 * *p_line_sip: in: points to the string buffer where the scan should start
1644 * out: points to the string buffer where the scan ended
1645 * *p_line_pos: in: position in **p_start_sip where the scan should start
1646 * out: position in **p_start_sip where the scan ended
1647 * break_sip: buffer that contain the line break
1648 * break_pos: position in break_sip that represents the line break
1649 */
FFExtractNextCloseLink(StringItemPtr * p_line_sip,Int4 * p_line_pos,StringItemPtr break_sip,Int4 break_pos)1650 NLM_EXTERN Boolean FFExtractNextCloseLink(
1651 StringItemPtr* p_line_sip,
1652 Int4* p_line_pos,
1653 StringItemPtr break_sip,
1654 Int4 break_pos )
1655 {
1656 int i;
1657
1658 const char* buf_close_link = "</A>";
1659 const int markup_close_size = strlen(buf_close_link);
1660
1661 while ((*p_line_sip != break_sip) || (*p_line_pos < break_pos)) {
1662
1663 if (FFFindSingleChar(p_line_sip, p_line_pos, break_sip, break_pos, '<' )) {
1664 if (FFRemainingLength(*p_line_sip, *p_line_pos) < markup_close_size) {
1665 *p_line_sip = break_sip;
1666 *p_line_pos = break_pos;
1667 return FALSE;
1668 }
1669 for ( i=0; i < markup_close_size; ++i ) {
1670 if (buf_close_link[i] != toupper(FFCharAt( *p_line_sip, (*p_line_pos)+i)))
1671 break;
1672 }
1673 if (i == markup_close_size) {
1674 (*p_line_pos) += markup_close_size;
1675 return TRUE;
1676 } else {
1677 ++(*p_line_pos);
1678 }
1679 }
1680 }
1681 return FALSE;
1682 }
1683
1684 /*
1685 * Checks a given line whether its end falls between the opening and the closing
1686 * tag of an HTML link.
1687 *
1688 * start_sip: string buffer where the given line starts,
1689 * start_pos: position in start_sip where the given line starts,
1690 * break_sip: string buffer where the given line ends,
1691 * break_pos: position in break_pos where the given line ends,
1692 * buf_link_open: optional buffer where the open tag of the split link will be
1693 * written to. Leave =0 if you don't need this.
1694 */
FFLineBreakSplitsHtmlLink(StringItemPtr start_sip,Int4 start_pos,StringItemPtr break_sip,Int4 break_pos,char * buf_link_open,Int4 * html_open_link_counter)1695 NLM_EXTERN Boolean FFLineBreakSplitsHtmlLink(
1696 StringItemPtr start_sip,
1697 Int4 start_pos,
1698 StringItemPtr break_sip,
1699 Int4 break_pos,
1700 char* buf_link_open,
1701 Int4* html_open_link_counter )
1702 {
1703 StringItemPtr cur_iter=0;
1704 int cur_pos=0;
1705
1706 if ( ! GetWWW((IntAsn2gbJobPtr)start_sip->iajp) )
1707 return FALSE;
1708
1709 cur_iter = start_sip;
1710 cur_pos = start_pos;
1711
1712 while ((cur_iter != break_sip) || (cur_pos < break_pos)) {
1713 switch(*html_open_link_counter) {
1714 case 0:
1715 if (FFExtractNextOpenLink(&cur_iter, &cur_pos, break_sip, break_pos, buf_link_open ))
1716 ++(*html_open_link_counter);
1717 break;
1718 case 1:
1719 if (FFExtractNextCloseLink(&cur_iter, &cur_pos, break_sip, break_pos ))
1720 --(*html_open_link_counter);
1721 break;
1722 default:
1723 break;
1724 }
1725 }
1726 return (*html_open_link_counter);
1727 } /*FFLineBreakSplitsHtmlLink*/
1728
FFLineWrap(IntAsn2gbJobPtr ajp,StringItemPtr dest,StringItemPtr src,Int4 init_indent,Int4 cont_indent,Int4 line_max,CharPtr eb_line_prefix)1729 NLM_EXTERN void FFLineWrap (
1730 IntAsn2gbJobPtr ajp,
1731 StringItemPtr dest,
1732 StringItemPtr src,
1733 Int4 init_indent,
1734 Int4 cont_indent,
1735 Int4 line_max,
1736 CharPtr eb_line_prefix
1737 )
1738 {
1739 /* line break candidate is a pair <StringItemPtr, position> */
1740 StringItemPtr break_sip = src;
1741 Int4 break_pos = 0;
1742 StringItemPtr line_start = NULL;
1743 Int4 line_pos = 0;
1744 Int4 i, line_prefix_len = 0;
1745 StringItemPtr iter;
1746 Boolean cont = FALSE;
1747 Boolean is_html = GetWWW(ajp);
1748
1749 /* Note:
1750 The value of the next two variables needs to persist between consecutive
1751 invocations of FFLineBreakSplitsHtmlLink().
1752 */
1753 Int4 html_open_link_counter = 0;
1754 char buf_split_link_open[ 1024 ];
1755
1756 Boolean linebreak_splits_link = FALSE;
1757 const char* buf_split_link_close = "</a>";
1758
1759 MemSet( (void*)buf_split_link_open, 0, sizeof(buf_split_link_open) );
1760 FFSavePosition(dest, &line_start, &line_pos);
1761
1762 for ( iter = src; iter != NULL; iter = iter->next ) {
1763 for ( i = 0; i < iter->pos; ) {
1764
1765 break_pos = i;
1766 break_sip = iter;
1767
1768 FFCalculateLineBreak(&break_sip, &break_pos, init_indent,
1769 line_max - line_prefix_len + 1, is_html);
1770 linebreak_splits_link =
1771 FFLineBreakSplitsHtmlLink(iter, i, break_sip, break_pos,
1772 buf_split_link_open, &html_open_link_counter );
1773 FFCatenateSubString(dest, iter, i, break_sip, break_pos, line_max);
1774 if (0 && eb_line_prefix) {
1775 /* don't quit at the indent width but trim all the way down to the EMBL line code */
1776 FFTrim(dest, line_start, line_pos, strlen(eb_line_prefix));
1777 } else {
1778 FFTrim(dest, line_start, line_pos, cont_indent);
1779 }
1780 if ( linebreak_splits_link ) {
1781 FFAddOneString( dest,
1782 (char*)buf_split_link_close, FALSE, FALSE, TILDE_IGNORE );
1783 }
1784 FFAddOneChar(dest, '\n', FALSE);
1785
1786 FFSavePosition(dest, &line_start, &line_pos);
1787
1788 /* for EMBL 'XX' lines */
1789 if (eb_line_prefix != NULL) {
1790 cont = FALSE;
1791 if (break_pos > 1) {
1792 if (break_sip->buf[break_pos-1] == 'X' && break_sip->buf[break_pos-2] == 'X') {
1793 if ((break_pos == 2) || (break_sip->buf[break_pos-3] == '\n')) {
1794 ++break_pos;
1795 cont = TRUE;
1796 }
1797 }
1798 } else if (break_pos == 1) {
1799 if (break_sip->buf[0] == 'X' && iter->buf[iter->pos-1] == 'X') {
1800 if ((iter->pos > 1) && iter->buf[iter->pos-2] == '\n') {
1801 ++break_pos;
1802 cont = TRUE;
1803 }
1804 }
1805 }
1806 }
1807
1808 i = break_pos;
1809 iter = break_sip;
1810
1811 if (cont) continue;
1812
1813 if ( IS_WHITESP(iter->buf[i]) ) {
1814 i++;
1815 }
1816 if ( iter != src->curr || i < iter->pos ) {
1817 if ( eb_line_prefix != NULL ) {
1818 FFAddOneString(dest, eb_line_prefix, FALSE, FALSE, TILDE_IGNORE);
1819 }
1820 FFAddNChar(dest, ' ', cont_indent - StringLen(eb_line_prefix), FALSE);
1821 if ( linebreak_splits_link ) {
1822 FFAddOneString( dest, buf_split_link_open, FALSE, FALSE, TILDE_IGNORE );
1823 }
1824 init_indent = 0;
1825 line_prefix_len = cont_indent;
1826 /*FFSkipGarbage(&iter, &i);*/
1827 }
1828 }
1829 }
1830 }
1831
1832 /* === */
1833
FFStartPrint(StringItemPtr sip,FmtType format,Int4 gb_init_indent,Int4 gb_cont_indent,CharPtr gb_label,Int4 gb_tab_to,Int4 eb_init_indent,Int4 eb_cont_indent,CharPtr eb_line_prefix,Boolean eb_print_xx)1834 NLM_EXTERN void FFStartPrint (
1835 StringItemPtr sip,
1836 FmtType format,
1837 Int4 gb_init_indent,
1838 Int4 gb_cont_indent,
1839 CharPtr gb_label,
1840 Int4 gb_tab_to,
1841 Int4 eb_init_indent,
1842 Int4 eb_cont_indent,
1843 CharPtr eb_line_prefix,
1844 Boolean eb_print_xx
1845 )
1846
1847 {
1848 if (format == GENBANK_FMT || format == GENPEPT_FMT) {
1849 FFAddNChar(sip, ' ', gb_init_indent, FALSE);
1850 FFAddOneString(sip, gb_label, FALSE, FALSE, TILDE_IGNORE);
1851 FFAddNChar(sip, ' ', gb_tab_to - gb_init_indent - StringLen(gb_label), FALSE);
1852 } else if (format == EMBL_FMT || format == EMBLPEPT_FMT) {
1853 if ( eb_print_xx ) {
1854 FFAddOneString(sip, "XX\n", FALSE, FALSE, TILDE_IGNORE);
1855 }
1856 FFAddOneString(sip, eb_line_prefix, FALSE, FALSE, TILDE_IGNORE);
1857 FFAddNChar(sip, ' ', eb_init_indent - StringLen(eb_line_prefix), FALSE);
1858 }
1859 }
1860
FFAddTextToString(StringItemPtr ffstring,CharPtr prefix,CharPtr string,CharPtr suffix,Boolean addPeriod,Boolean convertQuotes,Int2 tildeAction)1861 NLM_EXTERN void FFAddTextToString (
1862 StringItemPtr ffstring,
1863 CharPtr prefix,
1864 CharPtr string,
1865 CharPtr suffix,
1866 Boolean addPeriod,
1867 Boolean convertQuotes,
1868 Int2 tildeAction
1869 )
1870
1871 {
1872 FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
1873 FFAddOneString (ffstring, string, FALSE, convertQuotes, tildeAction);
1874 FFAddOneString (ffstring, suffix, FALSE, FALSE, TILDE_IGNORE);
1875
1876 if ( addPeriod ) {
1877 FFAddPeriod(ffstring);
1878 }
1879 }
1880
FFEndPrintEx(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,FmtType format,Int2 gb_init_indent,Int2 gb_cont_indent,Int2 eb_init_indent,Int2 eb_cont_indent,CharPtr eb_line_prefix,CharPtr pfx,CharPtr sfx)1881 NLM_EXTERN CharPtr FFEndPrintEx (
1882 IntAsn2gbJobPtr ajp,
1883 StringItemPtr ffstring,
1884 FmtType format,
1885 Int2 gb_init_indent,
1886 Int2 gb_cont_indent,
1887 Int2 eb_init_indent,
1888 Int2 eb_cont_indent,
1889 CharPtr eb_line_prefix,
1890 CharPtr pfx,
1891 CharPtr sfx
1892 )
1893
1894 {
1895 StringItemPtr temp = FFGetString(ajp);
1896 CharPtr result;
1897
1898 if ( (ffstring == NULL) || (ajp == NULL) ) return NULL;
1899
1900 if (format == GENBANK_FMT || format == GENPEPT_FMT) {
1901 FFLineWrap(ajp, temp, ffstring, gb_init_indent, gb_cont_indent, ASN2FF_GB_MAX, NULL);
1902 } else {
1903 FFLineWrap(ajp, temp, ffstring, eb_init_indent, eb_cont_indent, ASN2FF_EMBL_MAX, eb_line_prefix);
1904 }
1905 result = FFToCharPtrEx(temp, pfx, sfx);
1906 FFRecycleString(ajp, temp);
1907 return result;
1908 }
1909
FFEndPrint(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,FmtType format,Int2 gb_init_indent,Int2 gb_cont_indent,Int2 eb_init_indent,Int2 eb_cont_indent,CharPtr eb_line_prefix)1910 NLM_EXTERN CharPtr FFEndPrint (
1911 IntAsn2gbJobPtr ajp,
1912 StringItemPtr ffstring,
1913 FmtType format,
1914 Int2 gb_init_indent,
1915 Int2 gb_cont_indent,
1916 Int2 eb_init_indent,
1917 Int2 eb_cont_indent,
1918 CharPtr eb_line_prefix
1919 )
1920
1921 {
1922 return FFEndPrintEx (ajp, ffstring, format, gb_init_indent, gb_cont_indent,
1923 eb_init_indent, eb_cont_indent, eb_line_prefix, NULL, NULL);
1924 }
1925
FFLength(StringItemPtr ffstring)1926 NLM_EXTERN Uint4 FFLength(StringItemPtr ffstring) {
1927 Uint4 len = 0;
1928 StringItemPtr current;
1929
1930 for ( current = ffstring; current != NULL; current = current->next ) {
1931 len += current->pos;
1932 }
1933
1934 return len;
1935 }
1936
1937
FFCharAt(StringItemPtr ffstring,Uint4 pos)1938 NLM_EXTERN Char FFCharAt(StringItemPtr ffstring, Uint4 pos) {
1939 Uint4 inbufpos = pos % STRING_BUF_LEN;
1940 Uint4 count = 0;
1941 StringItemPtr current = NULL;
1942
1943 inbufpos = pos % STRING_BUF_LEN;
1944
1945 for ( current = ffstring; current != NULL; current = current->next ) {
1946 count += current->pos;
1947 if ( count > pos ) break;
1948 }
1949
1950 if ( current != NULL && inbufpos <= pos ) {
1951 return current->buf[inbufpos];
1952 }
1953
1954 return '\0';
1955 }
1956
1957
FFFindChar(StringItemPtr ffstring,StringItemPtr start_buf,Uint4 start_pos,Uint4 old_pos,Uint4 new_pos)1958 NLM_EXTERN Char FFFindChar (
1959 StringItemPtr ffstring, /* StringItem to search in */
1960 StringItemPtr start_buf, /* the position of the last char searched for (buffer) */
1961 Uint4 start_pos, /* the position of the last char searched for (pos) */
1962 Uint4 old_pos, /* the global position searched for */
1963 Uint4 new_pos /* new search position */
1964 )
1965 {
1966 Uint4 delta;
1967 Uint4 count;
1968 StringItemPtr current = NULL;
1969
1970 Char result = '\0';
1971
1972 if ( new_pos == old_pos ) {
1973 result = start_buf->buf[start_pos];
1974 }
1975
1976 if ( new_pos > old_pos ) {
1977 delta = new_pos - old_pos;
1978 current = start_buf;
1979 count = current->pos - start_pos - 1;
1980 current = current->next;
1981
1982 while ( delta > count && current != NULL ) {
1983 current = current->next;
1984 count += current->pos;
1985 }
1986
1987 if ( current != NULL ) {
1988 result = current->buf[new_pos % STRING_BUF_LEN];
1989 }
1990
1991 } else /* new_pos < old_pos */ {
1992 delta = old_pos - new_pos;
1993 if ( old_pos % STRING_BUF_LEN >= delta ) {
1994 result = start_buf->buf[new_pos % STRING_BUF_LEN];
1995 } else {
1996 result = FFCharAt(ffstring, new_pos);
1997 }
1998 }
1999
2000 return result;
2001 }
2002
FFEmpty(StringItemPtr ffstring)2003 NLM_EXTERN Boolean FFEmpty(StringItemPtr ffstring) {
2004 if ( ffstring != NULL && ffstring->pos != 0 ) {
2005 return FALSE;
2006 }
2007 return TRUE;
2008 }
2009
2010 /*
2011 * Compute the right-most position in the pattern at which character a occurs,
2012 * for each character a in the alphabet (assumed ASCII-ISO 8859-1)
2013 *
2014 * The result is returned in the supplied vector.
2015 */
ComputeLastOccurrence(const CharPtr pattern,Int4 last_occurrence[])2016 static void ComputeLastOccurrence(const CharPtr pattern, Int4 last_occurrence[])
2017 {
2018 Int4 i;
2019 Int4 pat_len;
2020
2021 /* Initilalize vector */
2022 for ( i = 0; i < 256; ++i ) {
2023 last_occurrence[i] = -1;
2024 }
2025
2026 /* compute right-most occurrence */
2027 pat_len = StringLen(pattern);
2028 for ( i = 0; i < pat_len; ++i ) {
2029 last_occurrence[(Uint1)pattern[i]] = i;
2030 }
2031 }
2032
ComputePrefix(const CharPtr pattern,Int4 longest_prefix[])2033 static void ComputePrefix(const CharPtr pattern, Int4 longest_prefix[])
2034 {
2035 Int4 pat_len = StringLen(pattern);
2036 Int4 k, q;
2037
2038 longest_prefix[0] = 0;
2039
2040 k = 0;
2041 for ( q = 1; q < pat_len; ++q ) {
2042 while ( k > 0 && pattern[k] != pattern[q] ) {
2043 k = longest_prefix[k - 1];
2044 }
2045 if ( pattern[k] == pattern[q] ) {
2046 ++k;
2047 }
2048 longest_prefix[q] = k;
2049 }
2050 }
2051
2052
ComputeGoodSuffix(const CharPtr pattern,Int4 good_suffix[])2053 static void ComputeGoodSuffix(const CharPtr pattern, Int4 good_suffix[])
2054 {
2055 Int4 pat_len = StringLen(pattern);
2056 Int4Ptr longest_prefix, reverse_longest_prefix;
2057 CharPtr reverse_pattern;
2058 Int4 i, j;
2059
2060 /* allocate memory */
2061 longest_prefix = MemNew(pat_len * sizeof(Int4));
2062 reverse_longest_prefix = MemNew(pat_len * sizeof(Int4));
2063 reverse_pattern = MemNew((pat_len + 1) * sizeof(Char));
2064
2065 if ( longest_prefix == NULL ||
2066 reverse_longest_prefix == NULL ||
2067 reverse_pattern == NULL ) {
2068 MemFree(longest_prefix);
2069 MemFree(reverse_longest_prefix);
2070 MemFree(reverse_pattern);
2071 return;
2072 }
2073
2074 /* compute reverse pattern */
2075 for ( i = 0; i < pat_len; ++i ) {
2076 reverse_pattern[pat_len - i - 1] = pattern[i];
2077 }
2078
2079 ComputePrefix(pattern, longest_prefix);
2080 ComputePrefix(reverse_pattern, reverse_longest_prefix);
2081
2082 for ( j = 0; j <= pat_len; ++j) {
2083 good_suffix[j] = pat_len - longest_prefix[pat_len-1];
2084 }
2085
2086 for ( i = 0; i < pat_len; ++i ) {
2087 j = pat_len - reverse_longest_prefix[i];
2088 if ( good_suffix[j] > i - reverse_longest_prefix[i] + 1) {
2089 good_suffix[j] = i - reverse_longest_prefix[i] + 1;
2090 }
2091 }
2092
2093 MemFree(longest_prefix);
2094 MemFree(reverse_longest_prefix);
2095 MemFree(reverse_pattern);
2096 }
2097
2098
2099 /*
2100 * searches for a pattern in a StringItem.
2101 * Using the Boyer-Moore algorithm for the search.
2102 */
FFStringSearch(StringItemPtr text,const CharPtr pattern,Uint4 position)2103 NLM_EXTERN Int4 FFStringSearch (
2104 StringItemPtr text,
2105 const CharPtr pattern,
2106 Uint4 position )
2107 {
2108 Int4 text_len = FFLength(text);
2109 Int4 pat_len = StringLen(pattern);
2110 Int4 last_occurrence[256];
2111 Int4Ptr good_suffix;
2112 Int4 shift;
2113 Int4 j;
2114
2115 if ( pat_len == 0 ) return 0;
2116 if ( text_len == 0 || pat_len > text_len - position ) return -1;
2117
2118 good_suffix = (Int4Ptr)MemNew((pat_len+1) * sizeof(Int4));
2119 if ( good_suffix == NULL ) return -1;
2120
2121 ComputeLastOccurrence(pattern, last_occurrence);
2122 ComputeGoodSuffix(pattern, good_suffix);
2123
2124 shift = position;
2125 while ( shift <= text_len - pat_len ) {
2126 j = pat_len - 1;
2127 while( j >= 0 && pattern[j] == FFCharAt(text,shift + j) ) {
2128 --j;
2129 }
2130 if ( j == -1 ) {
2131 MemFree (good_suffix);
2132 return shift;
2133 } else {
2134 if( last_occurrence[(int) FFCharAt(text,shift + j)] <= j ) {
2135 shift += MAX( (Int4)good_suffix[(int) j+1],
2136 (Int4)(j - last_occurrence[(int) FFCharAt(text,shift + j)]));
2137 } else {
2138 shift += (Int4)good_suffix[(int) j+1];
2139 }
2140 }
2141 }
2142 MemFree (good_suffix);
2143
2144 return -1;
2145 }
2146
2147 /* Returns true if the given text starts with "pattern".
2148 You can also control whether this is done case insensitively */
FFStartsWith(StringItemPtr text,Int4 text_pos,const CharPtr pattern,Boolean case_insens)2149 NLM_EXTERN Boolean FFStartsWith(
2150 StringItemPtr text,
2151 Int4 text_pos,
2152 const CharPtr pattern,
2153 Boolean case_insens
2154 )
2155 {
2156 Int4 pattern_pos = 0;
2157
2158 if( NULL == text || NULL == pattern ) {
2159 return FALSE;
2160 }
2161
2162 /* every string starts with the empty string */
2163 if( pattern[0] == '\0' ) {
2164 return TRUE;
2165 }
2166
2167 while( ( case_insens ?
2168 toupper(pattern[pattern_pos]) == toupper(text->buf[text_pos]) :
2169 pattern[pattern_pos] == text->buf[text_pos] ) )
2170 {
2171 /* advance pattern; if we reach the end,
2172 * text starts with pattern */
2173 ++pattern_pos;
2174 if( pattern[pattern_pos] == '\0' ) {
2175 return TRUE;
2176 }
2177
2178 /* advance text, if we reach the end, text does NOT start
2179 * with pattern */
2180 FFAdvanceChar( &text, &text_pos );
2181 if( NULL == text ) {
2182 return FALSE;
2183 }
2184 }
2185
2186 return FALSE;
2187 }
2188
2189 /* */
2190 /* IsWholeWordSubstr () -- Determines if a substring that is */
2191 /* contained in another string is a whole */
2192 /* word or phrase -- i.e. is it both */
2193 /* preceded and followed by white space. */
2194 /* */
2195
IsWholeWordSubstr(StringItemPtr searchStr,Uint4 foundPos,CharPtr subStr)2196 NLM_EXTERN Boolean IsWholeWordSubstr (
2197 StringItemPtr searchStr,
2198 Uint4 foundPos,
2199 CharPtr subStr
2200 )
2201 {
2202 Boolean left, right;
2203 Char ch;
2204
2205
2206 /* check on the left only if there is a character there */
2207 if (foundPos > 0) {
2208 ch = FFCharAt(searchStr, foundPos - 1);
2209 left = IS_WHITESP(ch) || ispunct(ch);
2210 } else {
2211 left = TRUE;
2212 }
2213
2214 foundPos += StringLen(subStr);
2215 if ( foundPos == FFLength(searchStr) ) {
2216 right = TRUE;
2217 } else {
2218 ch = FFCharAt(searchStr, foundPos);
2219 right = IS_WHITESP(ch) || ispunct(ch);
2220 }
2221
2222 return left; /* see comment above */
2223 /* return left && right; this is how it should be!*/
2224 }
2225
2226
2227 /* functions to record sections or blocks in linked lists */
2228
Asn2gbAddBlock(Asn2gbWorkPtr awp,BlockType blocktype,size_t size)2229 NLM_EXTERN BaseBlockPtr Asn2gbAddBlock (
2230 Asn2gbWorkPtr awp,
2231 BlockType blocktype,
2232 size_t size
2233 )
2234
2235 {
2236 BaseBlockPtr bbp;
2237 ValNodePtr vnp;
2238
2239 if (awp == NULL || size < 1) return NULL;
2240
2241 bbp = (BaseBlockPtr) MemNew (size);
2242 if (bbp == NULL) return NULL;
2243 bbp->blocktype = blocktype;
2244 bbp->section = awp->currsection;
2245
2246 vnp = ValNodeAddPointer (&(awp->lastblock), 0, bbp);
2247 if (vnp == NULL) return bbp;
2248
2249 awp->lastblock = vnp;
2250 if (awp->blockList == NULL) {
2251 awp->blockList = vnp;
2252 }
2253
2254 return bbp;
2255 }
2256
2257
2258 /*--------------------------------------------------------*/
2259 /* */
2260 /* s_LocusGetBaseName() - */
2261 /* */
2262 /*--------------------------------------------------------*/
2263
s_LocusGetBaseName(BioseqPtr parent,BioseqPtr segment,CharPtr baseName)2264 static Boolean s_LocusGetBaseName (BioseqPtr parent, BioseqPtr segment, CharPtr baseName)
2265 {
2266 Char parentName[SEQID_MAX_LEN];
2267 Char segName[SEQID_MAX_LEN];
2268 SeqIdPtr sip;
2269 TextSeqIdPtr tsip;
2270 Char prefix[5];
2271 Char bufTmp[SEQID_MAX_LEN];
2272 Int2 deleteChars;
2273 Int2 newLength;
2274 Int2 i;
2275 Uint2 segNameLen;
2276
2277 /* Get the parent Sequence ID */
2278
2279 parentName [0] = '\0';
2280 sip = NULL;
2281 for (sip = parent->id; sip != NULL; sip = sip->next) {
2282 if (sip->choice == SEQID_GENBANK ||
2283 sip->choice == SEQID_EMBL ||
2284 sip->choice == SEQID_DDBJ) break;
2285 if (sip->choice == SEQID_TPG ||
2286 sip->choice == SEQID_TPE ||
2287 sip->choice == SEQID_TPD) break;
2288 }
2289
2290 if (sip != NULL) {
2291 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
2292 if (tsip != NULL && (! StringHasNoText (tsip->name))) {
2293 StringNCpy_0 (parentName, tsip->name, sizeof (parentName));
2294 }
2295 }
2296
2297 if (StringHasNoText (parentName)) {
2298 StringNCpy_0 (parentName, baseName, sizeof (parentName));
2299 }
2300
2301 /* Get segment id */
2302
2303 segName [0] = '\0';
2304 segNameLen = 0;
2305 sip = NULL;
2306 for (sip = segment->id; sip != NULL; sip = sip->next) {
2307 if (sip->choice == SEQID_GENBANK ||
2308 sip->choice == SEQID_EMBL ||
2309 sip->choice == SEQID_DDBJ) break;
2310 if (sip->choice == SEQID_TPG ||
2311 sip->choice == SEQID_TPE ||
2312 sip->choice == SEQID_TPD) break;
2313 }
2314
2315 if (sip != NULL) {
2316 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
2317 if (tsip != NULL && (! StringHasNoText (tsip->name))) {
2318 StringNCpy_0 (segName, tsip->name, sizeof (segName));
2319 segNameLen = StringLen(segName);
2320 }
2321 }
2322
2323 /* If there's no "SEG_" prefix, then */
2324 /* just use the parent ID. */
2325
2326 StringNCpy_0 (prefix,parentName,sizeof (prefix));
2327 prefix[4] = '\0';
2328 if (StringCmp(prefix,"SEG_") != 0)
2329 {
2330 StringCpy(baseName,parentName);
2331 return FALSE;
2332 }
2333
2334 /* Otherwise, eliminate the "SEG_" ... */
2335
2336 StringCpy(bufTmp, &parentName[4]);
2337 StringCpy(parentName,bufTmp);
2338
2339 /* ... And calculate a base name */
2340
2341 if (segNameLen > 0 &&
2342 (segName[segNameLen-1] == '1') &&
2343 (StringLen(parentName) == segNameLen) &&
2344 (parentName[segNameLen-1] == segName[segNameLen-1]))
2345 {
2346 deleteChars = 1;
2347 for (i = segNameLen-2; i >= 0; i--)
2348 if (parentName[i] == '0')
2349 deleteChars++;
2350 else
2351 break;
2352 newLength = segNameLen - deleteChars;
2353 StringNCpy (parentName,segName,newLength); /* not StringNCpy_0 */
2354 parentName[newLength] = '\0';
2355 }
2356
2357 /* Return the base name in the basename parameter */
2358
2359 StringCpy(baseName,parentName);
2360 return TRUE;
2361 }
2362
2363 /* ********************************************************************** */
2364
2365 static Uint1 fasta_order [NUM_SEQID] = {
2366 33, /* 0 = not set */
2367 20, /* 1 = local Object-id */
2368 15, /* 2 = gibbsq */
2369 16, /* 3 = gibbmt */
2370 30, /* 4 = giim Giimport-id */
2371 10, /* 5 = genbank */
2372 10, /* 6 = embl */
2373 10, /* 7 = pir */
2374 10, /* 8 = swissprot */
2375 15, /* 9 = patent */
2376 10, /* 10 = other = refseq */
2377 20, /* 11 = general Dbtag */
2378 255, /* 12 = gi */
2379 10, /* 13 = ddbj */
2380 10, /* 14 = prf */
2381 12, /* 15 = pdb */
2382 10, /* 16 = tpg */
2383 10, /* 17 = tpe */
2384 10, /* 18 = tpd */
2385 15, /* 19 = gpp */
2386 15 /* 20 = nat */
2387 };
2388
2389 /* DoOneSection builds a single report for one bioseq or segment */
2390
Asn2gbAddSection(Asn2gbWorkPtr awp)2391 static Asn2gbSectPtr Asn2gbAddSection (
2392 Asn2gbWorkPtr awp
2393 )
2394
2395 {
2396 Asn2gbSectPtr asp;
2397 ValNodePtr vnp;
2398
2399 if (awp == NULL) return NULL;
2400
2401 asp = (Asn2gbSectPtr) MemNew (sizeof (IntAsn2gbSect));
2402 if (asp == NULL) return NULL;
2403
2404 vnp = ValNodeAddPointer (&(awp->lastsection), 0, asp);
2405 if (vnp == NULL) return asp;
2406
2407 awp->lastsection = vnp;
2408 if (awp->sectionList == NULL) {
2409 awp->sectionList = vnp;
2410 }
2411
2412 return asp;
2413 }
2414
SegHasParts(BioseqPtr bsp)2415 NLM_EXTERN Boolean SegHasParts (
2416 BioseqPtr bsp
2417 )
2418
2419 {
2420 BioseqSetPtr bssp;
2421 SeqEntryPtr sep;
2422
2423 if (bsp == NULL || bsp->repr != Seq_repr_seg) return FALSE;
2424 sep = bsp->seqentry;
2425 if (sep == NULL) return FALSE;
2426 sep = sep->next;
2427 if (sep == NULL || (! IS_Bioseq_set (sep))) return FALSE;
2428 bssp = (BioseqSetPtr) sep->data.ptrvalue;
2429 if (bssp != NULL && bssp->_class == BioseqseqSet_class_parts) return TRUE;
2430 return FALSE;
2431 }
2432
LocInBioseq(SeqLocPtr slp,BioseqPtr bsp)2433 static Boolean LocInBioseq (
2434 SeqLocPtr slp,
2435 BioseqPtr bsp
2436 )
2437
2438 {
2439 SeqIdPtr sip;
2440
2441 if (slp == NULL || bsp == NULL) return FALSE;
2442 sip = SeqLocId (slp);
2443 if (sip == NULL) return FALSE;
2444 return SeqIdIn (sip, bsp->id);
2445 }
2446
AddRemainingGaps(Asn2gbWorkPtr awp)2447 static void AddRemainingGaps (
2448 Asn2gbWorkPtr awp
2449 )
2450
2451 {
2452 Asn2gbSectPtr asp;
2453 BioseqPtr bsp;
2454 FeatBlockPtr fbp;
2455 SeqFeatPtr gap;
2456 IntFeatBlockPtr ifp;
2457
2458 if (awp == NULL) return;
2459 asp = awp->asp;
2460 if (asp == NULL) return;
2461 bsp = asp->bsp;
2462 if (bsp == NULL) return;
2463 gap = awp->currfargap;
2464 if (gap != NULL && awp->afp != NULL) {
2465 while (gap != NULL && LocInBioseq (gap->location, bsp)) {
2466
2467 fbp = (FeatBlockPtr) Asn2gbAddBlock (awp, FEATURE_BLOCK, sizeof (IntFeatBlock));
2468 if (fbp != NULL) {
2469 fbp->entityID = 0;
2470 fbp->itemID = 0;
2471 fbp->itemtype = OBJ_SEQFEAT;
2472 fbp->featdeftype = FEATDEF_gap;
2473 ifp = (IntFeatBlockPtr) fbp;
2474 ifp->mapToNuc = FALSE;
2475 ifp->mapToProt = FALSE;
2476 ifp->mapToGen = FALSE;
2477 ifp->mapToMrna = FALSE;
2478 ifp->mapToPep = FALSE;
2479 ifp->left = 0;
2480 ifp->right = 0;
2481 ifp->firstfeat = awp->firstfeat;
2482 awp->firstfeat = FALSE;
2483 if (awp->afp != NULL) {
2484 DoImmediateRemoteFeatureFormat (awp->afp, (BaseBlockPtr) fbp, gap);
2485 }
2486 }
2487
2488 awp->currfargap = gap->next;
2489 gap = awp->currfargap;
2490 }
2491 }
2492 }
2493
DoOneSection(BioseqPtr target,BioseqPtr parent,BioseqPtr bsp,BioseqPtr refs,SeqLocPtr slp,Uint2 seg,Int4 from,Int4 to,Boolean contig,Boolean onePartOfSeg,Asn2gbWorkPtr awp)2494 NLM_EXTERN void DoOneSection (
2495 BioseqPtr target,
2496 BioseqPtr parent,
2497 BioseqPtr bsp,
2498 BioseqPtr refs,
2499 SeqLocPtr slp,
2500 Uint2 seg,
2501 Int4 from,
2502 Int4 to,
2503 Boolean contig,
2504 Boolean onePartOfSeg,
2505 Asn2gbWorkPtr awp
2506 )
2507
2508 {
2509 size_t acclen;
2510 Asn2gbFormatPtr afp;
2511 IntAsn2gbJobPtr ajp;
2512 Asn2gbSectPtr asp;
2513 SeqMgrBioseqContext bcontext;
2514 BlockMask bkmask;
2515 BaseBlockPtr PNTR blockArray;
2516 Boolean cagemaster = FALSE;
2517 SeqMgrDescContext dcontext;
2518 BioseqPtr gbsp;
2519 SeqAnnotPtr gsap;
2520 Boolean hasRefs;
2521 Int4 i;
2522 IntAsn2gbSectPtr iasp;
2523 Boolean isGpipe = FALSE;
2524 Boolean isRefSeq = FALSE;
2525 MolInfoPtr mip;
2526 Boolean nsgenome = FALSE;
2527 Int4 numBlocks;
2528 Int4 numsegs = 0;
2529 SeqDescrPtr sdp;
2530 SeqIdPtr sip;
2531 Boolean tlsmaster = FALSE;
2532 Boolean tsamaster = FALSE;
2533 TextSeqIdPtr tsip;
2534 ValNodePtr vnp;
2535 Boolean wgsmaster = FALSE;
2536 Boolean wgstech = FALSE;
2537 Boolean willshowcage = FALSE;
2538 Boolean willshowcontig = FALSE;
2539 Boolean willshowgenome = FALSE;
2540 Boolean willshowsequence = FALSE;
2541 Boolean willshowtls = FALSE;
2542 Boolean willshowtsa = FALSE;
2543 Boolean willshowwgs = FALSE;
2544
2545 if (target == NULL || parent == NULL || bsp == NULL || awp == NULL) return;
2546 ajp = awp->ajp;
2547 if (ajp == NULL) return;
2548 bkmask = ajp->bkmask;
2549
2550 if (awp->mode == RELEASE_MODE && awp->style == CONTIG_STYLE) {
2551 if (bsp->repr == Seq_repr_seg) {
2552 } else if (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp))) {
2553 } else if (bsp->repr == Seq_repr_ref) {
2554 } else if (bsp->repr == Seq_repr_map) {
2555 } else return;
2556 }
2557
2558 if (ajp->flags.suppressLocalID) {
2559 sip = SeqIdSelect (bsp->id, fasta_order, NUM_SEQID);
2560 if (sip == NULL || sip->choice == SEQID_LOCAL) return;
2561 }
2562
2563 if (seg == 0) {
2564 awp->basename[0] = '\0';
2565 } else if (seg == 1) {
2566 s_LocusGetBaseName (parent, bsp, awp->basename);
2567 }
2568
2569 asp = Asn2gbAddSection (awp);
2570 if (asp == NULL) return;
2571
2572 afp = awp->afp;
2573 if (afp != NULL) {
2574 afp->asp = asp;
2575 }
2576
2577 numsegs = awp->partcount;
2578 if (numsegs == 0 && SeqMgrGetBioseqContext (parent, &bcontext)) {
2579 numsegs = bcontext.numsegs;
2580 }
2581
2582 /* set working data fields */
2583
2584 awp->asp = asp;
2585
2586 awp->target = target;
2587 awp->parent = parent;
2588 awp->bsp = bsp;
2589 awp->refs = refs;
2590 awp->slp = slp;
2591 (awp->sectionCount)++;
2592 awp->currGi = 0;
2593 awp->currAccVer [0] = '\0';
2594 awp->seg = seg;
2595 awp->numsegs = numsegs;
2596 awp->from = from;
2597 awp->to = to;
2598 awp->contig = contig;
2599
2600 awp->firstfeat = TRUE;
2601 awp->featseen = FALSE;
2602 awp->featjustseen = FALSE;
2603 awp->wgsaccnlist = NULL;
2604
2605 if (ajp->manygaps != NULL) {
2606 gbsp = (BioseqPtr) ajp->manygaps->data.ptrvalue;
2607 if (gbsp != NULL) {
2608 gsap = gbsp->annot;
2609 if (gsap != NULL && gsap->type == 1) {
2610 awp->currfargap = (SeqFeatPtr) gsap->data;
2611 }
2612 }
2613 }
2614
2615 /* initialize empty blockList for this section */
2616
2617 awp->blockList = NULL;
2618 awp->lastblock = NULL;
2619
2620 /* and store section data into section fields */
2621
2622 asp->target = target;
2623 asp->bsp = bsp;
2624 asp->slp = slp;
2625 asp->seg = seg;
2626 asp->numsegs = numsegs;
2627 asp->from = from;
2628 asp->to = to;
2629
2630 iasp = (IntAsn2gbSectPtr) asp;
2631
2632 asp->blockArray = NULL;
2633 asp->numBlocks = 0;
2634
2635 /* WGS master and NS_ virtual records treated differently */
2636
2637 if (bsp->repr == Seq_repr_virtual) {
2638
2639 /* check for certain ID types */
2640
2641 for (sip = bsp->id; sip != NULL; sip = sip->next) {
2642 if (sip->choice == SEQID_GENBANK ||
2643 sip->choice == SEQID_EMBL ||
2644 sip->choice == SEQID_DDBJ ||
2645 sip->choice == SEQID_TPG ||
2646 sip->choice == SEQID_TPE ||
2647 sip->choice == SEQID_TPD) {
2648 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
2649 if (tsip != NULL && tsip->accession != NULL) {
2650 acclen = StringLen (tsip->accession);
2651 if (acclen == 12) {
2652 if (StringCmp (tsip->accession + 6, "000000") == 0) {
2653 wgsmaster = TRUE;
2654 }
2655 } else if (acclen == 13) {
2656 if (StringCmp (tsip->accession + 6, "0000000") == 0) {
2657 wgsmaster = TRUE;
2658 }
2659 } else if (acclen == 14) {
2660 if (StringCmp (tsip->accession + 6, "00000000") == 0) {
2661 wgsmaster = TRUE;
2662 }
2663 }
2664 }
2665 } else if (sip->choice == SEQID_OTHER) {
2666 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
2667 if (tsip != NULL && tsip->accession != NULL) {
2668 if (StringNICmp (tsip->accession, "NC_", 3) == 0) {
2669 wgsmaster = TRUE;
2670 } else if (StringNICmp (tsip->accession, "NS_", 3) == 0) {
2671 nsgenome = TRUE;
2672 } else if (StringNICmp (tsip->accession, "NZ_", 3) == 0) {
2673 if (StringLen (tsip->accession) == 15) {
2674 if (StringCmp (tsip->accession + 9, "000000") == 0) {
2675 wgsmaster = TRUE;
2676 }
2677 } else if (StringLen (tsip->accession) == 16) {
2678 if (StringCmp (tsip->accession + 9, "0000000") == 0) {
2679 wgsmaster = TRUE;
2680 }
2681 }
2682 }
2683 }
2684 }
2685 }
2686
2687 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
2688 if (sdp != NULL) {
2689 mip = (MolInfoPtr) sdp->data.ptrvalue;
2690 if (mip != NULL) {
2691 if (mip->tech == MI_TECH_wgs) {
2692 wgstech = TRUE;
2693 } else if (mip->tech == MI_TECH_tsa && bsp->repr == Seq_repr_virtual) {
2694 tsamaster = TRUE;
2695 } else if (mip->tech == MI_TECH_targeted && bsp->repr == Seq_repr_virtual) {
2696 tlsmaster = TRUE;
2697 } else if (mip->tech == MI_TECH_other && StringCmp (mip->techexp, "cage") == 0) {
2698 cagemaster = TRUE;
2699 }
2700 }
2701 }
2702 }
2703
2704 for (sip = bsp->id; sip != NULL; sip = sip->next) {
2705 if (sip->choice == SEQID_OTHER) {
2706 isRefSeq = TRUE;
2707 SeqIdWrite (sip, awp->currAccVer, PRINTID_TEXTID_ACC_VER, sizeof (awp->currAccVer) - 1);
2708 } else if (sip->choice == SEQID_GI) {
2709 awp->currGi = (BIG_ID) sip->data.intvalue;
2710 } else if (sip->choice == SEQID_GPIPE) {
2711 isGpipe = TRUE;
2712 SeqIdWrite (sip, awp->currAccVer, PRINTID_TEXTID_ACC_VER, sizeof (awp->currAccVer) - 1);
2713 } else if (sip->choice == SEQID_GENBANK ||
2714 sip->choice == SEQID_EMBL ||
2715 sip->choice == SEQID_DDBJ ||
2716 sip->choice == SEQID_TPG ||
2717 sip->choice == SEQID_TPE ||
2718 sip->choice == SEQID_TPD ||
2719 sip->choice == SEQID_PIR ||
2720 sip->choice == SEQID_SWISSPROT ||
2721 sip->choice == SEQID_PRF ||
2722 sip->choice == SEQID_PDB) {
2723 SeqIdWrite (sip, awp->currAccVer, PRINTID_TEXTID_ACC_VER, sizeof (awp->currAccVer) - 1);
2724 }
2725 }
2726
2727 GetAccVerForBioseq (bsp, awp->currAccVerLabel, sizeof (awp->currAccVerLabel), ajp->hideGI, TRUE);
2728
2729 /* start exploring and populating paragraphs */
2730
2731 if (awp->format == FTABLE_FMT) {
2732 AddFeatHeaderBlock (awp);
2733 if (awp->showFtableRefs) {
2734 AddReferenceBlock (awp, isRefSeq);
2735 }
2736 if (! awp->hideSources) {
2737 AddSourceFeatBlock (awp);
2738 }
2739 if (! awp->hideFeatures) {
2740 AddFeatureBlock (awp);
2741 AddRemainingGaps (awp);
2742 }
2743
2744 } else {
2745
2746 if (wgsmaster && wgstech) {
2747 willshowwgs = TRUE;
2748 } else if (tsamaster) {
2749 willshowtsa = TRUE;
2750 } else if (tlsmaster) {
2751 willshowtls = TRUE;
2752 } else if (cagemaster) {
2753 willshowcage = TRUE;
2754 } else if (nsgenome) {
2755 willshowgenome = TRUE;
2756 } else if (contig) {
2757 willshowcontig = TRUE;
2758 if (awp->showContigAndSeq) {
2759 if (! awp->hideSequence) {
2760 willshowsequence = TRUE;
2761 }
2762 }
2763 } else {
2764 if (awp->showContigAndSeq) {
2765 if (bsp->repr == Seq_repr_seg && (! SegHasParts (bsp))) {
2766 willshowcontig = TRUE;
2767 } else if (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp))) {
2768 willshowcontig = TRUE;
2769 } else if (bsp->repr == Seq_repr_ref) {
2770 willshowcontig = TRUE;
2771 }
2772 }
2773 if (! awp->hideSequence) {
2774 willshowsequence = TRUE;
2775 }
2776 }
2777
2778 AddLocusBlock (awp, willshowwgs, willshowtsa, willshowtls, willshowcage, willshowgenome, willshowcontig, willshowsequence);
2779
2780 if (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT) {
2781
2782 AddDeflineBlock (awp);
2783 AddAccessionBlock (awp);
2784
2785 if (ISA_aa (bsp->mol)) {
2786 /*
2787 AddPidBlock (awp);
2788 */
2789 }
2790
2791 AddVersionBlock (awp);
2792
2793 /* if (ISA_na (bsp->mol)) { */
2794 AddDblinkBlock (awp);
2795 /* } */
2796
2797 if (ISA_aa (bsp->mol)) {
2798 AddDbsourceBlock (awp);
2799 }
2800
2801 } else if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
2802
2803 AddAccessionBlock (awp);
2804
2805 if (ISA_na (bsp->mol)) {
2806 AddVersionBlock (awp);
2807 }
2808
2809 if (ISA_aa (bsp->mol)) {
2810 /* AddPidBlock (awp); */
2811 /* AddDbsourceBlock (awp); */
2812 }
2813
2814 AddDateBlock (awp);
2815
2816 AddDeflineBlock (awp);
2817 }
2818
2819 AddKeywordsBlock (awp);
2820
2821 if (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT) {
2822 AddSegmentBlock (awp, onePartOfSeg, (Boolean) ISA_na (bsp->mol));
2823 }
2824
2825 AddSourceOrganismBlock (awp);
2826
2827 /*
2828 if (awp->showRefStats) {
2829 AddRefStatsBlock (awp);
2830 }
2831 */
2832
2833 if (! awp->hidePubs) {
2834
2835 /* !!! RELEASE_MODE should check return value of AddReferenceBlock !!! */
2836
2837 hasRefs = AddReferenceBlock (awp, isRefSeq);
2838 if (! hasRefs) {
2839 if (ajp->flags.needAtLeastOneRef) {
2840 /* RefSeq and Gpipe do not require a publication */
2841 if ((! isRefSeq) && (! isGpipe)) {
2842 awp->failed = TRUE;
2843 }
2844 }
2845 }
2846 }
2847
2848 AddCommentBlock (awp);
2849 AddPrimaryBlock (awp);
2850
2851 /*
2852 if (awp->showFeatStats) {
2853 AddFeatStatsBlock (awp);
2854 }
2855 */
2856
2857 AddFeatHeaderBlock (awp);
2858 if (! awp->hideSources) {
2859 AddSourceFeatBlock (awp);
2860 }
2861
2862 if (wgsmaster && wgstech) {
2863
2864 AddWGSBlock (awp);
2865
2866 } else if (tsamaster) {
2867
2868 AddTSABlock (awp);
2869
2870 } else if (tlsmaster) {
2871
2872 AddTLSBlock (awp);
2873
2874 } else if (cagemaster) {
2875
2876 AddCAGEBlock (awp);
2877
2878 } else if (nsgenome) {
2879
2880 AddGenomeBlock (awp);
2881
2882 } else if (contig) {
2883
2884 if (awp->showconfeats) {
2885 if (! awp->hideFeatures) {
2886 AddFeatureBlock (awp);
2887 AddRemainingGaps (awp);
2888 }
2889 } else if (awp->smartconfeats && bsp->length <= 1000000) {
2890 if (! awp->hideFeatures) {
2891 AddFeatureBlock (awp);
2892 AddRemainingGaps (awp);
2893 }
2894 }
2895 AddContigBlock (awp);
2896
2897 if (awp->showContigAndSeq) {
2898 if (ISA_na (bsp->mol) && ajp->gbseq == NULL) {
2899 if (awp->showBaseCount && bsp->repr != Seq_repr_map) {
2900 AddBasecountBlock (awp);
2901 }
2902 }
2903 if (bsp->repr != Seq_repr_map) {
2904 AddOriginBlock (awp);
2905 }
2906
2907 if (! awp->hideSequence) {
2908 if (bsp->repr != Seq_repr_map) {
2909 AddSequenceBlock (awp);
2910 }
2911 }
2912 }
2913
2914 } else {
2915
2916 if (! awp->hideFeatures) {
2917 AddFeatureBlock (awp);
2918 AddRemainingGaps (awp);
2919 }
2920
2921 if (awp->showContigAndSeq) {
2922 if (bsp->repr == Seq_repr_seg && (! SegHasParts (bsp))) {
2923 AddContigBlock (awp);
2924 } else if (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp))) {
2925 AddContigBlock (awp);
2926 } else if (bsp->repr == Seq_repr_ref) {
2927 AddContigBlock (awp);
2928 }
2929 }
2930
2931 if (ISA_na (bsp->mol) && ajp->gbseq == NULL) {
2932 if (awp->showBaseCount && bsp->repr != Seq_repr_map) {
2933 AddBasecountBlock (awp );
2934 }
2935 }
2936 if (bsp->repr != Seq_repr_map) {
2937 AddOriginBlock (awp);
2938 }
2939
2940 if (! awp->hideSequence) {
2941 if (bsp->repr != Seq_repr_map) {
2942 AddSequenceBlock (awp);
2943 }
2944 }
2945 }
2946
2947 AddSlashBlock (awp);
2948 }
2949
2950 /* allocate block array for this section */
2951
2952 numBlocks = ValNodeLen (awp->blockList);
2953 asp->numBlocks = numBlocks;
2954
2955 if (numBlocks > 0) {
2956 blockArray = (BaseBlockPtr PNTR) MemNew (sizeof (BaseBlockPtr) * (numBlocks + 1));
2957 asp->blockArray = blockArray;
2958
2959 if (blockArray != NULL) {
2960 for (vnp = awp->blockList, i = 0; vnp != NULL; vnp = vnp->next, i++) {
2961 blockArray [i] = (BaseBlockPtr) vnp->data.ptrvalue;
2962 }
2963 }
2964 }
2965
2966 /* free blockList, but leave data, now pointed to by blockArray elements */
2967
2968 awp->blockList = ValNodeFree (awp->blockList);
2969 awp->lastblock = NULL;
2970
2971 (awp->currsection)++;
2972 }
2973
2974 /* ********************************************************************** */
2975
2976 /*
2977 the following functions handle various kinds of input, all calling
2978 DoOneSection once for each component that gets its own report
2979 */
2980
Asn2Seg(SeqLocPtr slp,SeqMgrSegmentContextPtr context)2981 static Boolean LIBCALLBACK Asn2Seg (
2982 SeqLocPtr slp,
2983 SeqMgrSegmentContextPtr context
2984 )
2985
2986 {
2987 Asn2gbWorkPtr awp;
2988 BioseqPtr bsp = NULL;
2989 Uint2 entityID;
2990 Int4 from;
2991 SeqLocPtr loc;
2992 BioseqPtr parent;
2993 SeqIdPtr sip;
2994 Int4 to;
2995
2996 if (slp == NULL || context == NULL) return FALSE;
2997 awp = (Asn2gbWorkPtr) context->userdata;
2998
2999 parent = context->parent;
3000
3001 from = context->cumOffset;
3002 to = from + context->to - context->from;
3003
3004 sip = SeqLocId (slp);
3005 if (sip == NULL) {
3006 loc = SeqLocFindNext (slp, NULL);
3007 if (loc != NULL) {
3008 sip = SeqLocId (loc);
3009 }
3010 }
3011 if (sip == NULL) return TRUE;
3012
3013 /* may remote fetch genome component if not already in memory */
3014
3015 bsp = BioseqLockById (sip);
3016
3017 if (bsp == NULL) return TRUE;
3018
3019 entityID = ObjMgrGetEntityIDForPointer (bsp);
3020
3021 if (entityID != awp->entityID) {
3022
3023 /* if segment not packaged in record, may need to feature index it */
3024
3025 if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
3026 SeqMgrIndexFeatures (entityID, NULL);
3027 }
3028
3029 /* collect features indexed on the remote bioseq */
3030
3031 parent = bsp;
3032 from = 0;
3033 to = bsp->length - 1;
3034 }
3035
3036 if (bsp->repr != Seq_repr_virtual) {
3037 (awp->seg)++;
3038 DoOneSection (bsp, parent, bsp, bsp, /* slp */ NULL, awp->seg, from, to, FALSE, FALSE, awp);
3039 }
3040
3041 BioseqUnlock (bsp);
3042
3043 return TRUE;
3044 }
3045
CountRealParts(SeqLocPtr slp_head)3046 static Int4 CountRealParts (
3047 SeqLocPtr slp_head
3048 )
3049
3050 {
3051 SeqIdPtr id;
3052 Int4 numparts;
3053 BioseqPtr part;
3054 SeqIdPtr sip;
3055 SeqLocPtr slp;
3056
3057 numparts = 0;
3058 for (slp = (SeqLocPtr) slp_head; slp != NULL; slp = slp->next) {
3059 sip = SeqLocId (slp);
3060 if (sip == NULL) continue;
3061 if (sip->choice == SEQID_GI) {
3062 part = BioseqFind (sip);
3063 if (part == NULL) continue;
3064 for (id = part->id; id != NULL; id = id->next) {
3065 if (id->choice == SEQID_GIBBSQ ||
3066 id->choice == SEQID_GIBBMT ||
3067 id->choice == SEQID_GIIM) break;
3068 }
3069 if (id != NULL && part->repr == Seq_repr_virtual) continue;
3070 }
3071 numparts++;
3072 }
3073 return numparts;
3074 }
3075
3076 typedef struct findseg {
3077 BioseqPtr bsp;
3078 Uint2 seg;
3079 } FindSeg, PNTR FindSegPtr;
3080
FindSegForPart(SeqLocPtr slp,SeqMgrSegmentContextPtr context)3081 static Boolean LIBCALLBACK FindSegForPart (
3082 SeqLocPtr slp,
3083 SeqMgrSegmentContextPtr context
3084 )
3085
3086 {
3087 FindSegPtr fsp;
3088 BioseqPtr bsp = NULL;
3089 SeqLocPtr loc;
3090 SeqIdPtr sip;
3091
3092 if (slp == NULL || context == NULL) return TRUE;
3093 fsp = (FindSegPtr) context->userdata;
3094
3095 sip = SeqLocId (slp);
3096 if (sip == NULL) {
3097 loc = SeqLocFindNext (slp, NULL);
3098 if (loc != NULL) {
3099 sip = SeqLocId (loc);
3100 }
3101 }
3102 if (sip == NULL) return TRUE;
3103
3104 bsp = BioseqFind (sip);
3105 if (bsp == NULL) return TRUE;
3106
3107 if (bsp->repr != Seq_repr_virtual) {
3108 (fsp->seg)++;
3109 }
3110
3111 if (bsp != fsp->bsp) return TRUE;
3112
3113 return FALSE;
3114 }
3115
DoOneBioseq(BioseqPtr bsp,Pointer userdata)3116 NLM_EXTERN void DoOneBioseq (
3117 BioseqPtr bsp,
3118 Pointer userdata
3119 )
3120
3121 {
3122 IntAsn2gbJobPtr ajp;
3123 Asn2gbWorkPtr awp;
3124 BioseqSetPtr bssp;
3125 SeqMgrSegmentContext context;
3126 Boolean contig = FALSE;
3127 Int4 from;
3128 FindSeg fs;
3129 SeqEntryPtr oldscope;
3130 BioseqPtr parent;
3131 Boolean segmented = FALSE;
3132 SeqEntryPtr sep;
3133 Int4 to;
3134
3135 if (bsp == NULL) return;
3136 awp = (Asn2gbWorkPtr) userdata;
3137 if (awp == NULL) return;
3138 ajp = awp->ajp;
3139 if (ajp == NULL) return;
3140
3141 /* return if molecule not right for format */
3142
3143 if (ISA_na (bsp->mol)) {
3144 if (ajp->format == GENPEPT_FMT || ajp->format == EMBLPEPT_FMT) return;
3145
3146 /* only do mRNA feature tables in GPS if targeted to a specific mRNA */
3147
3148 if (ajp->format == FTABLE_FMT && ajp->skipMrnas) {
3149 if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
3150 bssp = (BioseqSetPtr) bsp->idx.parentptr;
3151 if (bssp != NULL && bssp->_class == BioseqseqSet_class_nuc_prot) {
3152 if (bssp->idx.parenttype == OBJ_BIOSEQSET) {
3153 bssp = (BioseqSetPtr) bssp->idx.parentptr;
3154 if (bssp != NULL && bssp->_class == BioseqseqSet_class_gen_prod_set) {
3155 return;
3156 }
3157 }
3158 }
3159 }
3160 }
3161
3162 } else if (ISA_aa (bsp->mol)) {
3163 if (ajp->format == GENBANK_FMT || ajp->format == EMBL_FMT) return;
3164
3165 /* only do protein feature tables if targeted to a specific protein */
3166
3167 if (ajp->format == FTABLE_FMT && ajp->skipProts) return;
3168 }
3169
3170 if (awp->style == SEGMENT_STYLE) {
3171 segmented = TRUE;
3172 }
3173 if (awp->style == CONTIG_STYLE) {
3174 contig = TRUE;
3175 }
3176 /* Never do segmented style in FTABLE format */
3177 if (awp->format == FTABLE_FMT) {
3178 segmented = FALSE;
3179 contig = FALSE;
3180 }
3181
3182 awp->partcount = 0;
3183
3184 if (bsp->repr == Seq_repr_seg && awp->style == NORMAL_STYLE) {
3185
3186 /* if bsp followed by parts set, then do not default to contig style */
3187
3188 if (SegHasParts (bsp)) {
3189 segmented = TRUE;
3190 contig = FALSE;
3191
3192 if (bsp->seq_ext_type == 1) {
3193
3194 /* count only non-virtual parts */
3195
3196 sep = GetTopSeqEntryForEntityID (awp->entityID);
3197 oldscope = SeqEntrySetScope (sep);
3198 awp->partcount = CountRealParts ((SeqLocPtr) bsp->seq_ext);
3199 SeqEntrySetScope (oldscope);
3200 }
3201 } else {
3202 segmented = FALSE;
3203 contig = TRUE;
3204 }
3205 }
3206 if (bsp->repr == Seq_repr_delta && awp->style == NORMAL_STYLE) {
3207 if (! DeltaLitOnly (bsp)) {
3208 contig = TRUE;
3209 if (awp->isRefSeq) {
3210 ajp->masterStyle = TRUE;
3211 }
3212 }
3213 }
3214
3215 if (bsp->repr == Seq_repr_seg) {
3216
3217 /* this is a segmented bioseq */
3218
3219 if (segmented) {
3220
3221 /* show all segments individually */
3222
3223 awp->seg = 0;
3224 SeqMgrExploreSegments (bsp, (Pointer) awp, Asn2Seg);
3225
3226 } else {
3227
3228 /* show as single bioseq */
3229
3230 parent = bsp;
3231 from = 0;
3232 to = bsp->length - 1;
3233
3234 DoOneSection (parent, parent, bsp, parent, ajp->ajp.slp, 0, from, to, contig, FALSE, awp);
3235 }
3236
3237 } else if (bsp->repr == Seq_repr_raw ||
3238 bsp->repr == Seq_repr_const ||
3239 bsp->repr == Seq_repr_delta ||
3240 bsp->repr == Seq_repr_ref ||
3241 bsp->repr == Seq_repr_map ||
3242 bsp->repr == Seq_repr_virtual) {
3243
3244 parent = SeqMgrGetParentOfPart (bsp, &context);
3245 if (parent != NULL) {
3246
3247 /* this is a part of an indexed segmented bioseq */
3248
3249 from = context.cumOffset;
3250 to = from + context.to - context.from;
3251
3252 s_LocusGetBaseName (parent, bsp, awp->basename);
3253
3254 fs.bsp = bsp;
3255 fs.seg = 0;
3256 SeqMgrExploreSegments (parent, (Pointer) &fs, FindSegForPart);
3257 awp->showAllFeats = TRUE;
3258
3259 DoOneSection (bsp, parent, bsp, parent, ajp->ajp.slp, fs.seg, from, to, contig, TRUE, awp);
3260
3261 } else {
3262
3263 /* this is a regular non-segmented bioseq */
3264
3265 parent = bsp;
3266 from = 0;
3267 to = bsp->length - 1;
3268
3269 DoOneSection (bsp, parent, bsp, parent, ajp->ajp.slp, 0, from, to, contig, FALSE, awp);
3270 }
3271 }
3272 }
3273
DoBioseqSetList(SeqEntryPtr seq_set,Asn2gbWorkPtr awp)3274 static void DoBioseqSetList (
3275 SeqEntryPtr seq_set,
3276 Asn2gbWorkPtr awp
3277 )
3278
3279 {
3280 BioseqSetPtr bssp;
3281 SeqEntryPtr sep;
3282
3283 if (seq_set == NULL || awp == NULL) return;
3284
3285 /* iterate rather than recurse unless multiple nested sets > nuc-prot */
3286
3287 for (sep = seq_set; sep != NULL; sep = sep->next) {
3288
3289 if (IS_Bioseq_set (sep)) {
3290 bssp = (BioseqSetPtr) sep->data.ptrvalue;
3291 if (bssp == NULL) continue;
3292
3293 if (bssp->_class == BioseqseqSet_class_genbank ||
3294 bssp->_class == BioseqseqSet_class_mut_set ||
3295 bssp->_class == BioseqseqSet_class_pop_set ||
3296 bssp->_class == BioseqseqSet_class_phy_set ||
3297 bssp->_class == BioseqseqSet_class_eco_set ||
3298 bssp->_class == BioseqseqSet_class_wgs_set ||
3299 bssp->_class == BioseqseqSet_class_gen_prod_set ||
3300 bssp->_class == BioseqseqSet_class_small_genome_set) {
3301
3302 /* if popset within genbank set, for example, recurse */
3303
3304 DoBioseqSetList (bssp->seq_set, awp);
3305
3306 continue;
3307 }
3308 }
3309
3310 /* at most nuc-prot set, so do main bioseqs that fit the format */
3311
3312 VisitSequencesInSep (sep, (Pointer) awp, VISIT_MAINS, DoOneBioseq);
3313 }
3314 }
3315
DoOneBioseqSet(SeqEntryPtr sep,Asn2gbWorkPtr awp)3316 static void DoOneBioseqSet (
3317 SeqEntryPtr sep,
3318 Asn2gbWorkPtr awp
3319 )
3320
3321 {
3322 BioseqSetPtr bssp;
3323
3324 if (sep == NULL || awp == NULL) return;
3325
3326 if (IS_Bioseq_set (sep)) {
3327 bssp = (BioseqSetPtr) sep->data.ptrvalue;
3328 if (bssp == NULL) return;
3329
3330 if (bssp->_class == BioseqseqSet_class_genbank ||
3331 bssp->_class == BioseqseqSet_class_mut_set ||
3332 bssp->_class == BioseqseqSet_class_pop_set ||
3333 bssp->_class == BioseqseqSet_class_phy_set ||
3334 bssp->_class == BioseqseqSet_class_eco_set ||
3335 bssp->_class == BioseqseqSet_class_wgs_set ||
3336 bssp->_class == BioseqseqSet_class_gen_prod_set ||
3337 bssp->_class == BioseqseqSet_class_small_genome_set) {
3338
3339 /* this is a pop/phy/mut/eco set, catenate separate reports */
3340
3341 DoBioseqSetList (bssp->seq_set, awp);
3342
3343 return;
3344 }
3345 }
3346
3347 /* at most nuc-prot set, so do main bioseqs that fit the format */
3348
3349 VisitSequencesInSep (sep, (Pointer) awp, VISIT_MAINS, DoOneBioseq);
3350 }
3351
3352 /* ********************************************************************** */
3353
RecordOneSection(Asn2gbWorkPtr awp,BioseqPtr bsp,SeqIdPtr sip)3354 static void RecordOneSection (
3355 Asn2gbWorkPtr awp,
3356 BioseqPtr bsp,
3357 SeqIdPtr sip
3358 )
3359
3360 {
3361 IntAsn2gbJobPtr ajp;
3362 ValNodePtr vnp;
3363
3364 if (awp == NULL) return;
3365 ajp = awp->ajp;
3366 if (ajp == NULL) return;
3367
3368 if (bsp != NULL) {
3369 for (sip = bsp->id; sip != NULL; sip = sip->next) {
3370 if (sip->choice == SEQID_GI) break;
3371 }
3372 }
3373
3374 if (sip == NULL) return;
3375
3376 if (sip->choice == SEQID_GI) {
3377 vnp = ValNodeAddBigInt (&(ajp->gitail), 0, (BIG_ID) sip->data.intvalue);
3378 if (ajp->gihead == NULL) {
3379 ajp->gihead = vnp;
3380 }
3381 ajp->gitail = vnp;
3382 }
3383
3384 (awp->sectionMax)++;
3385 }
3386
CountOneSection(BioseqPtr target,BioseqPtr parent,BioseqPtr bsp,BioseqPtr refs,SeqLocPtr slp,Uint2 seg,Int4 from,Int4 to,Boolean contig,Boolean onePartOfSeg,Asn2gbWorkPtr awp)3387 static void CountOneSection (
3388 BioseqPtr target,
3389 BioseqPtr parent,
3390 BioseqPtr bsp,
3391 BioseqPtr refs,
3392 SeqLocPtr slp,
3393 Uint2 seg,
3394 Int4 from,
3395 Int4 to,
3396 Boolean contig,
3397 Boolean onePartOfSeg,
3398 Asn2gbWorkPtr awp
3399 )
3400
3401 {
3402 IntAsn2gbJobPtr ajp;
3403 SeqIdPtr sip;
3404
3405 if (target == NULL || parent == NULL || bsp == NULL || awp == NULL) return;
3406 ajp = awp->ajp;
3407 if (ajp == NULL) return;
3408
3409 if (awp->mode == RELEASE_MODE && awp->style == CONTIG_STYLE) {
3410 if (bsp->repr == Seq_repr_seg) {
3411 } else if (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp))) {
3412 } else if (bsp->repr == Seq_repr_ref) {
3413 } else if (bsp->repr == Seq_repr_map) {
3414 } else return;
3415 }
3416
3417 if (ajp->flags.suppressLocalID) {
3418 sip = SeqIdSelect (bsp->id, fasta_order, NUM_SEQID);
3419 if (sip == NULL || sip->choice == SEQID_LOCAL) return;
3420 }
3421
3422 RecordOneSection (awp, bsp, NULL);
3423 }
3424
3425
Count2Seg(SeqLocPtr slp,SeqMgrSegmentContextPtr context)3426 static Boolean LIBCALLBACK Count2Seg (
3427 SeqLocPtr slp,
3428 SeqMgrSegmentContextPtr context
3429 )
3430
3431 {
3432 Asn2gbWorkPtr awp;
3433 BioseqPtr bsp = NULL;
3434 Int4 from;
3435 SeqLocPtr loc;
3436 BioseqPtr parent;
3437 SeqIdPtr sip;
3438 Int4 to;
3439
3440 if (slp == NULL || context == NULL) return FALSE;
3441 awp = (Asn2gbWorkPtr) context->userdata;
3442
3443 parent = context->parent;
3444
3445 from = context->cumOffset;
3446 to = from + context->to - context->from;
3447
3448 sip = SeqLocId (slp);
3449 if (sip == NULL) {
3450 loc = SeqLocFindNext (slp, NULL);
3451 if (loc != NULL) {
3452 sip = SeqLocId (loc);
3453 }
3454 }
3455 if (sip == NULL) return TRUE;
3456
3457 bsp = BioseqFindCore (sip);
3458 if (bsp != NULL && bsp->repr == Seq_repr_virtual) return TRUE;
3459
3460 RecordOneSection (awp, NULL, sip);
3461
3462 return TRUE;
3463 }
3464
CountOneBioseq(BioseqPtr bsp,Pointer userdata)3465 static void CountOneBioseq (
3466 BioseqPtr bsp,
3467 Pointer userdata
3468 )
3469
3470 {
3471 IntAsn2gbJobPtr ajp;
3472 Asn2gbWorkPtr awp;
3473 BioseqSetPtr bssp;
3474 SeqMgrSegmentContext context;
3475 Boolean contig = FALSE;
3476 Int4 from;
3477 BioseqPtr parent;
3478 Boolean segmented = FALSE;
3479 Int4 to;
3480
3481 if (bsp == NULL) return;
3482 awp = (Asn2gbWorkPtr) userdata;
3483 if (awp == NULL) return;
3484 ajp = awp->ajp;
3485 if (ajp == NULL) return;
3486
3487 if (ISA_na (bsp->mol)) {
3488 if (ajp->format == GENPEPT_FMT || ajp->format == EMBLPEPT_FMT) return;
3489
3490 if (ajp->format == FTABLE_FMT && ajp->skipMrnas) {
3491 if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
3492 bssp = (BioseqSetPtr) bsp->idx.parentptr;
3493 if (bssp != NULL && bssp->_class == BioseqseqSet_class_nuc_prot) {
3494 if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
3495 bssp = (BioseqSetPtr) bsp->idx.parentptr;
3496 if (bssp != NULL && bssp->_class == BioseqseqSet_class_gen_prod_set) {
3497 return;
3498 }
3499 }
3500 }
3501 }
3502 }
3503
3504 } else if (ISA_aa (bsp->mol)) {
3505 if (ajp->format == GENBANK_FMT || ajp->format == EMBL_FMT) return;
3506
3507 if (ajp->format == FTABLE_FMT && ajp->skipProts) return;
3508 }
3509
3510 if (awp->style == SEGMENT_STYLE) {
3511 segmented = TRUE;
3512 }
3513 if (awp->style == CONTIG_STYLE) {
3514 contig = TRUE;
3515 }
3516 if (awp->format == FTABLE_FMT) {
3517 segmented = FALSE;
3518 contig = FALSE;
3519 }
3520
3521 if (bsp->repr == Seq_repr_seg && awp->style == NORMAL_STYLE) {
3522
3523 if (SegHasParts (bsp)) {
3524 segmented = TRUE;
3525 contig = FALSE;
3526 } else {
3527 segmented = FALSE;
3528 contig = TRUE;
3529 }
3530 }
3531 if (bsp->repr == Seq_repr_delta && awp->style == NORMAL_STYLE) {
3532 if (! DeltaLitOnly (bsp)) {
3533 contig = TRUE;
3534 }
3535 }
3536
3537 if (bsp->repr == Seq_repr_seg) {
3538
3539 if (segmented) {
3540
3541 SeqMgrExploreSegments (bsp, (Pointer) awp, Count2Seg);
3542
3543 } else {
3544
3545 parent = bsp;
3546 from = 0;
3547 to = bsp->length - 1;
3548
3549 CountOneSection (parent, parent, bsp, parent, ajp->ajp.slp, 0, from, to, contig, FALSE, awp);
3550 }
3551
3552 } else if (bsp->repr == Seq_repr_raw ||
3553 bsp->repr == Seq_repr_const ||
3554 bsp->repr == Seq_repr_delta ||
3555 bsp->repr == Seq_repr_ref ||
3556 bsp->repr == Seq_repr_map ||
3557 bsp->repr == Seq_repr_virtual) {
3558
3559 parent = SeqMgrGetParentOfPart (bsp, &context);
3560 if (parent != NULL) {
3561
3562 from = context.cumOffset;
3563 to = from + context.to - context.from;
3564
3565 CountOneSection (bsp, parent, bsp, parent, ajp->ajp.slp, 0, from, to, contig, TRUE, awp);
3566
3567 } else {
3568
3569 parent = bsp;
3570 from = 0;
3571 to = bsp->length - 1;
3572
3573 CountOneSection (bsp, parent, bsp, parent, ajp->ajp.slp, 0, from, to, contig, FALSE, awp);
3574 }
3575 }
3576 }
3577
3578
3579
CountBioseqSetList(SeqEntryPtr seq_set,Asn2gbWorkPtr awp)3580 static void CountBioseqSetList (
3581 SeqEntryPtr seq_set,
3582 Asn2gbWorkPtr awp
3583 )
3584
3585 {
3586 BioseqSetPtr bssp;
3587 SeqEntryPtr sep;
3588
3589 if (seq_set == NULL || awp == NULL) return;
3590
3591 for (sep = seq_set; sep != NULL; sep = sep->next) {
3592
3593 if (IS_Bioseq_set (sep)) {
3594 bssp = (BioseqSetPtr) sep->data.ptrvalue;
3595 if (bssp == NULL) continue;
3596
3597 if (bssp->_class == BioseqseqSet_class_genbank ||
3598 bssp->_class == BioseqseqSet_class_mut_set ||
3599 bssp->_class == BioseqseqSet_class_pop_set ||
3600 bssp->_class == BioseqseqSet_class_phy_set ||
3601 bssp->_class == BioseqseqSet_class_eco_set ||
3602 bssp->_class == BioseqseqSet_class_wgs_set ||
3603 bssp->_class == BioseqseqSet_class_gen_prod_set ||
3604 bssp->_class == BioseqseqSet_class_small_genome_set) {
3605
3606 CountBioseqSetList (bssp->seq_set, awp);
3607
3608 continue;
3609 }
3610 }
3611
3612 VisitSequencesInSep (sep, (Pointer) awp, VISIT_MAINS, CountOneBioseq);
3613 }
3614 }
3615
CountOneBioseqSet(SeqEntryPtr sep,Asn2gbWorkPtr awp)3616 static void CountOneBioseqSet (
3617 SeqEntryPtr sep,
3618 Asn2gbWorkPtr awp
3619 )
3620
3621 {
3622 BioseqSetPtr bssp;
3623
3624 if (sep == NULL || awp == NULL) return;
3625
3626 if (IS_Bioseq_set (sep)) {
3627 bssp = (BioseqSetPtr) sep->data.ptrvalue;
3628 if (bssp == NULL) return;
3629
3630 if (bssp->_class == BioseqseqSet_class_genbank ||
3631 bssp->_class == BioseqseqSet_class_mut_set ||
3632 bssp->_class == BioseqseqSet_class_pop_set ||
3633 bssp->_class == BioseqseqSet_class_phy_set ||
3634 bssp->_class == BioseqseqSet_class_eco_set ||
3635 bssp->_class == BioseqseqSet_class_wgs_set ||
3636 bssp->_class == BioseqseqSet_class_gen_prod_set ||
3637 bssp->_class == BioseqseqSet_class_small_genome_set) {
3638
3639 CountBioseqSetList (bssp->seq_set, awp);
3640
3641 return;
3642 }
3643 }
3644
3645 VisitSequencesInSep (sep, (Pointer) awp, VISIT_MAINS, CountOneBioseq);
3646 }
3647
3648 /* ********************************************************************** */
3649
3650 /* public functions */
3651
SortParagraphByIDProc(VoidPtr vp1,VoidPtr vp2)3652 static int LIBCALLBACK SortParagraphByIDProc (
3653 VoidPtr vp1,
3654 VoidPtr vp2
3655 )
3656
3657 {
3658 BaseBlockPtr bbp1, bbp2;
3659
3660 if (vp1 == NULL || vp2 == NULL) return 0;
3661 bbp1 = *((BaseBlockPtr PNTR) vp1);
3662 bbp2 = *((BaseBlockPtr PNTR) vp2);
3663 if (bbp1 == NULL || bbp2 == NULL) return 0;
3664
3665 if (bbp1->entityID > bbp2->entityID) return 1;
3666 if (bbp1->entityID < bbp2->entityID) return -1;
3667
3668 if (bbp1->itemtype > bbp2->itemtype) return 1;
3669 if (bbp1->itemtype < bbp2->itemtype) return -1;
3670
3671 if (bbp1->itemID > bbp2->itemID) return 1;
3672 if (bbp1->itemID < bbp2->itemID) return -1;
3673
3674 if (bbp1->paragraph > bbp2->paragraph) return 1;
3675 if (bbp1->paragraph < bbp2->paragraph) return -1;
3676
3677 return 0;
3678 }
3679
IsBspRefseq(BioseqPtr bsp,Pointer userdata)3680 static void IsBspRefseq (
3681 BioseqPtr bsp,
3682 Pointer userdata
3683 )
3684
3685 {
3686 BoolPtr has_refseqP;
3687 SeqIdPtr sip;
3688
3689 if (bsp == NULL || userdata == NULL) return;
3690 has_refseqP = (BoolPtr) userdata;
3691 for (sip = bsp->id; sip != NULL; sip = sip->next) {
3692 if (sip->choice == SEQID_OTHER) {
3693 *has_refseqP = TRUE;
3694 }
3695 }
3696 }
3697
IsSepRefseq(SeqEntryPtr sep)3698 static Boolean IsSepRefseq (
3699 SeqEntryPtr sep
3700 )
3701
3702 {
3703 Boolean is_refseq = FALSE;
3704
3705 if (sep == NULL) return FALSE;
3706 VisitBioseqsInSep (sep, (Pointer) &is_refseq, IsBspRefseq);
3707 return is_refseq;
3708 }
3709
3710 typedef struct modeflags {
3711 Boolean flags [30];
3712 } ModeFlags, PNTR ModeFlagsPtr;
3713
3714 static ModeFlags flagTable [] = {
3715
3716 /* RELEASE_MODE */
3717 {TRUE, TRUE, TRUE, TRUE, TRUE,
3718 TRUE, TRUE, TRUE, TRUE, TRUE,
3719 TRUE, TRUE, TRUE, TRUE, TRUE,
3720 TRUE, TRUE, TRUE, TRUE, TRUE,
3721 TRUE, TRUE, TRUE, TRUE, TRUE,
3722 TRUE, TRUE, TRUE, TRUE, TRUE},
3723
3724 /* ENTREZ_MODE */
3725 {FALSE, TRUE, TRUE, TRUE, TRUE,
3726 FALSE, TRUE, TRUE, TRUE, TRUE,
3727 TRUE, TRUE, FALSE, TRUE, TRUE,
3728 TRUE, TRUE, FALSE, FALSE, TRUE,
3729 TRUE, TRUE, TRUE, TRUE, TRUE,
3730 TRUE, TRUE, TRUE, TRUE, FALSE},
3731
3732 /* SEQUIN_MODE */
3733 {FALSE, FALSE, FALSE, FALSE, FALSE,
3734 FALSE, FALSE, TRUE, FALSE, FALSE,
3735 FALSE, FALSE, FALSE, FALSE, FALSE,
3736 FALSE, FALSE, FALSE, FALSE, FALSE,
3737 FALSE, FALSE, TRUE, FALSE, FALSE,
3738 FALSE, TRUE, FALSE, FALSE, FALSE},
3739
3740 /* DUMP_MODE */
3741 {FALSE, FALSE, FALSE, FALSE, FALSE,
3742 FALSE, FALSE, FALSE, FALSE, FALSE,
3743 FALSE, FALSE, FALSE, FALSE, FALSE,
3744 FALSE, FALSE, FALSE, FALSE, FALSE,
3745 FALSE, FALSE, FALSE, FALSE, FALSE,
3746 FALSE, FALSE, FALSE, FALSE, FALSE}
3747 };
3748
SetFlagsFromMode(IntAsn2gbJobPtr ajp,ModType mode)3749 static void SetFlagsFromMode (
3750 IntAsn2gbJobPtr ajp,
3751 ModType mode
3752 )
3753
3754 {
3755 BoolPtr bp;
3756 ModeFlagsPtr mfp;
3757 SeqEntryPtr sep;
3758
3759 if (ajp == NULL) return;
3760 if (! (mode >= RELEASE_MODE && mode <= DUMP_MODE)) {
3761 mode = DUMP_MODE;
3762 }
3763 mfp = &(flagTable [(int) (mode - 1)]);
3764 bp = &(mfp->flags [0]);
3765
3766 ajp->flags.suppressLocalID = *(bp++);
3767 ajp->flags.validateFeats = *(bp++);
3768 ajp->flags.ignorePatPubs = *(bp++);
3769 ajp->flags.dropShortAA = *(bp++);
3770 ajp->flags.avoidLocusColl = *(bp++);
3771
3772 ajp->flags.iupacaaOnly = *(bp++);
3773 ajp->flags.dropBadCitGens = *(bp++);
3774 ajp->flags.noAffilOnUnpub = *(bp++);
3775 ajp->flags.dropIllegalQuals = *(bp++);
3776 ajp->flags.checkQualSyntax = *(bp++);
3777
3778 ajp->flags.needRequiredQuals = *(bp++);
3779 ajp->flags.needOrganismQual = *(bp++);
3780 ajp->flags.needAtLeastOneRef = *(bp++);
3781 ajp->flags.citArtIsoJta = *(bp++);
3782 ajp->flags.dropBadDbxref = *(bp++);
3783
3784 ajp->flags.useEmblMolType = *(bp++);
3785 ajp->flags.hideBankItComment = *(bp++);
3786 ajp->flags.checkCDSproductID = *(bp++);
3787 ajp->flags.suppressSegLoc = *(bp++);
3788 ajp->flags.srcQualsToNote = *(bp)++;
3789
3790 ajp->flags.hideEmptySource = *(bp++);
3791 ajp->flags.goQualsToNote = *(bp++);
3792 ajp->flags.separateGeneSyns = *(bp++);
3793 ajp->flags.refSeqQualsToNote = *(bp++);
3794 ajp->flags.selenocysteineToNote = *(bp++);
3795
3796 ajp->flags.pyrrolysineToNote = *(bp++);
3797 ajp->flags.extraProductsToNote = *(bp++);
3798 ajp->flags.codonRecognizedToNote = *(bp++);
3799 ajp->flags.hideSpecificGeneMaps = *(bp++);
3800 ajp->flags.forGbRelease = *(bp++);
3801
3802 /* unapproved qualifiers suppressed for flatfile, okay for GBSeq XML */
3803
3804 if (ajp->gbseq == NULL) {
3805
3806 /* collaboration unapproved source quals on their own line only in indexer Sequin - relaxed */
3807
3808 /*
3809 if (GetAppProperty ("InternalNcbiSequin") == NULL) {
3810
3811 ajp->flags.srcQualsToNote = TRUE;
3812 }
3813 */
3814
3815 sep = GetTopSeqEntryForEntityID (ajp->ajp.entityID);
3816 if (IsSepRefseq (sep)) {
3817
3818 ajp->flags.srcQualsToNote = FALSE;
3819 ajp->flags.separateGeneSyns = FALSE;
3820 ajp->flags.codonRecognizedToNote = FALSE;
3821 ajp->flags.goQualsToNote = FALSE;
3822 ajp->flags.refSeqQualsToNote = FALSE;
3823
3824 /* selenocysteine always a separate qualifier for RefSeq */
3825
3826 ajp->flags.selenocysteineToNote = FALSE;
3827 ajp->flags.pyrrolysineToNote = FALSE;
3828
3829 } else {
3830
3831 /* collaboration unapproved Gene Ontology quals on their own line only for RefSeq */
3832
3833 /* ajp->flags.goQualsToNote = TRUE; */
3834 /* ajp->flags.separateGeneSyns = TRUE; */
3835 }
3836
3837 } else {
3838
3839 sep = GetTopSeqEntryForEntityID (ajp->ajp.entityID);
3840 if (IsSepRefseq (sep)) {
3841
3842 ajp->flags.srcQualsToNote = FALSE;
3843 ajp->flags.separateGeneSyns = FALSE;
3844 ajp->flags.codonRecognizedToNote = FALSE;
3845
3846 /* selenocysteine always a separate qualifier for RefSeq */
3847
3848 ajp->flags.selenocysteineToNote = FALSE;
3849 ajp->flags.pyrrolysineToNote = FALSE;
3850
3851 }
3852 }
3853
3854 if (ajp->refseqConventions) {
3855 ajp->flags.srcQualsToNote = FALSE;
3856 ajp->flags.separateGeneSyns = FALSE;
3857 ajp->flags.codonRecognizedToNote = FALSE;
3858 ajp->flags.goQualsToNote = FALSE;
3859 ajp->flags.refSeqQualsToNote = FALSE;
3860 ajp->flags.hideSpecificGeneMaps = FALSE;
3861 }
3862 }
3863
CheckVersionWithGi(BioseqPtr bsp,Pointer userdata)3864 static void CheckVersionWithGi (BioseqPtr bsp, Pointer userdata)
3865
3866 {
3867 Boolean hasGi = FALSE;
3868 BoolPtr missingVersion;
3869 SeqIdPtr sip;
3870 TextSeqIdPtr tsip;
3871 Boolean zeroVersion = FALSE;
3872
3873 for (sip = bsp->id; sip != NULL; sip = sip->next) {
3874 switch (sip->choice) {
3875 case SEQID_TPG:
3876 case SEQID_TPE:
3877 case SEQID_TPD:
3878 case SEQID_GENBANK:
3879 case SEQID_EMBL:
3880 case SEQID_DDBJ:
3881 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
3882 if (tsip != NULL && tsip->version == 0) {
3883 zeroVersion = TRUE;
3884 }
3885 break;
3886 case SEQID_GI :
3887 hasGi = TRUE;
3888 break;
3889 default :
3890 break;
3891 }
3892 }
3893 if (hasGi && zeroVersion) {
3894 missingVersion = (BoolPtr) userdata;
3895 *missingVersion = TRUE;
3896 }
3897 }
3898
3899
3900 typedef struct lookforids {
3901 Boolean isG;
3902 Boolean isGED;
3903 Boolean isED;
3904 Boolean isNTorNWorNG;
3905 Boolean isNC;
3906 Boolean isNZ;
3907 Boolean isRefSeq;
3908 Boolean isGeneral;
3909 Boolean isNCBIGenomes;
3910 Boolean isTPA;
3911 Boolean isTPG;
3912 Boolean isSP;
3913 Boolean isNuc;
3914 Boolean isProt;
3915 Boolean isFarProt;
3916 Boolean isLocal;
3917 Boolean isNonLocal;
3918 Boolean sourcePubFuse;
3919 } LookForIDs, PNTR LookForIDsPtr;
3920
LookForSeqIDs(BioseqPtr bsp,Pointer userdata)3921 static void LookForSeqIDs (BioseqPtr bsp, Pointer userdata)
3922
3923 {
3924 DbtagPtr dbt;
3925 LookForIDsPtr lfip;
3926 SeqIdPtr sip;
3927 TextSeqIdPtr tsip;
3928
3929 lfip = (LookForIDsPtr) userdata;
3930 if (ISA_na (bsp->mol)) {
3931 lfip->isNuc = TRUE;
3932 }
3933 if (ISA_aa (bsp->mol)) {
3934 lfip->isProt = TRUE;
3935 if (bsp->repr == Seq_repr_delta) {
3936 if (! DeltaLitOnly (bsp)) {
3937 lfip->isFarProt = TRUE;
3938 }
3939 } else if (bsp->repr == Seq_repr_ref) {
3940 lfip->isFarProt = TRUE;
3941 }
3942 }
3943
3944 for (sip = bsp->id; sip != NULL; sip = sip->next) {
3945 switch (sip->choice) {
3946 case SEQID_GENBANK :
3947 lfip->isG = TRUE;
3948 lfip->isGED = TRUE;
3949 lfip->isNonLocal = TRUE;
3950 break;
3951 case SEQID_EMBL :
3952 case SEQID_DDBJ :
3953 lfip->isED = TRUE;
3954 lfip->isGED = TRUE;
3955 lfip->isNonLocal = TRUE;
3956 break;
3957 case SEQID_SWISSPROT :
3958 lfip->isSP = TRUE;
3959 break;
3960 case SEQID_TPG :
3961 lfip->isTPG = TRUE;
3962 /* and fall through to TPE and TPD */
3963 case SEQID_TPE :
3964 case SEQID_TPD :
3965 lfip->isTPA = TRUE;
3966 lfip->isNonLocal = TRUE;
3967 break;
3968 case SEQID_OTHER :
3969 lfip->isRefSeq = TRUE;
3970 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
3971 if (tsip != NULL) {
3972 if (StringNCmp (tsip->accession, "NC_", 3) == 0) {
3973 lfip->isNC = TRUE;
3974 } else if (StringNCmp (tsip->accession, "NT_", 3) == 0) {
3975 lfip->isNTorNWorNG = TRUE;
3976 } else if (StringNCmp (tsip->accession, "NW_", 3) == 0) {
3977 lfip->isNTorNWorNG = TRUE;
3978 } else if (StringNCmp (tsip->accession, "NG_", 3) == 0) {
3979 lfip->isNTorNWorNG = TRUE;
3980 } else if (StringNCmp (tsip->accession, "NZ_", 3) == 0) {
3981 lfip->isNZ = TRUE;
3982 }
3983 }
3984 lfip->isNonLocal = TRUE;
3985 break;
3986 case SEQID_GENERAL :
3987 dbt = (DbtagPtr) sip->data.ptrvalue;
3988 if (dbt != NULL && !IsSkippableDbtag(dbt)) {
3989 lfip->isGeneral = TRUE;
3990 lfip->isNonLocal = TRUE;
3991 if (StringCmp (dbt->db, "NCBI_GENOMES") == 0) {
3992 lfip->isNCBIGenomes = TRUE;
3993 }
3994 }
3995 break;
3996 case SEQID_LOCAL :
3997 lfip->isLocal = TRUE;
3998 break;
3999 default :
4000 lfip->isNonLocal = TRUE;
4001 break;
4002 }
4003 /* also set policy on sourcePubFuse */
4004 switch (sip->choice) {
4005 case SEQID_GIBBSQ :
4006 case SEQID_GIBBMT :
4007 lfip->sourcePubFuse = TRUE;
4008 break;
4009 case SEQID_EMBL :
4010 case SEQID_PIR :
4011 case SEQID_SWISSPROT :
4012 case SEQID_PATENT :
4013 case SEQID_DDBJ :
4014 case SEQID_PRF :
4015 case SEQID_PDB :
4016 case SEQID_TPE:
4017 case SEQID_TPD:
4018 case SEQID_GPIPE:
4019 lfip->sourcePubFuse = TRUE;
4020 break;
4021 case SEQID_GENBANK :
4022 case SEQID_TPG:
4023 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
4024 if (tsip != NULL) {
4025 if (StringLen (tsip->accession) == 6) {
4026 lfip->sourcePubFuse = TRUE;
4027 }
4028 }
4029 break;
4030 case SEQID_NOT_SET :
4031 case SEQID_LOCAL :
4032 case SEQID_OTHER :
4033 case SEQID_GENERAL :
4034 break;
4035 default :
4036 break;
4037 }
4038 }
4039 }
4040
LookForGEDetc(SeqEntryPtr topsep,BoolPtr isG,BoolPtr isGED,BoolPtr isED,BoolPtr isNTorNWorNG,BoolPtr isNC,BoolPtr isNZ,BoolPtr isRefSeq,BoolPtr isGeneral,BoolPtr isNCBIGenomes,BoolPtr isTPA,BoolPtr isTPG,BoolPtr isSP,BoolPtr isNuc,BoolPtr isProt,BoolPtr isFarProt,BoolPtr isOnlyLocal,BoolPtr sourcePubFuse)4041 static void LookForGEDetc (
4042 SeqEntryPtr topsep,
4043 BoolPtr isG,
4044 BoolPtr isGED,
4045 BoolPtr isED,
4046 BoolPtr isNTorNWorNG,
4047 BoolPtr isNC,
4048 BoolPtr isNZ,
4049 BoolPtr isRefSeq,
4050 BoolPtr isGeneral,
4051 BoolPtr isNCBIGenomes,
4052 BoolPtr isTPA,
4053 BoolPtr isTPG,
4054 BoolPtr isSP,
4055 BoolPtr isNuc,
4056 BoolPtr isProt,
4057 BoolPtr isFarProt,
4058 BoolPtr isOnlyLocal,
4059 BoolPtr sourcePubFuse
4060 )
4061
4062 {
4063 LookForIDs lfi;
4064
4065 MemSet ((Pointer) &lfi, 0, sizeof (LookForIDs));
4066 VisitBioseqsInSep (topsep, (Pointer) &lfi, LookForSeqIDs);
4067 *isG = lfi.isG;
4068 *isGED = lfi.isGED;
4069 *isED = lfi.isED;
4070 *isNTorNWorNG = lfi.isNTorNWorNG;
4071 *isNC = lfi.isNC;
4072 *isNZ = lfi.isNZ;
4073 *isRefSeq = lfi.isRefSeq;
4074 *isGeneral = lfi.isGeneral;
4075 *isNCBIGenomes = lfi.isNCBIGenomes;
4076 *isTPA = lfi.isTPA;
4077 *isTPG = lfi.isTPG;
4078 *isSP = lfi.isSP;
4079 *isNuc = lfi.isNuc;
4080 *isProt = lfi.isProt;
4081 *isFarProt = lfi.isFarProt;
4082 if (lfi.isLocal && (! lfi.isNonLocal)) {
4083 *isOnlyLocal = TRUE;
4084 } else {
4085 *isOnlyLocal = FALSE;
4086 }
4087 *sourcePubFuse = lfi.sourcePubFuse;
4088 }
4089
MakeGapFeatsBase(BioseqPtr bsp,Pointer userdata,Boolean isSP,Boolean rev_comp)4090 static void MakeGapFeatsBase (
4091 BioseqPtr bsp,
4092 Pointer userdata,
4093 Boolean isSP,
4094 Boolean rev_comp
4095 )
4096
4097 {
4098 Char buf [128];
4099 Int4 currpos = 0;
4100 BioseqPtr fakebsp = NULL;
4101 IntFuzzPtr fuzz;
4102 ValNodePtr PNTR gapvnp;
4103 ImpFeatPtr ifp;
4104 SeqFeatPtr last = NULL;
4105 SeqLitPtr litp;
4106 SeqAnnotPtr sap = NULL;
4107 SeqFeatPtr sfp;
4108 SeqIdPtr sip;
4109 SeqLocPtr slp;
4110 ValNodePtr vnp;
4111 SeqGapPtr seq_gap = NULL;
4112 Boolean gap_is_linked = FALSE;
4113 ValNodePtr evidvnp = NULL;
4114 Int4 linktype = 0;
4115 Boolean needs_evidence = FALSE;
4116
4117 if (bsp == NULL || bsp->repr != Seq_repr_delta) return;
4118 gapvnp = (ValNodePtr PNTR) userdata;
4119 if (gapvnp == NULL) return;
4120 sip = SeqIdFindBest (bsp->id, 0);
4121 if (sip == NULL) return;
4122 /* no longer suppress on far delta contigs */
4123 /* if (! DeltaLitOnly (bsp)) return; */
4124
4125 for (vnp = (ValNodePtr)(bsp->seq_ext); vnp != NULL; vnp = vnp->next) {
4126 if (vnp->choice == 1) {
4127 slp = (SeqLocPtr) vnp->data.ptrvalue;
4128 if (slp == NULL) continue;
4129 currpos += SeqLocLen (slp);
4130 }
4131 if (vnp->choice == 2) {
4132 litp = (SeqLitPtr) vnp->data.ptrvalue;
4133 if (litp == NULL) continue;
4134 if (litp->seq_data == NULL || litp->seq_data_type == Seq_code_gap) {
4135 if (litp->length > 0 || (isSP && litp->length == 0) ) {
4136 seq_gap = (SeqGapPtr)litp->seq_data; /* might be NULL */
4137 if (fakebsp == NULL) {
4138 /* to be freed with MemFree, not BioseqFree */
4139 fakebsp = MemNew (sizeof (Bioseq));
4140 if (fakebsp == NULL) return;
4141 sap = SeqAnnotNew ();
4142 if (sap == NULL) return;
4143 sap->type = 1;
4144 fakebsp->annot = sap;
4145 ValNodeAddPointer (gapvnp, 0, (Pointer) fakebsp);
4146 }
4147 ifp = ImpFeatNew ();
4148 if (ifp == NULL) continue;
4149 ifp->key = StringSave ( "gap" );
4150 sfp = SeqFeatNew ();
4151 if (sfp == NULL) continue;
4152 sfp->data.choice = SEQFEAT_IMP;
4153 sfp->data.value.ptrvalue = (Pointer) ifp;
4154 sfp->idx.subtype = FEATDEF_gap;
4155 if (last != NULL) {
4156 last->next = sfp;
4157 } else {
4158 sap->data = (Pointer) sfp;
4159 }
4160 last = sfp;
4161 fuzz = litp->fuzz;
4162 if (fuzz != NULL && fuzz->choice == 4 && fuzz->a == 0) {
4163 AddQualifierToFeature (sfp, "estimated_length", "unknown");
4164 } else {
4165 sprintf (buf, "%ld", (long) litp->length);
4166 AddQualifierToFeature (sfp, "estimated_length", buf);
4167 }
4168 if (rev_comp) {
4169 sfp->location = AddIntervalToLocation (
4170 NULL, sip,
4171 ( litp->length == 0 ? currpos : currpos + litp->length - 1 ),
4172 ( litp->length == 0 ? currpos - 1 : currpos ),
4173 FALSE, FALSE);
4174 } else {
4175 sfp->location = AddIntervalToLocation (
4176 NULL, sip,
4177 ( litp->length == 0 ? currpos - 1 : currpos ),
4178 ( litp->length == 0 ? currpos : currpos + litp->length - 1 ),
4179 FALSE, FALSE);
4180 }
4181 if( isSP && litp->length == 0 ) {
4182 sfp->comment = StringSave ("Non-consecutive residues");
4183 }
4184 if( seq_gap != NULL ) {
4185 needs_evidence = FALSE;
4186 /* I can't seem to find pound-defines for
4187 some of these magic numbers below */
4188 gap_is_linked = ( seq_gap->linkage == 1 ); /* linked */
4189
4190 if (seq_gap->linkage_evidence != NULL) {
4191 gap_is_linked = TRUE; /* do not rely solely on seq_gap->linkage, which is not always set correctly */
4192 }
4193
4194 switch( seq_gap->type ) {
4195 case 0: /* unknown */
4196 /* no /gap_type label - policy changed at SQD-1801 */
4197 AddQualifierToFeature(sfp, "gap_type", "unknown" );
4198 needs_evidence = gap_is_linked;
4199 break;
4200 case 1: /* fragment */
4201 AddQualifierToFeature(sfp, "gap_type", "within scaffold" );
4202 needs_evidence = TRUE;
4203 break;
4204 case 2: /* clone */
4205 AddQualifierToFeature(sfp, "gap_type",
4206 ( gap_is_linked ?
4207 "within scaffold" :
4208 "between scaffolds" ) );
4209 needs_evidence = gap_is_linked;
4210 break;
4211 case 3: /* short-arm */
4212 AddQualifierToFeature(sfp, "gap_type", "short_arm" );
4213 break;
4214 case 4: /* heterochromatin */
4215 AddQualifierToFeature(sfp, "gap_type", "heterochromatin" );
4216 break;
4217 case 5: /* centromere */
4218 AddQualifierToFeature(sfp, "gap_type", "centromere" );
4219 break;
4220 case 6: /* telomere */
4221 AddQualifierToFeature(sfp, "gap_type", "telomere");
4222 break;
4223 case 7: /* repeat */
4224 AddQualifierToFeature(sfp, "gap_type",
4225 ( gap_is_linked ?
4226 "repeat within scaffold" :
4227 "repeat between scaffolds" ) );
4228 needs_evidence = gap_is_linked;
4229 break;
4230 case 8: /* contig */
4231 AddQualifierToFeature(sfp, "gap_type", "between scaffolds" );
4232 break;
4233 case 9: /* scaffold */
4234 AddQualifierToFeature(sfp, "gap_type", "within scaffold" );
4235 needs_evidence = gap_is_linked;
4236 break;
4237 case 255: /* other */
4238 AddQualifierToFeature(sfp, "gap_type", "other" );
4239 break;
4240 default:
4241 sprintf (buf, "(ERROR: UNRECOGNIZED_GAP_TYPE:%ld)", (long)seq_gap->type );
4242 AddQualifierToFeature(sfp, "gap_type", buf );
4243 break;
4244 }
4245
4246 /* Create the /linkage_evidence quals */
4247 if( needs_evidence ) {
4248 for( evidvnp = seq_gap->linkage_evidence; evidvnp; evidvnp = evidvnp->next ) {
4249 linktype = ((LinkageEvidencePtr)evidvnp->data.ptrvalue)->type;
4250 switch( linktype ) {
4251 case 0: /* paired-ends */
4252 AddQualifierToFeature(sfp, "linkage_evidence",
4253 "paired-ends" );
4254 break;
4255 case 1: /* align-genus */
4256 AddQualifierToFeature(sfp, "linkage_evidence",
4257 "align genus" );
4258 break;
4259 case 2: /* align-xgenus */
4260 AddQualifierToFeature(sfp, "linkage_evidence",
4261 "align xgenus" );
4262 break;
4263 case 3: /* align-trnscpt */
4264 AddQualifierToFeature(sfp, "linkage_evidence",
4265 "align trnscpt" );
4266 break;
4267 case 4: /* within-clone */
4268 AddQualifierToFeature(sfp, "linkage_evidence",
4269 "within clone" );
4270 break;
4271 case 5: /* clone-contig */
4272 AddQualifierToFeature(sfp, "linkage_evidence",
4273 "clone contig" );
4274 break;
4275 case 6: /* map */
4276 AddQualifierToFeature(sfp, "linkage_evidence",
4277 "map" );
4278 break;
4279 case 7: /* strobe */
4280 AddQualifierToFeature(sfp, "linkage_evidence",
4281 "strobe" );
4282 break;
4283 case 8: /* unspecified */
4284 AddQualifierToFeature(sfp, "linkage_evidence",
4285 "unspecified" );
4286 break;
4287 case 9: /* pcr */
4288 AddQualifierToFeature(sfp, "linkage_evidence",
4289 "pcr" );
4290 break;
4291 case 255: /* other */
4292 AddQualifierToFeature(sfp, "linkage_evidence",
4293 "other" );
4294 break;
4295 default:
4296 sprintf( buf, "(UNRECOGNIZED LINKAGE EVIDENCE:%ld)",
4297 (long)linktype );
4298 AddQualifierToFeature(
4299 sfp, "linkage_evidence",
4300 buf );
4301 break;
4302 }
4303 }
4304 /* if no linkage-evidence and needs some, add "unspecified" */
4305 if( NULL == seq_gap->linkage_evidence ) {
4306 AddQualifierToFeature( sfp, "linkage_evidence",
4307 "unspecified" );
4308 }
4309 }
4310 }
4311 }
4312 }
4313 currpos += litp->length;
4314 }
4315 }
4316 }
4317
MakeSPGapFeats(BioseqPtr bsp,Pointer userdata)4318 static void MakeSPGapFeats (
4319 BioseqPtr bsp,
4320 Pointer userdata
4321 )
4322
4323 {
4324 MakeGapFeatsBase (bsp, userdata, TRUE, FALSE);
4325 }
4326
MakeRCGapFeats(BioseqPtr bsp,Pointer userdata)4327 static void MakeRCGapFeats (
4328 BioseqPtr bsp,
4329 Pointer userdata
4330 )
4331
4332 {
4333 MakeGapFeatsBase (bsp, userdata, FALSE, TRUE);
4334 }
4335
MakeGapFeats(BioseqPtr bsp,Pointer userdata)4336 static void MakeGapFeats (
4337 BioseqPtr bsp,
4338 Pointer userdata
4339 )
4340
4341 {
4342 MakeGapFeatsBase (bsp, userdata, FALSE, FALSE);
4343 }
4344
4345 typedef struct featpolicy {
4346 Boolean forceOnlyNearFeats;
4347 Boolean forceAllowFarFeats;
4348 } FeatPolicy, PNTR FeatPolicyPtr;
4349
LookFarFeatFetchPolicy(SeqDescrPtr sdp,Pointer userdata)4350 static void LookFarFeatFetchPolicy (
4351 SeqDescrPtr sdp,
4352 Pointer userdata
4353 )
4354
4355 {
4356 FeatPolicyPtr fpP;
4357 ObjectIdPtr oip;
4358 CharPtr str;
4359 UserFieldPtr ufp;
4360 UserObjectPtr uop;
4361
4362 if (sdp == NULL || sdp->choice != Seq_descr_user) return;
4363 fpP = (FeatPolicyPtr) userdata;
4364 if (fpP == NULL) return;
4365
4366 uop = (UserObjectPtr) sdp->data.ptrvalue;
4367 if (uop == NULL) return;
4368 oip = uop->type;
4369 if (oip == NULL) return;
4370 if (StringCmp (oip->str, "FeatureFetchPolicy") != 0) return;
4371
4372 for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
4373 oip = ufp->label;
4374 if (oip == NULL || ufp->data.ptrvalue == NULL) continue;
4375 if (StringCmp (oip->str, "Policy") == 0) {
4376 str = (CharPtr) ufp->data.ptrvalue;
4377 if (StringICmp (str, "OnlyNearFeatures") == 0) {
4378 fpP->forceOnlyNearFeats = TRUE;
4379 } else if (StringICmp (str, "AllowFarFeatures") == 0) {
4380 fpP->forceAllowFarFeats = TRUE;
4381 }
4382 }
4383 }
4384 }
4385
FindMultiIntervalGenes(SeqFeatPtr sfp,Pointer userdata)4386 static void FindMultiIntervalGenes (
4387 SeqFeatPtr sfp,
4388 Pointer userdata
4389 )
4390
4391 {
4392 BoolPtr multiIntervalGenesP;
4393 SeqLocPtr slp;
4394
4395 if (sfp == NULL || sfp->data.choice != SEQFEAT_GENE) return;
4396 multiIntervalGenesP = (BoolPtr) userdata;
4397 if (multiIntervalGenesP == NULL) return;
4398
4399 slp = sfp->location;
4400 if (slp == NULL) return;
4401 switch (slp->choice) {
4402 case SEQLOC_PACKED_INT :
4403 case SEQLOC_PACKED_PNT :
4404 case SEQLOC_MIX :
4405 case SEQLOC_EQUIV :
4406 *multiIntervalGenesP = TRUE;
4407 break;
4408 default :
4409 break;
4410 }
4411 }
4412
FindSegmentedBioseqs(BioseqPtr bsp,Pointer userdata)4413 static void FindSegmentedBioseqs (
4414 BioseqPtr bsp,
4415 Pointer userdata
4416 )
4417
4418 {
4419 BoolPtr segmentedBioseqsP;
4420
4421 if (bsp == NULL || bsp->repr != Seq_repr_seg) return;
4422 segmentedBioseqsP = (BoolPtr) userdata;
4423 if (segmentedBioseqsP == NULL) return;
4424 *segmentedBioseqsP = TRUE;
4425 }
4426
FindSmallGenomeSets(BioseqSetPtr bssp,Pointer userdata)4427 static void FindSmallGenomeSets (
4428 BioseqSetPtr bssp,
4429 Pointer userdata
4430 )
4431
4432 {
4433 BoolPtr smallGenomeSetP;
4434
4435 if (bssp == NULL || bssp->_class != BioseqseqSet_class_small_genome_set) return;
4436 smallGenomeSetP = (BoolPtr) userdata;
4437 if (smallGenomeSetP == NULL) return;
4438 *smallGenomeSetP = TRUE;
4439 }
4440
4441 static CharPtr bad_html_strings [] = {
4442 "<script", "<object", "<applet", "<embed", "<form", "javascript:", "vbscript:", NULL
4443 };
4444
4445 static CharPtr defHead = "\
4446 <!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n\
4447 \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n\
4448 <html lang=\"en\" xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\">\n\
4449 <head>\n\
4450 <meta http-equiv=\"Content-Type\" content=\"text/html; charset=us-ascii\" />\
4451 <title>GenBank entry</title>\n\
4452 </head>\n\
4453 <body>\n\
4454 <hr />";
4455
4456 static CharPtr defTail = "\
4457 <hr />\n\
4458 </body>\n\
4459 </html>\n";
4460
4461 #define FAR_TRANS_MASK (SHOW_FAR_TRANSLATION | TRANSLATE_IF_NO_PRODUCT | ALWAYS_TRANSLATE_CDS)
4462 #define FEAT_FETCH_MASK (ONLY_NEAR_FEATURES | FAR_FEATURES_SUPPRESS | NEAR_FEATURES_SUPPRESS | FORCE_ALLOW_FAR_FEATS)
4463 #define HTML_XML_ASN_MASK (CREATE_HTML_FLATFILE | CREATE_XML_GBSEQ_FILE | CREATE_ASN_GBSEQ_FILE)
4464 #define PUBLICATION_MASK (HIDE_GENE_RIFS | ONLY_GENE_RIFS | ONLY_REVIEW_PUBS | NEWEST_PUBS | OLDEST_PUBS | HIDE_ALL_PUBS)
4465
asn2gnbk_setup_ex(BioseqPtr bsp,BioseqSetPtr bssp,SeqLocPtr slp,FmtType format,ModType mode,StlType style,FlgType flags,LckType locks,CstType custom,XtraPtr extra,Boolean stream,FILE * fp,AsnIoPtr aip,AsnTypePtr atp)4466 static Asn2gbJobPtr asn2gnbk_setup_ex (
4467 BioseqPtr bsp,
4468 BioseqSetPtr bssp,
4469 SeqLocPtr slp,
4470 FmtType format,
4471 ModType mode,
4472 StlType style,
4473 FlgType flags,
4474 LckType locks,
4475 CstType custom,
4476 XtraPtr extra,
4477 Boolean stream,
4478 FILE *fp,
4479 AsnIoPtr aip,
4480 AsnTypePtr atp
4481 )
4482
4483 {
4484 Asn2gbFormat af;
4485 IntAsn2gbJobPtr ajp = NULL;
4486 Asn2gbSectPtr asp;
4487 Asn2gbWork aw;
4488 BaseBlockPtr bbp;
4489 BlockMask bkmask = (BlockMask) 0;
4490 BaseBlockPtr PNTR blockArray;
4491 Uint2 eID = 0;
4492 Uint2 entityID = 0;
4493 Uint2 item_type = 0;
4494 Uint4 item_id = 0;
4495 CharPtr ffhead = NULL;
4496 CharPtr fftail = NULL;
4497 Asn2gbWriteFunc ffwrite = NULL;
4498 FeatPolicy featpolicy;
4499 ValNodePtr gapvnp = NULL;
4500 GBSeqPtr gbseq = NULL;
4501 BioseqPtr gbsp;
4502 SeqAnnotPtr gsap;
4503 SeqFeatPtr gsfp;
4504 Int4 i;
4505 IndxPtr index = NULL;
4506 Boolean isFarProt;
4507 Boolean isG;
4508 Boolean isGED;
4509 Boolean isED;
4510 Boolean isGeneral;
4511 Boolean isNCBIGenomes;
4512 Boolean isNTorNWorNG;
4513 Boolean isNC;
4514 Boolean isNuc;
4515 Boolean isNZ;
4516 Boolean isOnlyLocal;
4517 Boolean isProt;
4518 Boolean isRefSeq;
4519 Boolean isSP;
4520 Boolean isTPA;
4521 Boolean isTPG;
4522 Int4 j;
4523 Int4 k;
4524 SeqLocPtr loc = NULL;
4525 Boolean lockFarComp;
4526 Boolean lockFarLocs;
4527 Boolean lockFarProd;
4528 Boolean lookupFarComp;
4529 Boolean lookupFarHist;
4530 Boolean lookupFarInf;
4531 Boolean lookupFarLocs;
4532 Boolean lookupFarOthers;
4533 Boolean lookupFarProd;
4534 ValNodePtr manygaps = NULL;
4535 Boolean missingVersion;
4536 Boolean multiIntervalGenes = FALSE;
4537 BIG_ID nextGi = 0;
4538 Boolean noLeft;
4539 Boolean noRight;
4540 Int4 numBlocks;
4541 Int4 numGaps;
4542 Int4 numSections;
4543 SeqEntryPtr oldscope;
4544 ObjMgrDataPtr omdp;
4545 Int4 numParagraphs;
4546 BaseBlockPtr PNTR paragraphArray;
4547 BaseBlockPtr PNTR paragraphByIDs;
4548 BioseqPtr parent = NULL;
4549 BIG_ID prevGi = 0;
4550 Int2 q;
4551 Boolean reindex = TRUE;
4552 Pointer remotedata = NULL;
4553 Asn2gbFreeFunc remotefree = NULL;
4554 Asn2gbLockFunc remotelock = NULL;
4555 ValNodePtr remotevnp = NULL;
4556 Int2 sat = 0;
4557 Int4 sat_key = 0;
4558 SubmitBlockPtr sbp;
4559 Asn2gbSectPtr PNTR sectionArray;
4560 Boolean segmentedBioseqs = FALSE;
4561 SeqEntryPtr sep;
4562 Boolean seqspans = FALSE;
4563 Boolean smallGenomeSet = FALSE;
4564 SeqIntPtr sintp;
4565 SeqIdPtr sip;
4566 Boolean skipMrnas = FALSE;
4567 Boolean skipProts = FALSE;
4568 Boolean sourcePubFuse;
4569 SeqSubmitPtr ssp;
4570 BioseqSetPtr topbssp;
4571 Pointer userdata = NULL;
4572 ValNodePtr vnp;
4573 Boolean was_slp = FALSE;
4574 Boolean rev_comp = FALSE;
4575 Boolean is_html = FALSE;
4576
4577 if (format == 0) {
4578 format = GENBANK_FMT;
4579 }
4580 if (mode == 0) {
4581 mode = SEQUIN_MODE;
4582 }
4583 if (style == 0) {
4584 style = NORMAL_STYLE;
4585 }
4586
4587 if (extra != NULL) {
4588 ffwrite = extra->ffwrite;
4589 ffhead = extra->ffhead;
4590 fftail = extra->fftail;
4591 index = extra->index;
4592 gbseq = extra->gbseq;
4593 userdata = extra->userdata;
4594 remotelock = extra->remotelock;
4595 remotefree = extra->remotefree;
4596 remotedata = extra->remotedata;
4597 prevGi = extra->prevGi;
4598 nextGi = extra->nextGi;
4599 bkmask = extra->bkmask;
4600 reindex = extra->reindex;
4601 seqspans = extra->seqspans;
4602 sat = extra->sat;
4603 sat_key = extra->sat_key;
4604 }
4605
4606 if ((custom & FORCE_SEQ_SPANS) != 0) {
4607 seqspans = TRUE;
4608 }
4609
4610 if (slp != NULL) {
4611 sip = SeqLocId (slp);
4612 bsp = BioseqFind (sip);
4613 if (bsp == NULL) {
4614 bsp = BioseqFindFromSeqLoc (slp);
4615 }
4616 if (bsp == NULL) return NULL;
4617
4618 /* if location is on part of segmented set, need to map to segmented bioseq */
4619
4620 if (slp->choice == SEQLOC_WHOLE) {
4621 /* Entrez server may pass in whole location on part instead of part bioseq */
4622 slp = NULL;
4623 } else if (sip == NULL) {
4624 parent = bsp;
4625 } else {
4626
4627 /* SeqMgrGetParentOfPart depends upon feature indexing */
4628
4629 eID = ObjMgrGetEntityIDForPointer (bsp);
4630 if (SeqMgrFeaturesAreIndexed (eID) == 0) {
4631 SeqMgrIndexFeatures (eID, NULL);
4632 }
4633
4634 parent = SeqMgrGetParentOfPart (bsp, NULL);
4635 }
4636 if (parent != NULL) {
4637 CheckSeqLocForPartial (slp, &noLeft, &noRight);
4638 loc = SeqLocMergeEx (parent, slp, NULL, FALSE, TRUE, FALSE, FALSE);
4639 slp = loc;
4640 FreeAllFuzz (slp);
4641 SetSeqLocPartial (slp, noLeft, noRight);
4642 }
4643
4644 /* if location is whole, generate normal bioseq report */
4645
4646 if (slp == NULL) {
4647 /* reality check in case SeqLocMergeEx fails and sets slp to NULL, or if was cleared above */
4648 } else if (slp->choice == SEQLOC_WHOLE) {
4649 slp = NULL;
4650 SeqLocFree (loc);
4651 loc = NULL;
4652 } else if (slp->choice == SEQLOC_INT) {
4653 sintp = (SeqIntPtr) slp->data.ptrvalue;
4654 if (sintp != NULL &&
4655 sintp->from == 0 &&
4656 sintp->to == bsp->length - 1 &&
4657 sintp->strand != Seq_strand_minus) {
4658 slp = NULL;
4659 SeqLocFree (loc);
4660 loc = NULL;
4661 } else if (sintp != NULL &&
4662 sintp->from == 0 &&
4663 sintp->to == bsp->length - 1 &&
4664 sintp->strand == Seq_strand_minus) {
4665 rev_comp = TRUE;
4666 }
4667 }
4668 }
4669
4670 if (slp != NULL && (! rev_comp)) {
4671 /* suppress gaps if using sub-location, but show gaps if location was whole or interval 0..length-1 on either strand */
4672 was_slp = TRUE;
4673 }
4674
4675 if (bsp != NULL) {
4676 bssp = NULL;
4677 entityID = ObjMgrGetEntityIDForPointer (bsp);
4678 item_type = OBJ_BIOSEQ;
4679 item_id = bsp->idx.itemID;
4680 } else if (bssp != NULL) {
4681 entityID = ObjMgrGetEntityIDForPointer (bssp);
4682 item_type = OBJ_BIOSEQSET;
4683 item_id = bssp->idx.itemID;
4684
4685 if (format == FTABLE_FMT) {
4686 skipProts = TRUE;
4687 skipMrnas = TRUE;
4688 }
4689 }
4690 if ((Boolean) ((custom & SHOW_PROT_FTABLE) != 0)) {
4691 skipProts = FALSE;
4692 skipMrnas = FALSE;
4693 }
4694
4695 if (entityID == 0) return NULL;
4696
4697 sep = GetTopSeqEntryForEntityID (entityID);
4698
4699 LookForGEDetc (sep, &isG, &isGED, &isED, &isNTorNWorNG, &isNC, &isNZ, &isRefSeq,
4700 &isGeneral, &isNCBIGenomes, &isTPA, &isTPG, &isSP, &isNuc,
4701 &isProt, &isFarProt, &isOnlyLocal, &sourcePubFuse);
4702
4703 if (mode == RELEASE_MODE) {
4704 missingVersion = FALSE;
4705 VisitBioseqsInSep (sep, (Pointer) &missingVersion, CheckVersionWithGi);
4706 if (missingVersion) return NULL;
4707 }
4708
4709 ajp = (IntAsn2gbJobPtr) MemNew (sizeof (IntAsn2gbJob));
4710 if (ajp == NULL) return NULL;
4711
4712 featpolicy.forceOnlyNearFeats = FALSE;
4713 featpolicy.forceAllowFarFeats = FALSE;
4714 VisitDescriptorsInSep (sep, (Pointer) &featpolicy, LookFarFeatFetchPolicy);
4715
4716 gapvnp = NULL;
4717 manygaps = NULL;
4718 remotevnp = NULL;
4719
4720 if (format != FTABLE_FMT && (! was_slp)) {
4721 if (isGED /* was isG */ || isTPG || isOnlyLocal || isRefSeq || isSP || (isGeneral && (! isGED))) {
4722 if ((Boolean) ((custom & HIDE_GAP_FEATS) == 0)) {
4723 if (isSP) {
4724 VisitBioseqsInSep (sep, (Pointer) &gapvnp, MakeSPGapFeats);
4725 } else if (rev_comp) {
4726 VisitBioseqsInSep (sep, (Pointer) &gapvnp, MakeRCGapFeats);
4727 } else {
4728 VisitBioseqsInSep (sep, (Pointer) &gapvnp, MakeGapFeats);
4729 }
4730 }
4731 }
4732 }
4733
4734 numGaps = 0;
4735 if (gapvnp != NULL) {
4736 gbsp = (BioseqPtr) gapvnp->data.ptrvalue;
4737 if (gbsp != NULL) {
4738 gsap = gbsp->annot;
4739 if (gsap != NULL && gsap->type == 1) {
4740 for (gsfp = (SeqFeatPtr) gsap->data; gsfp != NULL; gsfp = gsfp->next) {
4741 numGaps++;
4742 }
4743 }
4744 }
4745 }
4746 if (isED) {
4747 if (numGaps > 0) {
4748 manygaps = gapvnp;
4749 gapvnp = NULL;
4750 }
4751 } else {
4752 if (numGaps > 1000) {
4753 manygaps = gapvnp;
4754 gapvnp = NULL;
4755 }
4756 }
4757
4758 ajp->gapvnp = gapvnp;
4759 ajp->manygaps = manygaps;
4760
4761 ajp->remotelock = remotelock;
4762 ajp->remotefree = remotefree;
4763 ajp->remotedata = remotedata;
4764 if (remotelock != NULL && bsp != NULL) {
4765 sip = SeqIdFindBest (bsp->id, SEQID_GI);
4766 if (sip != NULL) {
4767 remotevnp = remotelock (sip, remotedata);
4768 }
4769 }
4770
4771 ajp->remotevnp = remotevnp;
4772
4773 if (gapvnp != NULL || remotevnp != NULL) {
4774 /* if both gapvnp and remotevnp, link together so everything is indexed */
4775 if (gapvnp != NULL) {
4776 ValNodeLink(&gapvnp, remotevnp);
4777 } else {
4778 gapvnp = remotevnp;
4779 }
4780 SeqMgrClearFeatureIndexes (entityID, NULL);
4781 SeqMgrIndexFeaturesExEx (entityID, NULL, FALSE, FALSE, gapvnp);
4782 gapvnp->next = NULL;
4783 }
4784
4785 if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
4786 SeqMgrIndexFeatures (entityID, NULL);
4787 }
4788
4789 is_html = (Boolean) ((flags & HTML_XML_ASN_MASK) == CREATE_HTML_FLATFILE);
4790 if (is_html) {
4791 InitWWW(ajp);
4792 }
4793
4794 ajp->ajp.entityID = entityID;
4795 ajp->ajp.bsp = bsp;
4796 ajp->ajp.bssp = bssp;
4797
4798 if (loc != NULL) {
4799 ajp->ajp.slp = loc;
4800 } else if (slp != NULL) {
4801 ajp->ajp.slp = AsnIoMemCopy ((Pointer) slp,
4802 (AsnReadFunc) SeqLocAsnRead,
4803 (AsnWriteFunc) SeqLocAsnWrite);
4804 } else {
4805 ajp->ajp.slp = NULL;
4806 }
4807
4808 /* reality check on interval sublocation */
4809
4810 slp = ajp->ajp.slp;
4811 if (slp != NULL && slp->choice == SEQLOC_INT) {
4812 sintp = (SeqIntPtr) slp->data.ptrvalue;
4813 if (sintp != NULL) {
4814 bsp = BioseqFind (sintp->id);
4815 if (bsp != NULL) {
4816 if (sintp->from < 0) {
4817 sintp->from = 0;
4818 } else if (sintp->from > bsp->length - 1) {
4819 sintp->from = bsp->length - 1;
4820 }
4821 if (sintp->to < 0) {
4822 sintp->to = 0;
4823 } else if (sintp->to > bsp->length - 1) {
4824 sintp->to = bsp->length - 1;
4825 }
4826 }
4827 }
4828 }
4829
4830 /* if location specified, other than full reverse complement, normal defaults to master style */
4831
4832 if (ajp->ajp.slp != NULL && style == NORMAL_STYLE && (! rev_comp)) {
4833 style = MASTER_STYLE;
4834 }
4835
4836 ajp->format = format;
4837 ajp->mode = mode; /* for showing new qualifiers before quarantine ends */
4838
4839 ajp->index = index;
4840 ajp->gbseq = gbseq; /* gbseq output can relax srcQualsToNote or goQualsToNote strictness */
4841 if (bkmask == 0) {
4842 bkmask = (BlockMask) (0xFFFFFFFF - FEAT_STATS_MASK - REF_STATS_MASK);
4843 }
4844 ajp->bkmask = bkmask;
4845 ajp->reindex = reindex;
4846 ajp->seqspans = seqspans;
4847 ajp->sat = sat;
4848 ajp->sat_key = sat_key;
4849 ajp->aip = aip;
4850 ajp->atp = atp;
4851
4852 ajp->refseqConventions = (Boolean) ((flags & REFSEQ_CONVENTIONS) != 0);
4853
4854 SetFlagsFromMode (ajp, mode);
4855
4856 lockFarComp = (Boolean) ((locks & LOCK_FAR_COMPONENTS) != 0);
4857 lockFarLocs = (Boolean) ((locks & LOCK_FAR_LOCATIONS) != 0);
4858 lockFarProd = (Boolean) ((locks & LOCK_FAR_PRODUCTS) != 0);
4859
4860 if (lockFarComp || lockFarLocs || lockFarProd) {
4861
4862 /* lock all bioseqs in advance, including remote genome components */
4863
4864 if (ajp->ajp.slp != NULL && lockFarComp) {
4865 ajp->lockedBspList = LockFarComponentsEx (sep, FALSE, lockFarLocs, lockFarProd, ajp->ajp.slp);
4866 } else {
4867 ajp->lockedBspList = LockFarComponentsEx (sep, lockFarComp, lockFarLocs, lockFarProd, NULL);
4868 }
4869 }
4870
4871 lookupFarComp = (Boolean) ((locks & LOOKUP_FAR_COMPONENTS) != 0);
4872 lookupFarLocs = (Boolean) ((locks & LOOKUP_FAR_LOCATIONS) != 0);
4873 lookupFarProd = (Boolean) ((locks & LOOKUP_FAR_PRODUCTS) != 0);
4874 lookupFarHist = (Boolean) ((locks & LOOKUP_FAR_HISTORY) != 0);
4875 lookupFarInf = (Boolean) ((locks & LOOKUP_FAR_INFERENCE) != 0);
4876 lookupFarOthers = (Boolean) ((locks & LOOKUP_FAR_OTHERS) != 0);
4877
4878 if (lookupFarComp || lookupFarLocs || lookupFarProd || lookupFarHist || lookupFarInf || lookupFarOthers) {
4879
4880 /* lookukp all far SeqIDs in advance */
4881
4882 LookupFarSeqIDs (sep, lookupFarComp, lookupFarLocs, lookupFarProd, FALSE, lookupFarHist, lookupFarInf, lookupFarOthers);
4883 }
4884
4885 ajp->showFarTransl = (Boolean) ((flags & FAR_TRANS_MASK) == SHOW_FAR_TRANSLATION);
4886 ajp->transIfNoProd = (Boolean) ((flags & FAR_TRANS_MASK) == TRANSLATE_IF_NO_PRODUCT);
4887 ajp->alwaysTranslCds = (Boolean) ((flags & FAR_TRANS_MASK) == ALWAYS_TRANSLATE_CDS);
4888 if (ajp->transIfNoProd || ajp->alwaysTranslCds) {
4889 ajp->showFarTransl = TRUE;
4890 }
4891
4892 ajp->masterStyle = (Boolean) (style == MASTER_STYLE);
4893
4894 ajp->showTranscript = (Boolean) ((flags & SHOW_TRANCRIPTION) != 0);
4895 ajp->showPeptide = (Boolean) ((flags & SHOW_PEPTIDE) != 0);
4896
4897 if (stream && (format == GENBANK_FMT || format == GENPEPT_FMT)) {
4898 ajp->specialGapFormat = (Boolean) ((flags & SPECIAL_GAP_DISPLAY) != 0);
4899 if (is_html && mode == ENTREZ_MODE) {
4900 ajp->specialGapFormat = TRUE;
4901 }
4902 if ((custom & EXPANDED_GAP_DISPLAY) != 0) {
4903 ajp->specialGapFormat = FALSE;
4904 }
4905 } else {
4906 ajp->specialGapFormat = FALSE;
4907 }
4908 ajp->seqGapCurrLen = 0;
4909
4910 ajp->relaxedMapping = (Boolean) ((flags & RELAXED_MAPPING) != 0);
4911 ajp->gpipdDeflines = (Boolean) ((flags & GPIPE_DEFLINES) != 0);
4912 ajp->hideProteinID = (Boolean) ((flags & HIDE_PROTEIN_ID) != 0);
4913
4914 ajp->produceInsdSeq = (Boolean) (((flags & PRODUCE_OLD_GBSEQ) == 0) && ((custom & OLD_GBSEQ_XML) == 0));
4915 ajp->oldXmlPolicy = (Boolean) ((custom & NEW_XML_POLICY) == 0);
4916
4917 ajp->gihead = NULL;
4918 ajp->gitail = NULL;
4919
4920 ajp->hideGoTerms = (Boolean) ((custom & HIDE_GO_TERMS) != 0);
4921 ajp->hideTranslation = (Boolean) ((custom & HIDE_TRANSLATION) != 0);
4922
4923 if (format == GENBANK_FMT || format == GENPEPT_FMT) {
4924 ajp->newSourceOrg = TRUE;
4925 }
4926
4927 VisitFeaturesInSep (sep, (Pointer) &multiIntervalGenes, FindMultiIntervalGenes);
4928 ajp->multiIntervalGenes = multiIntervalGenes;
4929 VisitBioseqsInSep (sep, (Pointer) &segmentedBioseqs, FindSegmentedBioseqs);
4930 ajp->segmentedBioseqs = segmentedBioseqs;
4931 VisitSetsInSep (sep, (Pointer) &smallGenomeSet, FindSmallGenomeSets);
4932 ajp->smallGenomeSet = smallGenomeSet;
4933
4934 ajp->relModeError = FALSE;
4935 ajp->skipProts = skipProts;
4936 ajp->skipMrnas = skipMrnas;
4937
4938 MemSet ((Pointer) (&aw), 0, sizeof (Asn2gbWork));
4939 aw.ajp = ajp;
4940 aw.entityID = entityID;
4941
4942 aw.sectionList = NULL;
4943 aw.lastsection = NULL;
4944
4945 aw.currsection = 0;
4946 aw.showAllFeats = FALSE;
4947
4948 aw.showconfeats = (Boolean) ((flags & SHOW_CONTIG_FEATURES) != 0);
4949 aw.showconsource = (Boolean) ((flags & SHOW_CONTIG_SOURCES) != 0);
4950
4951 aw.format = format;
4952 aw.mode = mode;
4953 aw.style = style;
4954
4955 /* sectionCount used for hyperlinks */
4956
4957 aw.sectionCount = 0;
4958 aw.sectionMax = 0;
4959 aw.gilistpos = NULL;
4960
4961 aw.currGi = 0;
4962 aw.prevGi = prevGi;
4963 aw.nextGi = nextGi;
4964 aw.currAccVer [0] = '\0';
4965
4966 /* internal format pointer if writing at time of creation */
4967
4968 if (stream) {
4969 MemSet ((Pointer) &af, 0, sizeof (Asn2gbFormat));
4970 af.ajp = ajp;
4971 af.asp = NULL;
4972 af.qvp = NULL;
4973 af.format = format;
4974 af.ffwrite = ffwrite;
4975 af.userdata = userdata;
4976 af.fp = fp;
4977 af.aip = aip;
4978 af.atp = atp;
4979
4980 aw.afp = ⁡
4981 }
4982
4983 /* special types of records override feature fetching and contig display parameters */
4984
4985 if (mode == ENTREZ_MODE) {
4986 if (! aw.showconfeats) {
4987 aw.smartconfeats = TRUE; /* features suppressed if CONTIG style and length > 1 MB */
4988 aw.showconfeats = FALSE;
4989 aw.showconsource = FALSE;
4990 }
4991 }
4992
4993 aw.onlyNearFeats = FALSE;
4994 aw.farFeatsSuppress = FALSE;
4995 aw.nearFeatsSuppress = FALSE;
4996
4997 if (featpolicy.forceAllowFarFeats) {
4998
4999 /* do not set other flags */
5000
5001 } else if ((Boolean) ((flags & FEAT_FETCH_MASK) == FORCE_ALLOW_FAR_FEATS)) {
5002
5003 /* do not set other flags */
5004
5005 } else if (featpolicy.forceOnlyNearFeats) {
5006
5007 aw.onlyNearFeats = TRUE;
5008
5009 } else if (isNC) {
5010
5011 if ((Boolean) ((flags & FEAT_FETCH_MASK) == ONLY_NEAR_FEATURES)) {
5012 aw.onlyNearFeats = TRUE;
5013 } else if (featpolicy.forceOnlyNearFeats) {
5014 aw.onlyNearFeats = TRUE;
5015 } else {
5016 aw.nearFeatsSuppress = TRUE;
5017 }
5018
5019 } else if (isNTorNWorNG || isTPA) {
5020
5021 aw.onlyNearFeats = TRUE;
5022
5023 } else if (isNZ) {
5024
5025 aw.onlyNearFeats = TRUE;
5026
5027 } else if (format == GENPEPT_FMT && isFarProt ) {
5028
5029 aw.onlyNearFeats = TRUE;
5030
5031 } else if (isGED) {
5032
5033 if ((Boolean) ((flags & FEAT_FETCH_MASK) == ONLY_NEAR_FEATURES)) {
5034 aw.onlyNearFeats = TRUE;
5035 } else if (featpolicy.forceOnlyNearFeats) {
5036 aw.onlyNearFeats = TRUE;
5037 } else {
5038 aw.nearFeatsSuppress = TRUE;
5039 }
5040
5041 } else {
5042
5043 aw.onlyNearFeats = (Boolean) ((flags & FEAT_FETCH_MASK) == ONLY_NEAR_FEATURES);
5044 aw.farFeatsSuppress = (Boolean) ((flags & FEAT_FETCH_MASK) == FAR_FEATURES_SUPPRESS);
5045 aw.nearFeatsSuppress = (Boolean) ((flags & FEAT_FETCH_MASK) == NEAR_FEATURES_SUPPRESS);
5046 }
5047
5048 if (isGED || isRefSeq) {
5049 ajp->showFarTransl = TRUE;
5050 }
5051
5052 /* continue setting flags */
5053
5054 aw.showFeatStats = (Boolean) ((custom & SHOW_FEATURE_STATS) != 0);
5055 aw.showRefStats = (Boolean) ((custom & SHOW_REFERENCE_STATS) != 0);
5056 aw.hideFeatures = (Boolean) ((custom & HIDE_FEATURES) != 0);
5057
5058 aw.hideImpFeats = (Boolean) ((custom & HIDE_IMP_FEATS) != 0);
5059 aw.hideVariations = (Boolean) ((custom & HIDE_VARS_AND_REPT_REGNS) != 0);
5060 aw.hideRepeatRegions = (Boolean) ((custom & HIDE_VARS_AND_REPT_REGNS) != 0);
5061 aw.hideSitesBondsRegions = (Boolean) ((custom & HIDE_SITES_BONDS_REGIONS) != 0);
5062 aw.hideCddFeats = (Boolean) ((custom & HIDE_CDD_FEATS) != 0);
5063 aw.hideCdsProdFeats = (Boolean) ((custom & HIDE_CDS_PROD_FEATS) != 0);
5064
5065 ajp->hideEvidence = (Boolean) ((custom & HIDE_EVIDENCE_QUALS) != 0);
5066
5067 aw.hideGeneRIFs = (Boolean) ((custom & PUBLICATION_MASK) == HIDE_GENE_RIFS);
5068 aw.onlyGeneRIFs = (Boolean) ((custom & PUBLICATION_MASK) == ONLY_GENE_RIFS);
5069 aw.onlyReviewPubs = (Boolean) ((custom & PUBLICATION_MASK) == ONLY_REVIEW_PUBS);
5070 aw.newestPubs = (Boolean) ((custom & PUBLICATION_MASK) == NEWEST_PUBS);
5071 aw.oldestPubs = (Boolean) ((custom & PUBLICATION_MASK) == OLDEST_PUBS);
5072 aw.hidePubs = (Boolean) ((custom & PUBLICATION_MASK) == HIDE_ALL_PUBS);
5073
5074 aw.showFtableRefs = (Boolean) ((custom & SHOW_FTABLE_REFS) != 0);
5075 aw.hideSources = (Boolean) ((custom & HIDE_SOURCE_FEATS) != 0);
5076 aw.hideGaps = (Boolean) ((custom & HIDE_GAP_FEATS) != 0);
5077 aw.hideSequence = (Boolean) ((custom & HIDE_SEQUENCE) != 0);
5078
5079 aw.isGPS = FALSE;
5080 if (sep != NULL && IS_Bioseq_set (sep)) {
5081 topbssp = (BioseqSetPtr) sep->data.ptrvalue;
5082 if (topbssp != NULL && topbssp->_class == BioseqseqSet_class_gen_prod_set) {
5083 aw.isGPS = TRUE;
5084 aw.copyGpsCdsUp = (Boolean) ((flags & COPY_GPS_CDS_UP) != 0);
5085 aw.copyGpsGeneDown = (Boolean) ((flags & COPY_GPS_GENE_DOWN) != 0);
5086 }
5087 }
5088 aw.isNCBIGenomes = isNCBIGenomes;
5089 aw.isRefSeq = isRefSeq;
5090
5091 aw.showContigAndSeq = (Boolean) ((flags & SHOW_CONTIG_AND_SEQ) != 0);
5092 /*
5093 if (style != MASTER_STYLE && style != SEGMENT_STYLE) {
5094 aw.showContigAndSeq = FALSE;
5095 }
5096 */
5097
5098 aw.newLocusLine = TRUE;
5099 aw.showBaseCount = FALSE;
5100
5101 if ((Boolean) ((flags & DDBJ_VARIANT_FORMAT) != 0)) {
5102 aw.citSubsFirst = TRUE;
5103 aw.hideGeneFeats = TRUE;
5104 aw.newLocusLine = FALSE;
5105 aw.showBaseCount = TRUE;
5106 ajp->newSourceOrg = FALSE;
5107 }
5108 if (mode == SEQUIN_MODE || mode == DUMP_MODE) {
5109 aw.showBaseCount = TRUE;
5110 }
5111 aw.forcePrimaryBlock = (Boolean) ((flags & FORCE_PRIMARY_BLOCK) != 0);
5112
5113 aw.localFeatCount = VisitFeaturesInSep (sep, NULL, NULL);
5114
5115 aw.sourcePubFuse = sourcePubFuse;
5116
5117 aw.hup = FALSE;
5118 aw.ssp = NULL;
5119
5120 aw.failed = FALSE;
5121
5122 omdp = ObjMgrGetData (entityID);
5123 if (omdp != NULL && omdp->datatype == OBJ_SEQSUB) {
5124 ssp = (SeqSubmitPtr) omdp->dataptr;
5125 if (ssp != NULL && ssp->datatype == 1) {
5126 aw.ssp = ssp;
5127 sbp = ssp->sub;
5128 if (sbp != NULL) {
5129 aw.hup = sbp->hup;
5130 }
5131 }
5132 }
5133
5134 ajp->hideGI = (Boolean) ((flags & HIDE_GI_NUMBERS) != 0);
5135 ajp->bad_html_fsa = TextFsaNew ();
5136
5137 for (q = 0; bad_html_strings [q] != NULL; q++) {
5138 TextFsaAdd (ajp->bad_html_fsa, bad_html_strings [q]);
5139 }
5140
5141 InitUrlAnchorFSA ();
5142
5143 oldscope = SeqEntrySetScope (sep);
5144
5145 if (stream) {
5146 /* send optional head string */
5147
5148 is_html = (Boolean) ((flags & HTML_XML_ASN_MASK) == CREATE_HTML_FLATFILE);
5149 if (ffhead == NULL && is_html) {
5150 ffhead = defHead;
5151 }
5152 if (ffhead != NULL) {
5153 if (fp != NULL) {
5154 fprintf (fp, "%s", ffhead);
5155 }
5156 }
5157 if (ffwrite != NULL) {
5158 ffwrite (ffhead, userdata, HEAD_BLOCK, entityID, item_type, item_id, 0, 0);
5159 }
5160 if (is_html) {
5161 DoQuickLinkFormat (aw.afp, "<div class=\"sequence\">\n");
5162 }
5163 }
5164
5165 /* if Web Entrez, set awp->sectionMax to decide when Next hyperlink is needed */
5166
5167 if (is_html && mode == ENTREZ_MODE && stream &&
5168 (format == GENBANK_FMT || format == GENPEPT_FMT)) {
5169 /* add dummy node as prev id for first section */
5170 ajp->gihead = ValNodeAddInt (&(ajp->gitail), 0, (Int4) 0);
5171 ajp->gitail = ajp->gihead;
5172 if (bssp != NULL) {
5173 CountOneBioseqSet (SeqMgrGetSeqEntryForData (bssp), &aw);
5174 } else {
5175 CountOneBioseq (bsp, &aw);
5176 }
5177 }
5178
5179 if (bssp != NULL) {
5180
5181 /* handle all components of a pop/phy/mut/eco set */
5182
5183 sep = SeqMgrGetSeqEntryForData (bssp);
5184 DoOneBioseqSet (sep, &aw);
5185
5186 } else {
5187
5188 /* handle single bioseq, which may be segmented or a local part */
5189
5190 DoOneBioseq (bsp, &aw);
5191 }
5192
5193 if (stream) {
5194 if (is_html) {
5195 DoQuickLinkFormat (aw.afp, "</div>");
5196 }
5197
5198 /* send optional tail string */
5199
5200 if (fftail == NULL && is_html) {
5201 fftail = defTail;
5202 }
5203 if (fftail != NULL) {
5204 if (fp != NULL) {
5205 fprintf (fp, "%s", fftail);
5206 }
5207 }
5208 if (ffwrite != NULL) {
5209 ffwrite (fftail, userdata, TAIL_BLOCK, entityID, item_type, item_id, 0, 0);
5210 }
5211 }
5212
5213 SeqEntrySetScope (oldscope);
5214
5215 /* check for failure to populate anything */
5216
5217 numSections = ValNodeLen (aw.sectionList);
5218 ajp->ajp.numSections = numSections;
5219
5220 if (numSections == 0) return asn2gnbk_cleanup ((Asn2gbJobPtr) ajp);
5221
5222 /* allocate section array for this job - needed for memory cleanup even if streamed */
5223
5224 sectionArray = (Asn2gbSectPtr PNTR) MemNew (sizeof (Asn2gbSectPtr) * (numSections + 1));
5225 ajp->ajp.sectionArray = sectionArray;
5226
5227 if (sectionArray == NULL) return asn2gnbk_cleanup ((Asn2gbJobPtr) ajp);
5228
5229 /* fill in section and paragraph arrays */
5230
5231 numParagraphs = 0;
5232 for (vnp = aw.sectionList, i = 0; vnp != NULL && i < numSections; vnp = vnp->next, i++) {
5233 asp = (Asn2gbSectPtr) vnp->data.ptrvalue;
5234 sectionArray [i] = asp;
5235 if (asp != NULL) {
5236 numParagraphs += asp->numBlocks;
5237 }
5238 }
5239
5240 /* allocate paragraph array pointing to all blocks in all sections */
5241
5242 ajp->ajp.numParagraphs = numParagraphs;
5243 if (numParagraphs == 0) return asn2gnbk_cleanup ((Asn2gbJobPtr) ajp);
5244
5245 paragraphArray = (BaseBlockPtr PNTR) MemNew (sizeof (BaseBlockPtr) * (numParagraphs + 1));
5246 ajp->ajp.paragraphArray = paragraphArray;
5247
5248 paragraphByIDs = (BaseBlockPtr PNTR) MemNew (sizeof (BaseBlockPtr) * (numParagraphs + 1));
5249 ajp->ajp.paragraphByIDs = paragraphByIDs;
5250
5251 if (paragraphArray == NULL || paragraphByIDs == NULL) return asn2gnbk_cleanup ((Asn2gbJobPtr) ajp);
5252
5253 k = 0;
5254 for (i = 0; i < numSections; i++) {
5255 asp = sectionArray [i];
5256 if (asp != NULL) {
5257
5258 numBlocks = asp->numBlocks;
5259 blockArray = asp->blockArray;
5260 if (blockArray != NULL) {
5261
5262 for (j = 0; j < numBlocks; j++) {
5263 bbp = blockArray [j];
5264
5265 paragraphArray [k] = bbp;
5266 paragraphByIDs [k] = bbp;
5267 bbp->paragraph = k;
5268 k++;
5269 }
5270 }
5271 }
5272 }
5273
5274 /* sort paragraphByIDs array by entityID/itemtype/itemID/paragraph */
5275
5276 StableMergeSort (paragraphByIDs, (size_t) numParagraphs, sizeof (BaseBlockPtr), SortParagraphByIDProc);
5277
5278 /* free sectionList, but leave data, now pointed to by sectionArray elements */
5279
5280 ValNodeFree (aw.sectionList);
5281
5282 /* check for failure to to make legal flatfile */
5283
5284 if (ajp->flags.needAtLeastOneRef && aw.failed) return asn2gnbk_cleanup ((Asn2gbJobPtr) ajp);
5285
5286 return (Asn2gbJobPtr) ajp;
5287 }
5288
asn2gnbk_setup(BioseqPtr bsp,BioseqSetPtr bssp,SeqLocPtr slp,FmtType format,ModType mode,StlType style,FlgType flags,LckType locks,CstType custom,XtraPtr extra)5289 NLM_EXTERN Asn2gbJobPtr asn2gnbk_setup (
5290 BioseqPtr bsp,
5291 BioseqSetPtr bssp,
5292 SeqLocPtr slp,
5293 FmtType format,
5294 ModType mode,
5295 StlType style,
5296 FlgType flags,
5297 LckType locks,
5298 CstType custom,
5299 XtraPtr extra
5300 )
5301
5302 {
5303 return asn2gnbk_setup_ex (bsp, bssp, slp, format, mode, style,
5304 flags, locks, custom, extra,
5305 FALSE, NULL, NULL, NULL);
5306 }
5307
5308 /* ********************************************************************** */
5309
5310 /* format functions allocate printable string for given paragraph */
5311
DefaultFormatBlock(Asn2gbFormatPtr afp,BaseBlockPtr bbp)5312 NLM_EXTERN CharPtr DefaultFormatBlock (
5313 Asn2gbFormatPtr afp,
5314 BaseBlockPtr bbp
5315 )
5316
5317 {
5318 if (afp == NULL || bbp == NULL) return NULL;
5319
5320 /* default format function assumes string pre-allocated by add block function */
5321
5322 return StringSaveNoNull (bbp->string);
5323 }
5324
5325 typedef CharPtr (*FormatProc) (Asn2gbFormatPtr afp, BaseBlockPtr bbp);
5326
5327 static FormatProc asn2gnbk_fmt_functions [30] = {
5328 NULL,
5329 NULL,
5330 DefaultFormatBlock,
5331 DefaultFormatBlock,
5332 DefaultFormatBlock,
5333 DefaultFormatBlock,
5334 DefaultFormatBlock,
5335 DefaultFormatBlock,
5336 DefaultFormatBlock,
5337 DefaultFormatBlock,
5338 DefaultFormatBlock,
5339 DefaultFormatBlock,
5340 FormatSourceBlock,
5341 FormatOrganismBlock,
5342 DefaultFormatBlock,
5343 FormatReferenceBlock,
5344 DefaultFormatBlock,
5345 FormatCommentBlock,
5346 DefaultFormatBlock,
5347 FormatFeatHeaderBlock,
5348 FormatSourceFeatBlock,
5349 FormatFeatureBlock,
5350 FormatBasecountBlock,
5351 DefaultFormatBlock,
5352 FormatSequenceBlock,
5353 FormatContigBlock,
5354 DefaultFormatBlock,
5355 DefaultFormatBlock,
5356 FormatSlashBlock,
5357 NULL
5358 };
5359
5360 static CharPtr asn2gnbk_fmt_labels [30] = {
5361 NULL,
5362 NULL,
5363 "locus",
5364 "defline",
5365 "accession",
5366 "version",
5367 "project",
5368 "pid",
5369 "dbsource",
5370 "date",
5371 "keywords",
5372 "segment",
5373 "source",
5374 "organism",
5375 "refstats",
5376 "reference",
5377 "primary",
5378 "comment",
5379 "featstats",
5380 "featheader",
5381 "sourcefeat",
5382 "feature",
5383 "basecount",
5384 "origin",
5385 "sequence",
5386 "contig",
5387 "wgs",
5388 "genome",
5389 "slash",
5390 NULL
5391 };
5392
asn2gnbk_block_label(BlockType blocktype)5393 NLM_EXTERN CharPtr asn2gnbk_block_label (
5394 BlockType blocktype
5395 )
5396
5397 {
5398 if (blocktype < LOCUS_BLOCK || blocktype > SLASH_BLOCK) return NULL;
5399 return asn2gnbk_fmt_labels [(int) blocktype];
5400 }
5401
PrintFtableIntervals(ValNodePtr PNTR head,BioseqPtr target,SeqLocPtr location,CharPtr label,Boolean relaxed)5402 NLM_EXTERN void PrintFtableIntervals (
5403 ValNodePtr PNTR head,
5404 BioseqPtr target,
5405 SeqLocPtr location,
5406 CharPtr label,
5407 Boolean relaxed
5408 )
5409
5410 {
5411 IntFuzzPtr ifp;
5412 Boolean is_circular;
5413 Boolean partial5;
5414 Boolean partial3;
5415 SeqLocPtr slp;
5416 SeqPntPtr spp;
5417 Int4 start;
5418 Int4 stop;
5419 Char str [64];
5420 Char str1 [32];
5421 Char str2 [32];
5422 SeqLocPtr tmp;
5423
5424 if (head == NULL || target == NULL || location == NULL || label == NULL) return;
5425
5426 if (location->choice == SEQLOC_PNT) {
5427 spp = (SeqPntPtr) location->data.ptrvalue;
5428 if (spp != NULL) {
5429 ifp = spp->fuzz;
5430 if (ifp != NULL && ifp->choice == 4 && ifp->a == 3) {
5431 sprintf (str, "%ld^\t%ld\t%s\n", (long) (spp->point + 1),
5432 (long) (spp->point + 2), label);
5433 ValNodeCopyStr (head, 0, str);
5434 return;
5435 }
5436 }
5437 }
5438
5439 slp = SeqLocFindNext (location, NULL);
5440 if (slp == NULL) return;
5441
5442 is_circular = (Boolean) (target->topology == TOPOLOGY_CIRCULAR);
5443
5444 start = GetOffsetInBioseqEx (slp, target, SEQLOC_START, is_circular, relaxed) + 1;
5445 stop = GetOffsetInBioseqEx (slp, target, SEQLOC_STOP, is_circular, relaxed) + 1;
5446 CheckSeqLocForPartial (slp, &partial5, &partial3);
5447 if (start == 0 || stop == 0) {
5448 tmp = TrimLocInSegment (target, slp, &partial5, &partial3);
5449 start = GetOffsetInBioseqEx (tmp, target, SEQLOC_START, is_circular, relaxed) + 1;
5450 stop = GetOffsetInBioseqEx (tmp, target, SEQLOC_STOP, is_circular, relaxed) + 1;
5451 SeqLocFree (tmp);
5452 }
5453 if (partial5) {
5454 sprintf (str1, "<%ld", (long) start);
5455 } else {
5456 sprintf (str1, "%ld", (long) start);
5457 }
5458 if (partial3) {
5459 sprintf (str2, ">%ld", (long) stop);
5460 } else {
5461 sprintf (str2, "%ld", (long) stop);
5462 }
5463 sprintf (str, "%s\t%s\t%s\n", str1, str2, label);
5464 ValNodeCopyStr (head, 0, str);
5465
5466 while ((slp = SeqLocFindNext (location, slp)) != NULL) {
5467 start = GetOffsetInBioseqEx (slp, target, SEQLOC_START, is_circular, relaxed) + 1;
5468 stop = GetOffsetInBioseqEx (slp, target, SEQLOC_STOP, is_circular, relaxed) + 1;
5469 CheckSeqLocForPartial (slp, &partial5, &partial3);
5470 if (start == 0 || stop == 0) {
5471 tmp = TrimLocInSegment (target, slp, &partial5, &partial3);
5472 start = GetOffsetInBioseqEx (tmp, target, SEQLOC_START, is_circular, relaxed) + 1;
5473 stop = GetOffsetInBioseqEx (tmp, target, SEQLOC_STOP, is_circular, relaxed) + 1;
5474 SeqLocFree (tmp);
5475 }
5476 if (partial5) {
5477 sprintf (str1, "<%ld", (long) start);
5478 } else {
5479 sprintf (str1, "%ld", (long) start);
5480 }
5481 if (partial3) {
5482 sprintf (str2, ">%ld", (long) stop);
5483 } else {
5484 sprintf (str2, "%ld", (long) stop);
5485 }
5486 if (start != 0 && stop != 0) {
5487 sprintf (str, "%s\t%s\n", str1, str2);
5488 ValNodeCopyStr (head, 0, str);
5489 }
5490 }
5491
5492 if (LocationHasNullsBetween (location)) {
5493 ValNodeCopyStr (head, 0, "\t\t\torder\n");
5494 }
5495 }
5496
5497 static CharPtr goQualList [] = {
5498 "", "go_process", "go_component", "go_function", NULL
5499 };
5500
PrintGeneOntologyUserFld(UserFieldPtr ufp,Pointer userdata)5501 static void PrintGeneOntologyUserFld (
5502 UserFieldPtr ufp,
5503 Pointer userdata
5504 )
5505
5506 {
5507 UserFieldPtr entry;
5508 CharPtr evidence;
5509 Char gid [32];
5510 CharPtr goid;
5511 CharPtr goref;
5512 ValNodePtr PNTR head;
5513 Int2 i;
5514 Int2 j;
5515 size_t len;
5516 ObjectIdPtr oip;
5517 Int4 pmid;
5518 CharPtr str;
5519 CharPtr textstr;
5520 Char tmp [16];
5521
5522 if (ufp == NULL || ufp->choice != 11) return;
5523 oip = ufp->label;
5524 if (oip == NULL) return;
5525 for (i = 0; goQualType [i] != NULL; i++) {
5526 if (StringICmp (oip->str, goQualType [i]) == 0) break;
5527 }
5528 if (goQualType [i] == NULL) return;
5529
5530 /* loop to allow multiple entries for each type of GO term */
5531 for (entry = ufp->data.ptrvalue; entry != NULL; entry = entry->next) {
5532 if (entry == NULL || entry->choice != 11) break;
5533
5534 pmid = 0;
5535 goid = NULL;
5536 goref = NULL;
5537 evidence = NULL;
5538 textstr = NULL;
5539
5540 for (ufp = (UserFieldPtr) entry->data.ptrvalue; ufp != NULL; ufp = ufp->next) {
5541 oip = ufp->label;
5542 if (oip == NULL) continue;
5543 for (j = 0; goFieldType [j] != NULL; j++) {
5544 if (StringICmp (oip->str, goFieldType [j]) == 0) break;
5545 }
5546 if (goFieldType [j] == NULL) continue;
5547 switch (j) {
5548 case 1 :
5549 if (ufp->choice == 1) {
5550 textstr = (CharPtr) ufp->data.ptrvalue;
5551 }
5552 break;
5553 case 2 :
5554 if (ufp->choice == 1) {
5555 goid = (CharPtr) ufp->data.ptrvalue;
5556 } else if (ufp->choice == 2) {
5557 sprintf (gid, "%ld", (long) (Int4) ufp->data.intvalue);
5558 goid = (CharPtr) gid;
5559 }
5560 break;
5561 case 3 :
5562 if (ufp->choice == 2) {
5563 pmid = (Int4) ufp->data.intvalue;
5564 }
5565 break;
5566 case 4 :
5567 if (ufp->choice == 1) {
5568 goref = (CharPtr) ufp->data.ptrvalue;
5569 }
5570 break;
5571 case 5 :
5572 if (ufp->choice == 1) {
5573 evidence = (CharPtr) ufp->data.ptrvalue;
5574 }
5575 break;
5576 default :
5577 break;
5578 }
5579 }
5580 /* if (StringHasNoText (textstr)) break; */
5581
5582 len = StringLen (textstr) + StringLen (goid) + StringLen (goref) + StringLen (evidence) + 40;
5583 str = (CharPtr) MemNew (len);
5584 if (str == NULL) return;
5585 StringCpy (str, "\t\t\t");
5586 StringCat (str, goQualList [i]);
5587 StringCat (str, "\t");
5588 StringCat (str, textstr);
5589 if (StringDoesHaveText (goid)) {
5590 StringCat (str, "|");
5591 StringCat (str, goid);
5592 } else {
5593 StringCat (str, "|");
5594 }
5595 if (pmid != 0) {
5596 sprintf (tmp, "|%ld", (long) pmid);
5597 StringCat (str, tmp);
5598 } else if (StringDoesHaveText (goref)) {
5599 StringCat (str, "|");
5600 StringCat (str, goref);
5601 } else {
5602 StringCat (str, "|");
5603 }
5604 if (StringDoesHaveText (evidence)) {
5605 StringCat (str, "|");
5606 StringCat (str, evidence);
5607 }
5608 len = StringLen (str);
5609 while (len > 0 && str [len - 1] == '|') {
5610 str [len - 1] = '\0';
5611 len--;
5612 }
5613 StringCat (str, "\n");
5614
5615 head = (ValNodePtr PNTR) userdata;
5616 ValNodeCopyStr (head, 0, str);
5617 MemFree (str);
5618 }
5619 }
5620
PrintNomenclatureUserObject(UserObjectPtr uop,Pointer userdata)5621 static void PrintNomenclatureUserObject (
5622 UserObjectPtr uop,
5623 Pointer userdata
5624 )
5625
5626 {
5627 CharPtr ds = NULL, me = NULL, nm = NULL, sy = NULL;
5628 ValNodePtr PNTR head;
5629 size_t len;
5630 ObjectIdPtr oip;
5631 CharPtr str = NULL;
5632 UserFieldPtr ufp;
5633
5634 if (uop == NULL) return;
5635 oip = uop->type;
5636 if (oip == NULL) return;
5637 if (StringCmp (oip->str, "OfficialNomenclature") != 0) return;
5638
5639 for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
5640 oip = ufp->label;
5641 if (oip == NULL || oip->str == NULL) continue;
5642 if (StringICmp (oip->str, "Symbol") == 0) {
5643 if (ufp->choice == 1) {
5644 str = (CharPtr) ufp->data.ptrvalue;
5645 if (StringDoesHaveText (str)) {
5646 sy = str;
5647 }
5648 }
5649 } else if (StringICmp (oip->str, "Name") == 0) {
5650 if (ufp->choice == 1) {
5651 str = (CharPtr) ufp->data.ptrvalue;
5652 if (StringDoesHaveText (str)) {
5653 nm = str;
5654 }
5655 }
5656 } else if (StringICmp (oip->str, "DataSource") == 0) {
5657 if (ufp->choice == 1) {
5658 str = (CharPtr) ufp->data.ptrvalue;
5659 if (StringDoesHaveText (str)) {
5660 ds = str;
5661 }
5662 }
5663 } else if (StringICmp (oip->str, "Status") == 0) {
5664 if (ufp->choice == 1) {
5665 str = (CharPtr) ufp->data.ptrvalue;
5666 if (StringDoesHaveText (str)) {
5667 me = str;
5668 }
5669 }
5670 }
5671 }
5672 if (me == NULL) {
5673 me = "Unclassified";
5674 }
5675
5676 if (StringHasNoText (sy)) return;
5677
5678 len = StringLen (ds) + StringLen (me) + StringLen (nm) + StringLen (sy) + 80;
5679 str = (CharPtr) MemNew (len);
5680 if (str == NULL) return;
5681
5682 StringCpy (str, "\t\t\tnomenclature\t");
5683 StringCat (str, me);
5684 StringCat (str, "|");
5685 StringCat (str, sy);
5686 StringCat (str, "|");
5687 if (StringDoesHaveText (nm)) {
5688 StringCat (str, nm);
5689 }
5690 StringCat (str, "|");
5691 if (StringDoesHaveText (ds)) {
5692 StringCat (str, ds);
5693 }
5694 StringCat (str, "\n");
5695
5696 head = (ValNodePtr PNTR) userdata;
5697 ValNodeCopyStr (head, 0, str);
5698 MemFree (str);
5699 }
5700
PrintFTUserObj(UserObjectPtr uop,Pointer userdata)5701 static void PrintFTUserObj (
5702 UserObjectPtr uop,
5703 Pointer userdata
5704 )
5705
5706 {
5707 ObjectIdPtr oip;
5708
5709 if (uop == NULL) return;
5710 oip = uop->type;
5711 if (oip == NULL) return;
5712 if (StringICmp (oip->str, "GeneOntology") == 0) {
5713 VisitUserFieldsInUop (uop, userdata, PrintGeneOntologyUserFld);
5714 } else if (StringICmp (oip->str, "OfficialNomenclature") == 0) {
5715 PrintNomenclatureUserObject (uop, userdata);
5716 }
5717 }
5718
PrintFTCodeBreakEx(ValNodePtr PNTR head,CodeBreakPtr cbp,BioseqPtr target,Boolean masterStyle,Boolean relaxed,SeqLocPtr subloc)5719 NLM_EXTERN void PrintFTCodeBreakEx (
5720 ValNodePtr PNTR head,
5721 CodeBreakPtr cbp,
5722 BioseqPtr target,
5723 Boolean masterStyle,
5724 Boolean relaxed,
5725 SeqLocPtr subloc
5726 )
5727
5728 {
5729 Char buf [128];
5730 Choice cbaa;
5731 IntAsn2gbJob iaj;
5732 SeqLocPtr newloc;
5733 CharPtr ptr;
5734 Uint1 residue;
5735 SeqCodeTablePtr sctp;
5736 Uint1 seqcode;
5737 SeqIdPtr sip;
5738 SeqLocPtr slp;
5739 CharPtr str;
5740
5741 seqcode = 0;
5742 sctp = NULL;
5743 cbaa = cbp->aa;
5744 switch (cbaa.choice) {
5745 case 1 :
5746 seqcode = Seq_code_ncbieaa;
5747 break;
5748 case 2 :
5749 seqcode = Seq_code_ncbi8aa;
5750 break;
5751 case 3 :
5752 seqcode = Seq_code_ncbistdaa;
5753 break;
5754 default :
5755 break;
5756 }
5757 if (seqcode == 0) return;
5758 sctp = SeqCodeTableFind (seqcode);
5759 if (sctp == NULL) return;
5760
5761 MemSet ((Pointer) &iaj, 0, sizeof (IntAsn2gbJob));
5762 iaj.flags.iupacaaOnly = FALSE;
5763 iaj.relModeError = FALSE;
5764
5765 slp = cbp->loc;
5766 if (slp != NULL) {
5767 str = NULL;
5768 if (subloc != NULL) {
5769 sip = SeqIdParse ("lcl|dummy");
5770 newloc = SeqLocReMapEx (sip, subloc, slp, 0, FALSE, masterStyle, relaxed);
5771
5772 SeqIdFree (sip);
5773 if (newloc != NULL) {
5774 A2GBSeqLocReplaceID (newloc, subloc);
5775 str = FFFlatLoc (&iaj, target, newloc, masterStyle, FALSE);
5776 SeqLocFree (newloc);
5777 }
5778 } else {
5779 str = FFFlatLoc (&iaj, target, slp, masterStyle, FALSE);
5780 }
5781 if (str != NULL) {
5782 residue = cbaa.value.intvalue;
5783 ptr = Get3LetterSymbol (&iaj, seqcode, sctp, residue);
5784 if (ptr == NULL) {
5785 ptr = "OTHER";
5786 }
5787 sprintf (buf, "\t\t\ttransl_except\t(pos:%s,aa:%s)\n", str, ptr);
5788 ValNodeCopyStr (head, 0, buf);
5789 MemFree (str);
5790 }
5791 }
5792 }
5793
PrintFTCodeBreak(ValNodePtr PNTR head,CodeBreakPtr cbp,BioseqPtr target)5794 NLM_EXTERN void PrintFTCodeBreak (
5795 ValNodePtr PNTR head,
5796 CodeBreakPtr cbp,
5797 BioseqPtr target
5798 )
5799
5800 {
5801 PrintFTCodeBreakEx (head, cbp, target, FALSE, FALSE, NULL);
5802 }
5803
SeqIdWriteForTable(SeqIdPtr sip,CharPtr buf,size_t buflen,IntAsn2gbJobPtr ajp,Boolean giOK)5804 static Boolean SeqIdWriteForTable (SeqIdPtr sip, CharPtr buf, size_t buflen, IntAsn2gbJobPtr ajp, Boolean giOK)
5805
5806 {
5807 SeqIdPtr accn = NULL, local = NULL, patent = NULL,
5808 pdb = NULL, general = NULL, gi = NULL;
5809 DbtagPtr dbt;
5810 Char id [128], str [250];
5811 Int2 numids;
5812 CharPtr prefix = NULL;
5813
5814 if (sip == NULL || buf == NULL || ajp == NULL) return FALSE;
5815
5816 while (sip != NULL) {
5817 switch (sip->choice) {
5818 case SEQID_LOCAL :
5819 local = sip;
5820 break;
5821 case SEQID_GENBANK :
5822 case SEQID_EMBL :
5823 case SEQID_PIR :
5824 case SEQID_SWISSPROT :
5825 case SEQID_DDBJ :
5826 case SEQID_PRF :
5827 case SEQID_TPG :
5828 case SEQID_TPE :
5829 case SEQID_TPD :
5830 case SEQID_OTHER :
5831 case SEQID_GPIPE :
5832 accn = sip;
5833 break;
5834 case SEQID_PATENT :
5835 patent = sip;
5836 break;
5837 case SEQID_GENERAL :
5838 dbt = (DbtagPtr) sip->data.ptrvalue;
5839 if (dbt != NULL && ! IsSkippableDbtag(dbt)) {
5840 general = sip;
5841 }
5842 break;
5843 case SEQID_PDB :
5844 pdb = sip;
5845 break;
5846 case SEQID_GI :
5847 gi = sip;
5848 break;
5849 default :
5850 break;
5851 }
5852 sip = sip->next;
5853 }
5854
5855 str [0] = '\0';
5856 numids = 0;
5857
5858 if (accn != NULL) {
5859 if (SeqIdWrite (accn, id, PRINTID_FASTA_SHORT, sizeof (id) - 1) != NULL) {
5860 StringCat (str, prefix);
5861 StringCat (str, id);
5862 prefix = "|";
5863 numids++;
5864 }
5865 }
5866
5867 if (general != NULL) {
5868 if (SeqIdWrite (general, id, PRINTID_FASTA_SHORT, sizeof (id) - 1) != NULL) {
5869 StringCat (str, prefix);
5870 StringCat (str, id);
5871 prefix = "|";
5872 numids++;
5873 }
5874 }
5875
5876 if (local != NULL && (! ajp->flags.suppressLocalID) && numids == 0) {
5877 if (SeqIdWrite (local, id, PRINTID_FASTA_SHORT, sizeof (id) - 1) != NULL) {
5878 StringCat (str, prefix);
5879 StringCat (str, id);
5880 prefix = "|";
5881 numids++;
5882 }
5883 }
5884
5885 if (gi != NULL && giOK && numids == 0) {
5886 if (SeqIdWrite (accn, id, PRINTID_FASTA_SHORT, sizeof (id) - 1) != NULL) {
5887 StringCat (str, prefix);
5888 StringCat (str, id);
5889 prefix = "|";
5890 numids++;
5891 }
5892 }
5893
5894 StringNCpy_0 (buf, str, buflen);
5895 if (StringHasNoText (buf)) return FALSE;
5896
5897 return TRUE;
5898 }
5899
PrintBioSourceFtableEntry(ValNodePtr PNTR head,BioSourcePtr biop)5900 static void PrintBioSourceFtableEntry (
5901 ValNodePtr PNTR head,
5902 BioSourcePtr biop
5903 )
5904 {
5905 OrgModPtr mod;
5906 SubSourcePtr ssp;
5907 Char str [256];
5908
5909 if (head == NULL || biop == NULL) return;
5910
5911 if (biop->org != NULL && ! StringHasNoText (biop->org->taxname))
5912 {
5913 sprintf (str, "\t\t\torganism\t%s\n", biop->org->taxname);
5914 ValNodeCopyStr (head, 0, str);
5915 }
5916
5917 /* add OrgMods */
5918 if (biop->org != NULL && biop->org->orgname != NULL)
5919 {
5920 for (mod = biop->org->orgname->mod;
5921 mod != NULL;
5922 mod = mod->next)
5923 {
5924 switch (mod->subtype)
5925 {
5926 case ORGMOD_strain :
5927 sprintf (str, "\t\t\tstrain\t");
5928 break;
5929 case ORGMOD_substrain :
5930 sprintf (str, "\t\t\tsubstrain\t");
5931 break;
5932 case ORGMOD_type :
5933 sprintf (str, "\t\t\ttype\t");
5934 break;
5935 case ORGMOD_subtype :
5936 sprintf (str, "\t\t\tsubtype\t");
5937 break;
5938 case ORGMOD_variety :
5939 sprintf (str, "\t\t\tvariety\t");
5940 break;
5941 case ORGMOD_serotype :
5942 sprintf (str, "\t\t\tserotype\t");
5943 break;
5944 case ORGMOD_serogroup :
5945 sprintf (str, "\t\t\tserogroup\t");
5946 break;
5947 case ORGMOD_serovar :
5948 sprintf (str, "\t\t\tserovar\t");
5949 break;
5950 case ORGMOD_cultivar :
5951 sprintf (str, "\t\t\tcultivar\t");
5952 break;
5953 case ORGMOD_pathovar :
5954 sprintf (str, "\t\t\tpathovar\t");
5955 break;
5956 case ORGMOD_chemovar :
5957 sprintf (str, "\t\t\tchemovar\t");
5958 break;
5959 case ORGMOD_biovar :
5960 sprintf (str, "\t\t\tbiovar\t");
5961 break;
5962 case ORGMOD_biotype :
5963 sprintf (str, "\t\t\tbiotype\t");
5964 break;
5965 case ORGMOD_group :
5966 sprintf (str, "\t\t\tgroup\t");
5967 break;
5968 case ORGMOD_subgroup :
5969 sprintf (str, "\t\t\tsubgroup\t");
5970 break;
5971 case ORGMOD_isolate :
5972 sprintf (str, "\t\t\tisolate\t");
5973 break;
5974 case ORGMOD_common :
5975 sprintf (str, "\t\t\tcommon\t");
5976 break;
5977 case ORGMOD_acronym :
5978 sprintf (str, "\t\t\tacronym\t");
5979 break;
5980 case ORGMOD_dosage :
5981 sprintf (str, "\t\t\tdosage\t");
5982 break;
5983 case ORGMOD_nat_host :
5984 sprintf (str, "\t\t\tnat_host\t");
5985 break;
5986 case ORGMOD_sub_species :
5987 sprintf (str, "\t\t\tsub_species\t");
5988 break;
5989 case ORGMOD_specimen_voucher :
5990 sprintf (str, "\t\t\tspecimen_voucher\t");
5991 break;
5992 case ORGMOD_authority :
5993 sprintf (str, "\t\t\tauthority\t");
5994 break;
5995 case ORGMOD_forma :
5996 sprintf (str, "\t\t\tforma\t");
5997 break;
5998 case ORGMOD_forma_specialis :
5999 sprintf (str, "\t\t\tforma_specialis\t");
6000 break;
6001 case ORGMOD_ecotype :
6002 sprintf (str, "\t\t\tecotype\t");
6003 break;
6004 case ORGMOD_synonym :
6005 sprintf (str, "\t\t\tsynonym\t");
6006 break;
6007 case ORGMOD_anamorph :
6008 sprintf (str, "\t\t\tanamorph\t");
6009 break;
6010 case ORGMOD_teleomorph :
6011 sprintf (str, "\t\t\tteleomorph\t");
6012 break;
6013 case ORGMOD_breed :
6014 sprintf (str, "\t\t\tbreed\t");
6015 break;
6016 case ORGMOD_gb_acronym :
6017 sprintf (str, "\t\t\tgb_acronym\t");
6018 break;
6019 case ORGMOD_gb_anamorph :
6020 sprintf (str, "\t\t\tgb_anamorph\t");
6021 break;
6022 case ORGMOD_culture_collection :
6023 sprintf (str, "\t\t\tculture_collection\t");
6024 break;
6025 case ORGMOD_bio_material :
6026 sprintf (str, "\t\t\tbio_material\t");
6027 break;
6028 case ORGMOD_metagenome_source :
6029 sprintf (str, "\t\t\tmetagenome_source\t");
6030 break;
6031 case ORGMOD_type_material :
6032 sprintf (str, "\t\t\ttype_material\t");
6033 break;
6034 case ORGMOD_old_lineage :
6035 sprintf (str, "\t\t\told_lineage\t");
6036 break;
6037 case ORGMOD_old_name :
6038 sprintf (str, "\t\t\told_name\t");
6039 break;
6040 case ORGMOD_other :
6041 sprintf (str, "\t\t\tnote\t");
6042 break;
6043 default :
6044 str [0] = 0;
6045 }
6046 if ( str [0] == 0) continue;
6047 if (! StringHasNoText (mod->subname))
6048 {
6049 StringNCat (str, mod->subname, sizeof (str) - StringLen (str) - 2);
6050 str [sizeof (str) - 2] = 0;
6051 }
6052 StringCat (str, "\n");
6053 ValNodeCopyStr (head, 0, str);
6054 }
6055 }
6056
6057 for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next)
6058 {
6059 switch (ssp->subtype)
6060 {
6061 case SUBSRC_chromosome :
6062 sprintf (str, "\t\t\tchromosome\t");
6063 break;
6064 case SUBSRC_map :
6065 sprintf (str, "\t\t\tmap\t");
6066 break;
6067 case SUBSRC_clone :
6068 sprintf (str, "\t\t\tclone\t");
6069 break;
6070 case SUBSRC_haplotype :
6071 sprintf (str, "\t\t\thaplotype\t");
6072 break;
6073 case SUBSRC_genotype :
6074 sprintf (str, "\t\t\tgenotype\t");
6075 break;
6076 case SUBSRC_sex :
6077 sprintf (str, "\t\t\tsex\t");
6078 break;
6079 case SUBSRC_cell_line :
6080 sprintf (str, "\t\t\tcell_line\t");
6081 break;
6082 case SUBSRC_cell_type :
6083 sprintf (str, "\t\t\tcell_type\t");
6084 break;
6085 case SUBSRC_tissue_type :
6086 sprintf (str, "\t\t\ttissue_type\t");
6087 break;
6088 case SUBSRC_clone_lib :
6089 sprintf (str, "\t\t\tclone_lib\t");
6090 break;
6091 case SUBSRC_dev_stage :
6092 sprintf (str, "\t\t\tdev_stage\t");
6093 break;
6094 case SUBSRC_frequency :
6095 sprintf (str, "\t\t\tfrequency\t");
6096 break;
6097 case SUBSRC_germline :
6098 sprintf (str, "\t\t\tgermline\t");
6099 break;
6100 case SUBSRC_rearranged :
6101 sprintf (str, "\t\t\trearranged\t");
6102 break;
6103 case SUBSRC_lab_host :
6104 sprintf (str, "\t\t\tlab_host\t");
6105 break;
6106 case SUBSRC_pop_variant :
6107 sprintf (str, "\t\t\tpop_variant\t");
6108 break;
6109 case SUBSRC_tissue_lib :
6110 sprintf (str, "\t\t\ttissue_lib\t");
6111 break;
6112 case SUBSRC_plasmid_name :
6113 sprintf (str, "\t\t\tplasmid_name\t");
6114 break;
6115 case SUBSRC_transposon_name :
6116 sprintf (str, "\t\t\ttransposon_name\t");
6117 break;
6118 case SUBSRC_insertion_seq_name :
6119 sprintf (str, "\t\t\tinsertion_seq_name\t");
6120 break;
6121 case SUBSRC_plastid_name :
6122 sprintf (str, "\t\t\tplastid_name\t");
6123 break;
6124 case SUBSRC_country :
6125 sprintf (str, "\t\t\tcountry\t");
6126 break;
6127 case SUBSRC_segment :
6128 sprintf (str, "\t\t\tsegment\t");
6129 break;
6130 case SUBSRC_endogenous_virus_name :
6131 sprintf (str, "\t\t\tendogenous_virus_name\t");
6132 break;
6133 case SUBSRC_transgenic :
6134 sprintf (str, "\t\t\ttransgenic\t");
6135 break;
6136 case SUBSRC_environmental_sample :
6137 sprintf (str, "\t\t\tenvironmental_sample\t");
6138 break;
6139 case SUBSRC_isolation_source :
6140 sprintf (str, "\t\t\tisolation_source\t");
6141 break;
6142 case SUBSRC_lat_lon :
6143 sprintf (str, "\t\t\tlat_lon\t");
6144 break;
6145 case SUBSRC_collection_date :
6146 sprintf (str, "\t\t\tcollection_date\t");
6147 break;
6148 case SUBSRC_collected_by :
6149 sprintf (str, "\t\t\tcollected_by\t");
6150 break;
6151 case SUBSRC_identified_by :
6152 sprintf (str, "\t\t\tidentified_by\t");
6153 break;
6154 case SUBSRC_fwd_primer_seq :
6155 sprintf (str, "\t\t\tfwd_pcr_primer_seq\t");
6156 break;
6157 case SUBSRC_rev_primer_seq :
6158 sprintf (str, "\t\t\trev_pcr_primer_seq\t");
6159 break;
6160 case SUBSRC_fwd_primer_name :
6161 sprintf (str, "\t\t\tfwd_pcr_primer_name\t");
6162 break;
6163 case SUBSRC_rev_primer_name :
6164 sprintf (str, "\t\t\trev_pcr_primer_name\t");
6165 break;
6166 case SUBSRC_metagenomic :
6167 sprintf (str, "\t\t\tmetagenomic\t");
6168 break;
6169 case SUBSRC_mating_type :
6170 sprintf (str, "\t\t\tmating_type\t");
6171 break;
6172 case SUBSRC_linkage_group :
6173 sprintf (str, "\t\t\tlinkage_group\t");
6174 break;
6175 case SUBSRC_haplogroup :
6176 sprintf (str, "\t\t\thaplogroup\t");
6177 break;
6178 case SUBSRC_phenotype :
6179 sprintf (str, "\t\t\tphenotype\t");
6180 break;
6181 case SUBSRC_altitude :
6182 sprintf (str, "\t\t\taltitude\t");
6183 break;
6184 case SUBSRC_other :
6185 sprintf (str, "\t\t\tnote\t");
6186 break;
6187 default :
6188 str [0] = 0;
6189 }
6190 if ( str [0] == 0) continue;
6191 if (! StringHasNoText (ssp->name))
6192 {
6193 StringNCat (str, ssp->name, sizeof (str) - StringLen (str) - 2);
6194 str [sizeof (str) - 2] = 0;
6195 }
6196 StringCat (str, "\n");
6197 ValNodeCopyStr (head, 0, str);
6198 }
6199 }
6200
AddOneFtableQual(ValNodePtr PNTR head,CharPtr qual,CharPtr val)6201 static void AddOneFtableQual (
6202 ValNodePtr PNTR head,
6203 CharPtr qual,
6204 CharPtr val
6205 )
6206
6207 {
6208 size_t len;
6209 CharPtr tmp;
6210
6211 if (head == NULL) return;
6212 if (StringHasNoText (qual)) return;
6213 if (StringHasNoText (val)) return;
6214
6215 if (StringCmp (qual, "orig_protein_id") == 0) {
6216 qual = "protein_id";
6217 } else if (StringCmp (qual, "orig_transcript_id") == 0) {
6218 qual = "transcript_id";
6219 }
6220
6221 len = StringLen (qual) + StringLen (val) + 10;
6222 tmp = (CharPtr) MemNew (sizeof (Char) * len);
6223 if (tmp == NULL) return;
6224
6225 StringCpy (tmp, "\t\t\t");
6226 StringCat (tmp, qual);
6227 StringCat (tmp, "\t");
6228 StringCat (tmp, val);
6229 StringCat (tmp, "\n");
6230
6231 ValNodeAddStr (head, 0, tmp);
6232 }
6233
6234
GetGeneticCodeNumber(ValNodePtr gcp)6235 static Int4 GetGeneticCodeNumber (ValNodePtr gcp)
6236 {
6237 Int4 gcode = 0;
6238 ValNodePtr vnp, tmp;
6239
6240 if (gcp == NULL) {
6241 return 0;
6242 }
6243 for (vnp = (ValNodePtr) gcp->data.ptrvalue; vnp != NULL; vnp = vnp->next) {
6244 if (vnp->choice == 2) {
6245 gcode = vnp->data.intvalue;
6246 }
6247 }
6248 if (gcode == 0) {
6249 for (vnp = (ValNodePtr) gcp->data.ptrvalue; vnp != NULL; vnp = vnp->next) {
6250 if (vnp->choice == 1) {
6251 tmp = GeneticCodeFind (0, vnp->data.ptrvalue);
6252 gcode = GetGeneticCodeNumber(tmp);
6253 }
6254 }
6255 }
6256 return gcode;
6257 }
6258
6259
6260
PrintFtableLocAndQuals(IntAsn2gbJobPtr ajp,ValNodePtr PNTR head,BioseqPtr target,SeqFeatPtr sfp,SeqMgrFeatContextPtr context)6261 NLM_EXTERN void PrintFtableLocAndQuals (
6262 IntAsn2gbJobPtr ajp,
6263 ValNodePtr PNTR head,
6264 BioseqPtr target,
6265 SeqFeatPtr sfp,
6266 SeqMgrFeatContextPtr context
6267 )
6268
6269 {
6270 CharPtr aa;
6271 Char anticodon [8];
6272 Int2 bondidx;
6273 BioseqSetPtr bssp;
6274 CodeBreakPtr cbp;
6275 BioseqPtr cdna;
6276 SeqFeatPtr cds;
6277 Char ch;
6278 CdRegionPtr crp;
6279 SeqMgrDescContext dcontext;
6280 DbtagPtr dbt;
6281 SeqMgrFeatContext fcontext;
6282 GBQualPtr gbq;
6283 ValNodePtr geneorprotdb;
6284 GeneRefPtr grp;
6285 Boolean is_gps_genomic = FALSE;
6286 CharPtr label;
6287 MolInfoPtr mip;
6288 SeqLocPtr newloc;
6289 Char numbuf [32];
6290 Int2 numcodons;
6291 ObjectIdPtr oip;
6292 BioseqPtr prod;
6293 SeqFeatPtr prot;
6294 ProtRefPtr prp = NULL;
6295 Boolean pseudo;
6296 CharPtr pseudogene = NULL;
6297 CharPtr ptr;
6298 RNAGenPtr rgp;
6299 RNAQualPtr rqp;
6300 RnaRefPtr rrp;
6301 SeqDescrPtr sdp;
6302 Int4 sec_str;
6303 SeqIdPtr sip;
6304 SeqIdPtr sip2;
6305 Int2 siteidx;
6306 SeqLocPtr slp;
6307 Char str [256];
6308 Char tmp [512];
6309 CharPtr tmpx;
6310 CharPtr tmpy;
6311 tRNAPtr trp;
6312 ValNodePtr vnp;
6313 Int4 gcode;
6314
6315 if (head == NULL || target == NULL || sfp == NULL || context == NULL) return;
6316 /* label = (CharPtr) FeatDefTypeLabel (sfp); */
6317 label = FindKeyFromFeatDefType (sfp->idx.subtype, FALSE);
6318 if (StringCmp (label, "Gene") == 0) {
6319 label = "gene";
6320 }
6321 else if (StringCmp (label, "Src") == 0) {
6322 label = "source";
6323 }
6324 if (StringHasNoText (label)) {
6325 label = "???";
6326 }
6327
6328 /* check if genomic sequence in genomic product set */
6329
6330 if (target->idx.parenttype == OBJ_BIOSEQSET) {
6331 bssp = (BioseqSetPtr) target->idx.parentptr;
6332 if (bssp != NULL && bssp->_class == BioseqseqSet_class_gen_prod_set) {
6333 sdp = SeqMgrGetNextDescriptor (target, NULL, Seq_descr_molinfo, &dcontext);
6334 if (sdp != NULL) {
6335 mip = (MolInfoPtr) sdp->data.ptrvalue;
6336 if (mip != NULL && mip->biomol == MOLECULE_TYPE_GENOMIC) {
6337 is_gps_genomic = TRUE;
6338 }
6339 }
6340 }
6341 }
6342
6343 PrintFtableIntervals (head, target, sfp->location, label, ajp->relaxedMapping);
6344
6345 geneorprotdb = NULL;
6346 pseudo = sfp->pseudo;
6347
6348 switch (context->seqfeattype) {
6349 case SEQFEAT_GENE :
6350 grp = (GeneRefPtr) sfp->data.value.ptrvalue;
6351 if (grp != NULL) {
6352 geneorprotdb = grp->db;
6353 pseudo |= grp->pseudo;
6354
6355 StringNCpy_0 (str, (CharPtr) grp->locus, sizeof (str));
6356 if (! StringHasNoText (str)) {
6357 sprintf (tmp, "\t\t\tgene\t%s\n", str);
6358 ValNodeCopyStr (head, 0, tmp);
6359 }
6360 if (! StringHasNoText (grp->allele)) {
6361 sprintf (tmp, "\t\t\tallele\t%s\n", grp->allele);
6362 ValNodeCopyStr (head, 0, tmp);
6363 }
6364 for (vnp = grp->syn; vnp != NULL; vnp = vnp->next) {
6365 StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str));
6366 if (! StringHasNoText (str)) {
6367 sprintf (tmp, "\t\t\tgene_syn\t%s\n", str);
6368 ValNodeCopyStr (head, 0, tmp);
6369 }
6370 }
6371 if (! StringHasNoText (grp->desc)) {
6372 sprintf (tmp, "\t\t\tgene_desc\t%s\n", grp->desc);
6373 ValNodeCopyStr (head, 0, tmp);
6374 }
6375 if (! StringHasNoText (grp->maploc)) {
6376 sprintf (tmp, "\t\t\tmap\t%s\n", grp->maploc);
6377 ValNodeCopyStr (head, 0, tmp);
6378 }
6379 if (! StringHasNoText (grp->locus_tag)) {
6380 sprintf (tmp, "\t\t\tlocus_tag\t%s\n", grp->locus_tag);
6381 ValNodeCopyStr (head, 0, tmp);
6382 }
6383 }
6384 break;
6385 case SEQFEAT_CDREGION :
6386 prod = BioseqFind (SeqLocId (sfp->product));
6387 prot = SeqMgrGetBestProteinFeature (prod, NULL);
6388 if (prot != NULL) {
6389 prp = (ProtRefPtr) prot->data.value.ptrvalue;
6390 }
6391 if (prp == NULL) {
6392 prp = SeqMgrGetProtXref (sfp);
6393 }
6394 if (prp != NULL) {
6395 geneorprotdb = prp->db;
6396 if (prp->name != NULL) {
6397 for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
6398 StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str));
6399 if (! StringHasNoText (str)) {
6400 sprintf (tmp, "\t\t\tproduct\t%s\n", str);
6401 ValNodeCopyStr (head, 0, tmp);
6402 }
6403 }
6404 }
6405 if (prp->desc != NULL) {
6406 StringNCpy_0 (str, prp->desc, sizeof (str));
6407 if (! StringHasNoText (str)) {
6408 sprintf (tmp, "\t\t\tprot_desc\t%s\n", str);
6409 ValNodeCopyStr (head, 0, tmp);
6410 }
6411 }
6412 for (vnp = prp->activity; vnp != NULL; vnp = vnp->next) {
6413 StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str));
6414 if (! StringHasNoText (str)) {
6415 sprintf (tmp, "\t\t\tfunction\t%s\n", str);
6416 ValNodeCopyStr (head, 0, tmp);
6417 }
6418 }
6419 for (vnp = prp->ec; vnp != NULL; vnp = vnp->next) {
6420 StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str));
6421 if (! StringHasNoText (str)) {
6422 sprintf (tmp, "\t\t\tEC_number\t%s\n", str);
6423 ValNodeCopyStr (head, 0, tmp);
6424 }
6425 }
6426 }
6427 if (prot != NULL) {
6428 AddOneFtableQual (head, "prot_note", prot->comment);
6429 /*
6430 StringNCpy_0 (str, prot->comment, sizeof (str));
6431 if (! StringHasNoText (str)) {
6432 sprintf (tmp, "\t\t\tprot_note\t%s\n", str);
6433 ValNodeCopyStr (head, 0, tmp);
6434 }
6435 */
6436 }
6437 crp = (CdRegionPtr) sfp->data.value.ptrvalue;
6438 if (crp != NULL) {
6439 if (crp->frame > 1 && crp->frame <= 3) {
6440 sprintf (tmp, "\t\t\tcodon_start\t%d\n", (int) crp->frame);
6441 ValNodeCopyStr (head, 0, tmp);
6442 }
6443 for (cbp = crp->code_break; cbp != NULL; cbp = cbp->next) {
6444 PrintFTCodeBreakEx (head, cbp, target, ajp->masterStyle, ajp->relaxedMapping, ajp->ajp.slp);
6445 }
6446 gcode = GetGeneticCodeNumber(crp->genetic_code);
6447 if (gcode > 0) {
6448 sprintf (tmp, "\t\t\ttransl_table\t%d\n", gcode);
6449 ValNodeCopyStr (head, 0, tmp);
6450 }
6451 }
6452 if (prod != NULL && ! ajp->hideProteinID) {
6453 if (SeqIdWriteForTable (prod->id, str, sizeof (str), ajp, FALSE)) {
6454 sprintf (tmp, "\t\t\tprotein_id\t%s\n", str);
6455 ValNodeCopyStr (head, 0, tmp);
6456 }
6457 if (is_gps_genomic) {
6458 cds = SeqMgrGetCDSgivenProduct (prod, NULL);
6459 if (cds != NULL) {
6460 cdna = BioseqFindFromSeqLoc (cds->location);
6461 if (cdna != NULL) {
6462 if (SeqIdWriteForTable (cdna->id, str, sizeof (str), ajp, FALSE)) {
6463 sprintf (tmp, "\t\t\ttranscript_id\t%s\n", str);
6464 ValNodeCopyStr (head, 0, tmp);
6465 }
6466 }
6467 }
6468 }
6469 } else if (sfp->product != NULL && ! ajp->hideProteinID) {
6470 sip = SeqLocId (sfp->product);
6471 if (sip != NULL) {
6472 if (sip->choice == SEQID_GI) {
6473 sip2 = GetSeqIdForGI (sip->data.intvalue);
6474 if (sip2 != NULL) {
6475 sip = sip2;
6476 }
6477 }
6478 if (SeqIdWriteForTable (sip, str, sizeof (str), ajp, TRUE)) {
6479 sprintf (tmp, "\t\t\tprotein_id\t%s\n", str);
6480 ValNodeCopyStr (head, 0, tmp);
6481 }
6482 }
6483 }
6484 break;
6485 case SEQFEAT_RNA :
6486 prod = BioseqFind (SeqLocId (sfp->product));
6487 rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
6488 if (rrp != NULL) {
6489 switch (rrp->ext.choice) {
6490 case 1 :
6491 StringNCpy_0 (str, (CharPtr) rrp->ext.value.ptrvalue, sizeof (str));
6492 if (! StringHasNoText (str)) {
6493 if (rrp->type == 255 &&
6494 (StringICmp (str, "misc_RNA") == 0 ||
6495 StringICmp (str, "ncRNA") == 0 ||
6496 StringICmp (str, "tmRNA") == 0)) {
6497 /* type other now uses name for type, product gbqual for product name */
6498 } else {
6499 sprintf (tmp, "\t\t\tproduct\t%s\n", str);
6500 ValNodeCopyStr (head, 0, tmp);
6501 }
6502 }
6503 break;
6504 case 2 :
6505 trp = (tRNAPtr) rrp->ext.value.ptrvalue;
6506 if (trp != NULL) {
6507 FeatDefLabel (sfp, str, sizeof (str) - 1, OM_LABEL_CONTENT);
6508 if (! StringHasNoText (str)) {
6509 sprintf (tmp, "\t\t\tproduct\t%s\n", str);
6510 ValNodeCopyStr (head, 0, tmp);
6511 }
6512 numcodons = ComposeCodonsRecognizedString (trp, numbuf, sizeof (numbuf));
6513 if (numcodons > 0 && StringDoesHaveText (numbuf)) {
6514 sprintf (tmp, "\t\t\tcodon_recognized\t%s\n", numbuf);
6515 ValNodeCopyStr (head, 0, tmp);
6516 }
6517 slp = trp->anticodon;
6518 newloc = NULL;
6519 if (slp != NULL && ajp->ajp.slp != NULL) {
6520 sip = SeqIdParse ("lcl|dummy");
6521 newloc = SeqLocReMapEx (sip, ajp->ajp.slp, slp, 0, FALSE, ajp->masterStyle, ajp->relaxedMapping);
6522 SeqIdFree (sip);
6523 slp = newloc;
6524 if (newloc != NULL) {
6525 A2GBSeqLocReplaceID (newloc, ajp->ajp.slp);
6526 }
6527 }
6528 aa = str;
6529 if (StringNICmp (aa, "tRNA-", 5) == 0) {
6530 aa += 5;
6531 }
6532 if (slp != NULL && StringDoesHaveText (aa)) {
6533 anticodon [0] = '\0';
6534 if (SeqLocLen (slp) == 3) {
6535 tmpy = GetSequenceByLocation (slp);
6536 if (tmpy != NULL) {
6537 ptr = tmpy;
6538 ch = *ptr;
6539 while (ch != '\0') {
6540 ch = TO_LOWER(ch);
6541 *ptr = ch;
6542 ptr++;
6543 ch = *ptr;
6544 }
6545 if (! StringHasNoText (tmpy)) {
6546 StringNCpy_0 (anticodon, tmpy, sizeof (anticodon));
6547 }
6548 MemFree (tmpy);
6549 }
6550 }
6551
6552 tmpx = FFFlatLoc (ajp, target, slp, ajp->masterStyle, FALSE);
6553 if (tmpx != NULL) {
6554 if (StringDoesHaveText (anticodon)) {
6555 sprintf (tmp, "\t\t\tanticodon\t(pos:%s,aa:%s,seq:%s)\n", tmpx, aa, anticodon);
6556 } else {
6557 sprintf (tmp, "\t\t\tanticodon\t(pos:%s,aa:%s)\n", tmpx, aa);
6558 }
6559 ValNodeCopyStr (head, 0, tmp);
6560 }
6561 MemFree (tmpx);
6562 }
6563 if (newloc != NULL) {
6564 SeqLocFree (newloc);
6565 }
6566 }
6567 break;
6568 case 3 :
6569 rgp = (RNAGenPtr) rrp->ext.value.ptrvalue;
6570 if (rgp != NULL) {
6571 StringNCpy_0 (str, rgp->_class, sizeof (str));
6572 if (StringDoesHaveText (str)) {
6573 sprintf (tmp, "\t\t\tncRNA_class\t%s\n", str);
6574 ValNodeCopyStr (head, 0, tmp);
6575 }
6576 StringNCpy_0 (str, rgp->product, sizeof (str));
6577 if (StringDoesHaveText (str)) {
6578 sprintf (tmp, "\t\t\tproduct\t%s\n", str);
6579 ValNodeCopyStr (head, 0, tmp);
6580 }
6581 for (rqp = rgp->quals; rqp != NULL; rqp = rqp->next) {
6582 if (StringDoesHaveText (rqp->qual) && StringDoesHaveText (rqp->val)) {
6583 AddOneFtableQual (head, rqp->qual, rqp->val);
6584 }
6585 }
6586 }
6587 default :
6588 break;
6589 }
6590 }
6591 if (prod != NULL && ! ajp->hideProteinID) {
6592 if (SeqIdWriteForTable (prod->id, str, sizeof (str), ajp, FALSE)) {
6593 sprintf (tmp, "\t\t\ttranscript_id\t%s\n", str);
6594 ValNodeCopyStr (head, 0, tmp);
6595 }
6596 if (is_gps_genomic) {
6597 cds = SeqMgrGetNextFeature (prod, NULL, SEQFEAT_CDREGION, 0, &fcontext);
6598 if (cds != NULL && SeqMgrGetNextFeature (prod, cds, SEQFEAT_CDREGION, 0, &fcontext) == NULL) {
6599 prod = BioseqFindFromSeqLoc (cds->product);
6600 if (prod != NULL) {
6601 if (SeqIdWriteForTable (prod->id, str, sizeof (str), ajp, FALSE)) {
6602 sprintf (tmp, "\t\t\tprotein_id\t%s\n", str);
6603 ValNodeCopyStr (head, 0, tmp);
6604 }
6605 }
6606 }
6607 }
6608 } else if (sfp->product != NULL && ! ajp->hideProteinID) {
6609 sip = SeqLocId (sfp->product);
6610 if (sip != NULL) {
6611 if (sip->choice == SEQID_GI) {
6612 sip2 = GetSeqIdForGI (sip->data.intvalue);
6613 if (sip2 != NULL) {
6614 sip = sip2;
6615 }
6616 }
6617 if (SeqIdWriteForTable (sip, str, sizeof (str), ajp, TRUE)) {
6618 sprintf (tmp, "\t\t\ttranscript_id\t%s\n", str);
6619 ValNodeCopyStr (head, 0, tmp);
6620 }
6621 }
6622 }
6623 break;
6624 case SEQFEAT_PROT :
6625 prod = BioseqFind (SeqLocId (sfp->product));
6626 prp = (ProtRefPtr) sfp->data.value.ptrvalue;
6627 if (prp != NULL) {
6628 if (prp->name != NULL) {
6629 for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
6630 StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str));
6631 if (! StringHasNoText (str)) {
6632 sprintf (tmp, "\t\t\tproduct\t%s\n", str);
6633 ValNodeCopyStr (head, 0, tmp);
6634 }
6635 }
6636 }
6637 if (prp->desc != NULL) {
6638 StringNCpy_0 (str, prp->desc, sizeof (str));
6639 if (! StringHasNoText (str)) {
6640 sprintf (tmp, "\t\t\tprot_desc\t%s\n", str);
6641 ValNodeCopyStr (head, 0, tmp);
6642 }
6643 }
6644 for (vnp = prp->activity; vnp != NULL; vnp = vnp->next) {
6645 StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str));
6646 if (! StringHasNoText (str)) {
6647 sprintf (tmp, "\t\t\tfunction\t%s\n", str);
6648 ValNodeCopyStr (head, 0, tmp);
6649 }
6650 }
6651 for (vnp = prp->ec; vnp != NULL; vnp = vnp->next) {
6652 StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str));
6653 if (! StringHasNoText (str)) {
6654 sprintf (tmp, "\t\t\tEC_number\t%s\n", str);
6655 ValNodeCopyStr (head, 0, tmp);
6656 }
6657 }
6658 }
6659 AddOneFtableQual (head, "prot_note", sfp->comment);
6660 /*
6661 StringNCpy_0 (str, sfp->comment, sizeof (str));
6662 if (! StringHasNoText (str)) {
6663 sprintf (tmp, "\t\t\tprot_note\t%s\n", str);
6664 ValNodeCopyStr (head, 0, tmp);
6665 }
6666 */
6667 if (prod != NULL && ! ajp->hideProteinID) {
6668 if (SeqIdWriteForTable (prod->id, str, sizeof (str), ajp, FALSE)) {
6669 sprintf (tmp, "\t\t\tprotein_id\t%s\n", str);
6670 ValNodeCopyStr (head, 0, tmp);
6671 }
6672 } else if (sfp->product != NULL && ! ajp->hideProteinID) {
6673 sip = SeqLocId (sfp->product);
6674 if (sip != NULL) {
6675 if (sip->choice == SEQID_GI) {
6676 sip2 = GetSeqIdForGI (sip->data.intvalue);
6677 if (sip2 != NULL) {
6678 sip = sip2;
6679 }
6680 }
6681 if (SeqIdWriteForTable (sip, str, sizeof (str), ajp, TRUE)) {
6682 sprintf (tmp, "\t\t\tprotein_id\t%s\n", str);
6683 ValNodeCopyStr (head, 0, tmp);
6684 }
6685 }
6686 }
6687 break;
6688 case SEQFEAT_REGION :
6689 StringNCpy_0 (str, (CharPtr) sfp->data.value.ptrvalue, sizeof (str));
6690 if (! StringHasNoText (str)) {
6691 sprintf (tmp, "\t\t\tregion_name\t%s\n", str);
6692 ValNodeCopyStr (head, 0, tmp);
6693 }
6694 break;
6695 case SEQFEAT_BOND :
6696 bondidx = (Int2) sfp->data.value.intvalue;
6697 if (bondidx == 255) {
6698 bondidx = 5;
6699 }
6700 if (bondidx > 0 && bondidx < 6) {
6701 sprintf (tmp, "\t\t\tbond_type\t%s\n", bondList [bondidx]);
6702 ValNodeCopyStr (head, 0, tmp);
6703 }
6704 break;
6705 case SEQFEAT_SITE :
6706 siteidx = (Int2) sfp->data.value.intvalue;
6707 if (siteidx == 255) {
6708 siteidx = 26;
6709 }
6710 if (siteidx > 0 && siteidx < 27) {
6711 sprintf (tmp, "\t\t\tsite_type\t%s\n", siteList [siteidx]);
6712 ValNodeCopyStr (head, 0, tmp);
6713 }
6714 break;
6715 case SEQFEAT_PSEC_STR :
6716 sec_str = (Int2) sfp->data.value.intvalue;
6717 if (sec_str > 0 && sec_str <= 3) {
6718 sprintf (tmp, "\t\t\tsec_str_type\t%s\n", secStrText [sec_str]);
6719 ValNodeCopyStr (head, 0, tmp);
6720 }
6721 break;
6722 case SEQFEAT_HET :
6723 StringNCpy_0 (str, (CharPtr) sfp->data.value.ptrvalue, sizeof (str));
6724 if (! StringHasNoText (str)) {
6725 sprintf (tmp, "\t\t\theterogen\t%s\n", str);
6726 ValNodeCopyStr (head, 0, tmp);
6727 }
6728 break;
6729 case SEQFEAT_BIOSRC :
6730 PrintBioSourceFtableEntry (head, sfp->data.value.ptrvalue);
6731 break;
6732 default :
6733 break;
6734 }
6735 for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
6736 if (StringICmp (gbq->qual, "pseudogene") != 0) continue;
6737 pseudogene = gbq->val;
6738 if (StringICmp (pseudogene, "processed") == 0 ||
6739 StringICmp (pseudogene, "unprocessed") == 0 ||
6740 StringICmp (pseudogene, "unitary") == 0 ||
6741 StringICmp (pseudogene, "allelic") == 0 ||
6742 StringICmp (pseudogene, "unknown") == 0) {
6743 pseudo = FALSE;
6744 }
6745 }
6746 if (pseudo) {
6747 ValNodeCopyStr (head, 0, "\t\t\tpseudo\n");
6748 }
6749 grp = SeqMgrGetGeneXref (sfp);
6750 if (grp != NULL) {
6751 if (SeqMgrGeneIsSuppressed (grp)) {
6752 ValNodeCopyStr (head, 0, "\t\t\tgene\t-\n");
6753 } else {
6754 if (StringDoesHaveText (grp->locus)) {
6755 sprintf (tmp, "\t\t\tgene\t%s\n", grp->locus);
6756 ValNodeCopyStr (head, 0, tmp);
6757 }
6758 if (StringDoesHaveText (grp->locus_tag)) {
6759 sprintf (tmp, "\t\t\tlocus_tag\t%s\n", grp->locus_tag);
6760 ValNodeCopyStr (head, 0, tmp);
6761 }
6762 }
6763 }
6764 if (! StringHasNoText (sfp->comment)) {
6765 ValNodeCopyStr (head, 0, "\t\t\tnote\t");
6766 ValNodeCopyStr (head, 0, sfp->comment);
6767 ValNodeCopyStr (head, 0, "\n");
6768 }
6769 switch (sfp->exp_ev) {
6770 case 1 :
6771 ValNodeCopyStr (head, 0, "\t\t\tevidence\texperimental\n");
6772 break;
6773 case 2 :
6774 ValNodeCopyStr (head, 0, "\t\t\tevidence\tnot_experimental\n");
6775 break;
6776 default :
6777 break;
6778 }
6779 if (! StringHasNoText (sfp->except_text)) {
6780 ValNodeCopyStr (head, 0, "\t\t\texception\t");
6781 ValNodeCopyStr (head, 0, sfp->except_text);
6782 ValNodeCopyStr (head, 0, "\n");
6783 } else if (sfp->excpt) {
6784 ValNodeCopyStr (head, 0, "\t\t\texception\n");
6785 }
6786 for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
6787 AddOneFtableQual (head, gbq->qual, gbq->val);
6788 /*
6789 if (! StringHasNoText (gbq->qual)) {
6790 if (! StringHasNoText (gbq->val)) {
6791 sprintf (tmp, "\t\t\t%s\t%s\n", gbq->qual, gbq->val);
6792 ValNodeCopyStr (head, 0, tmp);
6793 }
6794 }
6795 */
6796 }
6797 VisitUserObjectsInUop (sfp->ext, (Pointer) head, PrintFTUserObj);
6798 for (vnp = geneorprotdb; vnp != NULL; vnp = vnp->next) {
6799 dbt = (DbtagPtr) vnp->data.ptrvalue;
6800 if (dbt != NULL) {
6801 if (! StringHasNoText (dbt->db)) {
6802 oip = dbt->tag;
6803 if (oip->str != NULL && (! StringHasNoText (oip->str))) {
6804 sprintf (tmp, "\t\t\tdb_xref\t%s:%s\n", dbt->db, oip->str);
6805 ValNodeCopyStr (head, 0, tmp);
6806 } else {
6807 sprintf (tmp, "\t\t\tdb_xref\t%s:%ld\n", dbt->db, (long) oip->id);
6808 ValNodeCopyStr (head, 0, tmp);
6809 }
6810 }
6811 }
6812 }
6813 for (vnp = sfp->dbxref; vnp != NULL; vnp = vnp->next) {
6814 dbt = (DbtagPtr) vnp->data.ptrvalue;
6815 if (dbt != NULL) {
6816 if (! StringHasNoText (dbt->db)) {
6817 oip = dbt->tag;
6818 if (oip->str != NULL && (! StringHasNoText (oip->str))) {
6819 sprintf (tmp, "\t\t\tdb_xref\t%s:%s\n", dbt->db, oip->str);
6820 ValNodeCopyStr (head, 0, tmp);
6821 } else {
6822 sprintf (tmp, "\t\t\tdb_xref\t%s:%ld\n", dbt->db, (long) oip->id);
6823 ValNodeCopyStr (head, 0, tmp);
6824 }
6825 }
6826 }
6827 }
6828 }
6829
FindFirstBioseq(SeqEntryPtr sep)6830 static BioseqPtr FindFirstBioseq (SeqEntryPtr sep)
6831
6832 {
6833 BioseqPtr bsp;
6834 BioseqSetPtr bssp;
6835
6836 if (sep == NULL || sep->data.ptrvalue == NULL ||
6837 /* sep->choice < 0 || */ sep->choice > 2) return NULL;
6838 if (IS_Bioseq (sep)) {
6839 bsp = (BioseqPtr) sep->data.ptrvalue;
6840 return bsp;
6841 }
6842 bssp = (BioseqSetPtr) sep->data.ptrvalue;
6843 for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
6844 bsp = FindFirstBioseq (sep);
6845 if (bsp != NULL) return bsp;
6846 }
6847 return NULL;
6848 }
6849
BioseqLockAndIndexByEntity(Uint2 entityID)6850 static BioseqPtr BioseqLockAndIndexByEntity (Uint2 entityID)
6851
6852 {
6853 BioseqPtr bsp;
6854 SeqEntryPtr sep;
6855 SeqIdPtr sip;
6856
6857 if (entityID < 1) return NULL;
6858
6859 sep = SeqMgrGetSeqEntryForEntityID (entityID);
6860 if (sep == NULL) return NULL;
6861
6862 bsp = FindFirstBioseq (sep);
6863 if (bsp == NULL) return NULL;
6864
6865 sip = SeqIdFindBest (bsp->id, 0);
6866 if (sip == NULL) return NULL;
6867
6868 bsp = BioseqLockById (sip);
6869 if (bsp == NULL) return NULL;
6870
6871 if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
6872 SeqMgrIndexFeatures (entityID, NULL);
6873 }
6874
6875 return bsp;
6876 }
6877
FormatFtableSourceFeatBlock(BaseBlockPtr bbp,BioseqPtr target)6878 NLM_EXTERN CharPtr FormatFtableSourceFeatBlock (
6879 BaseBlockPtr bbp,
6880 BioseqPtr target
6881 )
6882
6883 {
6884 SeqFeatPtr sfp;
6885 SeqDescPtr sdp;
6886 SeqMgrDescContext dcontext;
6887 SeqMgrFeatContext fcontext;
6888 BioSourcePtr biop;
6889 ValNodePtr head;
6890 IntSrcBlockPtr isp;
6891 CharPtr str;
6892
6893 if (bbp == NULL) return NULL;
6894
6895 isp = (IntSrcBlockPtr) bbp;
6896 head = NULL;
6897 biop = NULL;
6898
6899 if (bbp->itemtype == OBJ_SEQDESC) {
6900 sdp = SeqMgrGetDesiredDescriptor (bbp->entityID, NULL, bbp->itemID,
6901 0, NULL, &dcontext);
6902 if (sdp == NULL) return NULL;
6903 biop = sdp->data.ptrvalue;
6904 } else if (bbp->itemtype == OBJ_SEQFEAT) {
6905 sfp = SeqMgrGetDesiredFeature (bbp->entityID, NULL, bbp->itemID, 0, NULL, &fcontext);
6906 if (sfp == NULL) return NULL;
6907 biop = sfp->data.value.ptrvalue;
6908 }
6909 if (biop == NULL) return NULL;
6910 PrintFtableIntervals (&head, target, isp->loc, "source", FALSE);
6911 PrintBioSourceFtableEntry (&head, biop);
6912
6913 str = MergeFFValNodeStrs (head);
6914 ValNodeFreeData (head);
6915
6916 return str;
6917 }
6918
DoImmediateFormat(Asn2gbFormatPtr afp,BaseBlockPtr bbp)6919 NLM_EXTERN void DoImmediateFormat (
6920 Asn2gbFormatPtr afp,
6921 BaseBlockPtr bbp
6922 )
6923
6924 {
6925 IntAsn2gbJobPtr ajp;
6926 BlockType blocktype;
6927 BioseqPtr bsp;
6928 FormatProc fmt;
6929 IntFeatBlockPtr ifp;
6930 Boolean is_www;
6931 Int4 left = 0;
6932 size_t max;
6933 SeqEntryPtr oldscope;
6934 QualValPtr qv = NULL;
6935 Int4 right = 0;
6936 SeqEntryPtr sep;
6937 CharPtr str = NULL;
6938 Uint2 itemtype;
6939 Uint2 itemID;
6940
6941 if (afp == NULL || bbp == NULL) return;
6942 ajp = afp->ajp;
6943 if (ajp == NULL) return;
6944 is_www = GetWWW (ajp);
6945
6946 blocktype = bbp->blocktype;
6947 if (blocktype < LOCUS_BLOCK || blocktype > SLASH_BLOCK) return;
6948 fmt = asn2gnbk_fmt_functions [(int) blocktype];
6949 if (fmt == NULL) return;
6950
6951 max = (size_t) (MAX (ASN2GNBK_TOTAL_SOURCE, ASN2GNBK_TOTAL_FEATUR));
6952 qv = MemNew (sizeof (QualVal) * (max + 5));
6953 if (qv == NULL) return;
6954
6955 sep = GetTopSeqEntryForEntityID (bbp->entityID);
6956
6957 bsp = BioseqLockAndIndexByEntity (bbp->entityID);
6958 oldscope = SeqEntrySetScope (sep);
6959
6960 afp->qvp = qv;
6961 str = fmt (afp, bbp);
6962 afp->qvp = NULL;
6963
6964 if (bbp->itemtype == 0 && ajp->ajp.bsp != NULL) {
6965 itemtype = ajp->ajp.bsp->idx.itemtype;
6966 itemID = ajp->ajp.bsp->idx.itemID;
6967 } else {
6968 itemtype = bbp->itemtype;
6969 itemID = bbp->itemID;
6970 }
6971
6972 SeqEntrySetScope (oldscope);
6973 BioseqUnlock (bsp);
6974
6975 if (blocktype == FEATURE_BLOCK && afp->ffwrite != NULL) {
6976 ifp = (IntFeatBlockPtr) bbp;
6977 left = ifp->left + 1;
6978 right = ifp->right + 1;
6979 }
6980
6981 if (str != NULL) {
6982 if (afp->fp != NULL) {
6983 fprintf (afp->fp, "%s", str);
6984 }
6985 if (afp->ffwrite != NULL) {
6986 afp->ffwrite (str, afp->userdata, blocktype, bbp->entityID, itemtype, itemID, left, right);
6987 }
6988 } else {
6989 if (afp->fp != NULL) {
6990 fprintf (afp->fp, "?\n");
6991 }
6992 if (afp->ffwrite != NULL) {
6993 afp->ffwrite ("?\n", afp->userdata, blocktype, bbp->entityID, itemtype, itemID, left, right);
6994 }
6995 }
6996
6997 MemFree (str);
6998 MemFree (qv);
6999 }
7000
DoQuickLinkFormat(Asn2gbFormatPtr afp,CharPtr str)7001 NLM_EXTERN void DoQuickLinkFormat (
7002 Asn2gbFormatPtr afp,
7003 CharPtr str
7004 )
7005
7006 {
7007 Uint2 entityID = 0, item_type = 0;
7008 Uint4 itemID = 0;
7009
7010 if (afp == NULL || StringHasNoText (str)) return;
7011
7012 if (afp->ajp != NULL) {
7013 if (afp->ajp->ajp.bsp != NULL) {
7014 entityID = afp->ajp->ajp.bsp->idx.entityID;
7015 item_type = OBJ_BIOSEQ;
7016 itemID = afp->ajp->ajp.bsp->idx.itemID;
7017 } else if (afp->ajp->ajp.bssp != NULL) {
7018 entityID = afp->ajp->ajp.bssp->idx.entityID;
7019 item_type = OBJ_BIOSEQSET;
7020 itemID = afp->ajp->ajp.bssp->idx.itemID;
7021 }
7022 }
7023
7024 if (str != NULL) {
7025 if (afp->fp != NULL) {
7026 fprintf (afp->fp, "%s", str);
7027 }
7028 if (afp->ffwrite != NULL) {
7029 afp->ffwrite (str, afp->userdata, (BlockType) 0, entityID, item_type, itemID, 0, 0);
7030 }
7031 }
7032 }
7033
asn2gnbk_format(Asn2gbJobPtr ajp,Int4 paragraph)7034 NLM_EXTERN CharPtr asn2gnbk_format (
7035 Asn2gbJobPtr ajp,
7036 Int4 paragraph
7037 )
7038
7039 {
7040 Asn2gbFormat af;
7041 Asn2gbSectPtr asp;
7042 BaseBlockPtr bbp;
7043 BlockType blocktype;
7044 BioseqPtr bsp;
7045 FormatProc fmt;
7046 IntAsn2gbJobPtr iajp;
7047 size_t max;
7048 SeqEntryPtr oldscope;
7049 QualValPtr qv;
7050 Int4 section;
7051 SeqEntryPtr sep;
7052 CharPtr str = NULL;
7053
7054 /* qv must hold MAX (ASN2GNBK_TOTAL_SOURCE, ASN2GNBK_TOTAL_FEATUR) */
7055
7056 iajp = (IntAsn2gbJobPtr) ajp;
7057 if (iajp == NULL || ajp->sectionArray == NULL || ajp->paragraphArray == NULL) return NULL;
7058 if (paragraph < 0 || paragraph >= ajp->numParagraphs) return NULL;
7059
7060 bbp = ajp->paragraphArray [paragraph];
7061 if (bbp == NULL) return NULL;
7062
7063 section = bbp->section;
7064 if (section < 0 || section >= ajp->numSections) return NULL;
7065
7066 asp = ajp->sectionArray [section];
7067 if (asp == NULL) return NULL;
7068
7069 blocktype = bbp->blocktype;
7070 if (blocktype < LOCUS_BLOCK || blocktype > SLASH_BLOCK) return NULL;
7071
7072 max = (size_t) (MAX (ASN2GNBK_TOTAL_SOURCE, ASN2GNBK_TOTAL_FEATUR));
7073 qv = MemNew (sizeof (QualVal) * (max + 5));
7074 if (qv == NULL) return NULL;
7075
7076 MemSet ((Pointer) &af, 0, sizeof (Asn2gbFormat));
7077 af.ajp = (IntAsn2gbJobPtr) ajp;
7078 af.asp = asp;
7079 af.qvp = qv;
7080 af.format = iajp->format;
7081 af.aip = iajp->aip;
7082 af.atp = iajp->atp;
7083
7084 sep = GetTopSeqEntryForEntityID (bbp->entityID);
7085
7086 fmt = asn2gnbk_fmt_functions [(int) blocktype];
7087 if (fmt == NULL) return NULL;
7088
7089 bsp = BioseqLockAndIndexByEntity (bbp->entityID);
7090 oldscope = SeqEntrySetScope (sep);
7091
7092 str = fmt (&af, bbp);
7093
7094 SeqEntrySetScope (oldscope);
7095 BioseqUnlock (bsp);
7096
7097 if (str == NULL) {
7098 str = StringSave ("???\n");
7099 }
7100
7101 MemFree (qv);
7102
7103 return str;
7104 }
7105
asn2gnbk_cleanup(Asn2gbJobPtr ajp)7106 NLM_EXTERN Asn2gbJobPtr asn2gnbk_cleanup (
7107 Asn2gbJobPtr ajp
7108 )
7109
7110 {
7111 Asn2gbSectPtr asp;
7112 BaseBlockPtr bbp;
7113 BaseBlockPtr PNTR blockArray;
7114 BioseqPtr bsp;
7115 ValNodePtr gapvnp;
7116 Int4 i;
7117 IntAsn2gbJobPtr iajp;
7118 IntAsn2gbSectPtr iasp;
7119 IntCdsBlockPtr icp;
7120 IntFeatBlockPtr ifp;
7121 IntRefBlockPtr irp;
7122 IntSrcBlockPtr isp;
7123 Int4 j;
7124 Int4 numBlocks;
7125 Int4 numSections;
7126 RefBlockPtr rbp;
7127 Asn2gbFreeFunc remotefree;
7128 ValNodePtr remotevnp;
7129 SeqAnnotPtr sap;
7130 SeqAnnotPtr sapnext;
7131 Asn2gbSectPtr PNTR sectionArray;
7132 StringItemPtr sip, nxt;
7133 SeqBlockPtr sbp;
7134 ValNodePtr vnp;
7135
7136 iajp = (IntAsn2gbJobPtr) ajp;
7137 if (iajp == NULL) return NULL;
7138
7139 SeqLocFree (iajp->ajp.slp);
7140
7141 numSections = ajp->numSections;
7142 sectionArray = ajp->sectionArray;
7143
7144 if (sectionArray != NULL) {
7145
7146 for (i = 0; i < numSections; i++) {
7147 asp = sectionArray [i];
7148 if (asp != NULL) {
7149 iasp = (IntAsn2gbSectPtr) asp;
7150
7151 numBlocks = asp->numBlocks;
7152 blockArray = asp->blockArray;
7153 if (blockArray != NULL) {
7154
7155 for (j = 0; j < numBlocks; j++) {
7156 bbp = blockArray [j];
7157 if (bbp != NULL) {
7158
7159 MemFree (bbp->string);
7160
7161 if (bbp->blocktype == REFERENCE_BLOCK) {
7162 rbp = (RefBlockPtr) bbp;
7163 MemFree (rbp->uniquestr);
7164 irp = (IntRefBlockPtr) rbp;
7165 DateFree (irp->date);
7166 SeqLocFree (irp->loc);
7167 MemFree (irp->authstr);
7168 MemFree (irp->fig);
7169 MemFree (irp->maploc);
7170
7171 } else if (bbp->blocktype == SOURCEFEAT_BLOCK) {
7172
7173 isp = (IntSrcBlockPtr) bbp;
7174 SeqLocFree (isp->loc);
7175
7176 } else if (bbp->blocktype == FEATURE_BLOCK) {
7177
7178 ifp = (IntFeatBlockPtr) bbp;
7179 if (ifp->isCDS) {
7180 icp = (IntCdsBlockPtr) ifp;
7181 MemFree (icp->fig);
7182 MemFree (icp->maploc);
7183 }
7184
7185 } else if (bbp->blocktype == SEQUENCE_BLOCK) {
7186
7187 sbp = (SeqBlockPtr) bbp;
7188 MemFree (sbp->bases);
7189 }
7190
7191 MemFree (bbp);
7192 }
7193 }
7194 }
7195 MemFree (asp->blockArray);
7196 MemFree (asp->referenceArray);
7197 MemFree (asp);
7198 }
7199 }
7200 }
7201
7202 MemFree (ajp->sectionArray);
7203 MemFree (ajp->paragraphArray);
7204 MemFree (ajp->paragraphByIDs);
7205
7206 sip = iajp->pool;
7207 while (sip != NULL) {
7208 nxt = sip->next;
7209 MemFree (sip);
7210 sip = nxt;
7211 }
7212
7213 if (iajp->lockedBspList != NULL) {
7214 UnlockFarComponents (iajp->lockedBspList);
7215 }
7216
7217 if (iajp->manygaps != NULL) {
7218 ValNodeFreeData (iajp->manygaps);
7219 }
7220
7221 if (iajp->gapvnp != NULL || iajp->remotevnp != NULL) {
7222 SeqMgrClearFeatureIndexes (ajp->entityID, NULL);
7223 if (iajp->reindex) {
7224 SeqMgrIndexFeaturesExEx (ajp->entityID, NULL, FALSE, FALSE, NULL);
7225 }
7226 }
7227
7228 if (iajp->gapvnp != NULL) {
7229 gapvnp = iajp->gapvnp;
7230 gapvnp->next = NULL; /* unlink in case remotevnp still linked after gapvnp */
7231 bsp = (BioseqPtr) gapvnp->data.ptrvalue;
7232 if (bsp != NULL) {
7233 sap = bsp->annot;
7234 while (sap != NULL) {
7235 sapnext = sap->next;
7236 SeqAnnotFree (sap);
7237 sap = sapnext;
7238 }
7239 }
7240 /* frees fake Bioseq that was created by MemNew, not BioseqNew */
7241 ValNodeFreeData (gapvnp);
7242 }
7243
7244 if (iajp->remotevnp != NULL) {
7245 remotevnp = iajp->remotevnp;
7246 remotefree = iajp->remotefree;
7247 if (remotefree != NULL) {
7248 /* if remotefree exists, it is responsible for all freeing */
7249 remotefree (remotevnp, iajp->remotedata);
7250 } else {
7251 /* otherwise free Bioseqs and ValNode chain ourselves */
7252 for (vnp = remotevnp; vnp != NULL; vnp = vnp->next) {
7253 bsp = (BioseqPtr) vnp->data.ptrvalue;
7254 if (bsp != NULL) {
7255 BioseqFree (bsp);
7256 }
7257 }
7258 ValNodeFree (remotevnp);
7259 }
7260 }
7261
7262 TextFsaFree (iajp->bad_html_fsa);
7263
7264 FreeUrlAnchorFSA ();
7265
7266 ValNodeFree (iajp->gihead);
7267
7268 free_buff ();
7269 FiniWWW (iajp);
7270
7271 MemFree (iajp);
7272
7273 return NULL;
7274 }
7275
SeqEntryToGnbk(SeqEntryPtr sep,SeqLocPtr slp,FmtType format,ModType mode,StlType style,FlgType flags,LckType locks,CstType custom,XtraPtr extra,FILE * fp)7276 NLM_EXTERN Boolean SeqEntryToGnbk (
7277 SeqEntryPtr sep,
7278 SeqLocPtr slp,
7279 FmtType format,
7280 ModType mode,
7281 StlType style,
7282 FlgType flags,
7283 LckType locks,
7284 CstType custom,
7285 XtraPtr extra,
7286 FILE *fp
7287 )
7288
7289 {
7290 AsnIoPtr aip = NULL;
7291 AsnIoPtr aipfree = NULL;
7292 Asn2gbJobPtr ajp;
7293 AsnTypePtr atp = NULL;
7294 BioseqPtr bsp = NULL;
7295 BioseqSetPtr bssp = NULL;
7296 Boolean do_gbseq_asn = FALSE;
7297 Boolean do_gbseq_xml = FALSE;
7298 Asn2gbWriteFunc ffwrite = NULL;
7299 GBSeqPtr gbseq = NULL;
7300 GBSeq gbsq;
7301 IntAsn2gbJobPtr iajp;
7302 Boolean rsult = FALSE;
7303 Int1 type = ASNIO_TEXT_OUT;
7304 Pointer userdata = NULL;
7305 XtraBlock xtra;
7306 /*
7307 BaseBlockPtr bbp;
7308 BlockType block;
7309 CharPtr ffhead = NULL;
7310 CharPtr fftail = NULL;
7311 Int4 i;
7312 Boolean is_html;
7313 Int4 numParagraphs;
7314 BaseBlockPtr PNTR paragraphArray;
7315 CharPtr str;
7316 */
7317 #ifdef WIN_MAC
7318 #if __profile__
7319 ValNodePtr bsplist = NULL;
7320 Uint2 entityID;
7321 Boolean lockFarComp;
7322 Boolean lockFarLocs;
7323 Boolean lockFarProd;
7324 Boolean lookupFarComp;
7325 Boolean lookupFarHist;
7326 Boolean lookupFarInf;
7327 Boolean lookupFarLocs;
7328 Boolean lookupFarOthers;
7329 Boolean lookupFarProd;
7330 #endif
7331 #endif
7332
7333 if (extra != NULL) {
7334 ffwrite = extra->ffwrite;
7335 /*
7336 ffhead = extra->ffhead;
7337 fftail = extra->fftail;
7338 */
7339 gbseq = extra->gbseq;
7340 aip = extra->aip;
7341 atp = extra->atp;
7342 userdata = extra->userdata;
7343 }
7344 if (fp == NULL && ffwrite == NULL && aip == NULL) return FALSE;
7345 if (sep == NULL && slp == NULL) return FALSE;
7346 if (sep != NULL) {
7347 if (IS_Bioseq (sep)) {
7348 bsp = (BioseqPtr) sep->data.ptrvalue;
7349 } else if (IS_Bioseq_set (sep)) {
7350 bssp = (BioseqSetPtr) sep->data.ptrvalue;
7351 }
7352 }
7353
7354 #ifdef WIN_MAC
7355 #if __profile__
7356 /* this allows profiling of just the formatter, without feature indexing, on the Mac */
7357
7358 if (sep != NULL) {
7359 entityID = ObjMgrGetEntityIDForPointer (sep->data.ptrvalue);
7360 if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
7361 SeqMgrIndexFeatures (entityID, NULL);
7362 }
7363 }
7364
7365 lockFarComp = (Boolean) ((locks & LOCK_FAR_COMPONENTS) != 0);
7366 lockFarLocs = (Boolean) ((locks & LOCK_FAR_LOCATIONS) != 0);
7367 lockFarProd = (Boolean) ((locks & LOCK_FAR_PRODUCTS) != 0);
7368
7369 if (lockFarComp || lockFarLocs || lockFarProd) {
7370 locks = locks ^ (LOCK_FAR_COMPONENTS | LOCK_FAR_LOCATIONS | LOCK_FAR_PRODUCTS);
7371 if (slp != NULL && lockFarComp) {
7372 bsplist = LockFarComponentsEx (sep, FALSE, lockFarLocs, lockFarProd, slp);
7373 } else {
7374 bsplist = LockFarComponentsEx (sep, lockFarComp, lockFarLocs, lockFarProd, NULL);
7375 }
7376 }
7377
7378 lookupFarComp = (Boolean) ((locks & LOOKUP_FAR_COMPONENTS) != 0);
7379 lookupFarLocs = (Boolean) ((locks & LOOKUP_FAR_LOCATIONS) != 0);
7380 lookupFarProd = (Boolean) ((locks & LOOKUP_FAR_PRODUCTS) != 0);
7381 lookupFarHist = (Boolean) ((locks & LOOKUP_FAR_HISTORY) != 0);
7382 lookupFarInf = (Boolean) ((locks & LOOKUP_FAR_INFERENCE) != 0);
7383 lookupFarOthers = (Boolean) ((locks & LOOKUP_FAR_OTHERS) != 0);
7384
7385 if (lookupFarComp || lookupFarLocs || lookupFarProd || lookupFarHist || lookupFarInf || lookupFarOthers) {
7386 locks = locks ^ (LOOKUP_FAR_COMPONENTS | LOOKUP_FAR_LOCATIONS | LOOKUP_FAR_PRODUCTS | LOOKUP_FAR_HISTORY | LOOKUP_FAR_INFERENCE | LOOKUP_FAR_OTHERS);
7387 LookupFarSeqIDs (sep, lookupFarComp, lookupFarLocs, lookupFarProd, FALSE, lookupFarHist, lookupFarInf, lookupFarOthers);
7388 }
7389
7390 ProfilerSetStatus (TRUE);
7391 #endif
7392 #endif
7393
7394 do_gbseq_xml = (Boolean) ((flags & HTML_XML_ASN_MASK) == CREATE_XML_GBSEQ_FILE);
7395 do_gbseq_asn = (Boolean) ((flags & HTML_XML_ASN_MASK) == CREATE_ASN_GBSEQ_FILE);
7396
7397 if (do_gbseq_xml || do_gbseq_asn) {
7398 if (fp != NULL && aip == NULL) {
7399 if (do_gbseq_xml) {
7400 type |= ASNIO_XML;
7401 }
7402 aip = AsnIoNew (type, fp, NULL, NULL, NULL);
7403 aipfree = aip;
7404 fp = NULL;
7405 }
7406 if (extra == NULL) {
7407 MemSet ((Pointer) &xtra, 0, sizeof (XtraBlock));
7408 extra = &xtra;
7409 }
7410 if (extra->gbseq == NULL) {
7411 MemSet ((Pointer) &gbsq, 0, sizeof (GBSeq));
7412 extra->gbseq = &gbsq;
7413 gbseq = extra->gbseq;
7414 }
7415 }
7416
7417 /* pass TRUE for stream to do immediate write at time of creation for speed */
7418
7419 ajp = asn2gnbk_setup_ex (bsp, bssp, slp, format, mode, style,
7420 flags, locks, custom, extra,
7421 TRUE, fp, aip, atp);
7422
7423 if (ajp != NULL) {
7424 rsult = TRUE;
7425 iajp = (IntAsn2gbJobPtr) ajp;
7426
7427 #if 0
7428 /* if streaming, all output was written in setup function, otherwise output here */
7429
7430 if (! stream) {
7431
7432 /* send optional head string */
7433
7434 is_html = (Boolean) ((flags & HTML_XML_ASN_MASK) == CREATE_HTML_FLATFILE);
7435 if (ffhead == NULL && is_html) {
7436 ffhead = defHead;
7437 }
7438 if (ffhead != NULL) {
7439 if (fp != NULL) {
7440 fprintf (fp, ffhead);
7441 }
7442 }
7443 if (ffwrite != NULL) {
7444 ffwrite (ffhead, userdata, HEAD_BLOCK, 0, 0, 0, 0, 0, 0, 0);
7445 }
7446
7447 /* send each paragraph */
7448
7449 numParagraphs = ajp->numParagraphs;
7450 paragraphArray = ajp->paragraphArray;
7451
7452 for (i = 0; i < numParagraphs; i++) {
7453 str = asn2gnbk_format (ajp, i);
7454 block = (BlockType) 0;
7455 if (paragraphArray != NULL) {
7456 bbp = paragraphArray [i];
7457 if (bbp != NULL) {
7458 block = bbp->blocktype;
7459 }
7460 }
7461 if (str != NULL) {
7462 if (fp != NULL) {
7463 fprintf (fp, "%s", str);
7464 }
7465 if (ffwrite != NULL) {
7466 ffwrite (str, userdata, block, 0, 0, 0, 0, 0);
7467 }
7468 } else {
7469 if (fp != NULL) {
7470 fprintf (fp, "?\n");
7471 }
7472 if (ffwrite != NULL) {
7473 ffwrite ("?\n", userdata, block, 0, 0, 0, 0, 0);
7474 }
7475 }
7476
7477 MemFree (str);
7478 }
7479
7480 /* send optional tail string */
7481
7482 if (fftail == NULL && is_html) {
7483 fftail = defTail;
7484 }
7485 if (fftail != NULL) {
7486 if (fp != NULL) {
7487 fprintf (fp, fftail);
7488 }
7489 }
7490 if (ffwrite != NULL) {
7491 ffwrite (fftail, userdata, TAIL_BLOCK, 0, 0, 0, 0, 0);
7492 }
7493 }
7494 #endif
7495
7496 /* if RELEASE_MODE, warn if unresolved gi numbers, missing translation, etc. */
7497
7498 if (iajp->relModeError && mode == RELEASE_MODE) {
7499 rsult = FALSE;
7500 }
7501
7502 asn2gnbk_cleanup (ajp);
7503 }
7504
7505 if (aipfree != NULL) {
7506 AsnIoFree (aipfree, FALSE);
7507 }
7508
7509 #ifdef WIN_MAC
7510 #if __profile__
7511 ProfilerSetStatus (FALSE);
7512
7513 UnlockFarComponents (bsplist);
7514 #endif
7515 #endif
7516
7517 return rsult;
7518 }
7519
BioseqToGnbk(BioseqPtr bsp,SeqLocPtr slp,FmtType format,ModType mode,StlType style,FlgType flags,LckType locks,CstType custom,XtraPtr extra,FILE * fp)7520 NLM_EXTERN Boolean BioseqToGnbk (
7521 BioseqPtr bsp,
7522 SeqLocPtr slp,
7523 FmtType format,
7524 ModType mode,
7525 StlType style,
7526 FlgType flags,
7527 LckType locks,
7528 CstType custom,
7529 XtraPtr extra,
7530 FILE *fp
7531 )
7532
7533 {
7534 SeqEntryPtr sep = NULL;
7535
7536 if (bsp == NULL && slp == NULL) return FALSE;
7537 if (bsp != NULL) {
7538 sep = SeqMgrGetSeqEntryForData (bsp);
7539 }
7540 return SeqEntryToGnbk (sep, slp, format, mode, style, flags, locks, custom, extra, fp);
7541 }
7542
7543
7544