1 /* asn2gnb2.c
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information (NCBI)
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government do not place any restriction on its use or reproduction.
13 * We would, however, appreciate having the NCBI and the author cited in
14 * any work or product based on this material
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name: asn2gnb2.c
27 *
28 * Author: Karl Sirotkin, Tom Madden, Tatiana Tatusov, Jonathan Kans,
29 * Mati Shomrat
30 *
31 * Version Creation Date: 10/21/98
32 *
33 * $Revision: 1.238 $
34 *
35 * File Description: New GenBank flatfile generator - work in progress
36 *
37 * Modifications:
38 * --------------------------------------------------------------------------
39 * ==========================================================================
40 */
41
42 #include <ncbi.h>
43 #include <objall.h>
44 #include <objsset.h>
45 #include <objsub.h>
46 #include <objfdef.h>
47 #include <objpubme.h>
48 #include <seqport.h>
49 #include <sequtil.h>
50 #include <sqnutils.h>
51 #include <subutil.h>
52 #include <tofasta.h>
53 #include <explore.h>
54 #include <gather.h>
55 #include <gbfeat.h>
56 #include <gbftdef.h>
57 #include <edutil.h>
58 #include <validerr.h>
59 #include <objvalid.h>
60 #include <valapi.h>
61 #include <asn2gnbi.h>
62 #include <asn2gnbi.h>
63
64 #ifdef WIN_MAC
65 #if __profile__
66 #include <Profiler.h>
67 #endif
68 #endif
69
70 static CharPtr link_projid = "https://www.ncbi.nlm.nih.gov/bioproject/";
71
72 static CharPtr link_bioproj = "https://www.ncbi.nlm.nih.gov/bioproject/";
73
74 static CharPtr link_biosamp = "https://www.ncbi.nlm.nih.gov/biosample/";
75
76 static CharPtr link_assembl = "https://www.ncbi.nlm.nih.gov/assembly/";
77
78 static CharPtr link_srr = "https://www.ncbi.nlm.nih.gov/sra/";
79 static CharPtr link_srz = "https://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?analysis=";
80
81 static CharPtr link_accn = "https://www.ncbi.nlm.nih.gov/sites/entrez?";
82
83 static CharPtr link_wgs = "https://www.ncbi.nlm.nih.gov/Traces/wgs?";
84 static CharPtr link_wgsscaf = "https://www.ncbi.nlm.nih.gov/nuccore?";
85
86 static CharPtr link_tls = "https://www.ncbi.nlm.nih.gov/Traces/wgs?";
87
88 static CharPtr link_tsa = "https://www.ncbi.nlm.nih.gov/Traces/wgs?";
89
90 static CharPtr link_cage = "https://www.ncbi.nlm.nih.gov/sites/entrez?";
91
92 static CharPtr link_sp = "http://www.uniprot.org/uniprot/";
93
94 static CharPtr link_mmdb = "https://www.ncbi.nlm.nih.gov/Structure/mmdb/mmdbsrv.cgi?uid=";
95
96 /*
97 static CharPtr link_featn = "https://www.ncbi.nlm.nih.gov/nuccore/";
98 static CharPtr link_featp = "https://www.ncbi.nlm.nih.gov/protein/";
99 */
100
101 static CharPtr link_seqn = "https://www.ncbi.nlm.nih.gov/nuccore/";
102 static CharPtr link_seqp = "https://www.ncbi.nlm.nih.gov/protein/";
103
104 /*
105 static CharPtr link_omim = "https://www.ncbi.nlm.nih.gov/omim/";
106 */
107
108
109
110 /* ********************************************************************** */
111
112 /* add functions allocate specific blocks, populate with paragraph print info */
113
114 static CharPtr strd [4] = {
115 " ", "ss-", "ds-", "ms-"
116 };
117
118 static CharPtr gnbk_mol [16] = {
119 " ", "DNA ", "RNA ", "mRNA", "rRNA", "tRNA", /* "snRNA" */ "RNA", /* "scRNA" */ "RNA",
120 " AA ", "DNA ", "DNA ", "cRNA ", /* "snoRNA" */ "RNA", "RNA ", "RNA ", "tmRNA "
121 };
122
123 /* EMBL_FMT in RELEASE_MODE or ENTREZ_MODE, otherwise use gnbk_mol */
124
125 static CharPtr embl_mol [16] = {
126 "xxx", "DNA", "RNA", "RNA", "RNA", "RNA", "RNA", "RNA",
127 "AA ", "DNA", "DNA", "RNA", "RNA", "RNA", "RNA", "RNA"
128 };
129
130 static CharPtr embl_divs [18] = {
131 "FUN", "INV", "MAM", "ORG", "PHG", "PLN", "PRI", "PRO", "ROD"
132 "SYN", "UNA", "VRL", "VRT", "PAT", "EST", "STS", "HUM", "HTC"
133 };
134
135 static Uint1 imolToMoltype [16] = {
136 0, 1, 2, 5, 4, 3, 6, 7, 9, 1, 1, 2, 8, 2, 10, 11
137 };
138
139 static CharPtr gbseq_strd [4] = {
140 NULL, "single", "double", "mixed"
141 };
142
143 static CharPtr gbseq_mol [16] = {
144 "?", "DNA", "RNA", "mRNA", "rRNA", "tRNA", /* "snRNA" */ "RNA", /* "scRNA" */ "RNA",
145 "AA", "DNA", "DNA", "cRNA", /* "snoRNA" */ "RNA", "RNA", /* "ncRNA" */ "RNA", "tmRNA "
146 };
147
148 static CharPtr gbseq_top [3] = {
149 NULL, "linear", "circular"
150 };
151
GetBestDate(DatePtr a,DatePtr b)152 static DatePtr GetBestDate (
153 DatePtr a,
154 DatePtr b
155 )
156
157 {
158 Int2 status;
159
160 if (a == NULL) return b;
161 if (b == NULL) return a;
162
163 status = DateMatch (a, b, FALSE);
164 if (status == 1) return a;
165
166 return b;
167 }
168
169 /*--------------------------------------------------------*/
170 /* */
171 /* s_IsSeperatorNeeded() */
172 /* */
173 /*--------------------------------------------------------*/
174
s_IsSeperatorNeeded(CharPtr baseString,Int4 baseLength,Int2 suffixLength)175 static Boolean s_IsSeperatorNeeded(CharPtr baseString, Int4 baseLength, Int2 suffixLength)
176 {
177 Char lastChar;
178 Char nextToLastChar;
179
180 lastChar = baseString[baseLength - 1];
181 nextToLastChar = baseString[baseLength - 2];
182
183 /* This first check put here to emulate what may be a */
184 /* bug in the original code (in CheckLocusLength() ) */
185 /* which adds an 'S' segment seperator only if it */
186 /* DOES make the string longer than the max. */
187
188 if (baseLength + suffixLength < 16)
189 return FALSE;
190
191 /* If the last character is not a digit */
192 /* then don't use a seperator. */
193
194 if (!IS_DIGIT(lastChar))
195 return FALSE;
196
197 /* If the last two characters are a non-digit */
198 /* followed by a '0', then don't use seperator. */
199
200 if ((lastChar == '0') && (!IS_DIGIT(nextToLastChar)))
201 return FALSE;
202
203 /* If we made it to here, use a seperator */
204
205 return TRUE;
206 }
207
208 /*--------------------------------------------------------*/
209 /* */
210 /* s_LocusAddSuffix() - */
211 /* */
212 /*--------------------------------------------------------*/
213
s_LocusAddSuffix(CharPtr locus,Asn2gbWorkPtr awp)214 static Boolean s_LocusAddSuffix (CharPtr locus, Asn2gbWorkPtr awp)
215 {
216 size_t buflen;
217 Char ch;
218 Char segCountStr[6];
219 Int2 segCountStrLen;
220 Char segSuffix[5];
221
222 buflen = StringLen (locus);
223
224 /* If there's one or less segments, */
225 /* no suffix is needed. */
226
227 if (awp->numsegs <= 1)
228 return FALSE;
229
230 /* If the basestring has one or less */
231 /* characters, no suffix is needed. */
232
233 if (buflen <=1)
234 return FALSE;
235
236 /* Add the suffix */
237
238 ch = locus[buflen-1];
239 sprintf(segCountStr,"%d",awp->numsegs);
240 segCountStrLen = StringLen(segCountStr);
241 segSuffix[0] = '\0';
242
243 if (s_IsSeperatorNeeded(locus,buflen,segCountStrLen) == TRUE)
244 sprintf(segSuffix,"S%0*d",segCountStrLen,awp->seg);
245 else
246 sprintf(segSuffix,"%0*d",segCountStrLen,awp->seg);
247 StringCat(locus,segSuffix);
248
249 /* Return successfully */
250
251 return TRUE;
252 }
253
254 /*--------------------------------------------------------*/
255 /* */
256 /* s_LocusAdjustLength() - */
257 /* */
258 /*--------------------------------------------------------*/
259
s_LocusAdjustLength(CharPtr locus,Int2 maxLength)260 static Boolean s_LocusAdjustLength(CharPtr locus, Int2 maxLength)
261 {
262 Int2 trimCount;
263 Int2 buflen;
264 CharPtr buftmp;
265
266 buflen = StringLen (locus);
267 if (buflen <= maxLength) return FALSE;
268
269 buftmp = (CharPtr) MemNew(maxLength + 1);
270
271 /* If the sequence id is an NCBI locus of the */
272 /* form HSU00001, then make sure that if */
273 /* there is trimming the HS gets trimmed off */
274 /* as a unit, never just the 'H'. */
275
276 trimCount = buflen - maxLength;
277 if (trimCount == 1)
278 if (IS_ALPHA(locus[0]) != 0 &&
279 IS_ALPHA(locus[1]) != 0 &&
280 IS_ALPHA(locus[2]) != 0 &&
281 IS_DIGIT(locus[3]) != 0 &&
282 IS_DIGIT(locus[4]) != 0 &&
283 IS_DIGIT(locus[5]) != 0 &&
284 IS_DIGIT(locus[6]) != 0 &&
285 IS_DIGIT(locus[7]) != 0 &&
286 locus[8] == 'S' &&
287 locus[9] == '\0')
288 trimCount++;
289
290 /* Left truncate the sequence id */
291
292 StringCpy(buftmp, &locus[trimCount]);
293 StringCpy(locus, buftmp);
294
295 MemFree(buftmp);
296 return TRUE;
297 }
298
299 /*--------------------------------------------------------*/
300 /* */
301 /* AddLocusBlock() - */
302 /* */
303 /*--------------------------------------------------------*/
304
GetBestDateForBsp(BioseqPtr bsp)305 static DatePtr GetBestDateForBsp (
306 BioseqPtr bsp
307 )
308
309 {
310 DatePtr best_date = NULL;
311 SeqMgrDescContext dcontext;
312 DatePtr dp;
313 EMBLBlockPtr ebp;
314 GBBlockPtr gbp;
315 PdbBlockPtr pdp;
316 PdbRepPtr prp;
317 SeqDescrPtr sdp;
318 SPBlockPtr spp;
319
320 if (bsp == NULL) return NULL;
321
322 dp = NULL;
323 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_update_date, &dcontext);
324 if (sdp != NULL) {
325 dp = (DatePtr) sdp->data.ptrvalue;
326 best_date = GetBestDate (dp, best_date);
327 }
328
329 /* !!! temporarily also look at genbank block entry date !!! */
330
331 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &dcontext);
332 if (sdp != NULL) {
333 gbp = (GBBlockPtr) sdp->data.ptrvalue;
334 if (gbp != NULL) {
335 dp = gbp->entry_date;
336 best_date = GetBestDate (dp, best_date);
337 }
338 }
339
340 /* more complicated code for dates from various objects goes here */
341
342 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_embl, &dcontext);
343 if (sdp != NULL) {
344 ebp = (EMBLBlockPtr) sdp->data.ptrvalue;
345 if (ebp != NULL) {
346 dp = ebp->creation_date;
347 best_date = GetBestDate (dp, best_date);
348 dp = ebp->update_date;
349 best_date = GetBestDate (dp, best_date);
350 }
351 }
352
353 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_sp, &dcontext);
354 if (sdp != NULL) {
355 spp = (SPBlockPtr) sdp->data.ptrvalue;
356 if (spp != NULL) {
357 dp = spp->created;
358 if (dp != NULL && dp->data [0] == 1) {
359 best_date = GetBestDate (dp, best_date);
360 }
361 dp = spp->sequpd;
362 if (dp != NULL && dp->data [0] == 1) {
363 best_date = GetBestDate (dp, best_date);
364 }
365 dp = spp->annotupd;
366 if (dp != NULL && dp->data [0] == 1) {
367 best_date = GetBestDate (dp, best_date);
368 }
369 }
370 }
371
372 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pdb, &dcontext);
373 if (sdp != NULL) {
374 pdp = (PdbBlockPtr) sdp->data.ptrvalue;
375 if (pdp != NULL) {
376 dp = pdp->deposition;
377 if (dp != NULL && dp->data [0] == 1) {
378 best_date = GetBestDate (dp, best_date);
379 }
380 prp = pdp->replace;
381 if (prp != NULL) {
382 dp = prp->date;
383 if (dp != NULL && dp->data[0] == 1) {
384 best_date = GetBestDate (dp, best_date);
385 }
386 }
387 }
388 }
389
390 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_create_date, &dcontext);
391 if (sdp != NULL) {
392 dp = (DatePtr) sdp->data.ptrvalue;
393 if (dp != NULL) {
394 best_date = GetBestDate (dp, best_date);
395 }
396 }
397
398 return best_date;
399 }
400
LocusHasBadChars(CharPtr locus)401 static Boolean LocusHasBadChars (
402 CharPtr locus
403 )
404
405 {
406 Char ch;
407 CharPtr ptr;
408
409 ptr = locus;
410 ch = *ptr;
411 while (ch != '\0') {
412 if (! (IS_ALPHA(ch) || IS_DIGIT(ch) || ch == '_')) {
413 return TRUE;
414 }
415 ptr++;
416 ch = *ptr;
417 }
418 return FALSE;
419 }
420
LookupAccnForNavLink(BIG_ID gi,CharPtr seqid,size_t len,CharPtr dfault)421 static void LookupAccnForNavLink (
422 BIG_ID gi,
423 CharPtr seqid,
424 size_t len,
425 CharPtr dfault
426 )
427
428 {
429 SeqIdPtr sip;
430
431 if (seqid == NULL) return;
432 *seqid = '\0';
433 if (gi > 0) {
434 if (GetAccnVerFromServer (gi, seqid)) return;
435 sip = GetSeqIdForGI (gi);
436 if (sip != NULL) {
437 if (SeqIdWrite (sip, seqid, PRINTID_TEXTID_ACC_VER, len) != NULL) {
438 SeqIdFree (sip);
439 return;
440 }
441 SeqIdFree (sip);
442 }
443 }
444 if (dfault == NULL) return;
445 StringCpy (seqid, dfault);
446 }
447
AddLocusBlock(Asn2gbWorkPtr awp,Boolean willshowwgs,Boolean willshowtsa,Boolean willshowtls,Boolean willshowcage,Boolean willshowgenome,Boolean willshowcontig,Boolean willshowsequence)448 NLM_EXTERN void AddLocusBlock (
449 Asn2gbWorkPtr awp,
450 Boolean willshowwgs,
451 Boolean willshowtsa,
452 Boolean willshowtls,
453 Boolean willshowcage,
454 Boolean willshowgenome,
455 Boolean willshowcontig,
456 Boolean willshowsequence
457 )
458
459 {
460 size_t acclen;
461 IntAsn2gbJobPtr ajp;
462 Asn2gbSectPtr asp;
463 BaseBlockPtr bbp;
464 DatePtr best_date = NULL;
465 BioSourcePtr biop;
466 Int2 bmol = 0;
467 BioseqPtr bsp;
468 Char buf [1024];
469 Boolean cagemaster = FALSE;
470 SeqFeatPtr cds;
471 Char ch1, ch2, ch3;
472 BIG_ID currGi;
473 Char dataclass [10];
474 Char date [40];
475 SeqMgrDescContext dcontext;
476 Char div [10];
477 BioseqPtr dna;
478 DatePtr dp;
479 CharPtr ebmol;
480 EMBLBlockPtr ebp;
481 Char embldiv [10];
482 SeqMgrFeatContext fcontext;
483 StringItemPtr ffstring;
484 GBBlockPtr gbp;
485 Char gene [32];
486 Boolean genome_view;
487 GBSeqPtr gbseq;
488 ValNodePtr gilistpos;
489 SeqIdPtr gpp = NULL;
490 Boolean has_next_pref_ul = FALSE;
491 Boolean hasComment;
492 Char id [41];
493 Int2 imol = 0;
494 IndxPtr index;
495 Int2 istrand;
496 Boolean is_nm = FALSE;
497 Boolean is_np = FALSE;
498 Boolean is_nz = FALSE;
499 Boolean is_env_sample = FALSE;
500 Boolean is_transgenic = FALSE;
501 Boolean is_tpa = FALSE;
502 Char len [32];
503 Int4 length;
504 size_t loclen;
505 Char locus [41];
506 MolInfoPtr mip;
507 Char mol [64];
508 BIG_ID nextGi;
509 BioseqPtr nm = NULL;
510 BioseqPtr nuc;
511 ObjectIdPtr oip;
512 OrgNamePtr onp;
513 Uint1 origin;
514 CharPtr original_id = NULL;
515 OrgRefPtr orp;
516 BioseqPtr parent;
517 BIG_ID prevGi;
518 CharPtr ptr;
519 SeqDescrPtr sdp;
520 Char sect [128];
521 Char seg [32];
522 Char seqid [128];
523 SeqFeatPtr sfp;
524 SeqHistPtr hist;
525 SeqIdPtr sip;
526 SubSourcePtr ssp;
527 CharPtr str;
528 CharPtr suffix = NULL;
529 Uint1 tech;
530 Boolean tlsmaster = FALSE;
531 Uint1 topology;
532 Boolean tsamaster = FALSE;
533 TextSeqIdPtr tsip;
534 UserObjectPtr uop;
535 Char ver [16];
536 Int2 version;
537 ValNodePtr vnp;
538 Boolean wgsmaster = FALSE;
539 Int2 moltype, strandedness, topol;
540 /*
541 BIG_ID gi = 0;
542 Char gi_buf [32];
543 Boolean is_aa;
544 CharPtr prefix = NULL;
545 */
546
547 if (awp == NULL) return;
548 ajp = awp->ajp;
549 if (ajp == NULL) return;
550 bsp = awp->bsp;
551 if (bsp == NULL) return;
552 asp = awp->asp;
553 if (asp == NULL) return;
554
555 bbp = Asn2gbAddBlock (awp, LOCUS_BLOCK, sizeof (BaseBlock));
556 if (bbp == NULL) return;
557
558 ffstring = FFGetString(ajp);
559 if ( ffstring == NULL ) return;
560
561 mol [0] = '\0';
562 len [0] = '\0';
563 div [0] = '\0';
564 embldiv [0] = '\0';
565 dataclass [0] = '\0';
566 date [0] = '\0';
567 gene [0] = '\0';
568 locus [0] = '\0';
569
570 genome_view = FALSE;
571 if (bsp->repr == Seq_repr_seg && (! SegHasParts (bsp))) {
572 genome_view = TRUE;
573
574 }
575 if (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp))) {
576 genome_view = TRUE;
577 }
578
579 /* locus id */
580
581 if (ShouldUseOriginalID (bsp)) {
582 original_id = FastaGetOriginalId (bsp);
583 }
584
585 sip = NULL;
586 version = 0;
587 for (sip = bsp->id; sip != NULL; sip = sip->next) {
588 if (sip->choice == SEQID_OTHER) {
589 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
590 if (tsip != NULL) {
591 version = tsip->version;
592 if (StringNCmp (tsip->accession, "NM_", 3) == 0 ||
593 StringNCmp (tsip->accession, "NR_", 3) == 0 ||
594 StringNCmp (tsip->accession, "XM_", 3) == 0 ||
595 StringNCmp (tsip->accession, "XR_", 3) == 0) {
596 is_nm = TRUE;
597 nm = bsp;
598 } else if (StringNCmp (tsip->accession, "NP_", 3) == 0 ||
599 StringNCmp (tsip->accession, "XP_", 3) == 0) {
600 is_np = TRUE;
601 } else if (StringNCmp (tsip->accession, "NZ_", 3) == 0) {
602 is_nz = TRUE;
603 }
604 }
605 break;
606 }
607 if (sip->choice == SEQID_GENBANK ||
608 sip->choice == SEQID_EMBL ||
609 sip->choice == SEQID_DDBJ) {
610 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
611 if (tsip != NULL) {
612 version = tsip->version;
613 }
614 break;
615 }
616 if (sip->choice == SEQID_TPG ||
617 sip->choice == SEQID_TPE ||
618 sip->choice == SEQID_TPD) {
619 is_tpa = TRUE;
620 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
621 if (tsip != NULL) {
622 version = tsip->version;
623 }
624 break;
625 }
626 if (sip->choice == SEQID_PIR ||
627 sip->choice == SEQID_SWISSPROT ||
628 sip->choice == SEQID_PRF ||
629 sip->choice == SEQID_PDB) break;
630 if (sip->choice == SEQID_GPIPE) {
631 gpp = sip;
632 }
633 }
634 if (sip == NULL) {
635 sip = gpp;
636 }
637 if (sip == NULL) {
638 sip = SeqIdFindBest (bsp->id, SEQID_GENBANK);
639 }
640 sprintf (ver, "%d", (int) version);
641
642 if (original_id != NULL) {
643 StringNCpy_0 (locus, original_id, sizeof (locus));
644 } else if (genome_view) {
645 SeqIdWrite (sip, locus, PRINTID_TEXTID_ACCESSION, sizeof (locus) - 1);
646 } else {
647 SeqIdWrite (sip, locus, PRINTID_TEXTID_LOCUS, sizeof (locus) - 1);
648 if (LocusHasBadChars (locus)) {
649 SeqIdWrite (sip, locus, PRINTID_TEXTID_ACCESSION, sizeof (locus) - 1);
650 }
651 }
652
653 if (sip != NULL && sip->choice == SEQID_PDB) {
654 ptr = StringChr (locus, '_');
655 if (ptr != NULL) {
656 ch1 = ptr [1];
657 if (ch1 != '\0') {
658 ch2 = ptr [2];
659 if (ch2 != '\0') {
660 ch3 = ptr [3];
661 if (ch3 == '\0') {
662 if (ch1 == ch2) {
663 if (IS_UPPER (ch1)) {
664 ptr [1] = TO_LOWER (ch1);
665 ptr [2] = '\0';
666 }
667 }
668 }
669 }
670 }
671 }
672 }
673
674 if (is_np) {
675 sfp = SeqMgrGetCDSgivenProduct (bsp, &fcontext);
676 if (sfp != NULL && fcontext.bsp != NULL) {
677 nm = fcontext.bsp;
678 for (sip = nm->id; sip != NULL; sip = sip->next) {
679 if (sip->choice == SEQID_OTHER) {
680 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
681 if (tsip != NULL) {
682 if (StringNCmp (tsip->accession, "NM_", 3) == 0 ||
683 StringNCmp (tsip->accession, "XM_", 3) == 0) {
684 is_nm = TRUE;
685 }
686 }
687 }
688 }
689 if (! is_nm) {
690 nm = NULL;
691 }
692 }
693 }
694 if (nm != NULL) {
695 /*
696 sfp = SeqMgrGetNextFeature (nm, NULL, SEQFEAT_GENE, 0, &fcontext);
697 if (sfp != NULL) {
698 StringNCpy_0 (gene, fcontext.label, sizeof (gene));
699 if (SeqMgrGetNextFeature (nm, sfp, SEQFEAT_GENE, 0, &fcontext) != NULL) {
700 gene [0] = '\0';
701 }
702 if (StringLen (gene) > 15) {
703 gene [0] = '\0';
704 }
705 }
706 */
707 }
708
709 /* more complicated code to get parent locus, if segmented, goes here */
710
711 if (awp->slp != NULL) {
712 length = SeqLocLen (awp->slp);
713 } else {
714 length = bsp->length;
715 }
716
717 mip = NULL;
718 tech = MI_TECH_standard;
719 origin = 0;
720 bmol = bsp->mol;
721 if (bmol > Seq_mol_aa) {
722 bmol = 0;
723 }
724 istrand = bsp->strand;
725 if (istrand > 3) {
726 istrand = 0;
727 }
728
729 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
730 if (sdp != NULL) {
731 bbp->entityID = dcontext.entityID;
732 bbp->itemID = dcontext.itemID;
733 bbp->itemtype = OBJ_SEQDESC;
734
735 mip = (MolInfoPtr) sdp->data.ptrvalue;
736 if (mip != NULL) {
737 if (mip->biomol <= MOLECULE_TYPE_TMRNA) {
738 imol = (Int2) mip->biomol;
739 }
740 tech = mip->tech;
741
742 if (tech == MI_TECH_wgs && bsp->repr == Seq_repr_virtual) {
743
744 /* check for WGS master record */
745
746 for (sip = bsp->id; sip != NULL; sip = sip->next) {
747 switch (sip->choice) {
748 case SEQID_GENBANK :
749 case SEQID_EMBL :
750 case SEQID_DDBJ :
751 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
752 if (tsip != NULL && tsip->accession != NULL) {
753 acclen = StringLen (tsip->accession);
754 if (acclen == 12) {
755 if (StringCmp (tsip->accession + 6, "000000") == 0) {
756 wgsmaster = TRUE;
757 }
758 } else if (acclen == 13) {
759 if (StringCmp (tsip->accession + 6, "0000000") == 0) {
760 wgsmaster = TRUE;
761 }
762 } else if (acclen == 14) {
763 if (StringCmp (tsip->accession + 6, "00000000") == 0) {
764 wgsmaster = TRUE;
765 }
766 }
767 }
768 break;
769 case SEQID_OTHER :
770 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
771 if (tsip != NULL && tsip->accession != NULL) {
772 if (StringLen (tsip->accession) == 15) {
773 if (StringCmp (tsip->accession + 9, "000000") == 0) {
774 wgsmaster = TRUE;
775 }
776 }
777 }
778 break;
779 default :
780 break;
781 }
782 }
783 }
784
785 if (tech == MI_TECH_tsa && bsp->repr == Seq_repr_virtual) {
786
787 /* check for TSA master record */
788
789 for (sip = bsp->id; sip != NULL; sip = sip->next) {
790 switch (sip->choice) {
791 case SEQID_GENBANK :
792 case SEQID_EMBL :
793 case SEQID_DDBJ :
794 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
795 if (tsip != NULL && tsip->accession != NULL) {
796 acclen = StringLen (tsip->accession);
797 if (acclen == 12) {
798 if (StringCmp (tsip->accession + 6, "000000") == 0) {
799 tsamaster = TRUE;
800 }
801 } else if (acclen == 13) {
802 if (StringCmp (tsip->accession + 6, "0000000") == 0) {
803 tsamaster = TRUE;
804 }
805 } else if (acclen == 14) {
806 if (StringCmp (tsip->accession + 6, "00000000") == 0) {
807 tsamaster = TRUE;
808 }
809 }
810 }
811 break;
812 default :
813 break;
814 }
815 }
816 }
817
818 if (tech == MI_TECH_other && willshowcage && bsp->repr == Seq_repr_virtual) {
819
820 /* check for TAG master record */
821
822 for (sip = bsp->id; sip != NULL; sip = sip->next) {
823 switch (sip->choice) {
824 case SEQID_GENBANK :
825 case SEQID_EMBL :
826 case SEQID_DDBJ :
827 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
828 if (tsip != NULL && tsip->accession != NULL) {
829 acclen = StringLen (tsip->accession);
830 if (acclen == 12) {
831 if (StringCmp (tsip->accession + 5, "0000000") == 0) {
832 cagemaster = TRUE;
833 }
834 }
835 }
836 break;
837 default :
838 break;
839 }
840 }
841 }
842
843 if (tech == MI_TECH_targeted && bsp->repr == Seq_repr_virtual) {
844 tlsmaster = TRUE;
845 }
846 }
847 }
848
849 /* check inst.mol if mol-type is not-set or genomic */
850
851 if (imol <= MOLECULE_TYPE_GENOMIC) {
852 if (bmol == Seq_mol_aa) {
853 imol = MOLECULE_TYPE_PEPTIDE;
854 } else if (bmol == Seq_mol_na) {
855 imol = 0;
856 } else if (bmol == Seq_mol_rna) {
857 imol = 2;
858 } else {
859 imol = 1;
860 }
861 } else if (imol == MOLECULE_TYPE_OTHER_GENETIC_MATERIAL) {
862 if (bmol == Seq_mol_aa) {
863 imol = MOLECULE_TYPE_PEPTIDE;
864 } else if (bmol == Seq_mol_rna) {
865 imol = 2;
866 }
867 }
868
869 /* if ds-DNA don't show ds */
870
871 if (bmol == Seq_mol_dna && istrand == 2) {
872 istrand = 0;
873 }
874
875 /* ss=any RNA don't show ss */
876
877 if ((bmol > Seq_mol_rna ||
878 (imol >= MOLECULE_TYPE_MRNA && imol <= MOLECULE_TYPE_PEPTIDE) ||
879 (imol >= MOLECULE_TYPE_CRNA && imol <= MOLECULE_TYPE_TMRNA)) &&
880 istrand == 1) {
881 istrand = 0;
882 }
883
884 topology = bsp->topology;
885 if (awp->slp != NULL) {
886 topology = TOPOLOGY_LINEAR;
887 }
888
889 /* length, topology, and molecule type */
890
891 if (awp->format == GENBANK_FMT) {
892
893 if (awp->newLocusLine) {
894
895 if (wgsmaster && (! is_nz)) {
896 sprintf (len, "%ld rc", (long) length);
897 } else if (tsamaster) {
898 sprintf (len, "%ld rc", (long) length);
899 } else if (cagemaster) {
900 sprintf (len, "%ld rc", (long) length);
901 } else if (tlsmaster) {
902 sprintf (len, "%ld rc", (long) length);
903 } else {
904 sprintf (len, "%ld bp", (long) length);
905 }
906 sprintf (mol, "%s%-4s", strd [istrand], gnbk_mol [imol]);
907
908 } else {
909
910 if (topology == TOPOLOGY_CIRCULAR) {
911 sprintf (len, "%7ld bp", (long) length);
912 sprintf (mol, "%s%-4s circular", strd [istrand], gnbk_mol [imol]);
913 } else {
914 sprintf (len, "%7ld bp", (long) length);
915 sprintf (mol, "%s%-4s ", strd [istrand], gnbk_mol [imol]);
916 }
917 }
918
919 } else if (awp->format == GENPEPT_FMT) {
920
921 if (awp->newLocusLine) {
922 sprintf (len, "%ld aa", (long) length);
923 } else {
924 sprintf (len, "%7ld aa", (long) length);
925 }
926
927 } else if (awp->format == EMBL_FMT) {
928
929 if (imol < MOLECULE_TYPE_PEPTIDE) {
930 if (ajp->flags.useEmblMolType) {
931 ebmol = embl_mol [imol];
932 } else {
933 ebmol = gnbk_mol [imol];
934 }
935
936 if (topology == TOPOLOGY_CIRCULAR) {
937 sprintf (mol, "circular %s", ebmol);
938 sprintf (len, "%ld BP.", (long) length);
939 } else {
940 sprintf (mol, "%s", ebmol);
941 sprintf (len, "%ld BP.", (long) length);
942 }
943 }
944 }
945
946 /* division */
947
948 biop = NULL;
949 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
950 if (sdp != NULL) {
951 biop = (BioSourcePtr) sdp->data.ptrvalue;
952 } else {
953 sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext);
954 if (sfp != NULL) {
955 biop = (BioSourcePtr) sfp->data.value.ptrvalue;
956 } else if (ISA_aa (bsp->mol)) {
957
958 /* if protein with no sources, get sources applicable to DNA location of CDS */
959
960 cds = SeqMgrGetCDSgivenProduct (bsp, &fcontext);
961 if (cds != NULL) {
962 sfp = SeqMgrGetOverlappingSource (cds->location, &fcontext);
963 if (sfp != NULL) {
964 biop = (BioSourcePtr) sfp->data.value.ptrvalue;
965 } else {
966 dna = BioseqFindFromSeqLoc (cds->location);
967 if (dna != NULL) {
968 sdp = SeqMgrGetNextDescriptor (dna, NULL, Seq_descr_source, &dcontext);
969 if (sdp != NULL) {
970 biop = (BioSourcePtr) sdp->data.ptrvalue;
971 }
972 }
973 }
974 }
975 }
976 }
977 if (biop != NULL) {
978 origin = biop->origin;
979 orp = biop->org;
980 if (orp != NULL) {
981 onp = orp->orgname;
982 if (onp != NULL) {
983 StringNCpy_0 (div, onp->div, sizeof (div));
984 StringNCpy_0 (embldiv, onp->div, sizeof (embldiv));
985 }
986 }
987 for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
988 if (ssp->subtype == SUBSRC_transgenic) {
989 is_transgenic = TRUE;
990 } else if (ssp->subtype == SUBSRC_environmental_sample) {
991 is_env_sample = TRUE;
992 }
993 }
994 }
995
996 StringCpy (dataclass, "STD");
997 if (is_tpa) {
998 StringCpy (dataclass, "TPA");
999 }
1000
1001 switch (tech) {
1002 case MI_TECH_est :
1003 StringCpy (div, "EST");
1004 StringCpy (dataclass, "EST");
1005 break;
1006 case MI_TECH_sts :
1007 StringCpy (div, "STS");
1008 StringCpy (dataclass, "STS");
1009 break;
1010 case MI_TECH_survey :
1011 StringCpy (div, "GSS");
1012 StringCpy (dataclass, "GSS");
1013 break;
1014 case MI_TECH_htgs_0 :
1015 case MI_TECH_htgs_1 :
1016 case MI_TECH_htgs_2 :
1017 StringCpy (div, "HTG");
1018 StringCpy (dataclass, "HTG");
1019 break;
1020 case MI_TECH_htc :
1021 StringCpy (div, "HTC");
1022 StringCpy (dataclass, "HTC");
1023 break;
1024 case MI_TECH_tsa :
1025 StringCpy (div, "TSA");
1026 StringCpy (dataclass, "TSA");
1027 break;
1028 default :
1029 break;
1030 }
1031
1032 if (origin == ORG_MUT ||
1033 origin == ORG_ARTIFICIAL ||
1034 origin == ORG_SYNTHETIC ||
1035 is_transgenic) {
1036 StringCpy (div, "SYN");
1037 StringCpy (embldiv, "SYN");
1038 } else if (is_env_sample) {
1039 if (tech == MI_TECH_unknown ||
1040 tech == MI_TECH_standard ||
1041 tech == MI_TECH_other ||
1042 tech == MI_TECH_wgs ||
1043 tech == MI_TECH_htgs_3) {
1044 StringCpy (div, "ENV");
1045 StringCpy (embldiv, "ENV");
1046 }
1047 }
1048
1049 if (is_transgenic && tech == MI_TECH_survey) {
1050 StringCpy (div, "GSS");
1051 StringCpy (dataclass, "GSS");
1052 }
1053
1054 sip = SeqIdFindBest (bsp->id, SEQID_PATENT);
1055 if (sip != NULL && sip->choice == SEQID_PATENT) {
1056 StringCpy (div, "PAT");
1057 StringCpy (dataclass, "PAT");
1058 }
1059
1060 /* if protein is encoded by a patent nucleotide, use PAT division */
1061
1062 if (ISA_aa (bsp->mol)) {
1063 cds = SeqMgrGetCDSgivenProduct (bsp, &fcontext);
1064 if (cds != NULL) {
1065 nuc = BioseqFindFromSeqLoc (cds->location);
1066 if (nuc != NULL) {
1067 for (sip = nuc->id; sip != NULL; sip = sip->next) {
1068 if (sip->choice == SEQID_PATENT) {
1069 StringCpy (div, "PAT");
1070 StringCpy (dataclass, "PAT");
1071 }
1072 }
1073 }
1074 }
1075 }
1076
1077 /* more complicated code for division, if necessary, goes here */
1078
1079 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &dcontext);
1080 while (sdp != NULL) {
1081 gbp = (GBBlockPtr) sdp->data.ptrvalue;
1082 if (gbp != NULL) {
1083 if (StringHasNoText (div) && gbp->div != NULL) {
1084 StringCpy (div, gbp->div);
1085 StringCpy (embldiv, gbp->div);
1086 } else if (StringCmp(gbp->div, "PAT") == 0) {
1087 StringCpy (div, gbp->div);
1088 StringCpy (dataclass, gbp->div);
1089 } else if (StringCmp(gbp->div, "SYN") == 0 ) {
1090 StringCpy (div, gbp->div);
1091 StringCpy (embldiv, gbp->div);
1092 }
1093 }
1094 sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_genbank, &dcontext);
1095 }
1096
1097 if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
1098
1099 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_embl, &dcontext);
1100 if (sdp != NULL) {
1101 ebp = (EMBLBlockPtr) sdp->data.ptrvalue;
1102 if (ebp != NULL) {
1103 if (ebp->div == 255) {
1104 if (mip == NULL) {
1105 StringCpy (div, "HUM");
1106 StringCpy (embldiv, "HUM");
1107 }
1108 } else if (ebp->div < 18) {
1109 StringCpy (div, embl_divs [ebp->div]);
1110 StringCpy (embldiv, embl_divs [ebp->div]);
1111 }
1112 }
1113 }
1114
1115 if (StringHasNoText (div)) {
1116 StringCpy (div, "UNA");
1117 StringCpy (embldiv, "UNA");
1118 }
1119 }
1120
1121 /* empty division field if unable to find anything */
1122
1123 if (StringHasNoText (div)) {
1124 StringCpy (div, " ");
1125 }
1126 if (StringHasNoText (embldiv)) {
1127 StringCpy (embldiv, " ");
1128 }
1129
1130 /* contig style (old genome_view flag) forces CON division */
1131
1132 if (awp->contig) {
1133 StringCpy (div, "CON");
1134 StringCpy (dataclass, "CON");
1135 }
1136
1137 if (genome_view) {
1138 StringCpy (div, "CON");
1139 StringCpy (dataclass, "CON");
1140 }
1141
1142 if (StringCmp (dataclass, "CON") == 0) {
1143 if (DeltaLitOnly (bsp)) {
1144 if (SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext) != NULL) {
1145 StringCpy (dataclass, "ANN");
1146 }
1147 }
1148 }
1149
1150 /* date */
1151
1152 best_date = GetBestDateForBsp (bsp);
1153
1154 if (best_date == NULL) {
1155
1156 /* if bsp is product of CDS or mRNA feature, get date from sfp->location bsp */
1157
1158 sfp = NULL;
1159 if (ISA_na (bsp->mol)) {
1160 sfp = SeqMgrGetRNAgivenProduct (bsp, NULL);
1161 } else if (ISA_aa (bsp->mol)) {
1162 sfp = SeqMgrGetCDSgivenProduct (bsp, NULL);
1163 }
1164 if (sfp != NULL) {
1165 parent = BioseqFindFromSeqLoc (sfp->location);
1166 if (parent != NULL) {
1167 best_date = GetBestDateForBsp (parent);
1168 }
1169 }
1170 }
1171
1172 /* convert best date */
1173
1174 if (best_date != NULL) {
1175 DateToFF (date, best_date, FALSE);
1176 }
1177 if (StringHasNoText (date)) {
1178 StringCpy (date, "01-JAN-1900");
1179 }
1180
1181 if (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT) {
1182
1183 /* Create the proper locus name */
1184
1185 parent = awp->parent;
1186 if (parent->repr == Seq_repr_seg) {
1187
1188 if (! StringHasNoText (awp->basename)) {
1189 StringCpy (locus, awp->basename);
1190 s_LocusAddSuffix (locus, awp);
1191 }
1192 }
1193
1194 /* Print the "LOCUS_NEW" line, if requested */
1195
1196 if (awp->newLocusLine) {
1197
1198 FFStartPrint (ffstring, awp->format, 0, 0, "LOCUS", 12, 0, 0, NULL, FALSE);
1199 parent = awp->parent;
1200
1201 if (parent->repr == Seq_repr_seg)
1202 s_LocusAdjustLength (locus,16);
1203
1204 if (is_nm && (! StringHasNoText (gene))) {
1205 FFAddOneString (ffstring, gene, FALSE, FALSE, TILDE_IGNORE);
1206 } else {
1207 FFAddOneString (ffstring, locus, FALSE, FALSE, TILDE_IGNORE);
1208 }
1209 FFAddNChar(ffstring, ' ', 43 - StringLen(len)- ffstring->curr->pos, FALSE);
1210 FFAddOneString (ffstring, len, FALSE, FALSE, TILDE_IGNORE);
1211 FFAddNChar(ffstring, ' ', 44 - ffstring->curr->pos, FALSE);
1212 FFAddOneString (ffstring, mol, FALSE, FALSE, TILDE_IGNORE);
1213 FFAddNChar(ffstring, ' ', 55 - ffstring->curr->pos, FALSE);
1214 if (topology == TOPOLOGY_CIRCULAR) {
1215 FFAddOneString (ffstring, "circular", FALSE, FALSE, TILDE_IGNORE);
1216 } else {
1217 FFAddOneString (ffstring, "linear ", FALSE, FALSE, TILDE_IGNORE);
1218 }
1219 FFAddNChar(ffstring, ' ', 64 - ffstring->curr->pos, FALSE);
1220 FFAddOneString (ffstring, div, FALSE, FALSE, TILDE_IGNORE);
1221 FFAddNChar(ffstring, ' ', 68 - ffstring->curr->pos, FALSE);
1222 FFAddOneString (ffstring, date, FALSE, FALSE, TILDE_IGNORE);
1223 }
1224
1225 /* Else print the "LOCUS" line */
1226
1227 else {
1228
1229 FFStartPrint (ffstring, awp->format, 0, 0, "LOCUS", 12, 0, 0, NULL, FALSE);
1230
1231 if (parent->repr == Seq_repr_seg)
1232 s_LocusAdjustLength (locus,16);
1233
1234 FFAddOneString (ffstring, locus, FALSE, FALSE, TILDE_IGNORE);
1235 FFAddNChar(ffstring, ' ', 32 - StringLen(len) - ffstring->curr->pos, FALSE);
1236 FFAddOneString (ffstring, len, FALSE, FALSE, TILDE_IGNORE);
1237 FFAddNChar(ffstring, ' ', 33 - ffstring->curr->pos, FALSE);
1238 FFAddOneString (ffstring, mol, FALSE, FALSE, TILDE_IGNORE);
1239 FFAddNChar(ffstring, ' ', 52 - ffstring->curr->pos, FALSE);
1240 FFAddOneString (ffstring, div, FALSE, FALSE, TILDE_IGNORE);
1241 FFAddNChar(ffstring, ' ', 62 - ffstring->curr->pos, FALSE);
1242 FFAddOneString (ffstring, date, FALSE, FALSE, TILDE_IGNORE);
1243 }
1244
1245 } else if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
1246
1247 if (awp->newLocusLine) {
1248
1249 str = GetMolTypeQual (bsp);
1250 if (str == NULL) {
1251 switch (bsp->mol) {
1252 case Seq_mol_dna :
1253 str = "unassigned DNA";
1254 break;
1255 case Seq_mol_rna :
1256 str = "unassigned RNA";
1257 break;
1258 case Seq_mol_aa :
1259 break;
1260 default :
1261 str = "unassigned DNA";
1262 break;
1263 }
1264 }
1265 if (StringCmp (str, "viral cRNA") == 0) {
1266 str = "other RNA";
1267 }
1268 if (StringICmp (str, "ncRNA") == 0) {
1269 str = "RNA";
1270 }
1271 StringNCpy_0 (mol, str, sizeof (mol));
1272
1273 FFStartPrint (ffstring, awp->format, 0, 0, NULL, 0, 5, 0, "ID", FALSE);
1274
1275 FFAddOneString (ffstring, locus, FALSE, FALSE, TILDE_IGNORE);
1276 FFAddOneString (ffstring, "; ", FALSE, FALSE, TILDE_IGNORE);
1277 FFAddOneString (ffstring, "SV ", FALSE, FALSE, TILDE_IGNORE);
1278 FFAddOneString (ffstring, ver, FALSE, FALSE, TILDE_IGNORE);
1279 FFAddOneString (ffstring, "; ", FALSE, FALSE, TILDE_IGNORE);
1280 if (topology == TOPOLOGY_CIRCULAR) {
1281 FFAddOneString (ffstring, "circular", FALSE, FALSE, TILDE_IGNORE);
1282 } else {
1283 FFAddOneString (ffstring, "linear", FALSE, FALSE, TILDE_IGNORE);
1284 }
1285 FFAddOneString (ffstring, "; ", FALSE, FALSE, TILDE_IGNORE);
1286 FFAddOneString (ffstring, mol, FALSE, FALSE, TILDE_IGNORE);
1287 FFAddOneString (ffstring, "; ", FALSE, FALSE, TILDE_IGNORE);
1288 FFAddOneString (ffstring, dataclass, FALSE, FALSE, TILDE_IGNORE);
1289 FFAddOneString (ffstring, "; ", FALSE, FALSE, TILDE_IGNORE);
1290 FFAddOneString (ffstring, embldiv, FALSE, FALSE, TILDE_IGNORE);
1291 FFAddOneString (ffstring, "; ", FALSE, FALSE, TILDE_IGNORE);
1292 FFAddOneString (ffstring, len, FALSE, FALSE, TILDE_IGNORE);
1293
1294 } else {
1295
1296 FFStartPrint (ffstring, awp->format, 0, 0, NULL, 0, 5, 0, "ID", FALSE);
1297
1298 FFAddOneString (ffstring, locus, FALSE, FALSE, TILDE_IGNORE);
1299 loclen = StringLen(locus);
1300 if (14 - 5 - loclen > 0) {
1301 FFAddNChar(ffstring, ' ', 14 - 5 - loclen, FALSE);
1302 }
1303 if (awp->hup) {
1304 FFAddOneString (ffstring, " confidential; ", FALSE, FALSE, TILDE_IGNORE);
1305 } else {
1306 FFAddOneString (ffstring, " standard; ", FALSE, FALSE, TILDE_IGNORE);
1307 }
1308 FFAddOneString (ffstring, mol, FALSE, FALSE, TILDE_IGNORE);
1309 FFAddOneString (ffstring, "; ", FALSE, FALSE, TILDE_IGNORE);
1310
1311 /* conditional code to make div "UNA" goes here */
1312
1313 FFAddOneString (ffstring, div, FALSE, FALSE, TILDE_IGNORE);
1314 FFAddOneString (ffstring, "; ", FALSE, FALSE, TILDE_IGNORE);
1315 FFAddOneString (ffstring, len, FALSE, FALSE, TILDE_IGNORE);
1316 }
1317 }
1318
1319 /* optionally populate indexes for NCBI internal database */
1320
1321 if (ajp->index) {
1322 index = &asp->index;
1323 } else {
1324 index = NULL;
1325 }
1326
1327 if (index != NULL) {
1328 Char tmp [20];
1329 index->locus = StringSave (locus);
1330 index->div = StringSave (div);
1331 sprintf (tmp, "%ld", (long) length);
1332 index->base_cnt = StringSave (tmp);
1333 }
1334
1335 /* optionally populate gbseq for XML-ized GenBank format */
1336
1337 if (ajp->gbseq) {
1338 gbseq = &asp->gbseq;
1339 } else {
1340 gbseq = NULL;
1341 }
1342
1343 if (gbseq != NULL) {
1344 gbseq->locus = StringSave (locus);
1345 gbseq->length = length;
1346 gbseq->division = StringSave (div);
1347
1348 gbseq->moltype = StringSave (gbseq_mol [imol]);
1349
1350 strandedness = (Int2) bsp->strand;
1351 if (strandedness < 0 || strandedness > 3) {
1352 strandedness = 0;
1353 }
1354 if (strandedness == 0) {
1355 moltype = (Int2) imolToMoltype [imol];
1356 if (moltype < 0 || moltype > 11) {
1357 moltype = 0;
1358 }
1359 if (moltype == 1) {
1360 strandedness = 2; /* default to double strand for DNA */
1361 } else if ((moltype >= 2 && moltype <= 8) || moltype >= 10 && moltype <= 11) {
1362 strandedness = 1; /* default to single strand for RNA */
1363 }
1364 }
1365 gbseq->strandedness = StringSaveNoNull (gbseq_strd [strandedness]);
1366
1367 topol = (Int2) bsp->topology;
1368 if (topol < 0 || topol > 2) {
1369 topol = 0;
1370 }
1371 if (topol == 0) {
1372 topol = 1; /* default to linear if not set */
1373 }
1374 gbseq->topology = StringSaveNoNull (gbseq_top [topol]);
1375
1376 for (sip = bsp->id; sip != NULL; sip = sip->next) {
1377 SeqIdWrite (sip, id, PRINTID_FASTA_SHORT, sizeof (id));
1378 ValNodeCopyStr (&gbseq->other_seqids, 0, id);
1379 }
1380
1381 date [0] = '\0';
1382 dp = NULL;
1383 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_create_date, &dcontext);
1384 if (sdp != NULL) {
1385 dp = (DatePtr) sdp->data.ptrvalue;
1386 }
1387 if (dp != NULL) {
1388 DateToFF (date, dp, FALSE);
1389 if (StringDoesHaveText (date)) {
1390 gbseq->create_date = StringSave (date);
1391 }
1392 }
1393 /*
1394 if (StringHasNoText (date)) {
1395 StringCpy (date, "01-JAN-1900");
1396 }
1397 gbseq->create_date = StringSave (date);
1398 */
1399
1400 date [0] = '\0';
1401 dp = NULL;
1402 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_update_date, &dcontext);
1403 if (sdp != NULL) {
1404 dp = (DatePtr) sdp->data.ptrvalue;
1405 }
1406 if (dp != NULL) {
1407 DateToFF (date, dp, FALSE);
1408 }
1409 if (StringHasNoText (date)) {
1410 StringCpy (date, "01-JAN-1900");
1411 }
1412 gbseq->update_date = StringSave (date);
1413 }
1414
1415 suffix = FFEndPrint(ajp, ffstring, awp->format, 12, 0, 5, 0, "ID");
1416 FFRecycleString(ajp, ffstring);
1417
1418 bbp->string = suffix;
1419
1420 /*
1421 if (awp->contig && (! awp->showconfeats) && awp->smartconfeats && GetWWW (ajp) &&
1422 (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
1423 is_aa = ISA_aa (bsp->mol);
1424 gi = 0;
1425 for (sip = bsp->id; sip != NULL; sip = sip->next) {
1426 if (sip->choice == SEQID_GI) {
1427 gi = (BIG_ID) sip->data.intvalue;
1428 }
1429 }
1430 if (gi > 0) {
1431 ffstring = FFGetString(ajp);
1432
1433 sprintf(gi_buf, "%ld", (long) gi);
1434 FFAddOneString(ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
1435 if (is_aa) {
1436 FF_Add_NCBI_Base_URL(ffstring, link_featp);
1437 } else {
1438 FF_Add_NCBI_Base_URL(ffstring, link_featn);
1439 }
1440 FFAddOneString(ffstring, gi_buf, FALSE, FALSE, TILDE_IGNORE);
1441 if ( is_aa ) {
1442 FFAddOneString(ffstring, "?report=gpwithparts", FALSE, FALSE, TILDE_IGNORE);
1443 } else {
1444 FFAddOneString(ffstring, "?report=gbwithparts", FALSE, FALSE, TILDE_IGNORE);
1445 }
1446 FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
1447 if (bsp->length > 1000000) {
1448 FFAddOneString(ffstring, "Click here to see all features and the sequence of this contig record.", FALSE, FALSE, TILDE_IGNORE);
1449 } else {
1450 FFAddOneString(ffstring, "Click here to see the sequence of this contig record.", FALSE, FALSE, TILDE_IGNORE);
1451 }
1452 FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
1453
1454 prefix = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "??");
1455
1456 FFRecycleString(ajp, ffstring);
1457
1458 if (awp->afp != NULL) {
1459 DoQuickLinkFormat (awp->afp, prefix);
1460 }
1461 MemFree (prefix);
1462 }
1463 }
1464 */
1465
1466 if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
1467 (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
1468
1469 sprintf (buf, "<a name=\"locus_%s\"></a>", awp->currAccVerLabel);
1470 DoQuickLinkFormat (awp->afp, buf);
1471
1472 buf [0] = '\0';
1473 hasComment = (Boolean) (SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_comment, &dcontext) != NULL);
1474 if (! hasComment) {
1475 hasComment = (Boolean) (SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_region, &dcontext) != NULL);
1476 }
1477 if (! hasComment) {
1478 hasComment = (Boolean) (SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_maploc, &dcontext) != NULL);
1479 }
1480 if (! hasComment) {
1481 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
1482 while (sdp != NULL) {
1483 uop = (UserObjectPtr) sdp->data.ptrvalue;
1484 if (uop != NULL) {
1485 oip = uop->type;
1486 if (oip != NULL) {
1487 if (StringCmp (oip->str, "RefGeneTracking") == 0) {
1488 hasComment = TRUE;
1489 } else if (StringCmp (oip->str, "GenomeBuild") == 0) {
1490 hasComment = TRUE;
1491 } else if (StringCmp (oip->str, "ENCODE") == 0) {
1492 hasComment = TRUE;
1493 }
1494 }
1495 }
1496 sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
1497 }
1498 }
1499 if (! hasComment) {
1500 hist = bsp->hist;
1501 if (hist != NULL) {
1502 if (hist->replaced_by_ids != NULL && hist->replaced_by_date != NULL) {
1503 hasComment = TRUE;
1504 } else if (hist->replace_ids != NULL && hist->replace_date != NULL) {
1505 hasComment = TRUE;
1506 }
1507 }
1508 }
1509
1510 buf [0] = '\0';
1511 StringCpy (buf, "<div class=\"localnav\"><ul class=\"locals\">");
1512
1513 if (hasComment) {
1514 sprintf (sect, "<li><a href=\"#comment_%s\" title=\"Jump to the comment section of this record\">Comment</a></li>",
1515 awp->currAccVerLabel);
1516 StringCat (buf, sect);
1517 }
1518 sprintf (sect, "<li><a href=\"#feature_%s\" title=\"Jump to the feature table of this record\">Features</a></li>",
1519 awp->currAccVerLabel);
1520 StringCat (buf, sect);
1521 if (willshowwgs) {
1522 sprintf (sect, "<li><a href=\"#wgs_%s\" title=\"Jump to WGS section of this record\">WGS</a></li>",
1523 awp->currAccVerLabel);
1524 StringCat (buf, sect);
1525 }
1526 if (willshowgenome) {
1527 sprintf (sect, "<li><a href=\"#genome_%s\" title=\"Jump to the genome section of this record\">Genome</a></li>",
1528 awp->currAccVerLabel);
1529 StringCat (buf, sect);
1530 }
1531 if (willshowcontig) {
1532 sprintf (sect, "<li><a href=\"#contig_%s\" title=\"Jump to the contig section of this record\">Contig</a></li>",
1533 awp->currAccVerLabel);
1534 StringCat (buf, sect);
1535 }
1536 if (willshowsequence) {
1537 sprintf (sect, "<li><a href=\"#sequence_%s\" title=\"Jump to the sequence of this record\">Sequence</a></li>",
1538 awp->currAccVerLabel);
1539 StringCat (buf, sect);
1540 }
1541
1542 StringCat (buf, "</ul>");
1543
1544 prevGi = 0;
1545 currGi = 0;
1546 nextGi = 0;
1547 gilistpos = awp->gilistpos;
1548 if (gilistpos == NULL) {
1549 gilistpos = ajp->gihead;
1550 }
1551 do {
1552 vnp = gilistpos;
1553 if (vnp != NULL) {
1554 prevGi = (BIG_ID) vnp->data.intvalue;
1555 vnp = vnp->next;
1556 gilistpos = vnp;
1557 if (vnp != NULL) {
1558 currGi = (BIG_ID) vnp->data.intvalue;
1559 vnp = vnp->next;
1560 if (vnp != NULL) {
1561 nextGi = (BIG_ID) vnp->data.intvalue;
1562 }
1563 }
1564 }
1565 } while (gilistpos != NULL && currGi != awp->currGi);
1566
1567 has_next_pref_ul = FALSE;
1568
1569 if (currGi == awp->currGi && nextGi > 0 && awp->sectionCount < awp->sectionMax) {
1570 if (! has_next_pref_ul) {
1571 StringCat (buf, "<ul class=\"nextprevlinks\">");
1572 has_next_pref_ul = TRUE;
1573 }
1574 LookupAccnForNavLink (nextGi, seqid, sizeof (seqid), "the next record");
1575 if (awp->seg + 1 > 0 && awp->numsegs > 0 && awp->seg + 1 <= awp->numsegs) {
1576 sprintf (seg, " (segment %d of %ld)", (int) (awp->seg + 1), (long) awp->numsegs);
1577 StringCat (seqid, seg);
1578 }
1579 sprintf (sect, "<li class=\"next\"><a href=\"#locus_%ld\" title=\"Jump to %s\">Next</a></li>", (long) nextGi, seqid);
1580 StringCat (buf, sect);
1581 } else if (awp->nextGi > 0) {
1582 if (! has_next_pref_ul) {
1583 StringCat (buf, "<ul class=\"nextprevlinks\">");
1584 has_next_pref_ul = TRUE;
1585 }
1586 LookupAccnForNavLink (nextGi, seqid, sizeof (seqid), "the next record");
1587 sprintf (sect, "<li class=\"next\"><a href=\"#locus_%ld\" title=\"Jump to %s\">Next</a></li>", (long) awp->nextGi, seqid);
1588 StringCat (buf, sect);
1589 }
1590 if (currGi == awp->currGi && prevGi > 0 && awp->sectionCount > 1) {
1591 if (! has_next_pref_ul) {
1592 StringCat (buf, "<ul class=\"nextprevlinks\">");
1593 has_next_pref_ul = TRUE;
1594 }
1595 LookupAccnForNavLink (prevGi, seqid, sizeof (seqid), "the previous record");
1596 if (awp->seg - 1 > 0 && awp->numsegs > 0 && awp->seg - 1 <= awp->numsegs) {
1597 sprintf (seg, " (segment %d of %ld)", (int) (awp->seg - 1), (long) awp->numsegs);
1598 StringCat (seqid, seg);
1599 }
1600 sprintf (sect, "<li class=\"prev\"><a href=\"#locus_%ld\" title=\"Jump to %s\">Previous</a></li>", (long) prevGi, seqid);
1601 StringCat (buf, sect);
1602 } else if (awp->prevGi > 0) {
1603 if (! has_next_pref_ul) {
1604 StringCat (buf, "<ul class=\"nextprevlinks\">");
1605 has_next_pref_ul = TRUE;
1606 }
1607 LookupAccnForNavLink (prevGi, seqid, sizeof (seqid), "the previous record");
1608 sprintf (sect, "<li class=\"prev\"><a href=\"#locus_%ld\" title=\"Jump to %s\">Previous</a></li>", (long) awp->prevGi, seqid);
1609 StringCat (buf, sect);
1610 }
1611 if (has_next_pref_ul) {
1612 StringCat (buf, "</ul>");
1613 }
1614 StringCat (buf, "</div>\n");
1615 StringCat (buf, "<pre class=\"genbank\">");
1616 DoQuickLinkFormat (awp->afp, buf);
1617 } else if (GetWWW (ajp)) {
1618 buf [0] = '\0';
1619 StringCat (buf, "<pre>");
1620 DoQuickLinkFormat (awp->afp, buf);
1621 }
1622
1623 if (awp->afp != NULL) {
1624 DoImmediateFormat (awp->afp, bbp);
1625 }
1626 }
1627
AddDeflineBlock(Asn2gbWorkPtr awp)1628 NLM_EXTERN void AddDeflineBlock (
1629 Asn2gbWorkPtr awp
1630 )
1631
1632 {
1633 IntAsn2gbJobPtr ajp;
1634 Asn2gbSectPtr asp;
1635 BaseBlockPtr bbp;
1636 BioseqPtr bsp;
1637 GBSeqPtr gbseq;
1638 ItemInfo ii;
1639 StringItemPtr ffstring;
1640 CharPtr title;
1641
1642 if (awp == NULL) return;
1643 ajp = awp->ajp;
1644 if (ajp == NULL) return;
1645 bsp = awp->bsp;
1646 if (bsp == NULL) return;
1647 asp = awp->asp;
1648 if (asp == NULL) return;
1649
1650 bbp = Asn2gbAddBlock (awp, DEFLINE_BLOCK, sizeof (BaseBlock));
1651 if (bbp == NULL) return;
1652
1653 ffstring = FFGetString(ajp);
1654 if ( ffstring == NULL ) return;
1655
1656 MemSet ((Pointer) (&ii), 0, sizeof (ItemInfo));
1657
1658 /* create default defline */
1659
1660 title = NewCreateDefLineEx (&ii, bsp, ajp->gpipdDeflines, FALSE, ajp->gpipdDeflines);
1661
1662 FFStartPrint (ffstring, awp->format, 0, 12, "DEFINITION", 12, 5, 5, "DE", TRUE);
1663
1664 if (StringDoesHaveText (title)) {
1665 bbp->entityID = ii.entityID;
1666 bbp->itemID = ii.itemID;
1667 bbp->itemtype = ii.itemtype;
1668
1669 FFAddOneString (ffstring, title, TRUE, TRUE, TILDE_IGNORE);
1670 } else {
1671 FFAddOneChar (ffstring, '.', FALSE);
1672 }
1673
1674 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "DE");
1675
1676 /* optionally populate gbseq for XML-ized GenBank format */
1677
1678 if (ajp->gbseq) {
1679 gbseq = &asp->gbseq;
1680 } else {
1681 gbseq = NULL;
1682 }
1683
1684 if (gbseq != NULL) {
1685 gbseq->definition = StringSave (title);
1686 }
1687
1688 MemFree (title);
1689
1690 FFRecycleString(ajp, ffstring);
1691
1692 /*
1693 if (bbp->itemtype == 0) {
1694 bbp->entityID = bsp->idx.entityID;
1695 bbp->itemtype = bsp->idx.itemtype;
1696 bbp->itemID = bsp->idx.itemID;
1697 }
1698 */
1699
1700 if (awp->afp != NULL) {
1701 DoImmediateFormat (awp->afp, bbp);
1702 }
1703 }
1704
FF_www_accession(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,CharPtr cstring,Boolean is_na)1705 static void FF_www_accession (
1706 IntAsn2gbJobPtr ajp,
1707 StringItemPtr ffstring,
1708 CharPtr cstring,
1709 Boolean is_na
1710 )
1711 {
1712 if (cstring == NULL || ffstring == NULL) return;
1713
1714 if ( GetWWW(ajp) ) {
1715 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
1716 if (is_na) {
1717 FF_Add_NCBI_Base_URL (ffstring, link_seqn);
1718 } else {
1719 FF_Add_NCBI_Base_URL (ffstring, link_seqp);
1720 }
1721 FFAddTextToString(ffstring, /* "val=" */ NULL, cstring, "\">", FALSE, FALSE, TILDE_IGNORE);
1722 FFAddOneString(ffstring, cstring, FALSE, FALSE, TILDE_IGNORE);
1723 FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
1724 } else {
1725 FFAddOneString(ffstring, cstring, FALSE, FALSE, TILDE_IGNORE);
1726 }
1727 return;
1728 }
1729
1730 /* Check if acc directly follows prev */
IsSuccessor(CharPtr acc,CharPtr prev)1731 static Boolean IsSuccessor(CharPtr acc, CharPtr prev)
1732 {
1733 CharPtr accp, prevp;
1734 Int4 acc_num, prev_num;
1735
1736 if (acc == NULL || prev == NULL) return FALSE;
1737
1738 if (StringLen(acc) != StringLen(prev)) return FALSE;
1739
1740 accp = acc;
1741 prevp = prev;
1742 while (accp != '\0' && prevp != '\0') {
1743 if (*accp != *prevp) return FALSE;
1744 if (IS_DIGIT(*accp)) {
1745 acc_num = (Int4)atol(accp);
1746 prev_num = (Int4)atol(prevp);
1747 return (acc_num == prev_num + 1);
1748 }
1749 ++accp;
1750 ++prevp;
1751 }
1752 return FALSE;
1753 }
1754
1755
IsProjectAccn(CharPtr acc)1756 static Boolean IsProjectAccn(CharPtr acc)
1757 {
1758 Int2 letters;
1759 Char digits[3];
1760 CharPtr ptr;
1761
1762 if (acc == NULL) {
1763 return FALSE;
1764 }
1765 digits[0] = '\0';
1766
1767 for (ptr = acc, letters = 0; ptr != '\0' && IS_ALPHA(*ptr); ++ptr, ++letters) continue;
1768 if (letters != 4 || StringLen(ptr) < 2) {
1769 return FALSE;
1770 }
1771 digits[0] = *ptr++;
1772 digits[1] = *ptr++;
1773 digits[2] = '\0';
1774 if (atoi(digits) < 1) {
1775 return FALSE;
1776 }
1777 while (*ptr != '\0') {
1778 if (*ptr != '0') {
1779 return FALSE;
1780 }
1781 ++ptr;
1782 }
1783 return TRUE;
1784 }
1785
1786
GetSecondaryAccessions(ValNodePtr extra_access)1787 static ValNodePtr GetSecondaryAccessions(ValNodePtr extra_access)
1788 {
1789 #define EXTRA_ACCESSION_CUTOFF 20
1790 #define BIN_ACCESSION_CUTOFF 5
1791
1792 Int4 extra_acc_num = 0;
1793 ValNodePtr bins, bin, vnp, result = NULL, temp, prj;
1794 CharPtr first, last, curr, prev = NULL;
1795 Char range[40];
1796
1797 extra_acc_num = ValNodeLen(extra_access);
1798 if (extra_acc_num < EXTRA_ACCESSION_CUTOFF) {
1799 for (vnp = extra_access; vnp != NULL; vnp = vnp->next) {
1800 ValNodeCopyStr(&result, 0, (CharPtr)vnp->data.ptrvalue);
1801 }
1802 return result;
1803 }
1804
1805 /* sort the accessions into bins of successive accessions */
1806 bin = bins = NULL;
1807 for (vnp = extra_access; vnp != NULL; vnp = vnp->next) {
1808 curr = (CharPtr) vnp->data.ptrvalue;
1809 if (ValidateAccn (curr) != 0) {
1810 continue;
1811 }
1812 if (IsProjectAccn(curr)) {
1813 prj = ValNodeNew(NULL);
1814 ValNodeAddStr ((ValNodePtr PNTR) &(prj->data.ptrvalue), 0, curr);
1815 prj->next = bins;
1816 bins = prj;
1817 continue;
1818 }
1819 if (!IsSuccessor(curr, prev)) {
1820 bin = ValNodeAdd(&bins);
1821 }
1822 if (bin != NULL) {
1823 temp = (ValNodePtr)bin->data.ptrvalue;
1824 ValNodeAddStr(&temp, 0, curr);
1825 bin->data.ptrvalue = temp;
1826 }
1827
1828 prev = curr;
1829 }
1830
1831 for (bin = bins; bin != NULL; bin = bin->next) {
1832 vnp = (ValNodePtr)bin->data.ptrvalue;
1833 if (ValNodeLen(vnp) > BIN_ACCESSION_CUTOFF) {
1834 first = last = NULL;
1835 for ( ; vnp != NULL; vnp = vnp->next) {
1836 last = (CharPtr)vnp->data.ptrvalue;
1837 if (first == NULL) {
1838 first = last;
1839 }
1840 }
1841 range[0] = '\0';
1842 StringCat(range, first);
1843 StringCat(range, "-");
1844 StringCat(range, last);
1845 ValNodeCopyStr(&result, 0, range);
1846 } else {
1847 for ( ; vnp != NULL; vnp = vnp->next) {
1848 ValNodeCopyStr(&result, 0, (CharPtr)vnp->data.ptrvalue);
1849 }
1850 }
1851 bin->data.ptrvalue = ValNodeFree((ValNodePtr)bin->data.ptrvalue);
1852 }
1853
1854 bins = ValNodeFreeData(bins);
1855 return result;
1856 }
1857
1858
1859 /* !!! this definitely needs more work to support all classes, use proper SeqId !!! */
1860
AddAccessionBlock(Asn2gbWorkPtr awp)1861 NLM_EXTERN void AddAccessionBlock (
1862 Asn2gbWorkPtr awp
1863 )
1864
1865 {
1866 size_t acclen;
1867 SeqIdPtr accn = NULL;
1868 IntAsn2gbJobPtr ajp;
1869 Asn2gbSectPtr asp;
1870 BaseBlockPtr bbp;
1871 BioseqPtr bsp;
1872 Char buf [41];
1873 Char ch1, ch2, ch3;
1874 SeqMgrDescContext dcontext;
1875 EMBLBlockPtr ebp;
1876 ValNodePtr extra_access;
1877 CharPtr flatloc;
1878 GBBlockPtr gbp;
1879 SeqIdPtr gi = NULL;
1880 GBSeqPtr gbseq;
1881 SeqIdPtr gnl = NULL;
1882 SeqIdPtr gpp = NULL;
1883 IndxPtr index;
1884 Boolean is_na;
1885 SeqIdPtr lcl = NULL;
1886 size_t len = 0;
1887 MolInfoPtr mip;
1888 CharPtr ptr;
1889 SeqDescrPtr sdp;
1890 ValNodePtr secondary_acc;
1891 CharPtr separator = " ";
1892 SeqIdPtr sip;
1893 TextSeqIdPtr tsip;
1894 ValNodePtr vnp;
1895 CharPtr wgsaccn = NULL;
1896 CharPtr xtra;
1897 StringItemPtr ffstring;
1898
1899 if (awp == NULL) return;
1900 ajp = awp->ajp;
1901 if (ajp == NULL) return;
1902 bsp = awp->bsp;
1903 if (bsp == NULL) return;
1904 asp = awp->asp;
1905 if (asp == NULL) return;
1906
1907 ffstring = FFGetString(ajp);
1908 if ( ffstring == NULL ) return;
1909
1910 is_na = ISA_na (bsp->mol);
1911
1912 for (sip = bsp->id; sip != NULL; sip = sip->next) {
1913 switch (sip->choice) {
1914 case SEQID_GI :
1915 gi = sip;
1916 break;
1917 case SEQID_GENBANK :
1918 case SEQID_EMBL :
1919 case SEQID_DDBJ :
1920 case SEQID_TPG :
1921 case SEQID_TPE :
1922 case SEQID_TPD :
1923 accn = sip;
1924 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
1925 if (tsip != NULL) {
1926 acclen = StringLen (tsip->accession);
1927 if (acclen == 12) {
1928 wgsaccn = tsip->accession;
1929 len = 12;
1930 } else if (acclen == 13) {
1931 wgsaccn = tsip->accession;
1932 len = 13;
1933 } else if (acclen == 14) {
1934 wgsaccn = tsip->accession;
1935 len = 14;
1936 } else if (acclen == 15) {
1937 wgsaccn = tsip->accession;
1938 len = 15;
1939 }
1940 }
1941 break;
1942 case SEQID_OTHER :
1943 accn = sip;
1944 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
1945 if (tsip != NULL) {
1946 if (StringLen (tsip->accession) == 15) {
1947 wgsaccn = tsip->accession;
1948 len = 15;
1949 }
1950 }
1951 break;
1952 case SEQID_PIR :
1953 case SEQID_SWISSPROT :
1954 case SEQID_PRF :
1955 case SEQID_PDB :
1956 accn = sip;
1957 break;
1958 case SEQID_GPIPE :
1959 /* should not override better accession */
1960 gpp = sip;
1961 break;
1962 case SEQID_GENERAL :
1963 /* should not override better accession */
1964 gnl = sip;
1965 break;
1966 case SEQID_LOCAL :
1967 lcl = sip;
1968 break;
1969 default :
1970 break;
1971 }
1972 }
1973
1974 sip = NULL;
1975 if (accn == NULL) {
1976 accn = gpp;
1977 gpp = NULL;
1978 }
1979 if (accn != NULL) {
1980 sip = accn;
1981 } else if (gnl != NULL) {
1982 sip = gnl;
1983 } else if (lcl != NULL) {
1984 sip = lcl;
1985 } else if (gi != NULL) {
1986 sip = gi;
1987 }
1988
1989 if (sip == NULL) return;
1990
1991 SeqIdWrite (sip, buf, PRINTID_TEXTID_ACC_ONLY, sizeof (buf));
1992
1993 if (sip->choice == SEQID_PDB) {
1994 ptr = StringChr (buf, '_');
1995 if (ptr != NULL) {
1996 ch1 = ptr [1];
1997 if (ch1 != '\0') {
1998 ch2 = ptr [2];
1999 if (ch2 != '\0') {
2000 ch3 = ptr [3];
2001 if (ch3 == '\0') {
2002 if (ch1 == ch2) {
2003 if (IS_UPPER (ch1)) {
2004 ptr [1] = TO_LOWER (ch1);
2005 ptr [2] = '\0';
2006 }
2007 }
2008 }
2009 }
2010 }
2011 }
2012 }
2013
2014 bbp = Asn2gbAddBlock (awp, ACCESSION_BLOCK, sizeof (BaseBlock));
2015 if (bbp == NULL) return;
2016
2017 bbp->entityID = awp->entityID;
2018
2019 if (accn == NULL) {
2020
2021 /* if no accession, do not show local or general in ACCESSION */
2022
2023 if (ajp->mode == ENTREZ_MODE || ajp->mode == SEQUIN_MODE) {
2024 buf [0] = '\0';
2025 }
2026 }
2027
2028 FFStartPrint (ffstring, awp->format, 0, 12, "ACCESSION", 12, 5, 5, "AC", TRUE);
2029
2030 if (awp->hup && accn != NULL) {
2031 FFAddOneString (ffstring, ";", FALSE, FALSE, TILDE_TO_SPACES);
2032
2033 } else if (ajp->ajp.slp != NULL) {
2034 FF_www_accession (ajp, ffstring, buf, is_na);
2035 flatloc = FFFlatLoc (ajp, bsp, ajp->ajp.slp, ajp->masterStyle, FALSE);
2036 FFAddTextToString (ffstring, " REGION: ", flatloc, NULL, FALSE, FALSE, TILDE_TO_SPACES);
2037 MemFree (flatloc);
2038 } else {
2039 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
2040 if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
2041 FFAddOneChar(ffstring, ';', FALSE);
2042 }
2043 }
2044
2045 /* optionally populate indexes for NCBI internal database */
2046
2047 if (ajp->index) {
2048 index = &asp->index;
2049 } else {
2050 index = NULL;
2051 }
2052
2053 if (index != NULL) {
2054 index->accession = StringSave (buf);
2055 }
2056
2057 /* optionally populate gbseq for XML-ized GenBank format */
2058
2059 if (ajp->gbseq) {
2060 gbseq = &asp->gbseq;
2061 } else {
2062 gbseq = NULL;
2063 }
2064
2065 if (gbseq != NULL) {
2066 gbseq->primary_accession = StringSave (buf);
2067 }
2068
2069 if (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT) {
2070 separator = " ";
2071 } else if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
2072 separator = " ";
2073 }
2074
2075 if (gpp != NULL) {
2076 SeqIdWrite (gpp, buf, PRINTID_TEXTID_ACC_ONLY, sizeof (buf));
2077 FFAddTextToString(ffstring, separator, buf, NULL, FALSE, FALSE, TILDE_IGNORE);
2078 if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
2079 FFAddOneChar(ffstring, ';', FALSE);
2080 }
2081 }
2082
2083 if (ajp->ajp.slp == NULL) {
2084 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
2085 if (sdp != NULL && wgsaccn != NULL) {
2086 mip = (MolInfoPtr) sdp->data.ptrvalue;
2087 if (mip != NULL && mip->tech == MI_TECH_wgs) {
2088 StringNCpy_0 (buf, wgsaccn, sizeof (buf));
2089 acclen = StringLen (buf);
2090 if (acclen == 12 && StringCmp (buf + len - 6, "000000") != 0) {
2091 StringCpy (buf + len - 6, "000000");
2092 } else if (acclen == 13 && StringCmp (buf + len - 7, "0000000") != 0) {
2093 StringCpy (buf + len - 7, "0000000");
2094 } else if (acclen == 14 && StringCmp (buf + len - 8, "00000000") != 0) {
2095 StringCpy (buf + len - 8, "00000000");
2096 } else if (acclen == 15 && StringCmp (buf + len - 8, "00000000") != 0) {
2097 StringCpy (buf + len - 8, "00000000");
2098 } else {
2099 buf [0] = '\0';
2100 }
2101 if (! StringHasNoText (buf)) {
2102 if ( GetWWW(ajp) ) {
2103 FFAddTextToString(ffstring, separator, "<a href=\"", NULL, FALSE, FALSE, TILDE_IGNORE);
2104 FF_Add_NCBI_Base_URL (ffstring, link_accn);
2105 FFAddTextToString(ffstring, "db=Nucleotide&cmd=Search&term=", buf, "\">", FALSE, FALSE, TILDE_IGNORE);
2106 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
2107 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
2108 } else {
2109 FFAddTextToString(ffstring, separator, buf, NULL, FALSE, FALSE, TILDE_TO_SPACES);
2110 }
2111 if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
2112 FFAddOneChar(ffstring, ';', FALSE);
2113 }
2114 }
2115 }
2116 }
2117
2118 sdp = SeqMgrGetNextDescriptor (bsp, NULL, 0, &dcontext);
2119 while (sdp != NULL) {
2120
2121 extra_access = NULL;
2122
2123 switch (dcontext.seqdesctype) {
2124 case Seq_descr_genbank :
2125 gbp = (GBBlockPtr) sdp->data.ptrvalue;
2126 if (gbp != NULL) {
2127 extra_access = gbp->extra_accessions;
2128 }
2129 break;
2130 case Seq_descr_embl :
2131 ebp = (EMBLBlockPtr) sdp->data.ptrvalue;
2132 if (ebp != NULL) {
2133 extra_access = ebp->extra_acc;
2134 }
2135 break;
2136 default :
2137 break;
2138 }
2139
2140 if (extra_access != NULL) {
2141 bbp->entityID = dcontext.entityID;
2142 bbp->itemID = dcontext.itemID;
2143 bbp->itemtype = OBJ_SEQDESC;
2144
2145
2146 secondary_acc = GetSecondaryAccessions(extra_access);
2147 for (vnp = secondary_acc; vnp != NULL; vnp = vnp->next) {
2148 xtra = (CharPtr)vnp->data.ptrvalue;
2149 FFAddTextToString(ffstring, separator, xtra, NULL, FALSE, FALSE, TILDE_IGNORE);
2150 if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
2151 FFAddOneChar(ffstring, ';', FALSE);
2152 }
2153
2154 /* optionally populate indexes for NCBI internal database */
2155
2156 if (index != NULL) {
2157 ValNodeCopyStrToHead (&(index->secondaries), 0, xtra);
2158 }
2159
2160 /* optionally populate gbseq for XML-ized GenBank format */
2161
2162 if (gbseq != NULL) {
2163 ValNodeCopyStr (&(gbseq->secondary_accessions), 0, xtra);
2164 }
2165 }
2166 ValNodeFreeData(secondary_acc);
2167 }
2168
2169 sdp = SeqMgrGetNextDescriptor (bsp, sdp, 0, &dcontext);
2170 }
2171 }
2172
2173 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "AC");
2174 FFRecycleString(ajp, ffstring);
2175
2176 if (bbp->itemtype == 0) {
2177 bbp->entityID = bsp->idx.entityID;
2178 bbp->itemtype = bsp->idx.itemtype;
2179 bbp->itemID = bsp->idx.itemID;
2180 }
2181
2182 if (awp->afp != NULL) {
2183 DoImmediateFormat (awp->afp, bbp);
2184 }
2185 }
2186
AddVersionBlock(Asn2gbWorkPtr awp)2187 NLM_EXTERN void AddVersionBlock (
2188 Asn2gbWorkPtr awp
2189 )
2190
2191 {
2192 SeqIdPtr accn = NULL;
2193 IntAsn2gbJobPtr ajp;
2194 Asn2gbSectPtr asp;
2195 BaseBlockPtr bbp;
2196 BioseqPtr bsp;
2197 Char buf [41];
2198 Char ch1, ch2, ch3;
2199 Uint1 format = PRINTID_TEXTID_ACC_VER;
2200 GBSeqPtr gbseq;
2201 BIG_ID gi = -1;
2202 SeqIdPtr gpp = NULL;
2203 IntAsn2gbSectPtr iasp;
2204 IndxPtr index;
2205 CharPtr ptr;
2206 SeqIdPtr sip;
2207 Char tmp [41];
2208 Char version [64];
2209 StringItemPtr ffstring;
2210
2211 if (awp == NULL) return;
2212 ajp = awp->ajp;
2213 if (ajp == NULL) return;
2214 bsp = awp->bsp;
2215 if (bsp == NULL) return;
2216 asp = awp->asp;
2217 if (asp == NULL) return;
2218
2219 ffstring = FFGetString(ajp);
2220 if ( ffstring == NULL ) return;
2221
2222 iasp = (IntAsn2gbSectPtr) asp;
2223
2224 for (sip = bsp->id; sip != NULL; sip = sip->next) {
2225 switch (sip->choice) {
2226 case SEQID_GI :
2227 gi = (BIG_ID) sip->data.intvalue;
2228 break;
2229 case SEQID_GENBANK :
2230 case SEQID_EMBL :
2231 case SEQID_DDBJ :
2232 case SEQID_OTHER :
2233 accn = sip;
2234 break;
2235 case SEQID_PIR :
2236 case SEQID_SWISSPROT :
2237 case SEQID_PRF :
2238 case SEQID_PDB :
2239 accn = sip;
2240 break;
2241 case SEQID_TPG :
2242 case SEQID_TPE :
2243 case SEQID_TPD :
2244 accn = sip;
2245 break;
2246 case SEQID_GPIPE :
2247 /* should not override better accession */
2248 gpp = sip;
2249 break;
2250 default :
2251 break;
2252 }
2253 }
2254
2255 if (accn == NULL) {
2256 accn = gpp;
2257 /*
2258 format = PRINTID_TEXTID_ACC_ONLY;
2259 */
2260 }
2261
2262 /* if (gi < 1 && accn == NULL) return; */
2263
2264 /* display of GI in VERSION line is now under control of HIDE_GI_NUMBERS bit in flags argument */
2265
2266 if (ajp->hideGI) {
2267 gi = 0;
2268 }
2269
2270 if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
2271 if ( accn == NULL ) return;
2272 if (awp->newLocusLine) return;
2273 }
2274
2275 bbp = Asn2gbAddBlock (awp, VERSION_BLOCK, sizeof (BaseBlock));
2276 if (bbp == NULL) return;
2277
2278 bbp->entityID = awp->entityID;
2279
2280 /* no longer displaying NID */
2281
2282 /*
2283 if (gi > 0) {
2284 sprintf (version, "g%ld", (long) gi);
2285
2286 gb_StartPrint (awp->format, needInitBuff, 0, 12, "NID", 13, 5, 5, "NI", TRUE);
2287 needInitBuff = FALSE;
2288
2289 gb_AddString (NULL, version, NULL, FALSE, FALSE, TILDE_TO_SPACES);
2290
2291 ff_EndPrint();
2292 needEndPrint = FALSE;
2293 }
2294 */
2295
2296 version [0] = '\0';
2297
2298 if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
2299 SeqIdWrite (accn, version, format, sizeof (version) - 1);
2300
2301 FFStartPrint (ffstring, awp->format, 0, 12, "VERSION", 12, 5, 5, "SV", TRUE);
2302
2303 FFAddOneString (ffstring, version, FALSE, FALSE, TILDE_TO_SPACES);
2304
2305 FFAddOneChar(ffstring, '\n', FALSE);
2306
2307 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "SV");
2308 FFRecycleString(ajp, ffstring);
2309
2310 if (awp->afp != NULL) {
2311 DoImmediateFormat (awp->afp, bbp);
2312 }
2313
2314 return;
2315 }
2316
2317 if (iasp != NULL && GetWWW (ajp) && ajp->mode == ENTREZ_MODE) {
2318 sprintf (version, "%ld", (long) gi);
2319 iasp->gi = StringSave (version);
2320 }
2321
2322 if (accn != NULL) {
2323
2324 buf [0] = '\0';
2325 SeqIdWrite (accn, buf, format, sizeof (buf) - 1);
2326
2327 if (accn->choice == SEQID_PDB) {
2328 ptr = StringChr (buf, '_');
2329 if (ptr != NULL) {
2330 ch1 = ptr [1];
2331 if (ch1 != '\0') {
2332 ch2 = ptr [2];
2333 if (ch2 != '\0') {
2334 ch3 = ptr [3];
2335 if (ch3 == '\0') {
2336 if (ch1 == ch2) {
2337 if (IS_UPPER (ch1)) {
2338 ptr [1] = TO_LOWER (ch1);
2339 ptr [2] = '\0';
2340 }
2341 }
2342 }
2343 }
2344 }
2345 }
2346 }
2347
2348 if (iasp != NULL && GetWWW (ajp) && ajp->mode == ENTREZ_MODE) {
2349 iasp->acc = StringSave (buf);
2350 ptr = StringChr (iasp->acc, '.');
2351 if (ptr != NULL) {
2352 *ptr = '\0';
2353 }
2354 }
2355
2356 if (gi > 0) {
2357 sprintf (version, "%s GI:%lld", buf, (long long) gi);
2358 } else {
2359 sprintf (version, "%s", buf);
2360 }
2361
2362 FFStartPrint (ffstring, awp->format, 0, 12, "VERSION", 12, 5, 5, "SV", TRUE);
2363
2364 FFAddTextToString (ffstring, NULL, version, "\n", FALSE, FALSE, TILDE_TO_SPACES);
2365 /* optionally populate indexes for NCBI internal database */
2366
2367 if (ajp->index) {
2368 index = &asp->index;
2369 } else {
2370 index = NULL;
2371 }
2372
2373 if (index != NULL) {
2374 ptr = StringChr (buf, '.');
2375 if (ptr != NULL) {
2376 ptr++;
2377 index->version = StringSave (ptr);
2378 }
2379 if (gi > 0) {
2380 sprintf (tmp, "%lld", (long long) gi);
2381 index->gi = StringSave (tmp);
2382 }
2383 }
2384
2385 /* optionally populate gbseq for XML-ized GenBank format */
2386
2387 if (ajp->gbseq) {
2388 gbseq = &asp->gbseq;
2389 } else {
2390 gbseq = NULL;
2391 }
2392
2393 if (gbseq != NULL) {
2394 ptr = StringChr (buf, '.');
2395 if (ptr != NULL) {
2396 gbseq->accession_version = StringSave (buf);
2397 } else if (StringDoesHaveText (gbseq->primary_accession)) {
2398 gbseq->accession_version = StringSave (gbseq->primary_accession);
2399 }
2400 }
2401
2402 } else if (gi > 0) {
2403
2404 FFStartPrint (ffstring, awp->format, 0, 0, "VERSION", 12, 5, 5, "SV", TRUE);
2405
2406 sprintf (version, " GI:%lld", (long long) gi);
2407
2408 FFAddTextToString (ffstring, NULL, version, "\n", FALSE, FALSE, TILDE_TO_SPACES);
2409
2410 } else {
2411
2412 FFStartPrint (ffstring, awp->format, 0, 0, "VERSION", 0, 5, 5, "SV", TRUE);
2413 FFAddOneChar(ffstring, '\n', FALSE);
2414 }
2415
2416 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "SV");
2417 FFRecycleString(ajp, ffstring);
2418
2419 if (bbp->itemtype == 0) {
2420 bbp->itemtype = bsp->idx.itemtype;
2421 bbp->itemID = bsp->idx.itemID;
2422 }
2423
2424 if (awp->afp != NULL) {
2425 DoImmediateFormat (awp->afp, bbp);
2426 }
2427 }
2428
FF_asn2gb_www_projID(StringItemPtr ffstring,CharPtr projID)2429 static void FF_asn2gb_www_projID (
2430 StringItemPtr ffstring,
2431 CharPtr projID
2432 )
2433
2434 {
2435 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
2436 FF_Add_NCBI_Base_URL (ffstring, link_projid);
2437 FFAddOneString (ffstring, projID, FALSE, FALSE, TILDE_IGNORE);
2438 FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
2439 FFAddOneString (ffstring, projID, FALSE, FALSE, TILDE_IGNORE);
2440 FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
2441 }
2442
FF_asn2gb_www_SRR(IntAsn2gbJobPtr ajp,CharPtr buf,CharPtr str)2443 static void FF_asn2gb_www_SRR (
2444 IntAsn2gbJobPtr ajp,
2445 CharPtr buf,
2446 CharPtr str
2447 )
2448
2449 {
2450 Char ch;
2451 StringItemPtr ffstring;
2452 CharPtr ptr;
2453 CharPtr tmp;
2454
2455 if (ajp == NULL || buf == NULL || StringHasNoText (str)) return;
2456
2457 ffstring = FFGetString (ajp);
2458 if (ffstring == NULL) return;
2459
2460 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
2461 if (StringNCmp (str, "SRZ", 3) == 0 || StringNCmp (str, "DRZ", 3) == 0 || StringNCmp (str, "ERZ", 3) == 0) {
2462 FF_Add_NCBI_Base_URL (ffstring, link_srz);
2463 } else {
2464 FF_Add_NCBI_Base_URL (ffstring, link_srr);
2465 }
2466 FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
2467 FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
2468 FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
2469 FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
2470
2471 tmp = FFEndPrint (ajp, ffstring, ajp->format, 21, 21, 21, 21, NULL);
2472 FFRecycleString (ajp, ffstring);
2473
2474 if (tmp != NULL) {
2475 ptr = tmp;
2476 ch = *ptr;
2477 while (ch != '\0') {
2478 if (ch == '\n' || ch == '\r' || ch == '\t') {
2479 *ptr = ' ';
2480 }
2481 ptr++;
2482 ch = *ptr;
2483 }
2484 TrimSpacesAroundString (tmp);
2485 StringCat (buf, tmp);
2486 MemFree (tmp);
2487 }
2488 }
2489
FF_asn2gb_www_BP(IntAsn2gbJobPtr ajp,CharPtr buf,CharPtr str)2490 static void FF_asn2gb_www_BP (
2491 IntAsn2gbJobPtr ajp,
2492 CharPtr buf,
2493 CharPtr str
2494 )
2495
2496 {
2497 Char ch;
2498 StringItemPtr ffstring;
2499 CharPtr ptr;
2500 CharPtr tmp;
2501
2502 if (ajp == NULL || buf == NULL || StringHasNoText (str)) return;
2503
2504 ffstring = FFGetString (ajp);
2505 if (ffstring == NULL) return;
2506
2507 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
2508 FF_Add_NCBI_Base_URL (ffstring, link_bioproj);
2509 FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
2510 FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
2511 FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
2512 FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
2513
2514 tmp = FFEndPrint (ajp, ffstring, ajp->format, 21, 21, 21, 21, NULL);
2515 FFRecycleString (ajp, ffstring);
2516
2517 if (tmp != NULL) {
2518 ptr = tmp;
2519 ch = *ptr;
2520 while (ch != '\0') {
2521 if (ch == '\n' || ch == '\r' || ch == '\t') {
2522 *ptr = ' ';
2523 }
2524 ptr++;
2525 ch = *ptr;
2526 }
2527 TrimSpacesAroundString (tmp);
2528 StringCat (buf, tmp);
2529 MemFree (tmp);
2530 }
2531 }
2532
FF_asn2gb_www_BS(IntAsn2gbJobPtr ajp,CharPtr buf,CharPtr str)2533 static void FF_asn2gb_www_BS (
2534 IntAsn2gbJobPtr ajp,
2535 CharPtr buf,
2536 CharPtr str
2537 )
2538
2539 {
2540 Char ch;
2541 StringItemPtr ffstring;
2542 CharPtr ptr;
2543 CharPtr tmp;
2544
2545 if (ajp == NULL || buf == NULL || StringHasNoText (str)) return;
2546
2547 ffstring = FFGetString (ajp);
2548 if (ffstring == NULL) return;
2549
2550 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
2551 FF_Add_NCBI_Base_URL (ffstring, link_biosamp);
2552 FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
2553 FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
2554 FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
2555 FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
2556
2557 tmp = FFEndPrint (ajp, ffstring, ajp->format, 21, 21, 21, 21, NULL);
2558 FFRecycleString (ajp, ffstring);
2559
2560 if (tmp != NULL) {
2561 ptr = tmp;
2562 ch = *ptr;
2563 while (ch != '\0') {
2564 if (ch == '\n' || ch == '\r' || ch == '\t') {
2565 *ptr = ' ';
2566 }
2567 ptr++;
2568 ch = *ptr;
2569 }
2570 TrimSpacesAroundString (tmp);
2571 StringCat (buf, tmp);
2572 MemFree (tmp);
2573 }
2574 }
2575
FF_asn2gb_www_AS(IntAsn2gbJobPtr ajp,CharPtr buf,CharPtr str)2576 static void FF_asn2gb_www_AS (
2577 IntAsn2gbJobPtr ajp,
2578 CharPtr buf,
2579 CharPtr str
2580 )
2581
2582 {
2583 Char ch;
2584 StringItemPtr ffstring;
2585 CharPtr ptr;
2586 CharPtr tmp;
2587
2588 if (ajp == NULL || buf == NULL || StringHasNoText (str)) return;
2589
2590 ffstring = FFGetString (ajp);
2591 if (ffstring == NULL) return;
2592
2593 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
2594 FF_Add_NCBI_Base_URL (ffstring, link_assembl);
2595 FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
2596 FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
2597 FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
2598 FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
2599
2600 tmp = FFEndPrint (ajp, ffstring, ajp->format, 21, 21, 21, 21, NULL);
2601 FFRecycleString (ajp, ffstring);
2602
2603 if (tmp != NULL) {
2604 ptr = tmp;
2605 ch = *ptr;
2606 while (ch != '\0') {
2607 if (ch == '\n' || ch == '\r' || ch == '\t') {
2608 *ptr = ' ';
2609 }
2610 ptr++;
2611 ch = *ptr;
2612 }
2613 TrimSpacesAroundString (tmp);
2614 StringCat (buf, tmp);
2615 MemFree (tmp);
2616 }
2617 }
2618
AddGbseqXref(GBXrefPtr PNTR headP,GBXrefPtr PNTR tailP,CharPtr db,CharPtr str,Int4 id)2619 static void AddGbseqXref (
2620 GBXrefPtr PNTR headP,
2621 GBXrefPtr PNTR tailP,
2622 CharPtr db,
2623 CharPtr str,
2624 Int4 id
2625 )
2626
2627 {
2628 GBXrefPtr gxp, lst;
2629 Char tmp [32];
2630
2631 if (headP == NULL || tailP == NULL) return;
2632 if (StringHasNoText (db)) return;
2633 if (StringHasNoText (str) && id == 0) return;
2634
2635 gxp = GBXrefNew ();
2636 if (gxp == NULL) return;
2637
2638 gxp->dbname = StringSave (db);
2639 if (StringDoesHaveText (str)) {
2640 gxp->id = StringSave (str);
2641 } else {
2642 sprintf (tmp, "%ld", (long) id);
2643 gxp->id = StringSave (tmp);
2644 }
2645
2646 if (*headP == NULL) {
2647 *headP = gxp;
2648 }
2649 if (*tailP != NULL) {
2650 lst = *tailP;
2651 while (lst->next != NULL) {
2652 lst = lst->next;
2653 }
2654 lst->next = gxp;
2655 }
2656 *tailP = gxp;
2657 }
2658
2659 typedef enum {
2660 eDbLinkStrOutputDest_Nothing,
2661 eDbLinkStrOutputDest_bioProjectIDP
2662 } EDbLinkStrOutputDest;
2663
2664 typedef void (*TDbLinkWWWFormatter)(IntAsn2gbJobPtr, CharPtr, CharPtr);
2665
2666 typedef struct dblinkinfo {
2667 EDbLinkStrOutputDest output_dest;
2668 CharPtr pchName; /* e.g. "Assembly" */
2669 TDbLinkWWWFormatter pWWWFormatFunc; /* e.g. & FF_asn2gb_www_BP */
2670 Uint4 uBufIdx; /* index into bufs array in GetDBLinkString */
2671 } DbLinkInfoForStr;
2672
GetDBLinkString(IntAsn2gbJobPtr ajp,UserObjectPtr uop,CharPtr PNTR bioProjectIDP,GBXrefPtr PNTR dblinkP)2673 static CharPtr GetDBLinkString (
2674 IntAsn2gbJobPtr ajp,
2675 UserObjectPtr uop,
2676 CharPtr PNTR bioProjectIDP,
2677 GBXrefPtr PNTR dblinkP
2678 )
2679
2680 {
2681 Char frm [256], tmp [256];
2682 CharPtr bufs[6];
2683 CharPtr PNTR cpp;
2684 GBXrefPtr dbhead = NULL, dbtail = NULL;
2685 ValNodePtr head, tail;
2686 Int4 i, j;
2687 Int4Ptr ip;
2688 size_t len;
2689 ObjectIdPtr oip;
2690 CharPtr prefix;
2691 CharPtr str;
2692 UserFieldPtr ufp;
2693 Int4 val;
2694
2695 const Uint4 num_bufs = sizeof(bufs)/sizeof(bufs[0]);
2696
2697 const static DbLinkInfoForStr str_dblink_infos[] = {
2698 /* Yes, 4 is missing for uBufIdx because that's
2699 handled by "Trace Assembly Archive" which is an
2700 int. */
2701 { eDbLinkStrOutputDest_Nothing, "Assembly", & FF_asn2gb_www_AS, 5 },
2702 { eDbLinkStrOutputDest_bioProjectIDP, "BioProject", & FF_asn2gb_www_BP, 0 },
2703 { eDbLinkStrOutputDest_Nothing, "BioSample", & FF_asn2gb_www_BS, 1 },
2704 { eDbLinkStrOutputDest_Nothing, "ProbeDB", NULL, 2 },
2705 { eDbLinkStrOutputDest_Nothing, "Sequence Read Archive", & FF_asn2gb_www_SRR, 3 }
2706 };
2707
2708 const Uint4 num_link_infos_for_str =
2709 sizeof(str_dblink_infos) /
2710 sizeof(str_dblink_infos[0]);
2711
2712 if (bioProjectIDP != NULL) {
2713 *bioProjectIDP = NULL;
2714 }
2715 if (dblinkP != NULL) {
2716 *dblinkP = NULL;
2717 }
2718 if (uop == NULL) return NULL;
2719
2720 for( i = 0; i < num_bufs; ++i ) {
2721 bufs[i] = NULL;
2722 }
2723
2724 for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
2725 oip = ufp->label;
2726 if (oip == NULL || oip->str == NULL) continue;
2727
2728 cpp = NULL;
2729 str = NULL;
2730 head = NULL;
2731 tail = NULL;
2732 if (ufp->choice == 1) {
2733 str = (CharPtr) ufp->data.ptrvalue;
2734 } else if (ufp->choice == 7) {
2735 cpp = (CharPtr PNTR) ufp->data.ptrvalue;
2736 if (cpp != NULL && ufp->num > 0) {
2737 str = cpp [0];
2738 }
2739 }
2740
2741 if (StringICmp (oip->str, "Trace Assembly Archive") == 0) {
2742 if (ufp->choice == 2) {
2743 val = (Int4) ufp->data.intvalue;
2744 if (val > 0) {
2745 sprintf (tmp, "Trace Assembly Archive: %ld", (long) val);
2746 bufs[4] = StringSave (tmp);
2747 AddGbseqXref (&dbhead, &dbtail, "Trace Assembly Archive", NULL, val);
2748 }
2749 } else if (ufp->choice == 8) {
2750 ip = (Int4Ptr) ufp->data.ptrvalue;
2751 if (ufp->num > 0 && ip != NULL) {
2752 val = ip [0];
2753 if (val > 0) {
2754 sprintf (tmp, "Trace Assembly Archive: %ld", (long) val);
2755 ValNodeCopyStrEx (&head, &tail, 0, tmp);
2756 AddGbseqXref (&dbhead, &dbtail, "Trace Assembly Archive", NULL, val);
2757 for (i = 1; i < ufp->num; i++) {
2758 val = ip [i];
2759 if (val > 0) {
2760 sprintf (tmp, "%ld", (long) val);
2761 ValNodeCopyStrEx (&head, &tail, 0, tmp);
2762 AddGbseqXref (&dbhead, &dbtail, "Trace Assembly Archive", NULL, val);
2763 }
2764 }
2765 bufs[4] = ValNodeMergeStrsEx (head, ", ");
2766 ValNodeFreeData (head);
2767 }
2768 }
2769 }
2770 } else if ( StringDoesHaveText(str) || cpp != NULL ) {
2771
2772 /* this handles DBLink entries where User-field.data is "str" or "strs" */
2773
2774 /* check which DBLink this is (for ones that are strings) */
2775 for( i = 0; i < num_link_infos_for_str ; ++i ) {
2776 /* check if this is the DBLink name that matches */
2777 if( 0 != StringICmp(oip->str, str_dblink_infos[i].pchName) ) {
2778 continue;
2779 }
2780
2781 /* str, possibly with HTML */
2782 if( StringDoesHaveText(str) ) {
2783 frm [0] = '\0';
2784 if (ajp != NULL && GetWWW (ajp) &&
2785 str_dblink_infos[i].pWWWFormatFunc )
2786 {
2787 (*str_dblink_infos[i].pWWWFormatFunc) (ajp, frm, str);
2788 } else {
2789 StringCpy (frm, str);
2790 }
2791 sprintf (tmp, "%s", frm);
2792 ValNodeCopyStrEx (&head, &tail, 0, tmp);
2793 AddGbseqXref (&dbhead, &dbtail, str_dblink_infos[i].pchName, str, 0);
2794 }
2795 if (cpp != NULL && ufp->num > 1) {
2796 for (j = 1; j < ufp->num; j++) {
2797 str = cpp [j];
2798 if (StringDoesHaveText (str)) {
2799 tmp [0] = '\0';
2800 if (ajp != NULL && GetWWW (ajp) &&
2801 str_dblink_infos[i].pWWWFormatFunc )
2802 {
2803 (*str_dblink_infos[i].pWWWFormatFunc) (
2804 ajp, tmp, str);
2805 } else {
2806 StringCpy (tmp, str);
2807 }
2808 ValNodeCopyStrEx (&head, &tail, 0, tmp);
2809 AddGbseqXref (&dbhead, &dbtail, str_dblink_infos[i].pchName, str, 0);
2810 }
2811 }
2812 }
2813
2814 /* load output variable for some types */
2815 switch( str_dblink_infos[i].output_dest ) {
2816 case eDbLinkStrOutputDest_Nothing:
2817 /* nothing to do */
2818 break;
2819 case eDbLinkStrOutputDest_bioProjectIDP:
2820 if (cpp == NULL || ufp->num == 1) {
2821 if (bioProjectIDP != NULL) {
2822 *bioProjectIDP = str;
2823 }
2824 }
2825 break;
2826 }
2827
2828 /* write output buf */
2829 if( head != NULL ) {
2830 ASSERT( str_dblink_infos[i].uBufIdx < num_bufs );
2831 sprintf (tmp, "%s: ", str_dblink_infos[i].pchName);
2832 bufs[str_dblink_infos[i].uBufIdx] = ValNodeMergeStrsExEx (head, ", ", tmp, NULL);
2833 ValNodeFreeData (head);
2834 }
2835 }
2836 }
2837 }
2838
2839 if (dblinkP != NULL) {
2840 *dblinkP = dbhead;
2841 }
2842
2843 len = 0;
2844 for( i = 0; i < num_bufs ; ++i ) {
2845 len += StringLen(bufs[i]);
2846 }
2847 if( 0 == len ) {
2848 /* all bufs are empty */
2849 return NULL;
2850 }
2851
2852 str = (CharPtr) MemNew (sizeof (Char) * (len + (num_bufs * 4))); /* not sure why exactly a "4" is used */
2853 if (str == NULL) return NULL;
2854
2855 prefix = "";
2856
2857 for( i = 0; i < num_bufs ; ++i ) {
2858 if( StringDoesHaveText(bufs[i]) ) {
2859 StringCat (str, prefix);
2860 StringCat (str, bufs[i]);
2861 bufs[i] = MemFree(bufs[i]);
2862 prefix = "\n";
2863 }
2864 }
2865
2866 return str;
2867 }
2868
AddDblinkBlock(Asn2gbWorkPtr awp)2869 NLM_EXTERN void AddDblinkBlock (
2870 Asn2gbWorkPtr awp
2871 )
2872
2873 {
2874 IntAsn2gbJobPtr ajp;
2875 Asn2gbSectPtr asp;
2876 BaseBlockPtr bbp;
2877 CharPtr bioProjectID;
2878 BioseqPtr bsp;
2879 Char buf [32];
2880 UserFieldPtr curr;
2881 Uint4 dbitemID = 0;
2882 GBXrefPtr dblinkHead = NULL;
2883 UserObjectPtr dbuop = NULL;
2884 SeqMgrDescContext dcontext;
2885 Boolean first = TRUE;
2886 StringItemPtr ffstring;
2887 GBSeqPtr gbseq;
2888 Uint4 gpitemID = 0;
2889 UserObjectPtr gpuop = NULL;
2890 ValNodePtr head = NULL;
2891 ObjectIdPtr oip;
2892 Int4 parentID;
2893 CharPtr prefix;
2894 Int4 projectID;
2895 SeqDescrPtr sdp;
2896 CharPtr str;
2897 UserObjectPtr uop;
2898 Int4 val;
2899
2900 if (awp == NULL) return;
2901 ajp = awp->ajp;
2902 if (ajp == NULL) return;
2903 bsp = awp->bsp;
2904 if (bsp == NULL) return;
2905 asp = awp->asp;
2906 if (asp == NULL) return;
2907
2908 if (ISA_na (bsp->mol) && awp->format != GENBANK_FMT) return;
2909 if (ISA_aa (bsp->mol) && awp->format != GENPEPT_FMT) return;
2910
2911 /*
2912 if (! ISA_na (bsp->mol)) return;
2913 if (awp->format != GENBANK_FMT) return;
2914 */
2915
2916 if (ajp->gbseq) {
2917 gbseq = &asp->gbseq;
2918 } else {
2919 gbseq = NULL;
2920 }
2921
2922 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
2923 while (sdp != NULL) {
2924 uop = (UserObjectPtr) sdp->data.ptrvalue;
2925 if (uop != NULL) {
2926 oip = uop->type;
2927 if (oip != NULL && StringICmp (oip->str, "GenomeProjectsDB") == 0) {
2928 gpuop = uop;
2929 gpitemID = dcontext.itemID;
2930 }
2931 if (oip != NULL && StringICmp (oip->str, "DBLink") == 0) {
2932 dbuop = uop;
2933 dbitemID = dcontext.itemID;
2934 }
2935 }
2936 sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
2937 }
2938 if (gpuop == NULL && dbuop == NULL) return;
2939
2940 ffstring = FFGetString (ajp);
2941 if ( ffstring == NULL ) return;
2942
2943 if (gpuop != NULL) {
2944 bbp = Asn2gbAddBlock (awp, PROJECT_BLOCK, sizeof (BaseBlock));
2945 if (bbp == NULL) return;
2946
2947 bbp->entityID = awp->entityID;
2948 bbp->itemID = gpitemID;
2949 bbp->itemtype = OBJ_SEQDESC;
2950
2951 if (first) {
2952 FFStartPrint (ffstring, awp->format, 0, 12, "DBLINK", 12, 5, 5, "XX", TRUE);
2953 } else {
2954 FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "XX", TRUE);
2955 }
2956 first = FALSE;
2957
2958 prefix = "Project: ";
2959 projectID = 0;
2960 parentID = 0;
2961 for (curr = gpuop->data; curr != NULL; curr = curr->next) {
2962 oip = curr->label;
2963 if (oip == NULL) continue;
2964 if (StringICmp (oip->str, "ProjectID") == 0) {
2965 if (curr->choice == 2) {
2966 val = (Int4) curr->data.intvalue;
2967 if (projectID > 0) {
2968 sprintf (buf, "%ld", (long) projectID);
2969 FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
2970 if (GetWWW (ajp)) {
2971 FF_asn2gb_www_projID (ffstring, buf);
2972 } else {
2973 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
2974 }
2975 /*
2976 FFAddTextToString (ffstring, prefix, buf, NULL, FALSE, FALSE, TILDE_IGNORE);
2977 */
2978 if (gbseq != NULL) {
2979 if (head == NULL) {
2980 sprintf (buf, "%ld", (long) projectID);
2981 } else {
2982 sprintf (buf, ", %ld", (long) projectID);
2983 }
2984 ValNodeCopyStr (&head, 0, buf);
2985 }
2986 prefix = ", ";
2987 parentID = 0;
2988 }
2989 projectID = val;
2990 }
2991 } else if (StringICmp (oip->str, "ParentID") == 0) {
2992 if (curr->choice == 2) {
2993 val = (Int4) curr->data.intvalue;
2994 parentID = val;
2995 }
2996 }
2997 }
2998 if (projectID > 0) {
2999 sprintf (buf, "%ld", (long) projectID);
3000 FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3001 if (GetWWW (ajp)) {
3002 FF_asn2gb_www_projID (ffstring, buf);
3003 } else {
3004 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
3005 }
3006 /*
3007 FFAddTextToString (ffstring, prefix, buf, NULL, FALSE, FALSE, TILDE_IGNORE);
3008 */
3009 if (gbseq != NULL) {
3010 if (head == NULL) {
3011 sprintf (buf, "%ld", (long) projectID);
3012 } else {
3013 sprintf (buf, ", %ld", (long) projectID);
3014 }
3015 ValNodeCopyStr (&head, 0, buf);
3016 }
3017 }
3018
3019 bbp->string = FFEndPrint (ajp, ffstring, awp->format, 12, 12, 5, 5, "XX");
3020 FFRecycleString (ajp, ffstring);
3021 ffstring = FFGetString (ajp);
3022
3023 if (awp->afp != NULL) {
3024 DoImmediateFormat (awp->afp, bbp);
3025 }
3026 }
3027
3028 if (dbuop != NULL) {
3029 str = GetDBLinkString (ajp, dbuop, &bioProjectID, &dblinkHead);
3030 if (StringDoesHaveText (str)) {
3031 bbp = Asn2gbAddBlock (awp, PROJECT_BLOCK, sizeof (BaseBlock));
3032 if (bbp == NULL) return;
3033
3034 bbp->entityID = awp->entityID;
3035 bbp->itemID = dbitemID;
3036 bbp->itemtype = OBJ_SEQDESC;
3037
3038 if (first) {
3039 FFStartPrint (ffstring, awp->format, 0, 12, "DBLINK", 12, 5, 5, "XX", TRUE);
3040 } else {
3041 FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "XX", TRUE);
3042 }
3043 first = FALSE;
3044
3045 FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_EXPAND);
3046
3047 if (gbseq != NULL && StringDoesHaveText (bioProjectID)) {
3048 ValNodeCopyStr (&head, 0, bioProjectID);
3049 }
3050
3051 bbp->string = FFEndPrint (ajp, ffstring, awp->format, 12, 12, 5, 5, "XX");
3052 MemFree (str);
3053
3054 if (awp->afp != NULL) {
3055 DoImmediateFormat (awp->afp, bbp);
3056 }
3057 }
3058 }
3059
3060 FFRecycleString (ajp, ffstring);
3061
3062 if (gbseq != NULL) {
3063 if (head != NULL) {
3064 gbseq->project = MergeFFValNodeStrs (head);
3065 ValNodeFreeData (head);
3066 }
3067 }
3068
3069 if (dblinkHead != NULL) {
3070 if (gbseq != NULL) {
3071 gbseq->xrefs = dblinkHead;
3072 } else {
3073 AsnGenericUserSeqOfFree (dblinkHead, (AsnOptFreeFunc) GBXrefFree);
3074 }
3075 }
3076 }
3077
3078 /* only displaying PID in GenPept format */
3079
3080 /*
3081 static void AddPidBlock (Asn2gbWorkPtr awp)
3082
3083 {
3084 IntAsn2gbJobPtr ajp;
3085 BaseBlockPtr bbp;
3086 BioseqPtr bsp;
3087 BIG_ID gi = -1;
3088 SeqIdPtr sip;
3089 Char version [64];
3090 StringItemPtr ffstring;
3091
3092 if (awp == NULL) return;
3093 ajp = awp->ajp;
3094 if (ajp == NULL) return;
3095 bsp = awp->bsp;
3096 if (bsp == NULL) return;
3097
3098 for (sip = bsp->id; sip != NULL; sip = sip->next) {
3099 switch (sip->choice) {
3100 case SEQID_GI :
3101 gi = (BIG_ID) sip->data.intvalue;
3102 break;
3103 default :
3104 break;
3105 }
3106 }
3107
3108 if (gi < 1) return;
3109
3110 bbp = Asn2gbAddBlock (awp, PID_BLOCK, sizeof (BaseBlock));
3111 if (bbp == NULL) return;
3112
3113 ffstring = FFGetString(ajp);
3114 if ( ffstring == NULL ) return;
3115
3116 FFStartPrint (ffstring, awp->format, 0, 12, "PID", 12, 5, 5, NULL, TRUE);
3117
3118 sprintf (version, "g%ld", (long) gi);
3119 FFAddOneString (ffstring, version, FALSE, FALSE, TILDE_TO_SPACES);
3120
3121 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, NULL);
3122 FFRecycleString(ajp, ffstring);
3123 }
3124 */
3125
3126 static Uint1 dbsource_fasta_order [NUM_SEQID] = {
3127 33, /* 0 = not set */
3128 20, /* 1 = local Object-id */
3129 15, /* 2 = gibbsq */
3130 16, /* 3 = gibbmt */
3131 30, /* 4 = giim Giimport-id */
3132 10, /* 5 = genbank */
3133 10, /* 6 = embl */
3134 10, /* 7 = pir */
3135 10, /* 8 = swissprot */
3136 15, /* 9 = patent */
3137 10, /* 10 = other = refseq */
3138 20, /* 11 = general Dbtag */
3139 31, /* 12 = gi */
3140 10, /* 13 = ddbj */
3141 10, /* 14 = prf */
3142 12, /* 15 = pdb */
3143 10, /* 16 = tpg */
3144 10, /* 17 = tpe */
3145 10, /* 18 = tpd */
3146 15, /* 19 = gpp */
3147 15 /* 20 = nat */
3148 };
3149
AddToUniqueSipList(ValNodePtr PNTR list,SeqIdPtr sip)3150 static void AddToUniqueSipList (
3151 ValNodePtr PNTR list,
3152 SeqIdPtr sip
3153 )
3154
3155 {
3156 ValNodePtr vnp;
3157
3158 if (list == NULL || sip == NULL) return;
3159 for (vnp = *list; vnp != NULL; vnp = vnp->next) {
3160 if (SeqIdMatch (sip, (SeqIdPtr) vnp->data.ptrvalue)) return;
3161 }
3162 ValNodeAddPointer (list, 0, (Pointer) sip);
3163 }
3164
WriteDbsourceID(SeqIdPtr sip,CharPtr str,BoolPtr is_na_p)3165 static Boolean WriteDbsourceID (
3166 SeqIdPtr sip,
3167 CharPtr str,
3168 BoolPtr is_na_p
3169 )
3170
3171 {
3172 Boolean check_na = FALSE;
3173 DbtagPtr db;
3174 CharPtr dt;
3175 BIG_ID gi;
3176 ObjectIdPtr oip;
3177 CharPtr pfx;
3178 PDBSeqIdPtr psip = NULL;
3179 CharPtr prefix;
3180 Boolean rsult = FALSE;
3181 CharPtr sfx;
3182 CharPtr suffix;
3183 Char tmp [32];
3184 TextSeqIdPtr tsip = NULL;
3185
3186 if (is_na_p != NULL) {
3187 *is_na_p = FALSE;
3188 }
3189 if (sip == NULL || str == NULL) return FALSE;
3190 *str = '\0';
3191 switch (sip->choice) {
3192 case SEQID_LOCAL :
3193 oip = (ObjectIdPtr) sip->data.ptrvalue;
3194 if (oip == NULL) return FALSE;
3195 if (! StringHasNoText (oip->str)) {
3196 StringCat (str, oip->str);
3197 return TRUE;
3198 } else if (oip->id > 0) {
3199 sprintf (tmp, "%ld", (long) oip->id);
3200 StringCat (str, tmp);
3201 return TRUE;
3202 }
3203 return FALSE;
3204 case SEQID_GI :
3205 gi = (BIG_ID) sip->data.intvalue;
3206 if (gi == 0) return FALSE;
3207 sprintf (tmp, "gi: %lld", (long long) gi);
3208 StringCat (str, tmp);
3209 return TRUE;
3210 case SEQID_GENERAL :
3211 db = (DbtagPtr) sip->data.ptrvalue;
3212 if (db == NULL) return FALSE;
3213 /* !!! still need to implement this !!! */
3214 return FALSE;
3215 case SEQID_GENBANK :
3216 case SEQID_EMBL :
3217 case SEQID_DDBJ :
3218 case SEQID_OTHER :
3219 case SEQID_TPG :
3220 case SEQID_TPE :
3221 case SEQID_TPD :
3222 case SEQID_GPIPE :
3223 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
3224 if (tsip == NULL) return FALSE;
3225 check_na = TRUE;
3226 break;
3227 case SEQID_PIR :
3228 case SEQID_SWISSPROT :
3229 case SEQID_PRF :
3230 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
3231 if (tsip == NULL) return FALSE;
3232 break;
3233 case SEQID_PDB :
3234 psip = (PDBSeqIdPtr) sip->data.ptrvalue;
3235 if (psip == NULL) return FALSE;
3236 break;
3237 default :
3238 break;
3239 }
3240 prefix = " ";
3241 suffix = NULL;
3242 switch (sip->choice) {
3243 case SEQID_EMBL :
3244 StringCat (str, "embl ");
3245 suffix = ",";
3246 break;
3247 case SEQID_OTHER :
3248 StringCat (str, "REFSEQ: ");
3249 break;
3250 case SEQID_SWISSPROT :
3251 StringCat (str, "UniProtKB: ");
3252 suffix = ",";
3253 break;
3254 case SEQID_PIR :
3255 StringCat (str, "UniProtKB: ");
3256 break;
3257 case SEQID_PRF :
3258 StringCat (str, "prf: ");
3259 break;
3260 case SEQID_PDB :
3261 StringCat (str, "pdb: ");
3262 suffix = ",";
3263 break;
3264 default :
3265 break;
3266 }
3267 pfx = NULL;
3268 sfx = NULL;
3269 if (tsip != NULL) {
3270 if (! StringHasNoText (tsip->name)) {
3271 StringCat (str, sfx);
3272 StringCat (str, pfx);
3273 StringCat (str, "locus ");
3274 StringCat (str, tsip->name);
3275 sfx = suffix;
3276 pfx = prefix;
3277 rsult = TRUE;
3278 }
3279 if (! StringHasNoText (tsip->accession)) {
3280 StringCat (str, sfx);
3281 StringCat (str, pfx);
3282 StringCat (str, "accession ");
3283 StringCat (str, tsip->accession);
3284 sfx = suffix;
3285 pfx = prefix;
3286 rsult = TRUE;
3287 if (check_na && is_na_p != NULL) {
3288 *is_na_p = IS_ntdb_accession (tsip->accession);
3289 }
3290 }
3291 if (tsip->version > 0 && sip->choice != SEQID_SWISSPROT) {
3292 sprintf (tmp, ".%d", (int) tsip->version);
3293 StringCat (str, tmp);
3294 sfx = suffix;
3295 pfx = prefix;
3296 }
3297 if (! StringHasNoText (tsip->release) && sip->choice != SEQID_SWISSPROT) {
3298 StringCat (str, pfx);
3299 StringCat (str, "release ");
3300 StringCat (str, tsip->release);
3301 sfx = suffix;
3302 pfx = prefix;
3303 }
3304 if (sip->choice == SEQID_SWISSPROT || sip->choice == SEQID_PIR || sip->choice == SEQID_PRF) {
3305 StringCat (str, ";");
3306 }
3307 return rsult;
3308 }
3309 if (psip != NULL) {
3310 if (! StringHasNoText (psip->mol)) {
3311 StringCat (str, "molecule ");
3312 StringCat (str, psip->mol);
3313 sfx = suffix;
3314 pfx = prefix;
3315 rsult = TRUE;
3316 }
3317 if (psip->chain > 0) {
3318 StringCat (str, sfx);
3319 StringCat (str, pfx);
3320 sprintf (tmp, "chain %d", (int) psip->chain);
3321 StringCat (str, tmp);
3322 sfx = suffix;
3323 pfx = prefix;
3324 rsult = TRUE;
3325 }
3326 if (psip->rel != NULL) {
3327 StringCat (str, sfx);
3328 StringCat (str, pfx);
3329 StringCat (str, "release ");
3330 dt = asn2gb_PrintDate (psip->rel);
3331 StringCat (str, dt);
3332 MemFree (dt);
3333 sfx = suffix;
3334 pfx = prefix;
3335 rsult = TRUE;
3336 }
3337 StringCat (str, ";");
3338 return rsult;
3339 }
3340 return rsult;
3341 }
3342
3343
AddSPBlock(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,BioseqPtr bsp)3344 static void AddSPBlock (
3345 IntAsn2gbJobPtr ajp,
3346 StringItemPtr ffstring,
3347 BioseqPtr bsp
3348 )
3349
3350 {
3351 CharPtr acc;
3352 Char buf [64];
3353 DbtagPtr db;
3354 SeqMgrDescContext dcontext;
3355 Boolean first;
3356 BIG_ID gi;
3357 Boolean has_link;
3358 Char id [42];
3359 ObjectIdPtr oip;
3360 CharPtr ptr;
3361 SeqDescrPtr sdp;
3362 SeqIdPtr sid;
3363 SeqIdPtr sif;
3364 SeqIdPtr sip;
3365 SPBlockPtr spb;
3366 CharPtr string;
3367 ValNodePtr vnp;
3368 CharPtr str;
3369 Char numbuf[40];
3370
3371 if (bsp == NULL) return;
3372 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_sp, &dcontext);
3373 if (sdp == NULL) return;
3374 spb = (SPBlockPtr) sdp->data.ptrvalue;
3375 if (spb == NULL) return;
3376
3377 if (spb->_class == 1) {
3378 FFAddOneString (ffstring, "class: standard.", FALSE, FALSE, TILDE_IGNORE);
3379 FFAddNewLine(ffstring);
3380 } else if (spb->_class == 2) {
3381 FFAddOneString (ffstring, "class: preliminary.", FALSE, FALSE, TILDE_IGNORE);
3382 FFAddNewLine(ffstring);
3383 }
3384
3385 if (spb->extra_acc) {
3386 FFAddOneString (ffstring, "extra accessions:", FALSE, FALSE, TILDE_IGNORE);
3387 for (vnp = spb->extra_acc; vnp != NULL; vnp = vnp->next) {
3388 FFAddOneString (ffstring, (CharPtr) vnp->data.ptrvalue, FALSE, FALSE, TILDE_IGNORE);
3389 if (vnp->next != NULL) {
3390 FFAddOneChar (ffstring, ',', FALSE );
3391 }
3392 }
3393 FFAddNewLine(ffstring);
3394 }
3395
3396 if (spb->imeth) {
3397 FFAddOneString (ffstring, "seq starts with Met", FALSE, FALSE, TILDE_IGNORE);
3398 }
3399
3400 if (spb->plasnm != NULL) {
3401 FFAddOneString (ffstring, "plasmid:", FALSE, FALSE, TILDE_IGNORE);
3402 for (vnp = spb->plasnm; vnp != NULL; vnp = vnp->next) {
3403 FFAddOneString (ffstring, (CharPtr) vnp->data.ptrvalue, FALSE, FALSE, TILDE_IGNORE);
3404 FFAddOneChar (ffstring, ',', FALSE );
3405 }
3406 }
3407
3408 if (spb->created) {
3409 string = PrintDate (spb->created);
3410 FFAddOneString (ffstring, "created: ", FALSE, FALSE, TILDE_IGNORE);
3411 FFAddOneString (ffstring, string, FALSE, FALSE, TILDE_IGNORE);
3412
3413 MemFree (string);
3414 }
3415
3416 if (spb->sequpd) {
3417 string = PrintDate (spb->sequpd);
3418 FFAddOneString (ffstring, "sequence updated: ", FALSE, FALSE, TILDE_IGNORE);
3419 FFAddOneString (ffstring, string, FALSE, FALSE, TILDE_IGNORE);
3420 MemFree (string);
3421 }
3422
3423 if (spb->annotupd) {
3424 string = PrintDate (spb->annotupd);
3425 FFAddOneString (ffstring, "annotation updated: ", FALSE, FALSE, TILDE_IGNORE);
3426 FFAddOneString (ffstring, string, FALSE, FALSE, TILDE_IGNORE);
3427 MemFree (string);
3428 }
3429
3430 if (spb->seqref) {
3431 FFAddOneString (ffstring, "xrefs: ", FALSE, FALSE, TILDE_IGNORE);
3432 first = TRUE;
3433 for (sid = spb->seqref; sid != NULL; sid = sid->next) {
3434 acc = NULL;
3435 has_link = FALSE;
3436 if (first == FALSE) {
3437 FFAddOneString (ffstring, ", ", FALSE, FALSE, TILDE_IGNORE);
3438 }
3439 first = FALSE;
3440 sip = sid;
3441 sif = NULL;
3442 id [0] = '\0';
3443 if (sip->choice == SEQID_GI) {
3444 gi = (BIG_ID) sid->data.intvalue;
3445 if (! GetAccnVerFromServer (gi, id)) {
3446 sif = GetSeqIdForGI (gi);
3447 if (sif != NULL) {
3448 sip = sif;
3449 }
3450 }
3451 }
3452 if (id [0] == '\0') {
3453 SeqIdWrite (sip, id, PRINTID_TEXTID_ACC_VER, sizeof (id) - 1);
3454 }
3455 if (sid->choice == SEQID_GI) {
3456 has_link = TRUE;
3457 }
3458 if (StringDoesHaveText (id)) {
3459 acc = id;
3460 }
3461 if (acc != NULL) {
3462 if ( GetWWW(ajp) && has_link ) {
3463 sprintf(numbuf, "%ld", (long) sid->data.intvalue);
3464 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3465 StringNCpy_0 (buf, acc, sizeof (buf));
3466 ptr = StringChr (buf, '.');
3467 if (ptr != NULL) {
3468 *ptr = '\0';
3469 }
3470 if (IS_ntdb_accession (buf)) {
3471 FF_Add_NCBI_Base_URL (ffstring, link_seqn);
3472 } else {
3473 FF_Add_NCBI_Base_URL (ffstring, link_seqp);
3474 }
3475 FFAddTextToString(ffstring, /* "val=" */ NULL, numbuf, "\">", FALSE, FALSE, TILDE_IGNORE);
3476 FFAddOneString(ffstring, acc, FALSE, FALSE, TILDE_IGNORE);
3477 FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
3478 } else {
3479 FFAddOneString(ffstring, acc, FALSE, FALSE, TILDE_IGNORE);
3480 }
3481 }
3482 if (sif != NULL) {
3483 SeqIdFree (sif);
3484 }
3485 }
3486 }
3487
3488 first = TRUE;
3489 for (vnp = spb->dbref; vnp != NULL; vnp = vnp->next) {
3490 db = (DbtagPtr) vnp->data.ptrvalue;
3491 if (db == NULL) continue;
3492 oip = db->tag;
3493 if (oip == NULL) continue;
3494 has_link = FALSE;
3495 if (first) {
3496 FFAddNewLine(ffstring);
3497 FFAddOneString (ffstring, "xrefs (non-sequence databases): ", FALSE, FALSE, TILDE_IGNORE);
3498 first = FALSE;
3499 } else {
3500 FFAddOneString (ffstring, ", ", FALSE, FALSE, TILDE_IGNORE);
3501 }
3502
3503 str = NULL;
3504 if ( oip->str != NULL ) {
3505 str = oip->str;
3506 if (StringCmp (db->db, "GO") == 0 && StringNCmp (str, "GO:", 3) == 0) {
3507 str += 3;
3508 } else if (StringNCmp (str, "MGI:", 4) == 0) {
3509 str += 4;
3510 } else if (StringCmp (db->db, "HGNC") == 0 && StringNCmp (str, "HGNC:", 5) == 0) {
3511 str += 5;
3512 } else if (StringCmp (db->db, "VGNC") == 0 && StringNCmp (str, "VGNC:", 5) == 0) {
3513 str += 5;
3514 } else if (StringCmp (db->db, "DIP") == 0 && StringNCmp (str, "DIP:", 4) == 0) {
3515 str += 4;
3516 }
3517 } else if ( oip->id > 0 ) {
3518 sprintf (numbuf, "%d", oip->id);
3519 str = numbuf;
3520 }
3521
3522 FF_www_db_xref (ajp, ffstring, db->db, str, bsp);
3523
3524 /*
3525 if (StringCmp (db->db, "MGD") == 0 || StringCmp (db->db, "MGI") == 0) {
3526 FFAddOneString (ffstring, "MGI", FALSE, FALSE, TILDE_IGNORE);
3527 } else {
3528 FFAddOneString (ffstring, db->db, FALSE, FALSE, TILDE_IGNORE);
3529 }
3530 if (StringCmp (db->db, "MIM") == 0) {
3531 has_link = TRUE;
3532 }
3533
3534 str = NULL;
3535 if ( oip->str != NULL ) {
3536 str = oip->str;
3537 if (StringNCmp (str, "GO:", 3) == 0) {
3538 str += 3;
3539 } else if (StringNCmp (str, "MGI:", 4) == 0) {
3540 str += 4;
3541 } else if (StringNCmp (str, "HGNC:", 5) == 0) {
3542 str += 5;
3543 } else if (StringNCmp (str, "VGNC:", 5) == 0) {
3544 str += 5;
3545 }
3546 } else if ( oip->id > 0 ) {
3547 sprintf(numbuf, "%d", oip->id);
3548 str = numbuf;
3549 }
3550
3551 if ( !StringHasNoText(str) ) {
3552 FFAddOneString (ffstring, ":", FALSE, FALSE, TILDE_IGNORE);
3553 if ( GetWWW(ajp) && has_link) {
3554 FFAddOneChar (ffstring, ' ', FALSE);
3555 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3556 FF_Add_NCBI_Base_URL (ffstring, link_omim);
3557 FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
3558 FFAddTextToString(ffstring, "\">", str, "</a>", FALSE, FALSE, TILDE_IGNORE);
3559 } else {
3560 FFAddOneString(ffstring, str, FALSE, FALSE, TILDE_IGNORE);
3561 }
3562 }
3563 */
3564 }
3565 }
3566
AddPIRBlock(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,BioseqPtr bsp)3567 static void AddPIRBlock (
3568 IntAsn2gbJobPtr ajp,
3569 StringItemPtr ffstring,
3570 BioseqPtr bsp
3571 )
3572
3573 {
3574 CharPtr acc;
3575 SeqMgrDescContext dcontext;
3576 Boolean first;
3577 Char id [41];
3578 CharPtr prefix = NULL;
3579 SeqDescrPtr sdp;
3580 SeqIdPtr sid;
3581 PirBlockPtr pbp;
3582
3583 if (bsp == NULL) return;
3584 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pir, &dcontext);
3585 if (sdp == NULL) return;
3586 pbp = (PirBlockPtr) sdp->data.ptrvalue;
3587 if (pbp == NULL) return;
3588
3589 if (pbp->host != NULL) {
3590 FFAddTextToString (ffstring, "host:", pbp->host, "\n", FALSE, TRUE, TILDE_IGNORE);
3591 prefix = ";";
3592 }
3593
3594 if (pbp->source != NULL) {
3595 FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3596 FFAddNewLine(ffstring);
3597 FFAddTextToString(ffstring, "source: ", pbp->source, "\n", FALSE, TRUE, TILDE_IGNORE);
3598 prefix = ";";
3599 }
3600
3601 if (pbp->summary != NULL) {
3602 FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3603 FFAddNewLine(ffstring);
3604 FFAddTextToString(ffstring, "summary: ", pbp->summary, "\n", FALSE, TRUE, TILDE_IGNORE);
3605 prefix = ";";
3606 }
3607
3608 if (pbp->genetic != NULL) {
3609 FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3610 FFAddNewLine(ffstring);
3611 FFAddTextToString(ffstring, "genetic: ", pbp->genetic, "\n", FALSE, TRUE, TILDE_IGNORE);
3612 prefix = ";";
3613 }
3614
3615 if (pbp->includes != NULL) {
3616 FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3617 FFAddNewLine(ffstring);
3618 FFAddTextToString(ffstring, "includes: ", pbp->includes, "\n", FALSE, TRUE, TILDE_IGNORE);
3619 prefix = ";";
3620 }
3621
3622 if (pbp->placement != NULL) {
3623 FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3624 FFAddNewLine(ffstring);
3625 FFAddTextToString(ffstring, "placement: ", pbp->placement, "\n", FALSE, TRUE, TILDE_IGNORE);
3626 prefix = ";";
3627 }
3628
3629 if (pbp->superfamily != NULL) {
3630 FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3631 FFAddNewLine(ffstring);
3632 FFAddTextToString(ffstring, "superfamily: ", pbp->superfamily, "\n", FALSE, TRUE, TILDE_IGNORE);
3633 prefix = ";";
3634 }
3635
3636 if (pbp->cross_reference != NULL) {
3637 FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3638 FFAddNewLine(ffstring);
3639 FFAddTextToString(ffstring, "xref: ", pbp->cross_reference, "\n", FALSE, TRUE, TILDE_IGNORE);
3640 prefix = ";";
3641 }
3642
3643 if (pbp->date != NULL) {
3644 FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3645 FFAddNewLine(ffstring);
3646 FFAddTextToString (ffstring, "PIR dates: ", pbp->date, "\n", FALSE, TRUE, TILDE_IGNORE);
3647 prefix = ";";
3648 }
3649
3650 if (pbp->had_punct) {
3651 FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3652 FFAddNewLine(ffstring);
3653 FFAddOneString (ffstring, "punctuation in sequence", FALSE, FALSE, TILDE_IGNORE);
3654 prefix = ";";
3655 }
3656
3657 if (pbp->seqref) {
3658 FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3659 FFAddNewLine(ffstring);
3660 FFAddOneString (ffstring, "xrefs: ", FALSE, FALSE, TILDE_IGNORE);
3661 first = TRUE;
3662 for (sid = pbp->seqref; sid != NULL; sid = sid->next) {
3663 acc = NULL;
3664 if (first == FALSE) {
3665 FFAddOneString (ffstring, ", ", FALSE, FALSE, TILDE_IGNORE);
3666 }
3667 first = FALSE;
3668 SeqIdWrite (sid, id, PRINTID_TEXTID_ACC_VER, sizeof (id) - 1);
3669 acc = id;
3670 if (acc != NULL) {
3671 switch (sid->choice) {
3672 case SEQID_GENBANK:
3673 FFAddOneString (ffstring, "genbank ", FALSE, FALSE, TILDE_IGNORE);
3674 break;
3675 case SEQID_EMBL:
3676 FFAddOneString (ffstring, "embl ", FALSE, FALSE, TILDE_IGNORE);
3677 break;
3678 case SEQID_PIR:
3679 FFAddOneString (ffstring, "UniProtKB ", FALSE, FALSE, TILDE_IGNORE);
3680 break;
3681 case SEQID_SWISSPROT:
3682 FFAddOneString (ffstring, "UniProtKB ", FALSE, FALSE, TILDE_IGNORE);
3683 break;
3684 case SEQID_DDBJ:
3685 FFAddOneString (ffstring, "ddbj ", FALSE, FALSE, TILDE_IGNORE);
3686 break;
3687 case SEQID_PRF:
3688 FFAddOneString (ffstring, "prf ", FALSE, FALSE, TILDE_IGNORE);
3689 break;
3690 case SEQID_GI:
3691 FFAddOneString (ffstring, "gi: ", FALSE, FALSE, TILDE_IGNORE);
3692 break;
3693 default:
3694 acc = NULL;
3695 break;
3696 }
3697 }
3698 if (acc != NULL) {
3699 FFAddOneString (ffstring, acc, FALSE, FALSE, TILDE_IGNORE);
3700 }
3701 }
3702 }
3703 FFAddOneString (ffstring, ".", FALSE, FALSE, TILDE_IGNORE);
3704 }
3705
AddPRFBlock(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,BioseqPtr bsp)3706 static void AddPRFBlock (
3707 IntAsn2gbJobPtr ajp,
3708 StringItemPtr ffstring,
3709 BioseqPtr bsp
3710 )
3711
3712 {
3713 SeqMgrDescContext dcontext;
3714 PrfExtSrcPtr extra;
3715 CharPtr prefix = NULL;
3716 SeqDescrPtr sdp;
3717 PrfBlockPtr prf;
3718
3719 if (bsp == NULL) return;
3720 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_prf, &dcontext);
3721 if (sdp == NULL) return;
3722 prf = (PrfBlockPtr) sdp->data.ptrvalue;
3723 if (prf == NULL) return;
3724 if ( ffstring == NULL ) return;
3725
3726 extra = prf->extra_src;
3727 if (extra != NULL) {
3728
3729 if (extra->host != NULL) {
3730 FFAddTextToString(ffstring, "host:", extra->host, NULL, FALSE, TRUE, TILDE_IGNORE);
3731 prefix = ";\n";
3732 }
3733
3734 if (extra->part != NULL) {
3735 FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3736 FFAddTextToString(ffstring, "part: ", extra->part, NULL, FALSE, TRUE, TILDE_IGNORE);
3737 prefix = ";\n";
3738 }
3739 if (extra->state != NULL) {
3740 FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3741 FFAddTextToString(ffstring, "state: ", extra->state, NULL, FALSE, TRUE, TILDE_IGNORE);
3742 prefix = ";\n";
3743 }
3744 if (extra->strain != NULL) {
3745 FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3746 FFAddTextToString(ffstring, "strain: ", extra->strain, NULL, FALSE, TRUE, TILDE_IGNORE);
3747 prefix = ";\n";
3748 }
3749 if (extra->taxon != NULL) {
3750 FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3751 FFAddTextToString(ffstring, "taxonomy: ", extra->taxon, NULL, FALSE, TRUE, TILDE_IGNORE);
3752 prefix = ";\n";
3753 }
3754
3755 FFAddOneChar(ffstring, '.', FALSE);
3756 }
3757 }
3758
AddPDBBlock(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,BioseqPtr bsp)3759 static void AddPDBBlock (
3760 IntAsn2gbJobPtr ajp,
3761 StringItemPtr ffstring,
3762 BioseqPtr bsp
3763 )
3764
3765 {
3766 Char ch;
3767 SeqMgrDescContext dcontext;
3768 CharPtr dt;
3769 CharPtr prefix = NULL;
3770 SeqDescrPtr sdp;
3771 PdbBlockPtr pdb;
3772 CharPtr ptr;
3773 PdbRepPtr replace;
3774 CharPtr str;
3775 ValNodePtr vnp;
3776
3777 if (bsp == NULL) return;
3778 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pdb, &dcontext);
3779 if (sdp == NULL) return;
3780 pdb = (PdbBlockPtr) sdp->data.ptrvalue;
3781 if (pdb == NULL) return;
3782
3783 if (pdb->deposition != NULL) {
3784 dt = asn2gb_PrintDate (pdb->deposition);
3785 FFAddTextToString (ffstring, "deposition: ", dt, NULL, FALSE, TRUE, TILDE_IGNORE);
3786 MemFree (dt);
3787 prefix = ";";
3788 }
3789 if (pdb->pdbclass != NULL) {
3790 FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3791 FFAddNewLine(ffstring);
3792 FFAddTextToString(ffstring, "class: ", pdb->pdbclass, NULL, FALSE, TRUE, TILDE_IGNORE);
3793 prefix = ";";
3794 }
3795 if (pdb->source != NULL) {
3796 FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3797 FFAddNewLine(ffstring);
3798 FFAddOneString(ffstring, "source: ", FALSE, TRUE, TILDE_IGNORE);
3799 prefix = NULL;
3800 for (vnp = pdb->source; vnp != NULL; vnp = vnp->next) {
3801 str = (CharPtr) vnp->data.ptrvalue;
3802 if (StringHasNoText (str)) continue;
3803 if (GetWWW (ajp)) {
3804 if (StringNICmp (str, "Mmdb_id:", 8) == 0) {
3805 ptr = str + 8;
3806 ch = *ptr;
3807 while (ch == ' ') {
3808 ptr++;
3809 ch = *ptr;
3810 }
3811 if (StringIsAllDigits (ptr)) {
3812 FFAddTextToString (ffstring, prefix, "Mmdb_id:", NULL, FALSE, TRUE, TILDE_IGNORE);
3813 FFAddTextToString (ffstring, NULL, " ", NULL, FALSE, TRUE, TILDE_IGNORE);
3814 FFAddOneString(ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3815 FFAddOneString(ffstring, link_mmdb, FALSE, FALSE, TILDE_IGNORE);
3816 FFAddOneString(ffstring, ptr, FALSE, FALSE, TILDE_IGNORE);
3817 FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
3818 FFAddOneString(ffstring, ptr, FALSE, FALSE, TILDE_IGNORE);
3819 FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
3820 } else {
3821 FFAddTextToString (ffstring, prefix, str, NULL, FALSE, TRUE, TILDE_IGNORE);
3822 }
3823 } else {
3824 FFAddTextToString (ffstring, prefix, str, NULL, FALSE, TRUE, TILDE_IGNORE);
3825 }
3826 } else {
3827 FFAddTextToString (ffstring, prefix, str, NULL, FALSE, TRUE, TILDE_IGNORE);
3828 }
3829 prefix = ", ";
3830 }
3831 prefix = ";";
3832 }
3833 if (pdb->exp_method != NULL) {
3834 FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3835 FFAddNewLine(ffstring);
3836 FFAddTextToString(ffstring, "Exp. method: ", pdb->exp_method, NULL, FALSE, TRUE, TILDE_IGNORE);
3837 prefix = ";";
3838 }
3839 replace = pdb->replace;
3840 if (replace != NULL) {
3841 if (replace->ids != NULL) {
3842 FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3843 FFAddNewLine(ffstring);
3844 FFAddOneString(ffstring, "ids replaced: ", FALSE, TRUE, TILDE_IGNORE);
3845
3846 prefix = NULL;
3847 for (vnp = replace->ids; vnp != NULL; vnp = vnp->next) {
3848 str = (CharPtr) vnp->data.ptrvalue;
3849 if (StringHasNoText (str)) continue;
3850 FFAddTextToString (ffstring, prefix, str, NULL, FALSE, TRUE, TILDE_IGNORE);
3851 prefix = ", ";
3852 }
3853 prefix = ";";
3854 }
3855 if (replace->date != NULL) {
3856 FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3857 FFAddNewLine(ffstring);
3858
3859 dt = asn2gb_PrintDate (replace->date);
3860 FFAddTextToString(ffstring, "replacement date: ", dt, NULL, FALSE, TRUE, TILDE_IGNORE);
3861 MemFree (dt);
3862 prefix = ";";
3863 }
3864 }
3865
3866 FFAddOneChar(ffstring, '.', FALSE);
3867 }
3868
TxtSave(CharPtr text,size_t len)3869 static CharPtr TxtSave (CharPtr text, size_t len)
3870
3871 {
3872 CharPtr str = NULL;
3873
3874 if ((text == NULL) || (len == 0))
3875 return str;
3876
3877 str = (CharPtr) MemNew((size_t)(len + 1));
3878 MemCopy(str, text, (size_t)len);
3879
3880 return (str);
3881 }
3882
FF_www_dbsource(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,CharPtr str,Boolean first,SeqIdPtr sip,Boolean is_na)3883 static Boolean FF_www_dbsource (
3884 IntAsn2gbJobPtr ajp,
3885 StringItemPtr ffstring,
3886 CharPtr str,
3887 Boolean first,
3888 SeqIdPtr sip,
3889 Boolean is_na
3890 )
3891
3892 {
3893 CharPtr temp, end, text, loc, link = NULL;
3894 Uint1 choice;
3895 Int2 j;
3896 BIG_ID gi = 0;
3897 Char gibuf [32];
3898
3899 if (sip == NULL) return FALSE;
3900 choice = sip->choice;
3901
3902 if( GetWWW(ajp) ) {
3903 if (choice == SEQID_PIR) {
3904 link = link_seqp;
3905 } else if (choice == SEQID_SWISSPROT) {
3906 link = link_sp;
3907 } else if (choice == SEQID_PDB || choice == SEQID_PRF) {
3908 link = link_seqp;
3909 } else if (choice == SEQID_EMBL || choice == SEQID_GENBANK ||
3910 choice == SEQID_DDBJ || choice == SEQID_GIBBSQ ||
3911 choice == SEQID_GIBBMT || choice == SEQID_GI ||
3912 choice == SEQID_GIIM || choice == SEQID_OTHER ||
3913 choice == SEQID_TPG || choice == SEQID_TPE || choice == SEQID_TPD ||
3914 choice == SEQID_GPIPE) {
3915 if (is_na) {
3916 link = link_seqn;
3917 } else {
3918 link = link_seqp;
3919 }
3920 } else {
3921 AddStringWithTildes(ffstring, str);
3922 return TRUE;
3923 }
3924
3925 if ((text = StringStr(str, "accession")) != NULL) {
3926 end = text + 9;
3927 j = 9;
3928 while (*end == ' ') {
3929 ++end;
3930 j++;
3931 }
3932 if (first == FALSE) {
3933 FFAddOneString(ffstring, ", ", FALSE, FALSE, TILDE_IGNORE);
3934 }
3935 loc = TxtSave (str, end-str - j);
3936 FFAddOneString(ffstring, loc, FALSE, FALSE, TILDE_IGNORE);
3937 MemFree (loc);
3938 for (; text != end; ++text ) {
3939 FFAddOneChar(ffstring, *text, FALSE);
3940 }
3941
3942 temp = text;
3943 end += StringLen(text) - 1;
3944 if ( *end != ';' ) {
3945 ++end;
3946 }
3947
3948 if (choice == SEQID_SWISSPROT) {
3949 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3950 FF_Add_NCBI_Base_URL (ffstring, link);
3951 for (text = temp; text != end; ++text ) {
3952 FFAddOneChar (ffstring, *text, FALSE);
3953 }
3954 } else {
3955 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3956 FF_Add_NCBI_Base_URL (ffstring, link);
3957 gi = GetGIForSeqId (sip);
3958 if (gi > 0) {
3959 sprintf (gibuf, "%ld", (long) gi);
3960 FFAddOneString (ffstring, gibuf, FALSE, FALSE, TILDE_IGNORE);
3961 } else {
3962 for (text = temp; text != end; ++text ) {
3963 FFAddOneChar(ffstring, *text, FALSE);
3964 }
3965 }
3966 }
3967 FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
3968
3969 for (text = temp; text != end; ++text ) {
3970 FFAddOneChar(ffstring, *text, FALSE);
3971 }
3972 FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
3973 if ( *end == ';' ) {
3974 FFAddOneChar(ffstring, ';', FALSE);
3975 }
3976 } else {
3977 if (first == FALSE) {
3978 FFAddOneString(ffstring, ", ", FALSE, FALSE, TILDE_IGNORE);
3979 }
3980 FFAddOneString(ffstring, str, FALSE, FALSE, TILDE_IGNORE);
3981 }
3982 } else {
3983 AddStringWithTildes(ffstring, str);
3984 }
3985 return TRUE;
3986 }
3987
AddDbsourceBlock(Asn2gbWorkPtr awp)3988 NLM_EXTERN void AddDbsourceBlock (
3989 Asn2gbWorkPtr awp
3990 )
3991
3992 {
3993 IntAsn2gbJobPtr ajp;
3994 Asn2gbSectPtr asp;
3995 BaseBlockPtr bbp;
3996 BioseqPtr bsp;
3997 Char buf [256];
3998 SeqFeatPtr cds;
3999 DbtagPtr db;
4000 GBSeqPtr gbseq;
4001 SeqIdPtr id;
4002 Boolean is_na;
4003 ValNodePtr list = NULL;
4004 BioseqPtr nuc;
4005 SeqEntryPtr sep;
4006 SeqIdPtr sip;
4007 SeqLocPtr slp;
4008 CharPtr str;
4009 TextSeqIdPtr tsip;
4010 Boolean unknown = TRUE;
4011 ValNodePtr vnp;
4012 StringItemPtr ffstring;
4013
4014 if (awp == NULL) return;
4015 ajp = awp->ajp;
4016 if (ajp == NULL) return;
4017 asp = awp->asp;
4018 if (asp == NULL) return;
4019 bsp = awp->bsp;
4020 if (bsp == NULL) return;
4021
4022 for (sip = bsp->id; sip != NULL; sip = sip->next) {
4023 if (sip->choice != SEQID_OTHER) continue;
4024 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
4025 if (tsip == NULL) continue;
4026 if (StringNCmp (tsip->accession, "WP_", 3) == 0) return;
4027 }
4028
4029 bbp = Asn2gbAddBlock (awp, DBSOURCE_BLOCK, sizeof (BaseBlock));
4030 if (bbp == NULL) return;
4031
4032 bbp->entityID = awp->entityID;
4033
4034 ffstring = FFGetString(ajp);
4035 if ( ffstring == NULL ) return;
4036
4037 FFStartPrint (ffstring, awp->format, 0, 12, "DBSOURCE", 12, 5, 5, NULL, TRUE);
4038
4039 sip = SeqIdSelect (bsp->id, dbsource_fasta_order, NUM_SEQID);
4040
4041 if (sip != NULL) {
4042
4043 switch (sip->choice) {
4044 case SEQID_PIR :
4045 case SEQID_SWISSPROT :
4046 case SEQID_PRF :
4047 case SEQID_PDB :
4048 if (WriteDbsourceID (sip, buf, &is_na)) {
4049 FF_www_dbsource (ajp, ffstring, buf, TRUE, sip, is_na);
4050 FFAddNewLine(ffstring);
4051 unknown = FALSE;
4052 }
4053 break;
4054 case SEQID_GENERAL :
4055 db = (DbtagPtr) sip->data.ptrvalue;
4056 if (db == NULL) {
4057 break;
4058 }
4059 if (StringNCmp (db->db, "PIDe", 4) != 0 &&
4060 StringNCmp (db->db, "PIDd", 4) != 0 &&
4061 StringNCmp (db->db, "PID", 3) != 0) {
4062 break;
4063 }
4064 /* if (ChoicePID) found, continue on to next set of cases */
4065 case SEQID_EMBL :
4066 case SEQID_GENBANK :
4067 case SEQID_DDBJ :
4068 case SEQID_GIBBSQ :
4069 case SEQID_GIBBMT :
4070 case SEQID_OTHER :
4071 case SEQID_TPG :
4072 case SEQID_TPE :
4073 case SEQID_TPD :
4074 case SEQID_GPIPE :
4075 case SEQID_GI :
4076 case SEQID_GIIM :
4077 cds = SeqMgrGetCDSgivenProduct (bsp, NULL);
4078 if (cds == NULL) {
4079 /* now may also be protein product of mature peptide feature */
4080 cds = SeqMgrGetPROTgivenProduct (bsp, NULL);
4081 }
4082 if (cds != NULL) {
4083 nuc = BioseqFindFromSeqLoc (cds->location);
4084 if (nuc != NULL) {
4085 slp = SeqLocFindNext (cds->location, NULL);
4086 while (slp != NULL) {
4087 sip = SeqLocId (slp);
4088 AddToUniqueSipList (&list, sip);
4089 slp = SeqLocFindNext (cds->location, slp);
4090 }
4091 for (vnp = list; vnp != NULL; vnp = vnp->next) {
4092 id = (SeqIdPtr) vnp->data.ptrvalue;
4093 nuc = BioseqFindCore (id);
4094 sip = NULL;
4095 if (nuc != NULL) {
4096 sip = SeqIdSelect (nuc->id, dbsource_fasta_order, NUM_SEQID);
4097 } else if (id != NULL && id->choice == SEQID_GI) {
4098 sip = GetSeqIdForGI (id->data.intvalue);
4099 }
4100 if (sip == NULL) {
4101 sip = id;
4102 }
4103 if (sip != NULL) {
4104 if (WriteDbsourceID (sip, buf, &is_na)) {
4105 FF_www_dbsource (ajp, ffstring, buf, TRUE, sip, is_na);
4106 FFAddNewLine(ffstring);
4107 unknown = FALSE;
4108 }
4109 }
4110 }
4111 ValNodeFree (list);
4112 } else {
4113 sep = GetTopSeqEntryForEntityID (awp->entityID);
4114 if (sep != NULL && IS_Bioseq (sep)) {
4115 /* special case for coded_by CDS packed on retcode 1 protein */
4116 id = SeqLocId (cds->location);
4117 if (id != NULL && id->choice == SEQID_GI) {
4118 sip = GetSeqIdForGI (id->data.intvalue);
4119 if (sip == NULL) {
4120 sip = id;
4121 }
4122 }
4123 if (WriteDbsourceID (sip, buf, &is_na)) {
4124 FF_www_dbsource (ajp, ffstring, buf, TRUE, sip, is_na);
4125 FFAddNewLine(ffstring);
4126 unknown = FALSE;
4127 }
4128 }
4129 }
4130 } else {
4131 if (WriteDbsourceID (sip, buf, &is_na)) {
4132 FF_www_dbsource (ajp, ffstring, buf, TRUE, sip, is_na);
4133 FFAddNewLine(ffstring);
4134 unknown = FALSE;
4135 }
4136 }
4137 break;
4138 default :
4139 break;
4140 }
4141
4142 if (sip != NULL) {
4143 switch (sip->choice) {
4144 case SEQID_PIR :
4145 AddPIRBlock (ajp, ffstring, bsp);
4146 break;
4147 case SEQID_SWISSPROT :
4148 AddSPBlock (ajp, ffstring, bsp);
4149 break;
4150 case SEQID_PRF :
4151 AddPRFBlock (ajp, ffstring, bsp);
4152 break;
4153 case SEQID_PDB :
4154 AddPDBBlock (ajp, ffstring, bsp);
4155 break;
4156 default :
4157 break;
4158 }
4159 }
4160 }
4161
4162 if (unknown) {
4163 FFAddOneString (ffstring, "UNKNOWN", FALSE, FALSE, TILDE_TO_SPACES);
4164 }
4165
4166 str = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, NULL);
4167
4168 /* optionally populate gbseq for XML-ized GenBank format */
4169
4170 if (ajp->gbseq) {
4171 gbseq = &asp->gbseq;
4172 } else {
4173 gbseq = NULL;
4174 }
4175
4176 if (gbseq != NULL) {
4177 if (StringNCmp (str, "DBSOURCE ", 12) == 0) {
4178 gbseq->source_db = StringSave (str + 12);
4179 } else {
4180 gbseq->source_db = StringSave (str);
4181 }
4182 CleanQualValue (gbseq->source_db);
4183 Asn2gnbkCompressSpaces (gbseq->source_db);
4184 }
4185
4186 bbp->string = str;
4187 FFRecycleString(ajp, ffstring);
4188
4189 if (awp->afp != NULL) {
4190 DoImmediateFormat (awp->afp, bbp);
4191 }
4192 }
4193
AddDateBlock(Asn2gbWorkPtr awp)4194 NLM_EXTERN void AddDateBlock (
4195 Asn2gbWorkPtr awp
4196 )
4197
4198 {
4199 IntAsn2gbJobPtr ajp;
4200 BaseBlockPtr bbp;
4201 BioseqPtr bsp;
4202 Char date [40];
4203 SeqMgrDescContext dcontext;
4204 DatePtr dp;
4205 SeqDescrPtr sdp;
4206 StringItemPtr ffstring;
4207
4208 if (awp == NULL) return;
4209 ajp = awp->ajp;
4210 if (ajp == NULL) return;
4211 bsp = awp->bsp;
4212 if (bsp == NULL) return;
4213
4214 ffstring = FFGetString(ajp);
4215 if ( ffstring == NULL ) return;
4216
4217 bbp = Asn2gbAddBlock (awp, DATE_BLOCK, sizeof (BaseBlock));
4218 if (bbp == NULL) return;
4219
4220 date [0] = '\0';
4221
4222 dp = NULL;
4223 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_create_date, &dcontext);
4224 if (sdp != NULL) {
4225 dp = (DatePtr) sdp->data.ptrvalue;
4226 }
4227 if (dp != NULL) {
4228 DateToFF (date, dp, FALSE);
4229 }
4230 if (StringHasNoText (date)) {
4231 StringCpy (date, "01-JAN-1900");
4232 }
4233
4234 FFStartPrint (ffstring, awp->format, 0, 0, NULL, 0, 5, 5, "DT", TRUE);
4235 FFAddOneString (ffstring, date, FALSE, FALSE, TILDE_IGNORE);
4236
4237 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 0, 0, 5, 5, "DT");
4238 FFRecycleString(ajp, ffstring);
4239
4240 bbp = Asn2gbAddBlock (awp, DATE_BLOCK, sizeof (BaseBlock));
4241 if (bbp == NULL) return;
4242
4243 ffstring = FFGetString(ajp);
4244
4245 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_update_date, &dcontext);
4246 if (sdp != NULL) {
4247 dp = (DatePtr) sdp->data.ptrvalue;
4248 }
4249 if (dp != NULL) {
4250 DateToFF (date, dp, FALSE);
4251 }
4252
4253 FFStartPrint (ffstring, awp->format, 0, 0, NULL, 0, 5, 5, "DT", FALSE);
4254 FFAddOneString (ffstring, date, FALSE, FALSE, TILDE_IGNORE);
4255
4256 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 0, 0, 5, 5, "DT");
4257 FFRecycleString(ajp, ffstring);
4258
4259 if (awp->afp != NULL) {
4260 DoImmediateFormat (awp->afp, bbp);
4261 }
4262 }
4263
4264
4265 #define TOTAL_ESTKW 11
4266 #define TOTAL_GSSKW 2
4267 #define TOTAL_STSKW 5
4268
4269 static CharPtr EST_kw_array[ TOTAL_ESTKW] = {
4270 "EST", "EST PROTO((expressed sequence tag)", "expressed sequence tag",
4271 "EST (expressed sequence tag)", "EST(expressed sequence tag)",
4272 "partial cDNA sequence", "transcribed sequence fragment", "TSR",
4273 "putatively transcribed partial sequence", "UK putts"
4274 };
4275
4276 static CharPtr GSS_kw_array [TOTAL_GSSKW] = {
4277 "GSS", "trapped exon"
4278 };
4279
4280 static CharPtr STS_kw_array[TOTAL_STSKW] = {
4281 "STS", "STS(sequence tagged site)", "STS (sequence tagged site)",
4282 "STS sequence", "sequence tagged site"
4283 };
4284
MatchArrayString(CharPtr array_string[],Int2 totalstr,CharPtr text)4285 static Int2 MatchArrayString (
4286 CharPtr array_string [],
4287 Int2 totalstr,
4288 CharPtr text
4289 )
4290
4291 {
4292 Int2 i;
4293
4294 for (i = 0; i < totalstr && text != NULL; i++) {
4295 if (StringCmp (array_string [i], text) == 0) {
4296 return (i);
4297 }
4298 }
4299
4300 return (-1);
4301 }
4302
CheckSpecialKeyword(Boolean is_est,Boolean is_sts,Boolean is_gss,CharPtr kwd)4303 static Boolean CheckSpecialKeyword (
4304 Boolean is_est,
4305 Boolean is_sts,
4306 Boolean is_gss,
4307 CharPtr kwd
4308 )
4309
4310 {
4311 if (kwd == NULL) return FALSE;
4312
4313 if (is_est) {
4314 if (MatchArrayString (STS_kw_array, TOTAL_STSKW, kwd) != -1) return FALSE;
4315 if (MatchArrayString (GSS_kw_array, TOTAL_GSSKW, kwd) != -1) return FALSE;
4316 }
4317
4318 if (is_sts) {
4319 if (MatchArrayString (EST_kw_array, TOTAL_ESTKW, kwd) != -1) return FALSE;
4320 if (MatchArrayString (GSS_kw_array, TOTAL_GSSKW, kwd) != -1) return FALSE;
4321 }
4322
4323 if (is_gss) {
4324 if (MatchArrayString (STS_kw_array, TOTAL_STSKW, kwd) != -1) return FALSE;
4325 if (MatchArrayString (EST_kw_array, TOTAL_ESTKW, kwd) != -1) return FALSE;
4326 }
4327
4328 return TRUE;
4329 }
4330
KeywordAlreadyInList(ValNodePtr head,CharPtr kwd)4331 static Boolean KeywordAlreadyInList (
4332 ValNodePtr head,
4333 CharPtr kwd
4334 )
4335
4336 {
4337 ValNodePtr vnp;
4338
4339 for (vnp = head; vnp != NULL; vnp = vnp->next) {
4340 if (StringICmp ((CharPtr) vnp->data.ptrvalue, kwd) == 0) return TRUE;
4341 }
4342
4343 return FALSE;
4344 }
4345
4346 typedef struct finstatdata {
4347 CharPtr inuserobj;
4348 CharPtr inkeyword;
4349 } FinStatData, PNTR FinStatPtr;
4350
4351 static FinStatData finStatKywds [] = {
4352 {"Standard-draft", "STANDARD_DRAFT"},
4353 {"High-quality-draft", "HIGH_QUALITY_DRAFT"},
4354 {"Improved-high-quality-draft", "IMPROVED_HIGH_QUALITY_DRAFT"},
4355 {"Annotation-directed-improvement", "ANNOTATION_DIRECTED_IMPROVEMENT"},
4356 {"Noncontiguous-finished", "NONCONTIGUOUS_FINISHED"},
4357 /*
4358 {"Finished", "FINISHED"},
4359 */
4360 {NULL, NULL}
4361 };
4362
GetFinishingStatus(CharPtr str)4363 static CharPtr GetFinishingStatus (
4364 CharPtr str
4365 )
4366
4367 {
4368 Char buf [64];
4369 Char ch;
4370 Int2 i;
4371 CharPtr ptr;
4372
4373 if (StringHasNoText (str)) return NULL;
4374
4375 StringNCpy_0 (buf, str, sizeof (buf));
4376 ptr = buf;
4377 ch = *ptr;
4378 while (ch != '\0') {
4379 if (ch == ' ') {
4380 *ptr = '-';
4381 }
4382 ptr++;
4383 ch = *ptr;
4384 }
4385
4386 for (i = 0; finStatKywds [i].inuserobj != NULL; i++) {
4387 if (StringICmp (buf, finStatKywds [i].inuserobj) == 0) {
4388 return finStatKywds [i].inkeyword;
4389 }
4390 }
4391
4392 return NULL;
4393 }
4394
AddKeywordsBlock(Asn2gbWorkPtr awp)4395 NLM_EXTERN void AddKeywordsBlock (
4396 Asn2gbWorkPtr awp
4397 )
4398
4399 {
4400 Boolean add_encode = FALSE;
4401 IntAsn2gbJobPtr ajp;
4402 Asn2gbSectPtr asp;
4403 BaseBlockPtr bbp;
4404 BioSourcePtr biop;
4405 BioseqPtr bsp;
4406 BioseqSetPtr bssp;
4407 UserFieldPtr curr;
4408 SeqMgrDescContext dcontext;
4409 EMBLBlockPtr ebp;
4410 CharPtr field;
4411 CharPtr finishing_status = NULL;
4412 GBBlockPtr gbp;
4413 GBSeqPtr gbseq;
4414 ValNodePtr head = NULL;
4415 IndxPtr index;
4416 Boolean is_cross_kingdom = FALSE;
4417 Boolean is_est = FALSE;
4418 Boolean is_gss = FALSE;
4419 Boolean is_sts = FALSE;
4420 Boolean is_env_sample = FALSE;
4421 Boolean is_genome_assembly = FALSE;
4422 Boolean is_tsa = FALSE;
4423 Boolean is_unverified = FALSE;
4424 Boolean is_unv_organism = FALSE;
4425 Boolean is_unv_misassembled = FALSE;
4426 Boolean is_wp = FALSE;
4427 Boolean this_is_gen_asm;
4428 ValNodePtr keywords;
4429 CharPtr kwd;
4430 ValNodePtr ky_head;
4431 MolInfoPtr mip;
4432 BioseqPtr nbsp;
4433 Int2 num_super_kingdom = 0;
4434 ObjectIdPtr oip;
4435 OrgNamePtr onp;
4436 OrgRefPtr orp;
4437 PirBlockPtr pir;
4438 PrfBlockPtr prf;
4439 CharPtr sc_keyword;
4440 SeqDescrPtr sdp;
4441 SeqEntryPtr sep;
4442 SeqIdPtr sip;
4443 SPBlockPtr sp;
4444 SubSourcePtr ssp;
4445 CharPtr str;
4446 Boolean super_kingdoms_different = FALSE;
4447 CharPtr super_kingdom_name = NULL;
4448 TaxElementPtr tep;
4449 TextSeqIdPtr tsip;
4450 UserFieldPtr ufp;
4451 UserObjectPtr uop;
4452 ValNodePtr vnp;
4453 StringItemPtr ffstring;
4454
4455 if (awp == NULL) return;
4456 ajp = awp->ajp;
4457 if (ajp == NULL) return;
4458 bsp = awp->bsp;
4459 if (bsp == NULL) return;
4460 asp = awp->asp;
4461 if (asp == NULL) return;
4462
4463 bbp = (BaseBlockPtr) Asn2gbAddBlock (awp, KEYWORDS_BLOCK, sizeof (BaseBlock));
4464 if (bbp == NULL) return;
4465
4466 ffstring = FFGetString(ajp);
4467 if ( ffstring == NULL ) return;
4468
4469 for (sip = bsp->id; sip != NULL; sip = sip->next) {
4470 if (sip->choice == SEQID_OTHER) {
4471 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
4472 if (tsip != NULL && StringNICmp (tsip->accession, "WP_", 3) == 0) {
4473 is_wp = TRUE;
4474 }
4475 }
4476 }
4477
4478 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
4479 while (sdp != NULL) {
4480 biop = (BioSourcePtr) sdp->data.ptrvalue;
4481 if (biop != NULL) {
4482 orp = biop->org;
4483 if (orp != NULL) {
4484 onp = orp->orgname;
4485 if (onp != NULL) {
4486 if (onp->choice == 5) {
4487 for (tep = (TaxElementPtr) onp->data; tep != NULL; tep = tep->next) {
4488 if (tep->fixed_level == 0 && StringICmp (tep->level, "superkingdom") == 0) {
4489 num_super_kingdom++;
4490 if (super_kingdom_name == NULL) {
4491 super_kingdom_name = tep->name;
4492 } else if (StringICmp (super_kingdom_name, tep->name) != 0) {
4493 super_kingdoms_different = TRUE;
4494 }
4495 }
4496 }
4497 }
4498 }
4499 }
4500 for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
4501 if (ssp->subtype == SUBSRC_environmental_sample) {
4502 is_env_sample = TRUE;
4503 }
4504 }
4505 }
4506 sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext);
4507 }
4508
4509 if (num_super_kingdom > 1 && super_kingdoms_different) {
4510 is_cross_kingdom = TRUE;
4511 }
4512
4513 if (bsp->repr == Seq_repr_map) {
4514 if (head != NULL) {
4515 ValNodeCopyStr (&head, 0, "; ");
4516 }
4517 ValNodeCopyStr (&head, 0, "Whole_Genome_Map");
4518 }
4519
4520 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
4521 if (sdp != NULL) {
4522 bbp->entityID = dcontext.entityID;
4523 bbp->itemID = dcontext.itemID;
4524 bbp->itemtype = OBJ_SEQDESC;
4525
4526 mip = (MolInfoPtr) sdp->data.ptrvalue;
4527 if (mip != NULL) {
4528 switch (mip->tech) {
4529 case MI_TECH_htgs_1 :
4530 if (head != NULL) {
4531 ValNodeCopyStr (&head, 0, "; ");
4532 }
4533 ValNodeCopyStr (&head, 0, "HTG");
4534 ValNodeCopyStr (&head, 0, "; ");
4535 ValNodeCopyStr (&head, 0, "HTGS_PHASE1");
4536 break;
4537 case MI_TECH_htgs_2 :
4538 if (head != NULL) {
4539 ValNodeCopyStr (&head, 0, "; ");
4540 }
4541 ValNodeCopyStr (&head, 0, "HTG");
4542 ValNodeCopyStr (&head, 0, "; ");
4543 ValNodeCopyStr (&head, 0, "HTGS_PHASE2");
4544 break;
4545 case MI_TECH_htgs_3 :
4546 if (head != NULL) {
4547 ValNodeCopyStr (&head, 0, "; ");
4548 }
4549 ValNodeCopyStr (&head, 0, "HTG");
4550 break;
4551 case MI_TECH_est :
4552 if (head != NULL) {
4553 ValNodeCopyStr (&head, 0, "; ");
4554 }
4555 is_est = TRUE;
4556 ValNodeCopyStr (&head, 0, "EST");
4557 if (is_env_sample) {
4558 if (head != NULL) {
4559 ValNodeCopyStr (&head, 0, "; ");
4560 }
4561 ValNodeCopyStr (&head, 0, "ENV");
4562 }
4563 break;
4564 case MI_TECH_sts :
4565 if (head != NULL) {
4566 ValNodeCopyStr (&head, 0, "; ");
4567 }
4568 is_sts = TRUE;
4569 ValNodeCopyStr (&head, 0, "STS");
4570 break;
4571 case MI_TECH_survey :
4572 if (head != NULL) {
4573 ValNodeCopyStr (&head, 0, "; ");
4574 }
4575 is_gss = TRUE;
4576 ValNodeCopyStr (&head, 0, "GSS");
4577 if (is_env_sample) {
4578 if (head != NULL) {
4579 ValNodeCopyStr (&head, 0, "; ");
4580 }
4581 ValNodeCopyStr (&head, 0, "ENV");
4582 }
4583 break;
4584 case MI_TECH_fli_cdna :
4585 if (head != NULL) {
4586 ValNodeCopyStr (&head, 0, "; ");
4587 }
4588 ValNodeCopyStr (&head, 0, "FLI_CDNA");
4589 break;
4590 case MI_TECH_htgs_0 :
4591 if (head != NULL) {
4592 ValNodeCopyStr (&head, 0, "; ");
4593 }
4594 ValNodeCopyStr (&head, 0, "HTG");
4595 ValNodeCopyStr (&head, 0, "; ");
4596 ValNodeCopyStr (&head, 0, "HTGS_PHASE0");
4597 break;
4598 case MI_TECH_htc :
4599 if (head != NULL) {
4600 ValNodeCopyStr (&head, 0, "; ");
4601 }
4602 ValNodeCopyStr (&head, 0, "HTC");
4603 break;
4604 case MI_TECH_wgs :
4605 if (head != NULL) {
4606 ValNodeCopyStr (&head, 0, "; ");
4607 }
4608 ValNodeCopyStr (&head, 0, "WGS");
4609 break;
4610 /*
4611 case MI_TECH_barcode :
4612 if (head != NULL) {
4613 ValNodeCopyStr (&head, 0, "; ");
4614 }
4615 ValNodeCopyStr (&head, 0, "BARCODE");
4616 break;
4617 */
4618 case MI_TECH_tsa :
4619 if (head != NULL) {
4620 ValNodeCopyStr (&head, 0, "; ");
4621 }
4622 ValNodeCopyStr (&head, 0, "TSA");
4623 ValNodeCopyStr (&head, 0, "; ");
4624 ValNodeCopyStr (&head, 0, "Transcriptome Shotgun Assembly");
4625 is_tsa = TRUE;
4626 break;
4627 case MI_TECH_targeted :
4628 if (head != NULL) {
4629 ValNodeCopyStr (&head, 0, "; ");
4630 }
4631 ValNodeCopyStr (&head, 0, "TLS");
4632 ValNodeCopyStr (&head, 0, "; ");
4633 ValNodeCopyStr (&head, 0, "Targeted Locus Study");
4634 break;
4635 case MI_TECH_unknown :
4636 case MI_TECH_standard :
4637 case MI_TECH_other :
4638 if (is_env_sample) {
4639 if (head != NULL) {
4640 ValNodeCopyStr (&head, 0, "; ");
4641 }
4642 ValNodeCopyStr (&head, 0, "ENV");
4643 }
4644 break;
4645 default :
4646 break;
4647 }
4648 }
4649 }
4650
4651 if (ISA_aa (bsp->mol) && (! is_tsa)) {
4652 if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
4653 bssp = (BioseqSetPtr) bsp->idx.parentptr;
4654 if (bssp != NULL && bssp->_class == BioseqseqSet_class_nuc_prot) {
4655 sep = bssp->seq_set;
4656 if (sep != NULL && IS_Bioseq (sep)) {
4657 nbsp = (BioseqPtr) sep->data.ptrvalue;
4658 if (nbsp != NULL) {
4659 sdp = SeqMgrGetNextDescriptor (nbsp, NULL, Seq_descr_molinfo, &dcontext);
4660 if (sdp != NULL) {
4661 mip = (MolInfoPtr) sdp->data.ptrvalue;
4662 if (mip != NULL) {
4663 if (mip->tech == MI_TECH_tsa) {
4664 if (head != NULL) {
4665 ValNodeCopyStr (&head, 0, "; ");
4666 }
4667 ValNodeCopyStr (&head, 0, "TSA");
4668 ValNodeCopyStr (&head, 0, "; ");
4669 ValNodeCopyStr (&head, 0, "Transcriptome Shotgun Assembly");
4670 }
4671 }
4672 }
4673 }
4674 }
4675 }
4676 }
4677 }
4678
4679 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
4680 while (sdp != NULL) {
4681 uop = (UserObjectPtr) sdp->data.ptrvalue;
4682 if (uop != NULL) {
4683 oip = uop->type;
4684 if (oip != NULL && StringICmp (oip->str, "ENCODE") == 0) {
4685 add_encode = TRUE;
4686 } else if (oip != NULL && StringICmp (oip->str, "StructuredComment") == 0) {
4687 this_is_gen_asm = FALSE;
4688 for (curr = uop->data; curr != NULL; curr = curr->next) {
4689 if (curr->choice != 1) continue;
4690 oip = curr->label;
4691 if (oip == NULL) continue;
4692 field = oip->str;
4693 if (StringHasNoText (field)) continue;
4694 if (StringCmp (field, "StructuredCommentPrefix") == 0) {
4695 if (StringCmp ((CharPtr) curr->data.ptrvalue, "##Genome-Assembly-Data-START##") == 0) {
4696 is_genome_assembly = TRUE;
4697 this_is_gen_asm = TRUE;
4698 }
4699 }
4700 }
4701 if (this_is_gen_asm) {
4702 for (curr = uop->data; curr != NULL; curr = curr->next) {
4703 if (curr->choice != 1) continue;
4704 oip = curr->label;
4705 if (oip == NULL) continue;
4706 field = oip->str;
4707 if (StringHasNoText (field)) continue;
4708 if (StringCmp (field, "Current Finishing Status") == 0) {
4709 finishing_status = GetFinishingStatus ((CharPtr) curr->data.ptrvalue);
4710 }
4711 }
4712 }
4713 sc_keyword = KeywordForStructuredCommentName (uop);
4714 if (sc_keyword != NULL) {
4715 if (IsStructuredCommentValid (uop, NULL, NULL) == eFieldValid_Valid) {
4716 ky_head = SplitStringAtSemicolon (sc_keyword);
4717 if (ky_head != NULL) {
4718 for (vnp = ky_head; vnp != NULL; vnp = vnp->next) {
4719 kwd = (CharPtr) vnp->data.ptrvalue;
4720 if (StringHasNoText (kwd)) continue;
4721 if (head != NULL) {
4722 ValNodeCopyStr (&head, 0, "; ");
4723 }
4724 ValNodeCopyStr (&head, 0, kwd);
4725 }
4726 ValNodeFreeData (ky_head);
4727 }
4728 }
4729 MemFree (sc_keyword);
4730 }
4731 } else if (oip != NULL && StringICmp (oip->str, "Unverified") == 0) {
4732 is_unverified = TRUE;
4733 for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
4734 oip = ufp->label;
4735 if (oip != NULL && StringCmp (oip->str, "Type") == 0 && ufp->choice == 1) {
4736 str = (CharPtr) ufp->data.ptrvalue;
4737 if (StringICmp (str, "Organism") == 0) {
4738 is_unv_organism = TRUE;
4739 } else if (StringICmp (str, "Misassembled") == 0) {
4740 is_unv_misassembled = TRUE;
4741 }
4742 }
4743 }
4744 }
4745 }
4746 sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
4747 }
4748 if (is_unverified) {
4749 if (head != NULL) {
4750 ValNodeCopyStr (&head, 0, "; ");
4751 }
4752 if (is_unv_organism) {
4753 ValNodeCopyStr (&head, 0, "UNVERIFIED_ORGANISM");
4754 } else if (is_unv_misassembled) {
4755 ValNodeCopyStr (&head, 0, "UNVERIFIED_MISASSEMBLY");
4756 } else {
4757 ValNodeCopyStr (&head, 0, "UNVERIFIED");
4758 }
4759 }
4760 if (add_encode) {
4761 if (head != NULL) {
4762 ValNodeCopyStr (&head, 0, "; ");
4763 }
4764 ValNodeCopyStr (&head, 0, "ENCODE");
4765 }
4766 if (is_genome_assembly && StringDoesHaveText (finishing_status)) {
4767 if (head != NULL) {
4768 ValNodeCopyStr (&head, 0, "; ");
4769 }
4770 ValNodeCopyStr (&head, 0, finishing_status);
4771 }
4772
4773 for (sip = bsp->id; sip != NULL; sip = sip->next) {
4774 if (sip->choice == SEQID_TPG || sip->choice == SEQID_TPE || sip->choice == SEQID_TPD) {
4775 if (head != NULL) {
4776 ValNodeCopyStr (&head, 0, "; ");
4777 }
4778 ValNodeCopyStr (&head, 0, "Third Party Data");
4779 ValNodeCopyStr (&head, 0, "; ");
4780 ValNodeCopyStr (&head, 0, "TPA");
4781 } else if (sip->choice == SEQID_OTHER) {
4782 if (head != NULL) {
4783 ValNodeCopyStr (&head, 0, "; ");
4784 }
4785 ValNodeCopyStr (&head, 0, "RefSeq");
4786 }
4787 }
4788
4789 if (is_cross_kingdom && is_wp) {
4790 if (head != NULL) {
4791 ValNodeCopyStr (&head, 0, "; ");
4792 }
4793 ValNodeCopyStr (&head, 0, "CROSS_KINGDOM");
4794 }
4795
4796 sdp = SeqMgrGetNextDescriptor (bsp, NULL, 0, &dcontext);
4797 while (sdp != NULL) {
4798
4799 keywords = NULL;
4800
4801 switch (dcontext.seqdesctype) {
4802 case Seq_descr_genbank :
4803 gbp = (GBBlockPtr) sdp->data.ptrvalue;
4804 if (gbp != NULL) {
4805 keywords = gbp->keywords;
4806 }
4807 break;
4808 case Seq_descr_embl :
4809 ebp = (EMBLBlockPtr) sdp->data.ptrvalue;
4810 if (ebp != NULL) {
4811 keywords = ebp->keywords;
4812 }
4813 break;
4814 case Seq_descr_pir :
4815 pir = (PirBlockPtr) sdp->data.ptrvalue;
4816 if (pir != NULL) {
4817 keywords = pir->keywords;
4818 }
4819 break;
4820 case Seq_descr_prf :
4821 prf = (PrfBlockPtr) sdp->data.ptrvalue;
4822 if (prf != NULL) {
4823 keywords = prf->keywords;
4824 }
4825 break;
4826 case Seq_descr_sp :
4827 sp = (SPBlockPtr) sdp->data.ptrvalue;
4828 if (sp != NULL) {
4829 keywords = sp->keywords;
4830 }
4831 break;
4832 default :
4833 break;
4834 }
4835
4836 if (keywords != NULL) {
4837 bbp->entityID = dcontext.entityID;
4838 bbp->itemID = dcontext.itemID;
4839 bbp->itemtype = OBJ_SEQDESC;
4840 }
4841
4842 for (vnp = keywords; vnp != NULL; vnp = vnp->next) {
4843 kwd = (CharPtr) vnp->data.ptrvalue;
4844 if (CheckSpecialKeyword (is_est, is_sts, is_gss, kwd)) {
4845 if (! KeywordAlreadyInList (head, kwd)) {
4846 if (head != NULL) {
4847 ValNodeCopyStr (&head, 0, "; ");
4848 }
4849 ValNodeCopyStr (&head, 0, kwd);
4850 }
4851 }
4852 }
4853
4854 sdp = SeqMgrGetNextDescriptor (bsp, sdp, 0, &dcontext);
4855 }
4856
4857 FFStartPrint( ffstring, awp->format, 0, 12, "KEYWORDS", 12, 5, 5, "KW", TRUE);
4858 str = MergeFFValNodeStrs (head);
4859
4860 /* if no keywords were found, period will still be added by this call */
4861 if ( str != NULL ) {
4862 FFAddOneString (ffstring, str, TRUE, FALSE, TILDE_TO_SPACES);
4863 } else {
4864 FFAddOneChar(ffstring, '.', FALSE);
4865 }
4866
4867 MemFree (str);
4868
4869 /* optionally populate indexes for NCBI internal database */
4870
4871 if (ajp->index) {
4872 index = &asp->index;
4873 } else {
4874 index = NULL;
4875 }
4876
4877 if (index != NULL) {
4878 for (vnp = head; vnp != NULL; vnp = vnp->next) {
4879 kwd = (CharPtr) vnp->data.ptrvalue;
4880 if (StringCmp (kwd, "; ") == 0) continue;
4881 ValNodeCopyStrToHead (&(index->keywords), 0, kwd);
4882 }
4883 }
4884
4885 /* optionally populate gbseq for XML-ized GenBank format */
4886
4887 if (ajp->gbseq) {
4888 gbseq = &asp->gbseq;
4889 } else {
4890 gbseq = NULL;
4891 }
4892
4893 if (gbseq != NULL) {
4894 for (vnp = head; vnp != NULL; vnp = vnp->next) {
4895 kwd = (CharPtr) vnp->data.ptrvalue;
4896 if (StringCmp (kwd, "; ") == 0) continue;
4897 ValNodeCopyStr (&(gbseq->keywords), 0, kwd);
4898 }
4899 }
4900
4901 ValNodeFreeData (head);
4902
4903 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "KW");
4904
4905 FFRecycleString(ajp, ffstring);
4906
4907 if (awp->afp != NULL) {
4908 DoImmediateFormat (awp->afp, bbp);
4909 }
4910 }
4911
AddSegmentBlock(Asn2gbWorkPtr awp,Boolean onePartOfSeg,Boolean is_na)4912 NLM_EXTERN void AddSegmentBlock (
4913 Asn2gbWorkPtr awp,
4914 Boolean onePartOfSeg,
4915 Boolean is_na
4916 )
4917
4918 {
4919 Char acc [41];
4920 IntAsn2gbJobPtr ajp;
4921 Asn2gbSectPtr asp;
4922 BaseBlockPtr bbp;
4923 Char buf [32];
4924 GBSeqPtr gbseq;
4925 StringItemPtr ffstring;
4926
4927 if (awp == NULL) return;
4928 ajp = awp->ajp;
4929 if (ajp == NULL) return;
4930 asp = awp->asp;
4931 if (asp == NULL) return;
4932
4933 if (awp->seg < 1 || awp->numsegs < 1) return;
4934
4935 bbp = Asn2gbAddBlock (awp, SEGMENT_BLOCK, sizeof (BaseBlock));
4936 if (bbp == NULL) return;
4937
4938 ffstring = FFGetString(ajp);
4939 if ( ffstring == NULL ) return;
4940
4941
4942 FFStartPrint (ffstring, awp->format, 0, 12, "SEGMENT", 12, 5, 5, "XX", FALSE);
4943
4944 if ( GetWWW(ajp) && awp->parent != NULL && onePartOfSeg) {
4945 sprintf (buf, "%d of ", (int) awp->seg);
4946 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
4947 SeqIdWrite (awp->parent->id, acc, PRINTID_TEXTID_ACC_VER, sizeof (acc) - 1);
4948
4949 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
4950 if (is_na) {
4951 FF_Add_NCBI_Base_URL (ffstring, link_seqn);
4952 } else {
4953 FF_Add_NCBI_Base_URL (ffstring, link_seqp);
4954 }
4955 FFAddTextToString(ffstring, /* "val=" */ NULL, acc, "\">", FALSE, FALSE, TILDE_IGNORE);
4956
4957 sprintf (buf, "%ld", (long) awp->numsegs);
4958 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
4959 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
4960 } else {
4961 sprintf (buf, "%d of %ld", (int) awp->seg, (long) awp->numsegs);
4962 FFAddOneString (ffstring, buf, FALSE, TRUE, TILDE_TO_SPACES);
4963 }
4964
4965 /* optionally populate gbseq for XML-ized GenBank format */
4966
4967 if (ajp->gbseq) {
4968 gbseq = &asp->gbseq;
4969 } else {
4970 gbseq = NULL;
4971 }
4972
4973 if (gbseq != NULL) {
4974 sprintf (buf, "%d of %ld", (int) awp->seg, (long) awp->numsegs);
4975 gbseq->segment = StringSave (buf);
4976 }
4977
4978 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "XX");
4979 FFRecycleString(ajp, ffstring);
4980
4981 if (awp->afp != NULL) {
4982 DoImmediateFormat (awp->afp, bbp);
4983 }
4984 }
4985
AddSrcBlk(Asn2gbWorkPtr awp,Uint2 entityID,Uint4 itemID,Uint2 itemtype)4986 static void AddSrcBlk (
4987 Asn2gbWorkPtr awp,
4988 Uint2 entityID,
4989 Uint4 itemID,
4990 Uint2 itemtype
4991 )
4992
4993 {
4994 BaseBlockPtr bbp;
4995
4996 if (awp == NULL) return;
4997 bbp = Asn2gbAddBlock (awp, SOURCE_BLOCK, sizeof (BaseBlock));
4998 if (bbp == NULL) return;
4999
5000 bbp->entityID = entityID;
5001 bbp->itemID = itemID;
5002 bbp->itemtype = itemtype;
5003
5004 if (awp->afp != NULL) {
5005 DoImmediateFormat (awp->afp, bbp);
5006 }
5007 }
5008
AddOrgBlk(Asn2gbWorkPtr awp,Uint2 entityID,Uint4 itemID,Uint2 itemtype)5009 static void AddOrgBlk (
5010 Asn2gbWorkPtr awp,
5011 Uint2 entityID,
5012 Uint4 itemID,
5013 Uint2 itemtype
5014 )
5015
5016 {
5017 BaseBlockPtr bbp;
5018
5019 if (awp == NULL) return;
5020 bbp = Asn2gbAddBlock (awp, ORGANISM_BLOCK, sizeof (BaseBlock));
5021 if (bbp == NULL) return;
5022
5023 bbp->entityID = entityID;
5024 bbp->itemID = itemID;
5025 bbp->itemtype = itemtype;
5026
5027 if (awp->afp != NULL) {
5028 DoImmediateFormat (awp->afp, bbp);
5029 }
5030 }
5031
x_NotSpecialTaxName(CharPtr taxname)5032 static Boolean x_NotSpecialTaxName (
5033 CharPtr taxname
5034 )
5035
5036 {
5037 if (StringHasNoText (taxname)) return TRUE;
5038
5039 if (StringICmp (taxname, "synthetic construct") == 0) return FALSE;
5040 if (StringICmp (taxname, "artificial sequence") == 0) return FALSE;
5041 if (StringStr (taxname, "vector") != NULL) return FALSE;
5042 if (StringStr (taxname, "Vector") != NULL) return FALSE;
5043
5044 return TRUE;
5045 }
5046
AddSourceOrganismBlock(Asn2gbWorkPtr awp)5047 NLM_EXTERN void AddSourceOrganismBlock (
5048 Asn2gbWorkPtr awp
5049 )
5050
5051 {
5052 IntAsn2gbJobPtr ajp;
5053 BioSourcePtr biop;
5054 BioseqPtr bsp;
5055 SeqFeatPtr cds;
5056 CharPtr common;
5057 SeqMgrDescContext dcontext;
5058 BioseqPtr dna;
5059 SeqMgrFeatContext fcontext;
5060 GBBlockPtr gbp = NULL;
5061 SeqDescrPtr gbsdp = NULL;
5062 Boolean is_wp = FALSE;
5063 Boolean loop = FALSE;
5064 Int2 num_super_kingdom = 0;
5065 Boolean okay = FALSE;
5066 OrgNamePtr onp;
5067 OrgRefPtr orp;
5068 ObjValNodePtr ovp;
5069 SeqDescrPtr sdp;
5070 ValNodePtr sdplist = NULL;
5071 SeqFeatPtr sfp;
5072 SeqIntPtr sintp;
5073 SeqIdPtr sip;
5074 SeqLocPtr slp, slpx;
5075 SeqPntPtr spp;
5076 Boolean super_kingdoms_different = FALSE;
5077 CharPtr super_kingdom_name = NULL;
5078 CharPtr taxname;
5079 TaxElementPtr tep;
5080 TextSeqIdPtr tsip;
5081 ValNodePtr vnp;
5082
5083 if (awp == NULL) return;
5084 ajp = awp->ajp;
5085 if (ajp == NULL) return;
5086 bsp = awp->bsp;
5087 if (bsp == NULL) return;
5088
5089 for (sip = bsp->id; sip != NULL; sip = sip->next) {
5090 if (sip->choice == SEQID_SWISSPROT) {
5091 loop = TRUE;
5092 } else if (sip->choice == SEQID_OTHER) {
5093 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
5094 if (tsip != NULL && StringNICmp (tsip->accession, "WP_", 3) == 0) {
5095 is_wp = TRUE;
5096 }
5097 }
5098 }
5099
5100 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &dcontext);
5101 if (sdp != NULL && (! ajp->newSourceOrg)) {
5102 gbp = (GBBlockPtr) sdp->data.ptrvalue;
5103 if (gbp != NULL && StringDoesHaveText (gbp->source)) {
5104 gbsdp = sdp;
5105 }
5106 }
5107
5108 if (ISA_aa (bsp->mol)) {
5109
5110 /* if protein, get sources applicable to DNA location of CDS */
5111
5112 sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_source, NULL);
5113 if (sdp != NULL && sdp->choice == Seq_descr_source) {
5114 biop = (BioSourcePtr) sdp->data.ptrvalue;
5115 if (biop != NULL) {
5116 orp = biop->org;
5117 if (orp != NULL) {
5118 taxname = orp->taxname;
5119 if (StringHasNoText (taxname) || x_NotSpecialTaxName (taxname)) {
5120 cds = SeqMgrGetCDSgivenProduct (bsp, &fcontext);
5121 if (cds != NULL) {
5122 dna = BioseqFindFromSeqLoc (cds->location);
5123 if (dna != NULL) {
5124 slp = AsnIoMemCopy ((Pointer) cds->location, (AsnReadFunc) SeqLocAsnRead, (AsnWriteFunc) SeqLocAsnWrite);
5125 if (slp != NULL) {
5126 for (slpx = SeqLocFindNext (slp, NULL); slpx != NULL; slpx = SeqLocFindNext (slp, slpx)) {
5127 if (slpx->choice == SEQLOC_INT) {
5128 sintp = (SeqIntPtr) slpx->data.ptrvalue;
5129 if (sintp != NULL) {
5130 sintp->strand = Seq_strand_both;
5131 }
5132 } else if (slpx->choice == SEQLOC_PNT) {
5133 spp = (SeqPntPtr) slpx->data.ptrvalue;
5134 if (spp != NULL) {
5135 spp->strand = Seq_strand_both;
5136 }
5137 }
5138 }
5139 }
5140 sfp = SeqMgrGetOverlappingSource (slp, &fcontext);
5141 SeqLocFree (slp);
5142 if (sfp != NULL) {
5143 AddSrcBlk (awp, sfp->idx.entityID, sfp->idx.itemID, OBJ_SEQFEAT);
5144 AddOrgBlk (awp, sfp->idx.entityID, sfp->idx.itemID, OBJ_SEQFEAT);
5145 return;
5146 }
5147 }
5148 }
5149 }
5150 }
5151 }
5152 }
5153 }
5154
5155 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
5156 while (sdp != NULL) {
5157 ValNodeAddPointer (&sdplist, 0, (Pointer) sdp);
5158 biop = (BioSourcePtr) sdp->data.ptrvalue;
5159 if (biop != NULL) {
5160 orp = biop->org;
5161 if (orp != NULL) {
5162 taxname = orp->taxname;
5163 common = orp->common;
5164 onp = orp->orgname;
5165 if (onp != NULL) {
5166 if (onp->choice == 5) {
5167 for (tep = (TaxElementPtr) onp->data; tep != NULL; tep = tep->next) {
5168 if (tep->fixed_level == 0 && StringICmp (tep->level, "superkingdom") == 0) {
5169 num_super_kingdom++;
5170 if (super_kingdom_name == NULL) {
5171 super_kingdom_name = tep->name;
5172 } else if (StringICmp (super_kingdom_name, tep->name) != 0) {
5173 super_kingdoms_different = TRUE;
5174 }
5175 }
5176 }
5177 }
5178 }
5179 }
5180 }
5181 sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext);
5182 }
5183
5184 if (sdplist != NULL && ((num_super_kingdom > 1 && super_kingdoms_different && is_wp) || loop)) {
5185
5186 for (vnp = sdplist; vnp != NULL; vnp = vnp->next) {
5187 sdp = (SeqDescrPtr) vnp->data.ptrvalue;
5188
5189 if (gbsdp != NULL) {
5190 if (gbsdp->extended != 0) {
5191 ovp = (ObjValNodePtr) gbsdp;
5192 AddSrcBlk (awp, ovp->idx.entityID, ovp->idx.itemID, OBJ_SEQDESC);
5193 okay = TRUE;
5194 }
5195 } else if (sdp->extended != 0) {
5196 ovp = (ObjValNodePtr) sdp;
5197 AddSrcBlk (awp, ovp->idx.entityID, ovp->idx.itemID, OBJ_SEQDESC);
5198 okay = TRUE;
5199 }
5200
5201 if (sdp->extended != 0) {
5202 ovp = (ObjValNodePtr) sdp;
5203 AddOrgBlk (awp, ovp->idx.entityID, ovp->idx.itemID, OBJ_SEQDESC);
5204 okay = TRUE;
5205 }
5206 }
5207
5208 } else if (sdplist != NULL) {
5209
5210 sdp = (SeqDescrPtr) sdplist->data.ptrvalue;
5211
5212 if (gbsdp != NULL) {
5213 if (gbsdp->extended != 0) {
5214 ovp = (ObjValNodePtr) gbsdp;
5215 AddSrcBlk (awp, ovp->idx.entityID, ovp->idx.itemID, OBJ_SEQDESC);
5216 okay = TRUE;
5217 }
5218 } else if (sdp->extended != 0) {
5219 ovp = (ObjValNodePtr) sdp;
5220 AddSrcBlk (awp, ovp->idx.entityID, ovp->idx.itemID, OBJ_SEQDESC);
5221 okay = TRUE;
5222 }
5223
5224 if (sdp->extended != 0) {
5225 ovp = (ObjValNodePtr) sdp;
5226 AddOrgBlk (awp, ovp->idx.entityID, ovp->idx.itemID, OBJ_SEQDESC);
5227 }
5228
5229 } else {
5230
5231 sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext);
5232 if (sfp != NULL) {
5233 AddSrcBlk (awp, sfp->idx.entityID, sfp->idx.itemID, OBJ_SEQFEAT);
5234 AddOrgBlk (awp, sfp->idx.entityID, sfp->idx.itemID, OBJ_SEQFEAT);
5235 okay = TRUE;
5236
5237 } else if (ISA_aa (bsp->mol)) {
5238
5239 /* if protein with no sources, get sources applicable to DNA location of CDS */
5240
5241 cds = SeqMgrGetCDSgivenProduct (bsp, &fcontext);
5242 if (cds != NULL) {
5243 sfp = SeqMgrGetOverlappingSource (cds->location, &fcontext);
5244 if (sfp != NULL) {
5245 AddSrcBlk (awp, sfp->idx.entityID, sfp->idx.itemID, OBJ_SEQFEAT);
5246 AddOrgBlk (awp, sfp->idx.entityID, sfp->idx.itemID, OBJ_SEQFEAT);
5247 okay = TRUE;
5248 } else {
5249 dna = BioseqFindFromSeqLoc (cds->location);
5250 if (dna != NULL) {
5251 sdp = SeqMgrGetNextDescriptor (dna, NULL, Seq_descr_source, &dcontext);
5252 if (sdp != NULL) {
5253 if (sdp->extended != 0) {
5254 ovp = (ObjValNodePtr) sdp;
5255 AddSrcBlk (awp, ovp->idx.entityID, ovp->idx.itemID, OBJ_SEQDESC);
5256 AddOrgBlk (awp, ovp->idx.entityID, ovp->idx.itemID, OBJ_SEQDESC);
5257 okay = TRUE;
5258 }
5259 }
5260 }
5261 }
5262 }
5263 }
5264 }
5265
5266 if (! okay) {
5267 AddSrcBlk (awp, 0, 0, 0);
5268 AddOrgBlk (awp, 0, 0, 0);
5269 }
5270
5271 ValNodeFree (sdplist);
5272 }
5273
AddPub(Asn2gbWorkPtr awp,ValNodePtr PNTR head,PubdescPtr pdp)5274 static RefBlockPtr AddPub (
5275 Asn2gbWorkPtr awp,
5276 ValNodePtr PNTR head,
5277 PubdescPtr pdp
5278 )
5279
5280 {
5281 Char buf [521]; /* increased for consortium in citsub */
5282 CitArtPtr cap;
5283 CitBookPtr cbp;
5284 CitGenPtr cgp;
5285 CitJourPtr cjp;
5286 CitPatPtr cpp;
5287 CitSubPtr csp;
5288 DatePtr dp = NULL;
5289 Boolean justuids = TRUE;
5290 ImprintPtr imp = NULL;
5291 IntRefBlockPtr irp;
5292 RefBlockPtr rbp;
5293 ValNodePtr vnp;
5294 ArticleIdPtr aip;
5295
5296 if (awp == NULL || head == NULL || pdp == NULL) return NULL;
5297
5298 if (awp->hideGeneRIFs) {
5299 if (StringISearch (pdp->comment, "GeneRIF") != NULL) return NULL;
5300 } else if (awp->onlyGeneRIFs) {
5301 if (StringISearch (pdp->comment, "GeneRIF") == NULL) return NULL;
5302 } else if (awp->onlyReviewPubs) {
5303 if (StringISearch (pdp->comment, "Review Article") == NULL) return NULL;
5304 }
5305
5306 rbp = (RefBlockPtr) MemNew (sizeof (IntRefBlock));
5307 if (rbp == NULL) return NULL;
5308 rbp->blocktype = REFERENCE_BLOCK;
5309 rbp->section = awp->currsection;
5310
5311 rbp->serial = INT2_MAX;
5312
5313 for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
5314 switch (vnp->choice) {
5315 case PUB_Gen :
5316 /* may be unpublished, or may be serial number of swiss-prot reference */
5317 cgp = (CitGenPtr) vnp->data.ptrvalue;
5318 if (cgp != NULL) {
5319 if (StringNICmp ("BackBone id_pub", cgp->cit, 15) != 0) {
5320 rbp->category = REF_CAT_UNP;
5321 if (dp == NULL) {
5322 dp = cgp->date;
5323 }
5324 if (cgp->serial_number > 0) {
5325 rbp->serial = cgp->serial_number;
5326 }
5327 if (cgp->cit != NULL) {
5328 if (StringNICmp ("unpublished", cgp->cit, 11) != 0 &&
5329 StringNICmp ("submitted", cgp->cit, 8) != 0 &&
5330 StringNICmp ("to be published", cgp->cit, 15) != 0 &&
5331 StringNICmp ("in press", cgp->cit, 8) != 0 &&
5332 StringStr (cgp->cit, "Journal") == NULL) {
5333 if (cgp->serial_number == 0) {
5334 MemFree (rbp);
5335 return NULL;
5336 }
5337 }
5338 } else if (cgp->journal == NULL || cgp->date == NULL) {
5339 if (cgp->serial_number == 0) {
5340 MemFree (rbp);
5341 return NULL;
5342 }
5343 }
5344 }
5345 }
5346 break;
5347 case PUB_Sub :
5348 rbp->category = REF_CAT_SUB;
5349 csp = (CitSubPtr) vnp->data.ptrvalue;
5350 if (csp != NULL) {
5351 imp = csp->imp;
5352 if (imp != NULL) {
5353 if (dp == NULL) {
5354 dp = imp->date;
5355 }
5356 }
5357 if (csp->date != NULL) {
5358 if (dp == NULL) {
5359 dp = csp->date;
5360 }
5361 }
5362 }
5363 break;
5364 case PUB_Article:
5365 cap = (CitArtPtr) vnp->data.ptrvalue;
5366 if (cap != NULL) {
5367 switch (cap->from) {
5368 case 1:
5369 cjp = (CitJourPtr) cap->fromptr;
5370 if (cjp != NULL) {
5371 imp = (ImprintPtr) cjp->imp;
5372 if (imp != NULL) {
5373 if (dp == NULL) {
5374 dp = imp->date;
5375 }
5376 }
5377 }
5378 break;
5379 case 2:
5380 cbp = (CitBookPtr) cap->fromptr;
5381 if (cbp != NULL) {
5382 imp = (ImprintPtr) cbp->imp;
5383 if (imp != NULL) {
5384 if (dp == NULL) {
5385 dp = imp->date;
5386 }
5387 }
5388 }
5389 break;
5390 case 3:
5391 cbp = (CitBookPtr) cap->fromptr;
5392 if (cbp != NULL) {
5393 imp = (ImprintPtr) cbp->imp;
5394 if (imp != NULL) {
5395 if (dp == NULL) {
5396 dp = imp->date;
5397 }
5398 }
5399 }
5400 break;
5401 default:
5402 break;
5403 }
5404 /* look for PMID and MUID in the Cit-art article ids set */
5405 if (cap->ids != NULL) {
5406 for (aip = cap->ids; aip != NULL; aip = aip->next) {
5407 if (aip->choice == ARTICLEID_PUBMED && rbp->pmid == 0) {
5408 rbp->pmid = aip->data.intvalue;
5409 rbp->category = REF_CAT_PUB;
5410 } else if (aip->choice == ARTICLEID_MEDLINE && rbp->muid == 0) {
5411 rbp->muid = aip->data.intvalue;
5412 rbp->category = REF_CAT_PUB;
5413 }
5414 }
5415 }
5416 }
5417 break;
5418 case PUB_Book:
5419 cbp = (CitBookPtr) vnp->data.ptrvalue;
5420 if (cbp != NULL) {
5421 imp = (ImprintPtr) cbp->imp;
5422 if (imp != NULL) {
5423 if (dp == NULL) {
5424 dp = imp->date;
5425 }
5426 }
5427 }
5428 break;
5429 case PUB_Proc:
5430 cbp = (CitBookPtr) vnp->data.ptrvalue;
5431 if (cbp != NULL) {
5432 imp = (ImprintPtr) cbp->imp;
5433 if (imp != NULL) {
5434 if (dp == NULL) {
5435 dp = imp->date;
5436 }
5437 }
5438 }
5439 break;
5440 case PUB_Patent :
5441 rbp->category = REF_CAT_PUB;
5442 cpp = (CitPatPtr) vnp->data.ptrvalue;
5443 if (cpp != NULL) {
5444 if (cpp->date_issue != NULL) {
5445 if (dp == NULL) {
5446 dp = (DatePtr) cpp->date_issue;
5447 }
5448 } else if (cpp->app_date != NULL) {
5449 if (dp == NULL) {
5450 dp = (DatePtr) cpp->app_date;
5451 }
5452 }
5453 }
5454 break;
5455 case PUB_Man:
5456 cbp = (CitBookPtr) vnp->data.ptrvalue;
5457 if (cbp != NULL) {
5458 imp = (ImprintPtr) cbp->imp;
5459 if (imp != NULL) {
5460 if (dp == NULL) {
5461 dp = imp->date;
5462 }
5463 }
5464 }
5465 break;
5466 case PUB_Muid :
5467 if (rbp->muid == 0) {
5468 rbp->muid = vnp->data.intvalue;
5469 rbp->category = REF_CAT_PUB;
5470 }
5471 break;
5472 case PUB_PMid :
5473 if (rbp->pmid == 0) {
5474 rbp->pmid = vnp->data.intvalue;
5475 rbp->category = REF_CAT_PUB;
5476 }
5477 break;
5478 default :
5479 break;
5480 }
5481 if (vnp->choice != PUB_Muid && vnp->choice != PUB_PMid) {
5482 justuids = FALSE;
5483 }
5484 }
5485
5486 /* check for submitted vs. in-press */
5487
5488 if (imp != NULL) {
5489 rbp->category = REF_CAT_PUB;
5490 switch (imp->prepub) {
5491 case 1 :
5492 rbp->category = REF_CAT_UNP;
5493 break;
5494 case 2 :
5495 rbp->category = REF_CAT_PUB;
5496 break;
5497 default :
5498 break;
5499 }
5500 }
5501
5502 /* check for sites reftype */
5503
5504 if (pdp->reftype != 0) {
5505 rbp->sites = pdp->reftype;
5506 }
5507
5508 if (rbp->muid == 0 && rbp->pmid == 0) {
5509 vnp = pdp->pub;
5510
5511 /* skip over just serial number */
5512
5513 if (vnp != NULL && vnp->choice == PUB_Gen && vnp->next != NULL) {
5514 cgp = (CitGenPtr) vnp->data.ptrvalue;
5515 if (cgp != NULL) {
5516 if (StringNICmp ("BackBone id_pub", cgp->cit, 15) != 0) {
5517 if (cgp->cit == NULL && cgp->journal == NULL && cgp->date == NULL && cgp->serial_number) {
5518 vnp = vnp->next;
5519 }
5520 }
5521 }
5522 }
5523
5524 if (PubLabelUnique (vnp, buf, sizeof (buf) - 1, OM_LABEL_CONTENT, TRUE) > 0) {
5525 rbp->uniquestr = StringSaveNoNull (buf);
5526 }
5527 }
5528
5529 irp = (IntRefBlockPtr) rbp;
5530 irp->date = DateDup (dp);
5531 irp->justuids = justuids;
5532 /* if (justuids) { */
5533 irp->fig = StringSaveNoNull (pdp->fig);
5534 irp->maploc = StringSaveNoNull (pdp->maploc);
5535 irp->poly_a = pdp->poly_a;
5536 /* } */
5537
5538 /* if not rejected by now, link in */
5539
5540 ValNodeAddPointer (head, 0, rbp);
5541
5542 return rbp;
5543 }
5544
SortReferences(VoidPtr ptr1,VoidPtr ptr2,Boolean serialFirst,Boolean isRefSeq)5545 static int LIBCALLBACK SortReferences (
5546 VoidPtr ptr1,
5547 VoidPtr ptr2,
5548 Boolean serialFirst,
5549 Boolean isRefSeq
5550 )
5551
5552 {
5553 int compare;
5554 IntRefBlockPtr irp1;
5555 IntRefBlockPtr irp2;
5556 RefBlockPtr rbp1;
5557 RefBlockPtr rbp2;
5558 Int2 status;
5559 RefBlockPtr temp;
5560 ValNodePtr vnp1;
5561 ValNodePtr vnp2;
5562
5563 if (ptr1 == NULL || ptr2 == NULL) return 0;
5564 vnp1 = *((ValNodePtr PNTR) ptr1);
5565 vnp2 = *((ValNodePtr PNTR) ptr2);
5566 if (vnp1 == NULL || vnp2 == NULL) return 0;
5567 rbp1 = (RefBlockPtr) vnp1->data.ptrvalue;
5568 rbp2 = (RefBlockPtr) vnp2->data.ptrvalue;
5569 if (rbp1 == NULL || rbp2 == NULL) return 0;
5570
5571 if (serialFirst) {
5572 if (rbp1->serial > rbp2->serial) {
5573 return 1;
5574 } else if (rbp1->serial < rbp2->serial) {
5575 return -1;
5576 }
5577 }
5578
5579 /* usual first sort by published, unpublished, and cit-subs */
5580
5581 if (rbp1->category > rbp2->category) {
5582 return 1;
5583 } else if (rbp1->category < rbp2->category) {
5584 return -1;
5585 }
5586
5587 /* for RefSeq, newer publications first, so temporarily swap pointers */
5588
5589 if (isRefSeq) {
5590 temp = rbp1;
5591 rbp1 = rbp2;
5592 rbp2 = temp;
5593 }
5594
5595 /* within class, sort by date, older publications first (except RefSeq) */
5596
5597 irp1 = (IntRefBlockPtr) rbp1;
5598 irp2 = (IntRefBlockPtr) rbp2;
5599
5600 if ( irp1->date != 0 && irp2->date == 0 ) {
5601 return 1;
5602 } else if ( irp1->date == 0 && irp2->date != 0 ) {
5603 return -1;
5604 }
5605
5606 status = DateMatch (irp1->date, irp2->date, TRUE);
5607 if (status == 1 || status == -1) return status;
5608 /* if dates incomparable, do other comparisons */
5609 if ( status != 0 ) {
5610 if( (NULL != irp1->date) && (NULL != irp2->date ) ) {
5611 /* std date comes before str date */
5612 return ( irp2->date->data[0] - irp1->date->data[0] );
5613 }
5614 }
5615
5616 /* if dates (e.g., years) match, try to distinguish by uids */
5617
5618 if (rbp1->pmid != 0 && rbp2->pmid != 0) {
5619 if (rbp1->pmid > rbp2->pmid) {
5620 return 1;
5621 } else if (rbp1->pmid < rbp2->pmid) {
5622 return -1;
5623 }
5624 }
5625
5626 if (rbp1->muid != 0 && rbp2->muid != 0) {
5627 if (rbp1->muid > rbp2->muid) {
5628 return 1;
5629 } else if (rbp1->muid < rbp2->muid) {
5630 return -1;
5631 }
5632 }
5633
5634 /* restore sort order after date and pmid/muid matching */
5635
5636 if (isRefSeq) {
5637 temp = rbp1;
5638 rbp1 = rbp2;
5639 rbp2 = temp;
5640
5641 irp1 = (IntRefBlockPtr) rbp1;
5642 irp2 = (IntRefBlockPtr) rbp2;
5643 }
5644
5645 /* if same uid, one with just uids goes last to be excised but remembered */
5646
5647 if ((rbp1->pmid != 0 && rbp2->pmid != 0) || (rbp1->muid != 0 && rbp2->muid != 0)) {
5648 if (irp1->justuids && (! irp2->justuids)) {
5649 return 1;
5650 } else if ((! irp1->justuids) && irp2->justuids) {
5651 return -1;
5652 }
5653 }
5654
5655 /* put sites after pubs that refer to all or a range of bases */
5656
5657 if (rbp1->sites > rbp2->sites) {
5658 return 1;
5659 } else if (rbp2->sites > rbp1->sites) {
5660 return -1;
5661 }
5662
5663 /* next use author string */
5664
5665 if (irp1->authstr != NULL && irp2->authstr != NULL) {
5666 compare = StringICmp (irp1->authstr, irp2->authstr);
5667 if (compare > 0) {
5668 return 1;
5669 } else if (compare < 0) {
5670 return -1;
5671 }
5672 }
5673
5674 /* use unique label string to determine sort order */
5675
5676 if (rbp1->uniquestr != NULL && rbp2->uniquestr != NULL) {
5677 compare = StringICmp (rbp1->uniquestr, rbp2->uniquestr);
5678 if (compare > 0) {
5679 return 1;
5680 } else if (compare < 0) {
5681 return -1;
5682 }
5683 }
5684
5685 /* for publication features, sort in explore index order - probably superset of itemID below */
5686
5687 if (irp1->index > irp2->index) {
5688 return 1;
5689 } else if (irp1->index < irp2->index) {
5690 return -1;
5691 }
5692
5693 /* last resort for equivalent publication descriptors, sort in itemID order */
5694
5695 if (rbp1->itemtype == OBJ_SEQDESC && rbp2->itemtype == OBJ_SEQDESC) {
5696 if (rbp1->itemID > rbp2->itemID) {
5697 return 1;
5698 } else if (rbp1->itemID < rbp2->itemID) {
5699 return -1;
5700 }
5701 }
5702
5703 if (rbp1->itemtype == OBJ_ANNOTDESC && rbp2->itemtype == OBJ_ANNOTDESC) {
5704 if (rbp1->itemID > rbp2->itemID) {
5705 return 1;
5706 } else if (rbp1->itemID < rbp2->itemID) {
5707 return -1;
5708 }
5709 }
5710
5711 if (! serialFirst) {
5712 if (rbp1->serial > rbp2->serial) {
5713 return 1;
5714 } else if (rbp1->serial < rbp2->serial) {
5715 return -1;
5716 }
5717 }
5718
5719 return 0;
5720 }
5721
SortReferencesA(VoidPtr ptr1,VoidPtr ptr2)5722 static int LIBCALLBACK SortReferencesA (
5723 VoidPtr ptr1,
5724 VoidPtr ptr2
5725 )
5726
5727 {
5728 return SortReferences (ptr1, ptr2, FALSE, FALSE);
5729 }
5730
SortReferencesB(VoidPtr ptr1,VoidPtr ptr2)5731 static int LIBCALLBACK SortReferencesB (
5732 VoidPtr ptr1,
5733 VoidPtr ptr2
5734 )
5735
5736 {
5737 return SortReferences (ptr1, ptr2, TRUE, FALSE);
5738 }
5739
SortReferencesAR(VoidPtr ptr1,VoidPtr ptr2)5740 static int LIBCALLBACK SortReferencesAR (
5741 VoidPtr ptr1,
5742 VoidPtr ptr2
5743 )
5744
5745 {
5746 return SortReferences (ptr1, ptr2, FALSE, TRUE);
5747 }
5748
SortReferencesBR(VoidPtr ptr1,VoidPtr ptr2)5749 static int LIBCALLBACK SortReferencesBR (
5750 VoidPtr ptr1,
5751 VoidPtr ptr2
5752 )
5753
5754 {
5755 return SortReferences (ptr1, ptr2, TRUE, TRUE);
5756 }
5757
GetAuthorsPlusConsortium(FmtType format,AuthListPtr alp)5758 static CharPtr GetAuthorsPlusConsortium (
5759 FmtType format,
5760 AuthListPtr alp
5761 )
5762
5763 {
5764 CharPtr consortium;
5765 CharPtr str;
5766 CharPtr tmp;
5767
5768 consortium = NULL;
5769 str = GetAuthorsString (format, alp, &consortium, NULL, NULL);
5770 if (str == NULL) return consortium;
5771 if (consortium == NULL) return str;
5772 tmp = (CharPtr) MemNew (StringLen (str) + StringLen (consortium) + 5);
5773 if (tmp == NULL) return NULL;
5774 StringCpy (tmp, str);
5775 StringCat (tmp, "; ");
5776 StringCat (tmp, consortium);
5777 MemFree (str);
5778 MemFree (consortium);
5779 return tmp;
5780 }
5781
HasNoPmidOrMuid(PubdescPtr pdp)5782 static Boolean HasNoPmidOrMuid (
5783 PubdescPtr pdp
5784 )
5785
5786 {
5787 ValNodePtr vnp;
5788
5789 if (pdp == NULL) return TRUE;
5790 for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
5791 if (vnp->choice == PUB_PMid || vnp->choice == PUB_Muid) return FALSE;
5792 }
5793 return TRUE;
5794 }
5795
5796 typedef struct cdspubs {
5797 Asn2gbWorkPtr awp;
5798 BioseqPtr target;
5799 ValNodePtr vnp;
5800 } CdsPubs, PNTR CdsPubsPtr;
5801
GetRefsOnCDS(SeqFeatPtr sfp,SeqMgrFeatContextPtr context)5802 static Boolean LIBCALLBACK GetRefsOnCDS (
5803 SeqFeatPtr sfp,
5804 SeqMgrFeatContextPtr context
5805 )
5806
5807 {
5808 AuthListPtr alp;
5809 Asn2gbWorkPtr awp;
5810 CdsPubsPtr cpp;
5811 IntRefBlockPtr irp;
5812 Boolean okay;
5813 PubdescPtr pdp;
5814 RefBlockPtr rbp;
5815 BioseqPtr target;
5816
5817 if (sfp == NULL || context == NULL) return TRUE;
5818 cpp = (CdsPubsPtr) context->userdata;
5819 awp = cpp->awp;
5820 if (awp == NULL) return TRUE;
5821 target = cpp->target;
5822
5823 okay = TRUE;
5824 pdp = (PubdescPtr) sfp->data.value.ptrvalue;
5825 if (awp->format == FTABLE_FMT) {
5826 if (HasNoPmidOrMuid (pdp)) {
5827 okay = FALSE;
5828 }
5829 }
5830
5831 if (okay) {
5832 rbp = AddPub (awp, &(awp->pubhead), pdp);
5833 if (rbp != NULL) {
5834
5835 rbp->entityID = context->entityID;
5836 rbp->itemID = context->itemID;
5837 rbp->itemtype = OBJ_SEQFEAT;
5838
5839 irp = (IntRefBlockPtr) rbp;
5840 irp->loc = SeqLocMerge (cpp->target, cpp->vnp, NULL, FALSE, TRUE, FALSE);
5841 if (target != NULL) {
5842 irp->left = 0;
5843 irp->right = target->length - 1;
5844 }
5845 alp = GetAuthListPtr (pdp, NULL);
5846 if (alp != NULL) {
5847 irp->authstr = GetAuthorsPlusConsortium (awp->format, alp);
5848 }
5849 irp->index = 0;
5850 }
5851 }
5852
5853 return TRUE;
5854 }
5855
GetRefsOnBioseq(Asn2gbWorkPtr awp,BioseqPtr target,BioseqPtr bsp,Int4 from,Int4 to,SeqLocPtr cdsloc,BioseqPtr cdsbsp)5856 static void GetRefsOnBioseq (
5857 Asn2gbWorkPtr awp,
5858 BioseqPtr target,
5859 BioseqPtr bsp,
5860 Int4 from,
5861 Int4 to,
5862 SeqLocPtr cdsloc,
5863 BioseqPtr cdsbsp
5864 )
5865
5866 {
5867 SeqMgrAndContext acontext;
5868 AnnotDescPtr adp;
5869 IntAsn2gbJobPtr ajp;
5870 AuthListPtr alp;
5871 CdsPubs cp;
5872 SeqMgrDescContext dcontext;
5873 SeqMgrFeatContext fcontext;
5874 Int2 i;
5875 Int2 idx;
5876 IntRefBlockPtr irp;
5877 Int4Ptr ivals;
5878 Int4 left;
5879 SeqLocPtr newloc;
5880 Int2 numivals;
5881 Boolean okay;
5882 PubdescPtr pdp;
5883 RefBlockPtr rbp;
5884 Int4 right;
5885 SeqDescrPtr sdp;
5886 SeqFeatPtr sfp;
5887 SeqInt sint;
5888 SeqIntPtr sintp;
5889 SeqIdPtr sip;
5890 Boolean split;
5891 Int4 start;
5892 Int4 stop;
5893 Uint1 strand;
5894 Boolean takeIt;
5895 ValNode vn;
5896 ValNodePtr vnp;
5897
5898 if (awp == NULL || target == NULL || bsp == NULL) return;
5899 ajp = awp->ajp;
5900 if (ajp == NULL) return;
5901
5902 /* full length loc for descriptors */
5903
5904 sint.from = 0;
5905 if (ajp->ajp.slp != NULL) {
5906 from = SeqLocStart (ajp->ajp.slp); /* other features use awp->slp for from and to */
5907 }
5908 if (ajp->ajp.slp != NULL) {
5909 sint.to = SeqLocLen (ajp->ajp.slp) - 1;
5910 to = SeqLocStop (ajp->ajp.slp); /* other features use awp->slp for from and to */
5911 } else {
5912 sint.to = bsp->length - 1;
5913 }
5914 sint.strand = Seq_strand_plus;
5915 sint.id = SeqIdStripLocus (SeqIdDup (SeqIdFindBest (bsp->id, 0)));
5916 sint.if_from = NULL;
5917 sint.if_to = NULL;
5918
5919 vn.choice = SEQLOC_INT;
5920 vn.data.ptrvalue = (Pointer) &sint;
5921 vn.next = NULL;
5922
5923 sdp = SeqMgrGetNextDescriptor (target, NULL, Seq_descr_pub, &dcontext);
5924 while (sdp != NULL) {
5925
5926 /* check if descriptor on part already added on segmented bioseq */
5927
5928 okay = TRUE;
5929 for (vnp = awp->pubhead; vnp != NULL && okay; vnp = vnp->next) {
5930 rbp = (RefBlockPtr) vnp->data.ptrvalue;
5931 if (rbp != NULL) {
5932 if (rbp->entityID == dcontext.entityID &&
5933 rbp->itemID == dcontext.itemID &&
5934 rbp->itemtype == OBJ_SEQDESC) {
5935 okay = FALSE;
5936 }
5937 }
5938 }
5939 if (awp->format == FTABLE_FMT) {
5940 pdp = (PubdescPtr) sdp->data.ptrvalue;
5941 if (HasNoPmidOrMuid (pdp)) {
5942 okay = FALSE;
5943 }
5944 }
5945
5946 if (okay) {
5947 pdp = (PubdescPtr) sdp->data.ptrvalue;
5948 rbp = AddPub (awp, &(awp->pubhead), pdp);
5949 if (rbp != NULL) {
5950
5951 rbp->entityID = dcontext.entityID;
5952 rbp->itemID = dcontext.itemID;
5953 rbp->itemtype = OBJ_SEQDESC;
5954
5955 irp = (IntRefBlockPtr) rbp;
5956 if (cdsloc != NULL && cdsbsp != NULL) {
5957 sintp = SeqIntNew ();
5958 sintp->from = 0;
5959 sintp->to = cdsbsp->length - 1;
5960 sintp->id = SeqIdDup (cdsbsp->id);
5961 irp->loc = ValNodeAddPointer (NULL, SEQLOC_INT, (Pointer) sintp);
5962 /*
5963 irp->loc = SeqLocWholeNew (cdsbsp);
5964 */
5965 irp->left = 0;
5966 irp->right = cdsbsp->length - 1;
5967 } else {
5968 irp->loc = SeqLocMerge (target, &vn, NULL, FALSE, TRUE, FALSE);
5969 irp->left = from;
5970 irp->right = to;
5971 }
5972 alp = GetAuthListPtr (pdp, NULL);
5973 if (alp != NULL) {
5974 irp->authstr = GetAuthorsPlusConsortium (awp->format, alp);
5975 }
5976 irp->index = 0;
5977 }
5978 }
5979 sdp = SeqMgrGetNextDescriptor (target, sdp, Seq_descr_pub, &dcontext);
5980 }
5981
5982 /* if protein with no pubs, get pubs applicable to DNA location of CDS */
5983
5984 if (cdsloc != NULL) {
5985 cp.awp = awp;
5986 cp.target = cdsbsp;
5987 cp.vnp = &vn;
5988 SeqMgrGetAllOverlappingFeatures (cdsloc, FEATDEF_PUB, NULL, 0, LOCATION_SUBSET, (Pointer) &cp, GetRefsOnCDS);
5989 }
5990
5991 /* also get publications from AnnotDesc on SeqAnnot */
5992
5993 adp = SeqMgrGetNextAnnotDesc (target, NULL, Annot_descr_pub, &acontext);
5994 while (adp != NULL) {
5995
5996 okay = TRUE;
5997
5998 if (okay) {
5999 pdp = (PubdescPtr) adp->data.ptrvalue;
6000 rbp = AddPub (awp, &(awp->pubhead), pdp);
6001 if (rbp != NULL) {
6002
6003 rbp->entityID = acontext.entityID;
6004 rbp->itemID = acontext.itemID;
6005 rbp->itemtype = OBJ_ANNOTDESC;
6006
6007 irp = (IntRefBlockPtr) rbp;
6008 irp->loc = SeqLocMerge (target, &vn, NULL, FALSE, TRUE, FALSE);
6009 irp->left = from;
6010 irp->right = to;
6011 alp = GetAuthListPtr (pdp, NULL);
6012 if (alp != NULL) {
6013 irp->authstr = GetAuthorsPlusConsortium (awp->format, alp);
6014 }
6015 irp->index = 0;
6016 }
6017 }
6018 adp = SeqMgrGetNextAnnotDesc (target, adp, Annot_descr_pub, &acontext);
6019 }
6020
6021 SeqIdFree (sint.id);
6022
6023 /* features are indexed on parent if segmented */
6024
6025 bsp = awp->parent;
6026
6027 sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_PUB, 0, &fcontext);
6028 while (sfp != NULL) {
6029 ivals = fcontext.ivals;
6030 numivals = fcontext.numivals;
6031 if (ivals != NULL && numivals > 0) {
6032
6033 /*
6034 idx = (numivals - 1) * 2;
6035 start = ivals [idx];
6036 stop = ivals [idx + 1];
6037 */
6038
6039 takeIt = FALSE;
6040 for (i = 0, idx = 0; i < numivals; i++, idx += 2) {
6041 start = ivals [idx];
6042 stop = ivals [idx + 1];
6043 if ((start <= from && stop > from) ||
6044 (start < to && stop >= to) ||
6045 (start >= from && stop <= to)) {
6046 takeIt = TRUE;
6047 }
6048 }
6049 if (awp->format == FTABLE_FMT) {
6050 pdp = (PubdescPtr) sfp->data.value.ptrvalue;
6051 if (HasNoPmidOrMuid (pdp)) {
6052 takeIt = FALSE;
6053 }
6054 }
6055
6056 if (takeIt /* stop >= from && stop <= to */) {
6057
6058 /*
6059 start = ivals [0] + 1;
6060 stop = ivals [idx + 1] + 1;
6061 */
6062 pdp = (PubdescPtr) sfp->data.value.ptrvalue;
6063 rbp = AddPub (awp, &(awp->pubhead), pdp);
6064 if (rbp != NULL) {
6065
6066 rbp->entityID = fcontext.entityID;
6067 rbp->itemID = fcontext.itemID;
6068 rbp->itemtype = OBJ_SEQFEAT;
6069
6070 irp = (IntRefBlockPtr) rbp;
6071 irp->loc = SeqLocMerge (target, sfp->location, NULL, FALSE, TRUE, FALSE);
6072 irp->left = fcontext.left;
6073 irp->right = fcontext.right;
6074 if (ajp->ajp.slp != NULL) {
6075 sip = SeqIdParse ("lcl|dummy");
6076 left = GetOffsetInBioseq (ajp->ajp.slp, bsp, SEQLOC_LEFT_END);
6077 right = GetOffsetInBioseq (ajp->ajp.slp, bsp, SEQLOC_RIGHT_END);
6078 strand = SeqLocStrand (ajp->ajp.slp);
6079 split = FALSE;
6080 newloc = SeqLocReMapEx (sip, ajp->ajp.slp, irp->loc, 0, FALSE, ajp->masterStyle, ajp->relaxedMapping);
6081 /*
6082 newloc = SeqLocCopyRegion (sip, irp->loc, bsp, left, right, strand, &split);
6083 */
6084 SeqIdFree (sip);
6085 if (newloc != NULL) {
6086 A2GBSeqLocReplaceID (newloc, ajp->ajp.slp);
6087 irp->loc = SeqLocFree (irp->loc);
6088 irp->loc = newloc;
6089 }
6090 }
6091 alp = GetAuthListPtr (pdp, NULL);
6092 if (alp != NULL) {
6093 irp->authstr = GetAuthorsPlusConsortium (awp->format, alp);
6094 }
6095 irp->index = fcontext.index;
6096 }
6097 }
6098 }
6099
6100 sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_PUB, 0, &fcontext);
6101 }
6102 }
6103
GetRefsOnSeg(SeqLocPtr slp,SeqMgrSegmentContextPtr context)6104 static Boolean LIBCALLBACK GetRefsOnSeg (
6105 SeqLocPtr slp,
6106 SeqMgrSegmentContextPtr context
6107 )
6108
6109 {
6110 Asn2gbWorkPtr awp;
6111 BioseqPtr bsp;
6112 Int4 from;
6113 SeqLocPtr loc;
6114 SeqEntryPtr oldscope;
6115 SeqEntryPtr sep;
6116 SeqIdPtr sip;
6117 Int4 to;
6118
6119 if (slp == NULL || context == NULL) return FALSE;
6120 awp = (Asn2gbWorkPtr) context->userdata;
6121
6122 from = context->cumOffset;
6123 to = from + context->to - context->from;
6124
6125 sip = SeqLocId (slp);
6126 if (sip == NULL) {
6127 loc = SeqLocFindNext (slp, NULL);
6128 if (loc != NULL) {
6129 sip = SeqLocId (loc);
6130 }
6131 }
6132 if (sip == NULL) return TRUE;
6133
6134 /* reference descriptors only on parts within entity */
6135
6136 sep = GetTopSeqEntryForEntityID (awp->entityID);
6137 oldscope = SeqEntrySetScope (sep);
6138 bsp = BioseqFind (sip);
6139 SeqEntrySetScope (oldscope);
6140
6141 if (bsp != NULL) {
6142 GetRefsOnBioseq (awp, awp->refs, bsp, from, to, NULL, NULL);
6143 return TRUE;
6144 }
6145
6146 /* if we ever want to fetch remote references, code goes here */
6147
6148 return TRUE;
6149 }
6150
AddReferenceBlock(Asn2gbWorkPtr awp,Boolean isRefSeq)6151 NLM_EXTERN Boolean AddReferenceBlock (
6152 Asn2gbWorkPtr awp,
6153 Boolean isRefSeq
6154 )
6155
6156 {
6157 IntAsn2gbJobPtr ajp;
6158 AuthListPtr alp;
6159 Asn2gbSectPtr asp;
6160 BioseqPtr bsp;
6161 SeqFeatPtr cds;
6162 Boolean combine;
6163 SeqMgrFeatContext context;
6164 CitSubPtr csp;
6165 BioseqPtr dna;
6166 Boolean excise;
6167 Int2 firstserial;
6168 ValNodePtr head = NULL;
6169 Int2 i = 0;
6170 IntRefBlockPtr irp;
6171 Boolean is_aa;
6172 Boolean is_ddbj = FALSE;
6173 Boolean is_embl = FALSE;
6174 Boolean is_patent = FALSE;
6175 Int2 j;
6176 IntRefBlockPtr lastirp;
6177 RefBlockPtr lastrbp;
6178 ValNodePtr next;
6179 Int2 numReferences;
6180 ValNodePtr PNTR prev;
6181 RefBlockPtr rbp;
6182 RefBlockPtr PNTR referenceArray;
6183 BioseqPtr refs;
6184 SubmitBlockPtr sbp;
6185 SeqIdPtr sip;
6186 SeqLocPtr slp;
6187 BioseqPtr target;
6188 ValNodePtr vnp;
6189
6190 if (awp == NULL) return FALSE;
6191 ajp = awp->ajp;
6192 if (ajp == NULL) return FALSE;
6193 asp = awp->asp;
6194 if (asp == NULL) return FALSE;
6195 bsp = awp->bsp;
6196 refs = awp->refs;
6197 if (bsp == NULL || refs == NULL) return FALSE;
6198
6199 /* collect publications on bioseq */
6200
6201 awp->pubhead = NULL;
6202 GetRefsOnBioseq (awp, bsp, refs, awp->from, awp->to, NULL, NULL);
6203 target = bsp;
6204
6205 for (sip = bsp->id; sip != NULL; sip = sip->next) {
6206 if (sip->choice == SEQID_EMBL) {
6207 is_embl = TRUE;
6208 } else if (sip->choice == SEQID_DDBJ) {
6209 is_ddbj = TRUE;
6210 } else if (sip->choice == SEQID_PATENT) {
6211 is_patent = TRUE;
6212 }
6213 }
6214
6215 is_aa = (Boolean) ISA_aa (bsp->mol);
6216
6217 if (bsp->repr == Seq_repr_seg) {
6218
6219 /* collect publication descriptors on local parts */
6220
6221 SeqMgrExploreSegments (bsp, (Pointer) awp, GetRefsOnSeg);
6222 target = awp->refs;
6223 }
6224
6225 if (awp->pubhead == NULL && ISA_aa (bsp->mol)) {
6226
6227 /* if protein with no pubs, get pubs applicable to DNA location of CDS */
6228
6229 cds = SeqMgrGetCDSgivenProduct (bsp, &context);
6230 if (cds != NULL) {
6231 dna = BioseqFindFromSeqLoc (cds->location);
6232 if (dna != NULL) {
6233 GetRefsOnBioseq (awp, dna, dna, context.left, context.right, cds->location, bsp);
6234 target = dna;
6235 }
6236 }
6237 }
6238
6239 head = awp->pubhead;
6240 awp->pubhead = NULL;
6241
6242 if (head == NULL && awp->ssp == NULL) return FALSE;
6243
6244 /* sort by pub/unpub/sites/sub, then date, finally existing serial */
6245
6246 if (isRefSeq) {
6247 head = ValNodeSort (head, SortReferencesAR);
6248 } else {
6249 head = ValNodeSort (head, SortReferencesA);
6250 }
6251
6252 if (awp->ssp != NULL && (! awp->onlyGeneRIFs) && (! awp->onlyReviewPubs)) {
6253
6254 /* add seq-submit citation */
6255
6256 rbp = (RefBlockPtr) MemNew (sizeof (IntRefBlock));
6257 if (rbp != NULL) {
6258 irp = (IntRefBlockPtr) rbp;
6259
6260 rbp->blocktype = REFERENCE_BLOCK;
6261 rbp->section = awp->currsection;
6262 rbp->serial = INT2_MAX;
6263 rbp->category = REF_CAT_SUB;
6264
6265 rbp->entityID = ajp->ajp.entityID;
6266 rbp->itemID = 1;
6267 rbp->itemtype = OBJ_SEQSUB_CIT;
6268
6269 sbp = awp->ssp->sub;
6270 if (sbp != NULL) {
6271 csp = sbp->cit;
6272 if (csp != NULL) {
6273 alp = GetAuthListPtr (NULL, csp);
6274 if (alp != NULL) {
6275 irp->authstr = GetAuthorsPlusConsortium (awp->format, alp);
6276 }
6277 if (csp->date != NULL) {
6278 irp->date = DateDup (csp->date);
6279 }
6280 }
6281 }
6282
6283 if (awp->citSubsFirst) {
6284
6285 /* for DDBJ, add seq-submit citation to beginning of list */
6286
6287 vnp = ValNodeNew (NULL);
6288 if (vnp != NULL) {
6289 vnp->choice = 0;
6290 vnp->data.ptrvalue = (VoidPtr) rbp;
6291 vnp->next = head;
6292 head = vnp;
6293 }
6294
6295 } else {
6296
6297 /* for GENBANK and EMBL add seq-submit citation to end of list */
6298
6299 ValNodeAddPointer (&head, 0, rbp);
6300 }
6301 }
6302 }
6303
6304 /* unique references, excise duplicates from list */
6305
6306 prev = &(head);
6307 vnp = head;
6308 lastrbp = NULL;
6309 while (vnp != NULL) {
6310 excise = FALSE;
6311 combine = TRUE;
6312 next = vnp->next;
6313 rbp = (RefBlockPtr) vnp->data.ptrvalue;
6314 if (lastrbp != NULL) {
6315 lastirp = (IntRefBlockPtr) lastrbp;
6316 if (rbp != NULL) {
6317 irp = (IntRefBlockPtr) rbp;
6318 if (lastrbp->pmid != 0 && rbp->pmid != 0) {
6319 if (lastrbp->pmid == rbp->pmid) {
6320 if (lastirp->right + 1 >= irp->left) {
6321 excise = TRUE;
6322 }
6323 }
6324 } else if (lastrbp->muid != 0 && rbp->muid != 0) {
6325 if (lastrbp->muid == rbp->muid) {
6326 if (lastirp->right + 1 >= irp->left) {
6327 excise = TRUE;
6328 }
6329 }
6330 } else if (lastrbp->uniquestr != NULL && rbp->uniquestr != NULL) {
6331 if (StringICmp (lastrbp->uniquestr, rbp->uniquestr) == 0) {
6332 if (SeqLocCompare (irp->loc, lastirp->loc) == SLC_A_EQ_B) {
6333 if (StringICmp (irp->authstr, lastirp->authstr) == 0) {
6334
6335 /* L76496.1 - removing duplicate submission pubs */
6336 if (lastirp->right + 1 >= irp->left) {
6337 excise = TRUE;
6338 }
6339 }
6340 }
6341 }
6342 }
6343 if (excise && lastrbp->sites == 0 && rbp->sites > 0) {
6344 /* real range trumps sites */
6345 combine = FALSE;
6346 }
6347 }
6348 }
6349 if (rbp != NULL) {
6350 irp = (IntRefBlockPtr) rbp;
6351 if (irp->justuids) {
6352 if (isRefSeq && is_aa) {
6353 /* if allowing justuid in protein RefSeq, try to look up dynamically */
6354 excise = TRUE; /* Back to old behavior, do not fetch */
6355 } else {
6356 /* do not allow justuids reference to appear by itself - S79174.1 */
6357 excise = TRUE;
6358 /* justuids should still combine, even if no authors - S67070.1 */
6359 }
6360 } else if (is_embl && is_patent) {
6361 /* EMBL patent records do not need author or title - A29528.1 */
6362 } else if (StringHasNoText (irp->authstr)) {
6363 /* do not allow no author reference to appear by itself - U07000.1 */
6364 excise = TRUE;
6365 combine = FALSE;
6366 } else if (isRefSeq && is_aa && rbp->category == REF_CAT_SUB) {
6367 /* GenPept RefSeq suppresses cit-subs */
6368 excise = TRUE;
6369 combine = FALSE;
6370 }
6371 }
6372 if (awp->mode == DUMP_MODE) {
6373 excise = FALSE;
6374 }
6375 /* do not hide duplicate EMBL and DDBJ publications */
6376 if (is_embl || is_ddbj) {
6377 excise = FALSE;
6378 combine = TRUE;
6379 }
6380 /* does not fuse equivalent publication features for local, general, refseq, and 2+6 genbank ids */
6381 if (excise && awp->sourcePubFuse) {
6382 *prev = vnp->next;
6383 vnp->next = NULL;
6384
6385 /* combine locations of duplicate references */
6386
6387 irp = (IntRefBlockPtr) rbp;
6388 lastirp = (IntRefBlockPtr) lastrbp;
6389 if (combine) {
6390 if (lastirp != NULL) {
6391 slp = SeqLocMerge (target, lastirp->loc, irp->loc, FALSE, TRUE, FALSE);
6392 lastirp->loc = SeqLocFree (lastirp->loc);
6393 lastirp->loc = slp;
6394 }
6395 if (irp != NULL && lastirp != NULL) {
6396 if ((rbp->muid == lastrbp->muid && rbp->muid != 0) ||
6397 (rbp->pmid == lastrbp->pmid && rbp->pmid != 0)) {
6398 if (lastirp->fig == NULL) {
6399 lastirp->fig = StringSaveNoNull (irp->fig);
6400 }
6401 if (lastirp->maploc == NULL) {
6402 lastirp->maploc = StringSaveNoNull (irp->maploc);
6403 }
6404 lastirp->poly_a = irp->poly_a;
6405 }
6406 }
6407 }
6408
6409 /* and remove duplicate reference */
6410
6411 MemFree (rbp->uniquestr);
6412 DateFree (irp->date);
6413 SeqLocFree (irp->loc);
6414 MemFree (irp->authstr);
6415 MemFree (irp->fig);
6416 MemFree (irp->maploc);
6417 MemFree (rbp);
6418 ValNodeFree (vnp);
6419
6420 } else {
6421
6422 prev = &(vnp->next);
6423 lastrbp = rbp;
6424 }
6425 vnp = next;
6426 }
6427
6428 /* resort by existing serial, then pub/unpub/sites/sub, then date */
6429
6430 if (isRefSeq) {
6431 head = ValNodeSort (head, SortReferencesBR);
6432 } else {
6433 head = ValNodeSort (head, SortReferencesB);
6434 }
6435
6436 if (head == NULL) return FALSE;
6437
6438 /* if taking newest publications, free remainder */
6439
6440 if (awp->newestPubs) {
6441 for (vnp = head, i = 1; vnp != NULL && i < 5; vnp = vnp->next, i++) continue;
6442 if (vnp != NULL) {
6443 next = vnp->next;
6444 vnp->next = NULL;
6445 for (vnp = next; vnp != NULL; vnp = vnp->next) {
6446 rbp = (RefBlockPtr) vnp->data.ptrvalue;
6447 MemFree (rbp->uniquestr);
6448 irp = (IntRefBlockPtr) rbp;
6449 DateFree (irp->date);
6450 SeqLocFree (irp->loc);
6451 MemFree (irp->authstr);
6452 MemFree (irp->fig);
6453 MemFree (irp->maploc);
6454 MemFree (rbp);
6455 }
6456 }
6457
6458 /* if taking oldest publications, free remainder */
6459
6460 } else if (awp->oldestPubs) {
6461 for (vnp = head, j = 0; vnp != NULL; vnp = vnp->next, j++) continue;
6462 if (j > 5) {
6463 for (vnp = head, i = 0; vnp != NULL && i < j - 6; vnp = vnp->next, i++) continue;
6464 if (vnp != NULL) {
6465 next = vnp->next;
6466 vnp->next = NULL;
6467 for (vnp = head; vnp != NULL; vnp = vnp->next) {
6468 rbp = (RefBlockPtr) vnp->data.ptrvalue;
6469 MemFree (rbp->uniquestr);
6470 irp = (IntRefBlockPtr) rbp;
6471 DateFree (irp->date);
6472 SeqLocFree (irp->loc);
6473 MemFree (irp->authstr);
6474 MemFree (irp->fig);
6475 MemFree (irp->maploc);
6476 MemFree (rbp);
6477 }
6478 head = next;
6479 }
6480 }
6481 }
6482
6483 /* assign serial numbers */
6484
6485 firstserial = 1;
6486
6487 /* first find highest one assigned by EMBL/SWISS-PROT */
6488
6489 for (vnp = head; vnp != NULL; vnp = vnp->next) {
6490 rbp = (RefBlockPtr) vnp->data.ptrvalue;
6491 if (rbp == NULL) continue;
6492 if (rbp->serial > 0 && rbp->serial < INT2_MAX) {
6493 firstserial = rbp->serial + 1;
6494 }
6495 }
6496
6497 /* then give increasing serial numbers to unassigned publications */
6498
6499 for (vnp = head; vnp != NULL; vnp = vnp->next) {
6500 rbp = (RefBlockPtr) vnp->data.ptrvalue;
6501 if (rbp == NULL) continue;
6502 if (rbp->serial > 0 && rbp->serial < INT2_MAX) continue;
6503 rbp->serial = firstserial;
6504 firstserial++;
6505 }
6506
6507 /* allocate reference array for this section */
6508
6509 numReferences = ValNodeLen (head);
6510 asp->numReferences = numReferences;
6511
6512 if (numReferences > 0) {
6513 referenceArray = (RefBlockPtr PNTR) MemNew (sizeof (RefBlockPtr) * (numReferences + 1));
6514 asp->referenceArray = referenceArray;
6515
6516 if (referenceArray != NULL) {
6517
6518 /* fill in reference array */
6519
6520 for (vnp = head, i = 0; vnp != NULL && i < numReferences; vnp = vnp->next, i++) {
6521 referenceArray [i] = (RefBlockPtr) vnp->data.ptrvalue;
6522 }
6523 }
6524 }
6525
6526 /* finally link into blocks for current section */
6527
6528 ValNodeLink (&(awp->lastblock), head);
6529 vnp = awp->lastblock;
6530 if (vnp == NULL) return FALSE;
6531 while (vnp->next != NULL) {
6532 vnp = vnp->next;
6533 }
6534
6535 awp->lastblock = vnp;
6536 if (awp->blockList == NULL) {
6537 awp->blockList = vnp;
6538 }
6539
6540 if (awp->afp != NULL) {
6541 for (vnp = head; vnp != NULL; vnp = vnp->next) {
6542 rbp = (RefBlockPtr) vnp->data.ptrvalue;
6543 if (rbp == NULL) continue;
6544 DoImmediateFormat (awp->afp, (BaseBlockPtr) rbp);
6545 }
6546 }
6547
6548 return TRUE;
6549 }
6550
AddRefStatsBlock(Asn2gbWorkPtr awp)6551 NLM_EXTERN void AddRefStatsBlock (
6552 Asn2gbWorkPtr awp
6553 )
6554
6555 {
6556 IntAsn2gbJobPtr ajp;
6557 BaseBlockPtr bbp;
6558 BioseqPtr bsp;
6559 StringItemPtr ffstring;
6560
6561 if (awp == NULL) return;
6562 ajp = awp->ajp;
6563 if ( ajp == NULL ) return;
6564 bsp = awp->bsp;
6565 if (bsp == NULL) return;
6566
6567 if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) return;
6568
6569 bbp = Asn2gbAddBlock (awp, REF_STATS_BLOCK, sizeof (BaseBlock));
6570 if (bbp != NULL) {
6571 ffstring = FFGetString (ajp);
6572 if (ffstring != NULL) {
6573 FFStartPrint (ffstring, awp->format, 0, 12, "REFSTATS", 12, 0, 0, NULL, FALSE);
6574
6575 FFAddOneString (ffstring, "placeholder", FALSE, FALSE, TILDE_TO_SPACES);
6576
6577 bbp->string = FFEndPrint (ajp, ffstring, awp->format, 12, 12, 0, 0, NULL);
6578 FFRecycleString(ajp, ffstring);
6579 }
6580
6581 if (awp->afp != NULL) {
6582 DoImmediateFormat (awp->afp, bbp);
6583 }
6584 }
6585 }
6586
AddWGSBlock(Asn2gbWorkPtr awp)6587 NLM_EXTERN void AddWGSBlock (
6588 Asn2gbWorkPtr awp
6589 )
6590
6591 {
6592 IntAsn2gbJobPtr ajp;
6593 Asn2gbSectPtr asp;
6594 BaseBlockPtr bbp;
6595 BioseqPtr bsp;
6596 Char buf [128];
6597 SeqMgrDescContext dcontext;
6598 CharPtr first;
6599 GBAltSeqItemPtr gbaip;
6600 GBAltSeqDataPtr gbasp, asphead = NULL, asplast = NULL;
6601 GBSeqPtr gbseq;
6602 CharPtr last;
6603 ObjectIdPtr oip;
6604 SeqDescrPtr sdp;
6605 UserFieldPtr ufp;
6606 UserObjectPtr uop;
6607 Int2 wgstype;
6608 StringItemPtr ffstring;
6609
6610 if (awp == NULL) return;
6611 ajp = awp->ajp;
6612 if ( ajp == NULL ) return;
6613 bsp = awp->bsp;
6614 if (bsp == NULL) return;
6615 asp = awp->asp;
6616 if (asp == NULL) return;
6617
6618 if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) return;
6619
6620 if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
6621 (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
6622 sprintf (buf, "<a name=\"wgs_%s\"></a>", awp->currAccVerLabel);
6623 DoQuickLinkFormat (awp->afp, buf);
6624 }
6625
6626 if (ajp->gbseq) {
6627 gbseq = &asp->gbseq;
6628 } else {
6629 gbseq = NULL;
6630 }
6631
6632 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
6633 while (sdp != NULL) {
6634 uop = (UserObjectPtr) sdp->data.ptrvalue;
6635 if (uop != NULL) {
6636 oip = uop->type;
6637 first = NULL;
6638 last = NULL;
6639 wgstype = 0;
6640 if (oip != NULL) {
6641 if (StringICmp (oip->str, "WGSProjects") == 0) {
6642 wgstype = 1;
6643 } else if (StringICmp (oip->str, "WGS-Scaffold-List") == 0) {
6644 wgstype = 2;
6645 } else if (StringICmp (oip->str, "WGS-Contig-List") == 0) {
6646 wgstype = 3;
6647 }
6648 if (wgstype != 0) {
6649 for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
6650 oip = ufp->label;
6651 if (oip == NULL || oip->str == NULL || ufp->choice != 1) continue;
6652 if (StringICmp (oip->str, "WGS_accession_first") == 0) {
6653 first = (CharPtr) ufp->data.ptrvalue;
6654 } else if (StringICmp (oip->str, "WGS_accession_last") == 0) {
6655 last = (CharPtr) ufp->data.ptrvalue;
6656 } else if (StringICmp (oip->str, "Accession_first") == 0) {
6657 first = (CharPtr) ufp->data.ptrvalue;
6658 } else if (StringICmp (oip->str, "Accession_last") == 0) {
6659 last = (CharPtr) ufp->data.ptrvalue;
6660 }
6661 }
6662 if (first != NULL && last != NULL) {
6663 if (gbseq != NULL) {
6664 gbasp = (GBAltSeqDataPtr) MemNew (sizeof (GBAltSeqData));
6665 if (gbasp != NULL) {
6666 if (asphead == NULL) {
6667 asphead = gbasp;
6668 }
6669 if (asplast != NULL) {
6670 asplast->next = gbasp;
6671 }
6672 asplast = gbasp;
6673 if (wgstype == 1) {
6674 gbasp->name = StringSave ("WGS");
6675 } else if (wgstype == 2) {
6676 gbasp->name = StringSave ("WGS_SCAFLD");
6677 } else if (wgstype == 3) {
6678 gbasp->name = StringSave ("WGS_CONTIG");
6679 }
6680 gbaip = GBAltSeqItemNew ();
6681 if (gbaip != NULL) {
6682 gbaip->first_accn = StringSave (first);
6683 if (StringCmp (first, last) != 0) {
6684 gbaip->last_accn = StringSave (last);
6685 }
6686 gbasp->items = gbaip;
6687 }
6688 }
6689 }
6690 bbp = Asn2gbAddBlock (awp, WGS_BLOCK, sizeof (BaseBlock));
6691 if (bbp != NULL) {
6692 ffstring = FFGetString (ajp);
6693 if (ffstring != NULL) {
6694 if (wgstype == 1) {
6695 FFStartPrint (ffstring, awp->format, 0, 12, "WGS", 12, 0, 0, NULL, FALSE);
6696 } else if (wgstype == 2) {
6697 FFStartPrint (ffstring, awp->format, 0, 12, "WGS_SCAFLD", 12, 0, 0, NULL, FALSE);
6698 } else if (wgstype == 3) {
6699 FFStartPrint (ffstring, awp->format, 0, 12, "WGS_CONTIG", 12, 0, 0, NULL, FALSE);
6700 }
6701
6702 if ( GetWWW(ajp) ) {
6703 if (StringCmp (first, last) != 0) {
6704 if (wgstype == 1) {
6705 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
6706 FF_Add_NCBI_Base_URL (ffstring, link_wgs);
6707 StringCpy (buf, first);
6708 if (buf [2] == '_') {
6709 buf [9] = '\0';
6710 } else {
6711 buf [6] = '\0';
6712 }
6713 FFAddTextToString(ffstring, "val=", buf, "#contigs\">", FALSE, FALSE, TILDE_IGNORE);
6714 sprintf (buf, "%s-%s", first, last);
6715 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
6716 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
6717 } else if (wgstype == 2) {
6718 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
6719 if (StringLen (first) > 7 && first [6] == 'S') {
6720 FF_Add_NCBI_Base_URL (ffstring, link_wgs);
6721 StringCpy (buf, first);
6722 if (buf [2] == '_') {
6723 buf [9] = '\0';
6724 } else {
6725 buf [6] = '\0';
6726 }
6727 FFAddTextToString(ffstring, "val=", buf, "#scaffolds\">", FALSE, FALSE, TILDE_IGNORE);
6728 sprintf (buf, "%s-%s", first, last);
6729 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
6730 } else {
6731 FF_Add_NCBI_Base_URL (ffstring, link_wgsscaf);
6732 sprintf (buf, "%s:%s", first, last);
6733 FFAddTextToString(ffstring, "term=", buf, "[PACC]\">", FALSE, FALSE, TILDE_IGNORE);
6734 sprintf (buf, "%s-%s", first, last);
6735 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
6736 }
6737 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
6738 } else if (wgstype == 3) {
6739 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
6740 FF_Add_NCBI_Base_URL (ffstring, link_wgsscaf);
6741 sprintf (buf, "%s:%s", first, last);
6742 FFAddTextToString(ffstring, "term=", buf, "[PACC]\">", FALSE, FALSE, TILDE_IGNORE);
6743 sprintf (buf, "%s-%s", first, last);
6744 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
6745 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
6746 }
6747 } else {
6748 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
6749 FF_Add_NCBI_Base_URL (ffstring, link_seqn);
6750 FFAddTextToString(ffstring, /* "val=" */ NULL, first, "\">", FALSE, FALSE, TILDE_IGNORE);
6751 sprintf (buf, "%s", first);
6752 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
6753 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
6754 }
6755 } else {
6756 if (StringCmp (first, last) != 0) {
6757 sprintf (buf, "%s-%s", first, last);
6758 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
6759 } else {
6760 sprintf (buf, "%s", first);
6761 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
6762 }
6763 }
6764
6765 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 0, 0, NULL);
6766 FFRecycleString(ajp, ffstring);
6767 }
6768
6769 bbp->entityID = dcontext.entityID;
6770 bbp->itemtype = OBJ_SEQDESC;
6771 bbp->itemID = dcontext.itemID;
6772 if (awp->afp != NULL) {
6773 DoImmediateFormat (awp->afp, bbp);
6774 }
6775 }
6776 }
6777 }
6778 }
6779 }
6780 sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
6781 }
6782
6783 if (gbseq != NULL) {
6784 gbseq->alt_seq = asphead;
6785 }
6786 }
6787
AddTLSBlock(Asn2gbWorkPtr awp)6788 NLM_EXTERN void AddTLSBlock (
6789 Asn2gbWorkPtr awp
6790 )
6791
6792 {
6793 IntAsn2gbJobPtr ajp;
6794 Asn2gbSectPtr asp;
6795 BaseBlockPtr bbp;
6796 BioseqPtr bsp;
6797 Char buf [128];
6798 SeqMgrDescContext dcontext;
6799 CharPtr first;
6800 CharPtr last;
6801 ObjectIdPtr oip;
6802 SeqDescrPtr sdp;
6803 Char tls [32];
6804 UserFieldPtr ufp;
6805 UserObjectPtr uop;
6806 StringItemPtr ffstring;
6807
6808 if (awp == NULL) return;
6809 ajp = awp->ajp;
6810 if ( ajp == NULL ) return;
6811 bsp = awp->bsp;
6812 if (bsp == NULL) return;
6813 asp = awp->asp;
6814 if (asp == NULL) return;
6815
6816 if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) return;
6817
6818 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
6819 while (sdp != NULL) {
6820 uop = (UserObjectPtr) sdp->data.ptrvalue;
6821 if (uop != NULL) {
6822 oip = uop->type;
6823 first = NULL;
6824 last = NULL;
6825 if (oip != NULL) {
6826 if (StringICmp (oip->str, "TLSProjects") == 0) {
6827 for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
6828 oip = ufp->label;
6829 if (oip == NULL || oip->str == NULL || ufp->choice != 1) continue;
6830 if (StringICmp (oip->str, "TLS_accession_first") == 0) {
6831 first = (CharPtr) ufp->data.ptrvalue;
6832 } else if (StringICmp (oip->str, "TLS_accession_last") == 0) {
6833 last = (CharPtr) ufp->data.ptrvalue;
6834 }
6835 }
6836 if (first != NULL && last != NULL) {
6837 bbp = Asn2gbAddBlock (awp, WGS_BLOCK, sizeof (BaseBlock));
6838 if (bbp != NULL) {
6839 ffstring = FFGetString (ajp);
6840 if (ffstring != NULL) {
6841 FFStartPrint (ffstring, awp->format, 0, 12, "TLS", 12, 0, 0, NULL, FALSE);
6842
6843 if ( GetWWW(ajp) ) {
6844 StringNCpy_0 (tls, first, sizeof (tls));
6845 tls [6] = '\0';
6846 if (StringCmp (first, last) != 0) {
6847 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
6848 FF_Add_NCBI_Base_URL (ffstring, link_tls);
6849 FFAddTextToString(ffstring, "val=", tls, "#contigs\">", FALSE, FALSE, TILDE_IGNORE);
6850 sprintf (buf, "%s-%s", first, last);
6851 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
6852 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
6853 } else {
6854 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
6855 FF_Add_NCBI_Base_URL (ffstring, link_tls);
6856 FFAddTextToString(ffstring, "val=", tls, "#contigs\">", FALSE, FALSE, TILDE_IGNORE);
6857 sprintf (buf, "%s", first);
6858 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
6859 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
6860 }
6861 } else {
6862 if (StringCmp (first, last) != 0) {
6863 sprintf (buf, "%s-%s", first, last);
6864 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
6865 } else {
6866 sprintf (buf, "%s", first);
6867 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
6868 }
6869 }
6870
6871 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 0, 0, NULL);
6872 FFRecycleString(ajp, ffstring);
6873 }
6874
6875 bbp->entityID = dcontext.entityID;
6876 bbp->itemtype = OBJ_SEQDESC;
6877 bbp->itemID = dcontext.itemID;
6878 if (awp->afp != NULL) {
6879 DoImmediateFormat (awp->afp, bbp);
6880 }
6881 }
6882 }
6883 }
6884 }
6885 }
6886 sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
6887 }
6888 }
6889
AddTSABlock(Asn2gbWorkPtr awp)6890 NLM_EXTERN void AddTSABlock (
6891 Asn2gbWorkPtr awp
6892 )
6893
6894 {
6895 IntAsn2gbJobPtr ajp;
6896 Asn2gbSectPtr asp;
6897 BaseBlockPtr bbp;
6898 BioseqPtr bsp;
6899 Char buf [128];
6900 SeqMgrDescContext dcontext;
6901 CharPtr first;
6902 GBAltSeqItemPtr gbaip;
6903 GBAltSeqDataPtr gbasp, asphead = NULL, asplast = NULL;
6904 GBSeqPtr gbseq;
6905 CharPtr last;
6906 ObjectIdPtr oip;
6907 SeqDescrPtr sdp;
6908 Int2 tsatype;
6909 UserFieldPtr ufp;
6910 UserObjectPtr uop;
6911 StringItemPtr ffstring;
6912
6913 if (awp == NULL) return;
6914 ajp = awp->ajp;
6915 if ( ajp == NULL ) return;
6916 bsp = awp->bsp;
6917 if (bsp == NULL) return;
6918 asp = awp->asp;
6919 if (asp == NULL) return;
6920
6921 if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) return;
6922
6923 if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
6924 (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
6925 sprintf (buf, "<a name=\"wgs_%s\"></a>", awp->currAccVerLabel);
6926 DoQuickLinkFormat (awp->afp, buf);
6927 }
6928
6929 if (ajp->gbseq) {
6930 gbseq = &asp->gbseq;
6931 } else {
6932 gbseq = NULL;
6933 }
6934
6935 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
6936 while (sdp != NULL) {
6937 uop = (UserObjectPtr) sdp->data.ptrvalue;
6938 if (uop != NULL) {
6939 oip = uop->type;
6940 first = NULL;
6941 last = NULL;
6942 tsatype = 0;
6943 if (oip != NULL) {
6944 if (StringICmp (oip->str, "TSA-mRNA-List") == 0 || StringICmp (oip->str, "TSA-RNA-List") == 0) {
6945 tsatype = 1;
6946 }
6947 if (tsatype != 0) {
6948 for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
6949 oip = ufp->label;
6950 if (oip == NULL || oip->str == NULL || ufp->choice != 1) continue;
6951 if (StringICmp (oip->str, "TSA_accession_first") == 0) {
6952 first = (CharPtr) ufp->data.ptrvalue;
6953 } else if (StringICmp (oip->str, "TSA_accession_last") == 0) {
6954 last = (CharPtr) ufp->data.ptrvalue;
6955 }
6956 }
6957 if (first != NULL && last != NULL) {
6958 if (gbseq != NULL) {
6959 gbasp = (GBAltSeqDataPtr) MemNew (sizeof (GBAltSeqData));
6960 if (gbasp != NULL) {
6961 if (asphead == NULL) {
6962 asphead = gbasp;
6963 }
6964 if (asplast != NULL) {
6965 asplast->next = gbasp;
6966 }
6967 asplast = gbasp;
6968 if (tsatype == 1) {
6969 gbasp->name = StringSave ("TSA");
6970 }
6971 gbaip = GBAltSeqItemNew ();
6972 if (gbaip != NULL) {
6973 gbaip->first_accn = StringSave (first);
6974 if (StringCmp (first, last) != 0) {
6975 gbaip->last_accn = StringSave (last);
6976 }
6977 gbasp->items = gbaip;
6978 }
6979 }
6980 }
6981 bbp = Asn2gbAddBlock (awp, WGS_BLOCK, sizeof (BaseBlock));
6982 if (bbp != NULL) {
6983 ffstring = FFGetString (ajp);
6984 if (ffstring != NULL) {
6985 if (tsatype == 1) {
6986 FFStartPrint (ffstring, awp->format, 0, 12, "TSA", 12, 0, 0, NULL, FALSE);
6987 }
6988
6989 if ( GetWWW(ajp) ) {
6990 if (StringCmp (first, last) != 0) {
6991 /*
6992 sprintf (buf, "%s-%s", first, last);
6993 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
6994 */
6995 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
6996 FF_Add_NCBI_Base_URL (ffstring, link_tsa);
6997 StringCpy (buf, first);
6998 if (buf [2] == '_') {
6999 buf [9] = '\0';
7000 } else {
7001 buf [6] = '\0';
7002 }
7003 FFAddTextToString(ffstring, "val=", buf, "\">", FALSE, FALSE, TILDE_IGNORE);
7004 sprintf (buf, "%s-%s", first, last);
7005 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
7006 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
7007 } else {
7008 /*
7009 sprintf (buf, "%s", first);
7010 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
7011 */
7012 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
7013 FF_Add_NCBI_Base_URL (ffstring, link_seqn);
7014 FFAddTextToString(ffstring, /* "val=" */ NULL, first, "\">", FALSE, FALSE, TILDE_IGNORE);
7015 sprintf (buf, "%s", first);
7016 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
7017 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
7018 }
7019 } else {
7020 if (StringCmp (first, last) != 0) {
7021 sprintf (buf, "%s-%s", first, last);
7022 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
7023 } else {
7024 sprintf (buf, "%s", first);
7025 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
7026 }
7027 }
7028
7029 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 0, 0, NULL);
7030 FFRecycleString(ajp, ffstring);
7031 }
7032
7033 bbp->entityID = dcontext.entityID;
7034 bbp->itemtype = OBJ_SEQDESC;
7035 bbp->itemID = dcontext.itemID;
7036 if (awp->afp != NULL) {
7037 DoImmediateFormat (awp->afp, bbp);
7038 }
7039 }
7040 }
7041 }
7042 }
7043 }
7044 sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
7045 }
7046
7047 if (gbseq != NULL) {
7048 gbseq->alt_seq = asphead;
7049 }
7050 }
7051
AddCAGEBlock(Asn2gbWorkPtr awp)7052 NLM_EXTERN void AddCAGEBlock (
7053 Asn2gbWorkPtr awp
7054 )
7055
7056 {
7057 IntAsn2gbJobPtr ajp;
7058 Asn2gbSectPtr asp;
7059 BaseBlockPtr bbp;
7060 BioseqPtr bsp;
7061 Char buf [128];
7062 Int2 cagetype;
7063 SeqMgrDescContext dcontext;
7064 CharPtr first;
7065 GBAltSeqItemPtr gbaip;
7066 GBAltSeqDataPtr gbasp, asphead = NULL, asplast = NULL;
7067 GBSeqPtr gbseq;
7068 CharPtr last;
7069 ObjectIdPtr oip;
7070 SeqDescrPtr sdp;
7071 UserFieldPtr ufp;
7072 UserObjectPtr uop;
7073 StringItemPtr ffstring;
7074
7075 if (awp == NULL) return;
7076 ajp = awp->ajp;
7077 if ( ajp == NULL ) return;
7078 bsp = awp->bsp;
7079 if (bsp == NULL) return;
7080 asp = awp->asp;
7081 if (asp == NULL) return;
7082
7083 if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) return;
7084
7085 if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
7086 (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
7087 sprintf (buf, "<a name=\"wgs_%s\"></a>", awp->currAccVerLabel);
7088 DoQuickLinkFormat (awp->afp, buf);
7089 }
7090
7091 if (ajp->gbseq) {
7092 gbseq = &asp->gbseq;
7093 } else {
7094 gbseq = NULL;
7095 }
7096
7097 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
7098 while (sdp != NULL) {
7099 uop = (UserObjectPtr) sdp->data.ptrvalue;
7100 if (uop != NULL) {
7101 oip = uop->type;
7102 first = NULL;
7103 last = NULL;
7104 cagetype = 0;
7105 if (oip != NULL) {
7106 if (StringICmp (oip->str, "CAGE-Tag-List") == 0) {
7107 cagetype = 1;
7108 }
7109 if (cagetype != 0) {
7110 for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
7111 oip = ufp->label;
7112 if (oip == NULL || oip->str == NULL || ufp->choice != 1) continue;
7113 if (StringICmp (oip->str, "CAGE_accession_first") == 0) {
7114 first = (CharPtr) ufp->data.ptrvalue;
7115 } else if (StringICmp (oip->str, "CAGE_accession_last") == 0) {
7116 last = (CharPtr) ufp->data.ptrvalue;
7117 }
7118 }
7119 if (first != NULL && last != NULL) {
7120 if (gbseq != NULL) {
7121 gbasp = (GBAltSeqDataPtr) MemNew (sizeof (GBAltSeqData));
7122 if (gbasp != NULL) {
7123 if (asphead == NULL) {
7124 asphead = gbasp;
7125 }
7126 if (asplast != NULL) {
7127 asplast->next = gbasp;
7128 }
7129 asplast = gbasp;
7130 if (cagetype == 1) {
7131 gbasp->name = StringSave ("TAG");
7132 }
7133 gbaip = GBAltSeqItemNew ();
7134 if (gbaip != NULL) {
7135 gbaip->first_accn = StringSave (first);
7136 if (StringCmp (first, last) != 0) {
7137 gbaip->last_accn = StringSave (last);
7138 }
7139 gbasp->items = gbaip;
7140 }
7141 }
7142 }
7143 bbp = Asn2gbAddBlock (awp, WGS_BLOCK, sizeof (BaseBlock));
7144 if (bbp != NULL) {
7145 ffstring = FFGetString (ajp);
7146 if (ffstring != NULL) {
7147 if (cagetype == 1) {
7148 FFStartPrint (ffstring, awp->format, 0, 12, "TAG", 12, 0, 0, NULL, FALSE);
7149 }
7150
7151 if ( GetWWW(ajp) ) {
7152 if (StringCmp (first, last) != 0) {
7153 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
7154 FF_Add_NCBI_Base_URL (ffstring, link_cage);
7155 FFAddTextToString(ffstring, "db=Nucleotide&cmd=Search&term=", first, NULL, FALSE, FALSE, TILDE_IGNORE);
7156 FFAddTextToString(ffstring, ":", last, "[PACC]\">", FALSE, FALSE, TILDE_IGNORE);
7157 sprintf (buf, "%s-%s", first, last);
7158 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
7159 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
7160 } else {
7161 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
7162 FF_Add_NCBI_Base_URL (ffstring, link_seqn);
7163 FFAddTextToString(ffstring, /* "val=" */ NULL, first, "\">", FALSE, FALSE, TILDE_IGNORE);
7164 sprintf (buf, "%s", first);
7165 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
7166 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
7167 }
7168 } else {
7169 if (StringCmp (first, last) != 0) {
7170 sprintf (buf, "%s-%s", first, last);
7171 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
7172 } else {
7173 sprintf (buf, "%s", first);
7174 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
7175 }
7176 }
7177
7178 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 0, 0, NULL);
7179 FFRecycleString(ajp, ffstring);
7180 }
7181
7182 bbp->entityID = dcontext.entityID;
7183 bbp->itemtype = OBJ_SEQDESC;
7184 bbp->itemID = dcontext.itemID;
7185 if (awp->afp != NULL) {
7186 DoImmediateFormat (awp->afp, bbp);
7187 }
7188 }
7189 }
7190 }
7191 }
7192 }
7193 sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
7194 }
7195
7196 if (gbseq != NULL) {
7197 gbseq->alt_seq = asphead;
7198 }
7199 }
7200
AddGenomeBlock(Asn2gbWorkPtr awp)7201 NLM_EXTERN void AddGenomeBlock (
7202 Asn2gbWorkPtr awp
7203 )
7204
7205 {
7206 CharPtr accn;
7207 IntAsn2gbJobPtr ajp;
7208 Asn2gbSectPtr asp;
7209 BaseBlockPtr bbp;
7210 BioseqPtr bsp;
7211 Char buf [128];
7212 SeqMgrDescContext dcontext;
7213 Boolean first = TRUE;
7214 GBAltSeqItemPtr gbaip;
7215 GBAltSeqDataPtr gbasp, asphead = NULL, asplast = NULL;
7216 GBSeqPtr gbseq;
7217 CharPtr moltype;
7218 ObjectIdPtr oip;
7219 SeqDescrPtr sdp;
7220 UserFieldPtr ufp;
7221 UserObjectPtr uop;
7222 UserFieldPtr urf;
7223 StringItemPtr ffstring;
7224
7225 if (awp == NULL) return;
7226 ajp = awp->ajp;
7227 if ( ajp == NULL ) return;
7228 bsp = awp->bsp;
7229 if (bsp == NULL) return;
7230 asp = awp->asp;
7231 if (asp == NULL) return;
7232
7233 if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) return;
7234
7235 if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
7236 (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
7237 sprintf (buf, "<a name=\"genome_%s\"></a>", awp->currAccVerLabel);
7238 DoQuickLinkFormat (awp->afp, buf);
7239 }
7240
7241 if (ajp->gbseq) {
7242 gbseq = &asp->gbseq;
7243 } else {
7244 gbseq = NULL;
7245 }
7246
7247 bbp = Asn2gbAddBlock (awp, GENOME_BLOCK, sizeof (BaseBlock));
7248 if (bbp == NULL) return;
7249
7250 ffstring = FFGetString(ajp);
7251 if ( ffstring == NULL ) return;
7252
7253 FFStartPrint (ffstring, awp->format, 0, 12, "GENOME", 12, 0, 0, NULL, FALSE);
7254
7255 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
7256 while (sdp != NULL) {
7257 uop = (UserObjectPtr) sdp->data.ptrvalue;
7258 if (uop != NULL) {
7259 oip = uop->type;
7260 if (oip != NULL && StringICmp (oip->str, "GenomeProject") == 0) {
7261 for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
7262 oip = ufp->label;
7263 if (oip == NULL || oip->str == NULL || ufp->choice != 11) continue;
7264 if (StringICmp (oip->str, "Chromosome") != 0) continue;
7265 accn = NULL;
7266 moltype = NULL;
7267 for (urf = (UserFieldPtr) ufp->data.ptrvalue; urf != NULL; urf = urf->next) {
7268 oip = urf->label;
7269 if (oip == NULL || oip->str == NULL || urf->choice != 1) continue;
7270 if (StringICmp (oip->str, "accession") == 0) {
7271 accn = (CharPtr) urf->data.ptrvalue;
7272 } else if (StringICmp (oip->str, "Moltype") == 0) {
7273 moltype = (CharPtr) urf->data.ptrvalue;
7274 }
7275 }
7276 if (! StringHasNoText (accn)) {
7277 if (gbseq != NULL) {
7278 gbasp = (GBAltSeqDataPtr) MemNew (sizeof (GBAltSeqData));
7279 if (gbasp != NULL) {
7280 if (asphead == NULL) {
7281 asphead = gbasp;
7282 }
7283 if (asplast != NULL) {
7284 asplast->next = gbasp;
7285 }
7286 asplast = gbasp;
7287 gbasp->name = StringSave ("GENOME");
7288 gbaip = GBAltSeqItemNew ();
7289 if (gbaip != NULL) {
7290 if (! StringHasNoText (moltype)) {
7291 sprintf (buf, "%s (%s)", accn, moltype);
7292 gbaip->value = StringSave (buf);
7293 } else {
7294 sprintf (buf, "%s", accn);
7295 gbaip->value = StringSave (buf);
7296 }
7297 gbasp->items = gbaip;
7298 }
7299 }
7300 }
7301 if (! first) {
7302 FFAddNewLine(ffstring);
7303 }
7304 first = FALSE;
7305 FFAddOneString (ffstring, accn, FALSE, FALSE, TILDE_IGNORE);
7306 if (! StringHasNoText (moltype)) {
7307 FFAddTextToString (ffstring, " (", moltype, ")", FALSE, FALSE, TILDE_TO_SPACES);
7308 }
7309 }
7310 }
7311 }
7312 }
7313 sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
7314 }
7315
7316 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 0, 0, NULL);
7317 FFRecycleString(ajp, ffstring);
7318
7319 if (awp->afp != NULL) {
7320 DoImmediateFormat (awp->afp, bbp);
7321 }
7322
7323 if (gbseq != NULL) {
7324 gbseq->alt_seq = asphead;
7325 }
7326 }
7327
AddBasecountBlock(Asn2gbWorkPtr awp)7328 NLM_EXTERN void AddBasecountBlock (
7329 Asn2gbWorkPtr awp
7330 )
7331
7332 {
7333 IntAsn2gbJobPtr ajp;
7334 BaseBlockPtr bbp;
7335 BioseqPtr bsp;
7336
7337 if (awp == NULL) return;
7338 ajp = awp->ajp;
7339 if (ajp == NULL) return;
7340 bsp = awp->bsp;
7341 if (bsp == NULL) return;
7342
7343 bbp = Asn2gbAddBlock (awp, BASECOUNT_BLOCK, sizeof (BaseBlock));
7344 if (bbp == NULL) return;
7345
7346 bbp->entityID = awp->entityID;
7347 bbp->itemtype = bsp->idx.itemtype;
7348 bbp->itemID = bsp->idx.itemID;
7349
7350 if (awp->afp != NULL) {
7351 DoImmediateFormat (awp->afp, bbp);
7352 }
7353 }
7354
AddOriginBlock(Asn2gbWorkPtr awp)7355 NLM_EXTERN void AddOriginBlock (
7356 Asn2gbWorkPtr awp
7357 )
7358
7359 {
7360 IntAsn2gbJobPtr ajp;
7361 BaseBlockPtr bbp;
7362 BioseqPtr bsp;
7363 Char buf [67];
7364 SeqMgrDescContext dcontext;
7365 GBBlockPtr gbp;
7366 SeqDescrPtr sdp;
7367 StringItemPtr ffstring;
7368
7369 if (awp == NULL) return;
7370 ajp = awp->ajp;
7371 if (ajp == NULL) return;
7372 bsp = awp->bsp;
7373 if (bsp == NULL) return;
7374
7375 if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) return;
7376
7377 ffstring = FFGetString(ajp);
7378 if ( ffstring == NULL ) return;
7379
7380 bbp = Asn2gbAddBlock (awp, ORIGIN_BLOCK, sizeof (BaseBlock));
7381 if (bbp == NULL) return;
7382
7383 bbp->entityID = awp->entityID;
7384
7385 if (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT) {
7386
7387 buf [0] = '\0';
7388
7389 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &dcontext);
7390 if (sdp != NULL) {
7391 gbp = (GBBlockPtr) sdp->data.ptrvalue;
7392 if (gbp != NULL && (! StringHasNoText (gbp->origin))) {
7393 StringNCpy_0 (buf, gbp->origin, sizeof (buf));
7394 bbp->entityID = dcontext.entityID;
7395 bbp->itemID = dcontext.itemID;
7396 bbp->itemtype = OBJ_SEQDESC;
7397 }
7398 }
7399
7400 FFStartPrint (ffstring, awp->format, 0, 12, "ORIGIN", 12, 0, 0, NULL, FALSE);
7401
7402 if (! StringHasNoText (buf)) {
7403 FFAddOneString (ffstring, buf, TRUE, FALSE, TILDE_TO_SPACES);
7404 }
7405 }
7406
7407 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 0, 12, 0, 0, NULL);
7408 FFRecycleString(ajp, ffstring);
7409
7410 if (awp->afp != NULL) {
7411 DoImmediateFormat (awp->afp, bbp);
7412 }
7413 }
7414
7415 #define BASES_PER_BLOCK 1200
7416
AddSequenceBlock(Asn2gbWorkPtr awp)7417 NLM_EXTERN void AddSequenceBlock (
7418 Asn2gbWorkPtr awp
7419 )
7420
7421 {
7422 IntAsn2gbJobPtr ajp;
7423 BioseqPtr bsp;
7424 Char buf [128];
7425 Int4 extend;
7426 Int4 len;
7427 SeqBlockPtr sbp;
7428 Int4 start;
7429 Int4 stop;
7430
7431 if (awp == NULL) return;
7432 ajp = awp->ajp;
7433 if (ajp == NULL) return;
7434 bsp = awp->bsp;
7435 if (bsp == NULL) return;
7436
7437 if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
7438 (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
7439 sprintf (buf, "<a name=\"sequence_%s\"></a>", awp->currAccVerLabel);
7440 DoQuickLinkFormat (awp->afp, buf);
7441 }
7442
7443 if (awp->slp != NULL) {
7444 len = SeqLocLen (awp->slp);
7445 } else {
7446 len = bsp->length;
7447 }
7448
7449 /* if generating GBSeq XML, populate single sequence block */
7450
7451 if (ajp->gbseq) {
7452 sbp = (SeqBlockPtr) Asn2gbAddBlock (awp, SEQUENCE_BLOCK, sizeof (SeqBlock));
7453 if (sbp == NULL) return;
7454
7455 sbp->entityID = bsp->idx.entityID;
7456 sbp->itemID = bsp->idx.itemID;
7457 sbp->itemtype = OBJ_BIOSEQ;
7458
7459 sbp->start = 0;
7460 sbp->stop = len;
7461
7462 if (awp->afp != NULL) {
7463 DoImmediateFormat (awp->afp, (BaseBlockPtr) sbp);
7464 }
7465
7466 return;
7467 }
7468
7469 /* otherwise populate individual sequence blocks for given range */
7470
7471 for (start = 0; start < len; start += BASES_PER_BLOCK) {
7472 sbp = (SeqBlockPtr) Asn2gbAddBlock (awp, SEQUENCE_BLOCK, sizeof (SeqBlock));
7473 if (sbp == NULL) continue;
7474
7475 sbp->entityID = bsp->idx.entityID;
7476 sbp->itemID = bsp->idx.itemID;
7477 sbp->itemtype = OBJ_BIOSEQ;
7478
7479 stop = start + BASES_PER_BLOCK;
7480 if (stop >= len) {
7481 stop = len;
7482 }
7483 extend = start + BASES_PER_BLOCK + 60;
7484 if (extend >= len) {
7485 extend = len;
7486 }
7487
7488 sbp->start = start;
7489 sbp->stop = stop;
7490 sbp->extend = extend;
7491
7492 if (awp->afp != NULL) {
7493 DoImmediateFormat (awp->afp, (BaseBlockPtr) sbp);
7494 }
7495 }
7496 }
7497
AddContigBlock(Asn2gbWorkPtr awp)7498 NLM_EXTERN void AddContigBlock (
7499 Asn2gbWorkPtr awp
7500 )
7501
7502 {
7503 IntAsn2gbJobPtr ajp;
7504 BaseBlockPtr bbp;
7505 Char buf [128];
7506
7507 if (awp == NULL) return;
7508 ajp = awp->ajp;
7509 if ( ajp == NULL ) return;
7510
7511 if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
7512 (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
7513 sprintf (buf, "<a name=\"contig_%s\"></a>", awp->currAccVerLabel);
7514 DoQuickLinkFormat (awp->afp, buf);
7515 }
7516 bbp = Asn2gbAddBlock (awp, CONTIG_BLOCK, sizeof (BaseBlock));
7517
7518 if (awp->afp != NULL) {
7519 DoImmediateFormat (awp->afp, bbp);
7520 }
7521 }
7522
AddSlashBlock(Asn2gbWorkPtr awp)7523 NLM_EXTERN void AddSlashBlock (
7524 Asn2gbWorkPtr awp
7525 )
7526
7527 {
7528 IntAsn2gbJobPtr ajp;
7529 BaseBlockPtr bbp;
7530 Char buf [128];
7531 CharPtr str;
7532
7533 if (awp == NULL) return;
7534 ajp = awp->ajp;
7535 if (ajp == NULL) return;
7536
7537 /*
7538 if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
7539 (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
7540 sprintf (buf, "<a name=\"slash_%s\"></a>", awp->currAccVerLabel);
7541 DoQuickLinkFormat (awp->afp, buf);
7542 }
7543 */
7544
7545 bbp = Asn2gbAddBlock (awp, SLASH_BLOCK, sizeof (BaseBlock));
7546 if (bbp == NULL) return;
7547
7548 bbp->entityID = awp->entityID;
7549
7550 if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
7551 (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
7552 sprintf (buf, "//</pre>\n<a name=\"slash_%s\"></a>", awp->currAccVerLabel);
7553 str = StringSave (buf);
7554 } else if (GetWWW (ajp)) {
7555 sprintf (buf, "//</pre>\n");
7556 str = StringSave (buf);
7557 } else {
7558 str = (CharPtr) MemNew(sizeof(Char) * 4);
7559 StringNCpy(str, "//\n", 4);
7560 }
7561
7562 bbp->string = str;
7563
7564 if (awp->afp != NULL) {
7565 DoImmediateFormat (awp->afp, bbp);
7566 }
7567 }
7568
7569