1 /* asn2ff3.c
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information (NCBI)
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government do not place any restriction on its use or reproduction.
13 * We would, however, appreciate having the NCBI and the author cited in
14 * any work or product based on this material
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name: asn2ff3.c
27 *
28 * Author: Karl Sirotkin, Tom Madden, Tatiana Tatusov
29 *
30 * Version Creation Date: 7/15/95
31 *
32 *
33 * File Description:
34 *
35 * Modifications:
36 * --------------------------------------------------------------------------
37 *
38 **************************************/
39 #include <asn2ffp.h>
40 #include <a2ferrdf.h>
41 #include <a2ferr.h>
42 #include <utilpub.h>
43 #include <ffprint.h>
44 #include <parsegb.h>
45 #include <sequtil.h>
46 #include <edutil.h>
47 #include <gather.h>
48 #include <explore.h>
49 #include <sqnutils.h>
50
51 #define METHOD_concept_transl_a 6
52
53 NLM_EXTERN CharPtr mRNAEvidenceComment PROTO ((UserObjectPtr obj, Boolean add));
54 NLM_EXTERN Int2 ConvertToNAImpFeat PROTO ((Asn2ffJobPtr ajp, GBEntryPtr gbp, SeqFeatPtr sfp_in, SeqFeatPtr PNTR sfp_out, SortStructPtr p));
55 NLM_EXTERN Int2 ConvertToAAImpFeat PROTO ((Asn2ffJobPtr ajp, GBEntryPtr gbp, SeqFeatPtr sfp_in, SeqFeatPtr PNTR sfp_out, SortStructPtr p));
56 NLM_EXTERN Int2 ValidateAAImpFeat PROTO ((SeqFeatPtr sfp, Boolean use_product));
57 NLM_EXTERN Int2 ValidateNAImpFeat PROTO ((SeqFeatPtr sfp));
58 NLM_EXTERN void AddProteinQuals PROTO ((SeqFeatPtr sfp, SeqFeatPtr sfp_out, NoteStructPtr nsp));
59 static void GetGeneticCode PROTO ((CharPtr ptr, SeqFeatPtr sfp));
60 NLM_EXTERN void ComposeGBQuals PROTO((Asn2ffJobPtr ajp, SeqFeatPtr sfp_out, GBEntryPtr gbp, SortStructPtr p, Boolean note_pseudo));
61 NLM_EXTERN CharPtr ComposeNoteFromNoteStruct PROTO ((NoteStructPtr nsp, GeneStructPtr gsp));
62 NLM_EXTERN void AddPID PROTO ((Asn2ffJobPtr ajp, SeqFeatPtr sfp_out, Boolean is_NTorNG));
63 NLM_EXTERN void Add_trid PROTO ((Asn2ffJobPtr ajp, SeqFeatPtr sfp_out));
64 NLM_EXTERN Int2 MakeGBSelectNote PROTO ((CharPtr ptr, SeqFeatPtr sfp));
65 static void GetProtRefComment PROTO ((SeqFeatPtr sfp, BioseqPtr bsp, Asn2ffJobPtr ajp, OrganizeProtPtr opp, NoteStructPtr nsp, Uint1 method));
66 NLM_EXTERN Int2 MiscFeatOrphanGenes PROTO ((Asn2ffJobPtr ajp, GBEntryPtr gbp, SeqFeatPtr sfp, Int2 index));
67 Int2 CheckForQual PROTO ((GBQualPtr gbqual, CharPtr string_q, CharPtr string_v));
68 NLM_EXTERN GBQualPtr AddModifsToGBQual PROTO ((GBEntryPtr gbp, GBQualPtr gbqual));
69 NLM_EXTERN GBQualPtr AddOrgRefModToGBQual PROTO ((OrgRefPtr orp, GBQualPtr gbqual));
70 NLM_EXTERN Int2 CheckForEqualSign PROTO ((CharPtr qual));
71 NLM_EXTERN CharPtr GetProductFromCDS PROTO ((ValNodePtr product, ValNodePtr location, Int4 length));
72 NLM_EXTERN void PrepareSourceFeatQuals PROTO ((SeqFeatPtr sfp_in, SeqFeatPtr sfp_out, GBEntryPtr gbp, Boolean add_modif));
73 static Int2 CheckForExtraChars PROTO ((CharPtr note));
74 NLM_EXTERN GBQualPtr AddBioSourceToGBQual PROTO((Asn2ffJobPtr ajp, NoteStructPtr nsp, BioSourcePtr biosp, GBQualPtr gbqual, Boolean new_release));
75 NLM_EXTERN Boolean delete_qual PROTO((GBQualPtr PNTR qlist, CharPtr qual));
76
77 typedef struct {
78 CharPtr name;
79 Uint1 num;
80 } ORGMOD;
81
82 #define num_subtype 25
83 CharPtr subtype[num_subtype] = {
84 "chromosome", "map", "clone", "sub_clone", "haplotype", "genotype", "sex",
85 "cell_line", "cell_type", "tissue_type", "clone_lib", "dev_stage",
86 "frequency", "germline", "rearranged", "lab_host", "pop_variant",
87 "tissue_lib", "plasmid", "transposon", "insertion_seq", "plastid", "country",
88 "segment", "endogenous_virus"};
89
90 #define num_genome 15
91 static CharPtr genome[num_genome] = {"unknown", "genomic", "chloroplast", "chromoplast", "kinetoplast", "mitochondrion", "plastid", "macronuclear",
92 "extrachrom", "plasmid", "transposon", "insertion_seq", "cyanelle", "proviral", "virion"};
93
94 /*______________________________________________________________________
95 **
96 ** This code is not currently used.
97 ** I do not remove this piece of code, just comment it out.
98 ** -- Dmitri Lukyanov
99 */
100 #if 0
101
102 #define num_biomol 7
103 static CharPtr biomol[num_biomol] = {"genomic", "RNA", "mRNA", "rRNA",
104 "tRNA", "snRNA", "scRNA"};
105
106 #endif
107 /*______________________________________________________________________
108 */
109
110 ORGMOD orgmod_subtype[34] = {
111 { "strain", 2 }, {"sub_strain", 3}, {"type", 4}, {"subtype", 5},
112 {"variety", 6}, {"serotype",7}, {"serogroup",8}, {"serovar", 9},
113 {"cultivar", 10}, {"pathovar", 11}, {"chemovar", 12}, {"biovar", 13},
114 {"biotype", 14}, {"group", 15}, {"subgroup", 16}, {"isolate", 17},
115 {"common", 18}, {"acronym", 19}, {"dosage", 20}, {"nat_host", 21},
116 {"sub_species", 22}, {"specimen_voucher", 23}, {"authority", 24},
117 {"forma", 25}, {"forma_specialis", 26}, {"ecotype", 27},
118 {"synonym", 28}, {"anamorph", 29}, {"teleomorph", 30}, {"breed", 31},
119 {"old_lineage", 253}, {"old_name", 254}, {"note", 255}, { NULL, 0 }
120 };
121
122 /*
123 CharPtr dbtag[DBNUM] = {
124 "PIDe", "PIDd", "PIDg", "PID", "FLYBASE",
125 "GDB", "MIM", "SGD", "SWISS-PROT", "CK",
126 "SPTREMBL", "ATCC", "ATCC (inhost)", "ATCC (dna)", "taxon",
127 "BDGP_EST", "dbEST", "dbSTS", "MGD", "PIR",
128 "GI", "RiceGenes", "UniGene", "LocusID", "dbSNP",
129 "RATMAP", "RGD", "CDD", "UniSTS", "InterimID", "COG", "GO", "niaEST",
130 "GeneID", "BDGP_INS", "SoyBase",
131 };
132 */
133
134 CharPtr dbtag[DBNUM] = {
135 "PIDe", "PIDd", "PIDg", "PID",
136 "AceView/WormGenes",
137 "ATCC",
138 "ATCC(in host)",
139 "ATCC(dna)",
140 "BDGP_EST",
141 "BDGP_INS",
142 "CDD",
143 "CK",
144 "COG",
145 "dbEST",
146 "dbSNP",
147 "dbSTS",
148 "ENSEMBL",
149 "ESTLIB",
150 "FANTOM_DB",
151 "FLYBASE",
152 "GABI",
153 "GDB",
154 "GeneDB",
155 "GeneID",
156 "GI",
157 "GO",
158 "GOA",
159 "IFO",
160 "IMGT/LIGM",
161 "IMGT/HLA",
162 "InterimID",
163 "Interpro",
164 "ISFinder",
165 "JCM",
166 "LocusID",
167 "MaizeDB",
168 "MGD",
169 "MGI",
170 "MIM",
171 "NextDB",
172 "niaEST",
173 "PIR",
174 "PSEUDO",
175 "RATMAP",
176 "RiceGenes",
177 "REMTREMBL",
178 "RGD",
179 "RZPD",
180 "SGD",
181 "SoyBase",
182 "SPTREMBL",
183 "SWISS-PROT",
184 "taxon",
185 "UniGene",
186 "UniSTS",
187 "WorfDB",
188 "WormBase",
189 "ZFIN",
190 };
191
192
193 /*************************************************************************
194 * sfp_out: synthetic SeqFeatPtr of type ImpFeat for use in printing.
195 * This function puts the dbxref qualifier on every SeqFeatPtr.
196 *************************************************************************/
IsRefSeq(BioseqPtr bsp)197 static Boolean IsRefSeq (BioseqPtr bsp)
198 {
199 SeqIdPtr sip;
200
201 if (bsp == NULL)
202 return FALSE;
203 for (sip = bsp->id; sip != NULL; sip = sip->next) {
204 if (sip->choice == SEQID_OTHER)
205 return TRUE;
206 }
207 return FALSE;
208 }
209
Add_dbxref(Asn2ffJobPtr ajp,SeqFeatPtr sfp_out,SeqFeatPtr sfp,BioseqPtr bsp)210 static void Add_dbxref (Asn2ffJobPtr ajp, SeqFeatPtr sfp_out, SeqFeatPtr sfp, BioseqPtr bsp)
211 {
212 Int4 id = -1;
213 Int2 i;
214 ValNodePtr vnp;
215 DbtagPtr db = NULL;
216 CharPtr val;
217
218 if (sfp == NULL || sfp->dbxref == NULL) {
219 return;
220 }
221 for (vnp=sfp->dbxref; vnp; vnp=vnp->next) {
222 id = -1;
223 db = vnp->data.ptrvalue;
224 if (db && db->db) {
225 for (i =0; i < DBNUM; i++) {
226 if (StringCmp(db->db, dbtag[i]) == 0) {
227 id = i;
228 break;
229 }
230 }
231 if (id == -1 && StringCmp (db->db, "WormBase") == 0 && IsRefSeq (bsp)) {
232 id = 18; /* show it even if not RefSeq record */
233 }
234 if (ajp->mode == RELEASE_MODE && id == -1) {
235 continue; /* drop unknown dbtag */
236 }
237 }
238 if (sfp->data.choice == SEQFEAT_CDREGION) {
239 /*
240 if (sfp->product != NULL && id > 4) {
241 continue;
242 }
243 */
244 } else {
245 if (id == -1 && ajp->mode != RELEASE_MODE) {
246 } else
247 if (id < 4) {
248 continue; /* PID is illegal on non-CDS features */
249 }
250 }
251 if (db == NULL) {
252 return;
253 }
254 if (db->tag && db->tag->str) {
255 val = MemNew(StringLen(db->db)+StringLen(db->tag->str)+2);
256 sprintf(val, "%s:%s", db->db, db->tag->str);
257 } else if (db->tag) {
258 val = MemNew(StringLen(db->db)+16);
259 if (StringNCmp(db->db, "PIDe", 4) == 0) {
260 sprintf(val, "PID:e%ld", (long) db->tag->id);
261 } else if (StringNCmp(db->db, "PIDd", 4) == 0) {
262 sprintf(val, "PID:d%ld", (long) db->tag->id);
263 } else if (StringNCmp(db->db, "PIDg", 4) == 0) {
264 sprintf(val, "PID:g%ld", (long) db->tag->id);
265 } else {
266 sprintf(val, "%s:%ld", db->db, (long) db->tag->id);
267 }
268 }
269 if (val[0] != '\0') {
270 sfp_out->qual = AddGBQual(sfp_out->qual, "db_xref", val);
271 MemFree(val);
272 }
273 }
274 return;
275 } /* Add_dbxref */
276
CheckSeqIdChoice(SeqIdPtr sip)277 static Boolean CheckSeqIdChoice(SeqIdPtr sip)
278 {
279 Uint1 ch;
280 SeqIdPtr si;
281
282 for (si = sip; si; si=si->next) {
283 ch = si->choice;
284 if (ch == SEQID_GI || ch == SEQID_GENBANK || ch == SEQID_EMBL || ch == SEQID_DDBJ ||
285 ch == SEQID_TPG || ch == SEQID_TPE || ch == SEQID_TPD) {
286 return TRUE;
287 }
288 }
289 return FALSE;
290 }
291
GetSeqIdChoice(SeqIdPtr sip)292 static SeqIdPtr GetSeqIdChoice(SeqIdPtr sip)
293 {
294 Uint1 ch;
295 SeqIdPtr si;
296
297 for (si = sip; si; si=si->next) {
298 ch = si->choice;
299 if (ch == SEQID_GI || ch == SEQID_GENBANK || ch == SEQID_EMBL || ch == SEQID_DDBJ ||
300 ch == SEQID_OTHER || ch == SEQID_TPG || ch == SEQID_TPE || ch == SEQID_TPD) {
301 return si;
302 }
303 }
304 return NULL;
305 }
306
CheckSeqIdAccVer(SeqIdPtr sip)307 static Boolean CheckSeqIdAccVer(SeqIdPtr sip)
308 {
309 Uint1 ch;
310 SeqIdPtr si;
311 TextSeqIdPtr tsip;
312
313 for (si = sip; si; si=si->next) {
314 ch = si->choice;
315 if (ch == SEQID_GENBANK || ch == SEQID_EMBL || ch == SEQID_DDBJ || ch == SEQID_OTHER ||
316 ch == SEQID_TPG || ch == SEQID_TPE || ch == SEQID_TPD) {
317 tsip = si->data.ptrvalue;
318 if (tsip->accession != NULL && tsip->version >= 1) {
319 return TRUE;
320 }
321 }
322 }
323 return FALSE;
324 }
325
GetNonGeneQuals(Int2 mode,SeqFeatPtr sfp_in,SeqFeatPtr sfp_out,NoteStructPtr nsp)326 static void GetNonGeneQuals (Int2 mode, SeqFeatPtr sfp_in, SeqFeatPtr sfp_out, NoteStructPtr nsp)
327 {
328 GBQualPtr gbqp;
329 Boolean evidence_present;
330 Int2 i;
331
332 for (gbqp=sfp_in->qual; gbqp; gbqp=gbqp->next) {
333 if (StringCmp(gbqp->qual, "gene") == 0) {
334 ;
335 } else if (StringCmp(gbqp->qual, "product") == 0) {
336 ;
337 } else if (StringCmp(gbqp->qual, "standard_name") == 0) {
338 ;
339 } else if (StringCmp(gbqp->qual, "map") == 0) {
340 ;
341 } else if (StringCmp(gbqp->qual, "EC_number") == 0) {
342 ;
343 } else if (StringCmp(gbqp->qual, "anticodon") == 0) {
344 ; /* This is done by DotRNAQuals */
345 } else if (StringCmp(gbqp->qual, "note") == 0) {
346 CpNoteToCharPtrStack(nsp, NULL, gbqp->val);
347 } else if (StringCmp(gbqp->qual, "transl_table") == 0) {
348 sfp_out->qual =
349 AddGBQual(sfp_out->qual, gbqp->qual, gbqp->val);
350 /* This is captured by GetGeneticCode */
351 } else if (StringCmp(gbqp->qual, "db_xref") == 0) {
352 for (i =0; i < DBNUM; i++) {
353 if (StringNCmp(gbqp->val, dbtag[i], StringLen(dbtag[i])) == 0) {
354 break;
355 }
356 }
357 if (mode == RELEASE_MODE && i == DBNUM) {
358 continue; /* drop unknown dbtag */
359 }
360 sfp_out->qual =
361 AddGBQual(sfp_out->qual, gbqp->qual, gbqp->val);
362 } else {
363 sfp_out->qual =
364 AddGBQual(sfp_out->qual, gbqp->qual, gbqp->val);
365 }
366 }
367
368 evidence_present = GBQualPresent("evidence", sfp_out->qual);
369 if (sfp_out->exp_ev) {
370 if (evidence_present == FALSE) {
371 if (sfp_out->exp_ev == 1)
372 sfp_out->qual =
373 AddGBQual(sfp_out->qual, "evidence", "experimental");
374 if (sfp_out->exp_ev == 2)
375 sfp_out->qual =
376 AddGBQual(sfp_out->qual, "evidence", "not_experimental");
377 } else {
378 for (gbqp=sfp_out->qual; gbqp; gbqp=gbqp->next)
379 if (StringCmp(gbqp->qual, "evidence") == 0) {
380 gbqp->val = MemFree(gbqp->val);
381 if (sfp_out->exp_ev == 1)
382 gbqp->val = StringSave("experimental");
383 if (sfp_out->exp_ev == 2)
384 gbqp->val = StringSave("not_experimental");
385 break;
386 }
387 }
388 } else if (evidence_present == TRUE) {
389 for (gbqp=sfp_out->qual; gbqp; gbqp=gbqp->next)
390 if (StringCmp(gbqp->qual, "evidence") == 0) {
391 if (StringCmp(gbqp->val, "EXPERIMENTAL") == 0) {
392 StringCpy(gbqp->val, "experimental");
393 } else if (StringCmp(gbqp->val, "NOT_EXPERIMENTAL") == 0) {
394 StringCpy(gbqp->val, "not_experimental");
395 }
396 break;
397 }
398 }
399 return;
400 } /* GetNonGeneQuals */
401
402 /*****************************************************************************
403 *LookForPartialImpFeat
404 *
405 * This function first looks for the sfp->qual of type "partial".
406 * If found the qual is deleted and the variable "partial" is
407 * set equal to TRUE. If "partial" is TRUE or if sfp->partial
408 * is TRUE, FlatAnnotPartial is called (modified version of Karl Sirotkin's
409 * program) to see if sfp->partial should really be TRUE.
410 * WARNING: sfp should be an ImpFeatPtr
411 *
412 * written by Tom Madden (12/7/93)
413 *****************************************************************************/
LookForPartialImpFeat(SeqFeatPtr sfp,Boolean use_product)414 static void LookForPartialImpFeat(SeqFeatPtr sfp, Boolean use_product)
415
416 {
417 Boolean partial=FALSE;
418 GBQualPtr curq, gbqual, lastq=NULL, tmpqual;
419
420 gbqual = sfp->qual;
421
422 while (gbqual && (StringCmp(gbqual->qual, "partial")==0))
423 {
424 partial = TRUE;
425 tmpqual = gbqual->next;
426 gbqual->next = NULL;
427 gbqual = GBQualFree(gbqual);
428 gbqual = tmpqual;
429 }
430
431 if (gbqual)
432 {
433 for (lastq=gbqual, curq=gbqual->next; curq; curq=curq->next)
434 {
435 if (StringCmp(curq->qual, "partial") == 0)
436 {
437 partial = TRUE;
438 lastq->next = curq->next;
439 curq->next = NULL;
440 curq = GBQualFree(curq);
441 curq = lastq;
442 }
443 else
444 lastq = curq;
445 }
446 }
447
448 sfp->qual = gbqual;
449
450 if (partial == TRUE || sfp->partial == TRUE)
451 sfp->partial = FlatAnnotPartial(sfp, use_product);
452 } /* LookForPartialImpFeat */
453
SeqCodeNameGet(SeqCodeTablePtr table,Uint1 residue,Boolean error_msgs)454 static CharPtr SeqCodeNameGet (SeqCodeTablePtr table, Uint1 residue, Boolean error_msgs)
455 {
456 int index=residue - table -> start_at;
457 static CharPtr oops = "?";
458
459 if (index >= 0 && index < (int) table -> num){
460 return (table -> names) [index];
461 }else {
462 if (error_msgs == TRUE)
463 ErrPostEx(SEV_WARNING, CTX_NCBI2GB, 1,
464 "asn2ff: %c(%d) > max in SeqCode table=%d",
465 (char) residue, (int) residue, (int) table -> num);
466 return oops;
467 }
468 }
469
470 /***************************************************************************
471 *CharPtr Get3LetterSymbol (Uint1 seq_code, SeqCodeTablePtr table, Uint1 residue, Boolean error_msgs)
472 *
473 * if (ASN2FF_IUPACAA_ONLY == TRUE) then
474 * Check if the residue is legal in iupacaa; if not, return 'X', if so,
475 * return the three letter code from iupacaa3.
476 *
477 * if (ASN2FF_IUPACAA_ONLY != TRUE) then
478 * Then do a translation, if necessary, then get th three letter code
479 * from iupacaa3.
480 *
481 ***************************************************************************/
482
Get3LetterSymbol(Uint1 seq_code,SeqCodeTablePtr table,Uint1 residue,Boolean error_msgs)483 static CharPtr Get3LetterSymbol (Uint1 seq_code, SeqCodeTablePtr table, Uint1 residue, Boolean error_msgs)
484 {
485 static CharPtr bad_symbol= "OTHER";
486 CharPtr ptr, retval=NULL;
487 Int2 index;
488 SeqCodeTablePtr table_3aa;
489 SeqMapTablePtr smtp;
490 Uint1 code, new_residue;
491
492 if (residue == 42) { /* stop codon in NCBIeaa */
493 retval = "TERM";
494 return retval;
495 }
496 if (ASN2FF_IUPACAA_ONLY == TRUE)
497 code = Seq_code_iupacaa;
498 else
499 code = Seq_code_ncbieaa;
500
501 if (code != seq_code)
502 {/* if code and seq_code are identical, then smtp is NULL?? */
503 smtp = SeqMapTableFind(seq_code, code);
504 new_residue = SeqMapTableConvert(smtp, residue);
505 }
506 else
507 new_residue = residue;
508
509 /* The following looks for non-symbols (255) and "Undetermined" (88) */
510 if ((int) new_residue == 255 || (int) new_residue == 88)
511 retval = bad_symbol;
512 else
513 {
514 ptr = SeqCodeNameGet(table, residue, error_msgs);
515
516 table_3aa=SeqCodeTableFind (Seq_code_iupacaa3);
517 if (ptr != NULL && *ptr != '\0' && table_3aa != NULL)
518 {
519 for (index=0; index < (int) table_3aa->num; index++)
520 {
521 if (StringCmp(ptr, (table_3aa->names) [index]) == 0)
522 {
523 retval = (table_3aa->symbols) [index];
524 break;
525 }
526 }
527 }
528 }
529
530 return retval;
531
532 } /* Get3LetterSymbol */
533
GetNameFromOrgName(OrgNamePtr orgname)534 static CharPtr GetNameFromOrgName(OrgNamePtr orgname)
535 {
536 BinomialOrgNamePtr bi;
537 CharPtr name = NULL, virus, newname;
538 Int2 len=0;
539 Boolean first;
540 OrgNamePtr org;
541
542 switch(orgname->choice)
543 {
544 case 1: /*binomial*/
545 bi = (BinomialOrgNamePtr) orgname->data;
546 len = StringLen(bi->genus);
547 if (bi->species) {
548 len += StringLen(bi->species);
549 }
550 name = MemNew(len + 2);
551 StringCpy(name, bi->genus);
552 if (bi->species) {
553 name = StringCat(name, " ");
554 name = StringCat(name, bi->species);
555 } else {
556 name = StringCat(name, " sp.");
557 }
558 break;
559 case 2: /*virus*/
560 virus = (CharPtr) orgname->data;
561 name = MemNew(StringLen(virus));
562 StringCpy(name, virus);
563 break;
564 case 3: /*hybrid*/
565 first = TRUE;
566 for (org = (OrgNamePtr) orgname->data; org; org=org->next) {
567 newname = GetNameFromOrgName(org);
568 len += StringLen(newname) + 3;
569 }
570 name = MemNew(len + 1);
571 for (org = (OrgNamePtr) orgname->data; org; org=org->next) {
572 newname = GetNameFromOrgName(org);
573 if (first == TRUE) {
574 name = StringCat(name, newname);
575 first = FALSE;
576 } else {
577 name = StringCat(name, " x ");
578 name = StringCat(name, newname);
579 }
580 }
581 break;
582 case 4: /*namedhybrid*/
583 bi = (BinomialOrgNamePtr) orgname->data;
584 len = StringLen(bi->genus);
585 if (bi->species) {
586 len += StringLen(bi->species);
587 }
588 name = MemNew(len + 4);
589 StringCpy(name, bi->genus);
590 if (bi->species) {
591 name = StringCat(name, " x ");
592 name = StringCat(name, bi->species);
593 }
594 break;
595 case 5: /*partial*/
596 /* not implemented yet */
597 ErrPostStr(SEV_WARNING, 0, 0, "Partial name in OrgName.name");
598 break;
599 default:
600 break;
601 }
602 return name;
603 }
604
PrintSourceFeat(Asn2ffJobPtr ajp,GBEntryPtr gbp)605 NLM_EXTERN void PrintSourceFeat(Asn2ffJobPtr ajp, GBEntryPtr gbp)
606
607 {
608 BioseqPtr bsp;
609 Char location[40];
610 ImpFeatPtr ifp;
611 Int2 status = -1, /* mol = -1, -- UNUSED */ i, bsize=0;
612 NoteStructPtr nsp = NULL;
613 OrgRefPtr orp=NULL;
614 SeqFeatPtr sfp_in, sfp_out=NULL, sfp;
615 SeqIntPtr sip;
616 SeqLocPtr slp, keep_loc;
617 ValNodePtr vnp=NULL;
618 BioSourcePtr biosp = NULL;
619 OrgModPtr omp;
620 SortStructPtr pss, ps=NULL, bs = NULL, po=NULL;
621 DescrStructPtr ds;
622 CharPtr name;
623
624 if (gbp == NULL) {
625 return;
626 }
627 if (gbp->feat) {
628 nsp=gbp->feat->source_notes;
629 po = gbp->feat->Orglist;
630 ps = gbp->feat->Sourcelist;
631 bs = gbp->feat->Biosrclist;
632 bsize = gbp->feat->biosrcsize;
633 }
634 ds = gbp->source_info;
635 bsp = gbp->bsp;
636 if (ajp->slp) {
637 return;
638 }
639 sprintf(location, "1..%ld", (long) (bsp->length));
640 sfp_out = ajp->sfp_out;
641 ifp = sfp_out->data.value.ptrvalue;
642 ifp->key = StringSave("source");
643 if (ajp->slp) {
644 slp = AsnIoMemCopy(ajp->slp,
645 (AsnReadFunc) SeqLocAsnRead, (AsnWriteFunc) SeqLocAsnWrite);
646 } else {
647 slp = (SeqLocPtr) ValNodeNew(NULL);
648 slp->choice = SEQLOC_INT;
649 sip = SeqIntNew();
650 sip->from = 0;
651 sip->to = (bsp->length)-1;
652 sip->id = SeqIdDup(SeqIdFindBest (bsp->id, 0));
653 slp->data.ptrvalue = sip;
654 }
655 sfp_out->location = slp;
656 if (ds != NULL) {
657 vnp = ds->vnp;
658 keep_loc = AsnIoMemCopy(slp,
659 (AsnReadFunc) SeqLocAsnRead, (AsnWriteFunc) SeqLocAsnWrite);
660 if (vnp && vnp->choice == Seq_descr_source) {
661 biosp = vnp->data.ptrvalue;
662 if (biosp->is_focus == TRUE) {
663 sfp_out->qual = AddGBQual(sfp_out->qual,
664 "focus", NULL);
665 if (StringNCmp(gbp->div, "SYN", 3) != 0) {
666 for (pss=bs, i= 0; pss && i < bsize; i++, pss++) {
667 if (pss->sfp == NULL)
668 continue;
669 sfp_out->location =
670 SeqLocSubtract(sfp_out->location,
671 pss->sfp->location);
672 }
673 }
674 }
675 }
676 if (sfp_out->location == NULL) {
677 sfp_out->location = keep_loc;
678 }
679 else
680 SeqLocFree(keep_loc);
681 }
682 flat2asn_install_feature_user_string("source", ifp->loc);
683 if (gbp->feat && gbp->feat->sfpSourcesize != 0) {
684 if ((sfp_in = ps->sfp) == NULL) {
685 GatherItemWithLock(ps->entityID, ps->itemID, ps->itemtype,
686 &sfp_in, find_item);
687 }
688 if (sfp_out->qual != NULL)
689 sfp_out->qual = GBQualFree(sfp_out->qual);
690 NoteStructReset(nsp);
691 PrepareSourceFeatQuals(sfp_in, sfp_out, gbp, FALSE);
692 Add_dbxref(ajp, sfp_out, sfp_in, bsp);
693 status = ValidateNAImpFeat(sfp_out);
694 if (status < 0) {
695 /* source feat is probably missing organism name, add
696 and try again. Don't delete old quals! */
697 if (ds != NULL) {
698 vnp = ds->vnp;
699 if (vnp->choice == Seq_descr_source) {
700 biosp = vnp->data.ptrvalue;
701 orp = (OrgRefPtr) biosp->org;
702 } else if (vnp->choice == Seq_descr_org) {
703 orp = (OrgRefPtr) vnp->data.ptrvalue;
704 }
705 } else if (gbp->feat && gbp->feat->sfpOrgsize != 0) {
706 if ((sfp = po->sfp) == NULL) {
707 GatherItemWithLock(po->entityID, po->itemID, po->itemtype,
708 &sfp, find_item);
709 }
710 if (sfp != NULL) {
711 orp = (OrgRefPtr) sfp->data.value.ptrvalue;
712 }
713 }
714 if (orp) {
715 if (ajp->orgname && orp->orgname) {
716 name = GetNameFromOrgName(orp->orgname);
717 sfp_out->qual = AddGBQual(sfp_out->qual,
718 "organism", name);
719 MemFree(name);
720 } else if (orp->taxname) {
721 sfp_out->qual = AddGBQual(sfp_out->qual,
722 "organism", orp->taxname);
723 if (orp->common && sfp_in->comment != NULL)
724 CpNoteToCharPtrStack(nsp, NULL, orp->common);
725 } else if (orp->common) {
726 if (StrStr(orp->common, "virus") ||
727 StrStr(orp->common, "Virus") ||
728 StrStr(orp->common, "phage") ||
729 StrStr(orp->common, "Phage") ||
730 StrStr(orp->common, "viroid") ||
731 StrStr(orp->common, "Viroid")) {
732 sfp_out->qual = AddGBQual(sfp_out->qual,
733 "organism", orp->common);
734 }
735 }
736 }
737 status = ValidateNAImpFeat(sfp_out);
738 }
739 }
740 if (status < 0) {
741 if (ds != NULL) {
742 if ((vnp = ds->vnp) != NULL) {
743 if (vnp->choice == Seq_descr_source) {
744 biosp = vnp->data.ptrvalue;
745 orp = (OrgRefPtr) biosp->org;
746 } else if (vnp->choice == Seq_descr_org) {
747 orp = (OrgRefPtr) vnp->data.ptrvalue;
748 }
749 }
750 } else if (gbp->feat && gbp->feat->sfpOrgsize != 0 && po->sfp != NULL) {
751 orp = (OrgRefPtr) (po->sfp)->data.value.ptrvalue;
752 } else {
753 orp = NULL;
754 }
755 if (orp) {
756 if (nsp) {
757 NoteStructReset(nsp);
758 }
759 if (sfp_out->qual != NULL)
760 sfp_out->qual = GBQualFree(sfp_out->qual);
761 if (ajp->orgname && orp->orgname) {
762 name = GetNameFromOrgName(orp->orgname);
763 sfp_out->qual = AddGBQual(sfp_out->qual,
764 "organism", name);
765 MemFree(name);
766 } else if (orp->taxname) {
767 sfp_out->qual = AddGBQual(sfp_out->qual,
768 "organism", orp->taxname);
769 } else if (orp->common) {
770 if (StrStr(orp->common, "virus") ||
771 StrStr(orp->common, "Virus") ||
772 StrStr(orp->common, "phage") ||
773 StrStr(orp->common, "Phage") ||
774 StrStr(orp->common, "viroid") ||
775 StrStr(orp->common, "Viroid")) {
776 sfp_out->qual = AddGBQual(sfp_out->qual, "organism",
777 orp->common);
778 }
779 }
780 if (orp->orgname && orp->orgname->mod) {
781 omp = orp->orgname->mod;
782 if (omp->subtype == 0 && omp->subname != NULL) {
783 CpNoteToCharPtrStack(nsp, NULL, omp->subname);
784 }
785 }
786 sfp_out->qual = AddBioSourceToGBQual(ajp, nsp, biosp, sfp_out->qual, TRUE);
787 sfp_out->qual = AddOrgRefModToGBQual(orp, sfp_out->qual);
788 }
789 if ((vnp=BioseqGetSeqDescr(gbp->bsp, Seq_descr_molinfo, NULL)) != NULL){
790 /*
791 mfp = vnp->data.ptrvalue;
792 if (mfp) {
793 mol = mfp->biomol;
794 }
795 -- NO EFFECT */
796 }
797 PrepareSourceFeatQuals(NULL, sfp_out, gbp, TRUE);
798 status = ValidateNAImpFeat(sfp_out);
799 }
800 /* ----------Organism not found -------------*/
801 if (status < 0) {
802 if (sfp_out->qual)
803 sfp_out->qual = GBQualFree(sfp_out->qual);
804 sfp_out->qual = AddGBQual(sfp_out->qual, "organism", "unknown");
805 NoteStructReset(nsp);
806 if (orp && orp->common)
807 CpNoteToCharPtrStack(nsp, NULL, orp->common);
808 /*try new first */
809 if (biosp) {
810 sfp_out->qual = AddBioSourceToGBQual(ajp, nsp, biosp, sfp_out->qual, TRUE);
811 if (orp)
812 sfp_out->qual = AddOrgRefModToGBQual(orp, sfp_out->qual);
813 }
814 /* try old then */
815 sfp_out->qual = AddOrgRefModToGBQual(orp, sfp_out->qual);
816 PrepareSourceFeatQuals(NULL, sfp_out, gbp, TRUE);
817 status = ValidateNAImpFeat(sfp_out);
818 }
819 flat2asn_delete_feature_user_string();
820
821 if (status >= 0 || ASN2FF_VALIDATE_FEATURES == FALSE) {
822 PrintImpFeat(ajp, gbp->bsp, sfp_out);
823 }
824 sfp_out->comment = NULL;
825 sfp_out->location = SeqLocFree(sfp_out->location);
826 sfp_out->location = NULL;
827 sfp_out->product = NULL;
828 sfp_out->exp_ev = FALSE;
829 sfp_out->partial = FALSE;
830 sfp_out->excpt = FALSE;
831 ifp = sfp_out->data.value.ptrvalue;
832 if (ifp->key) {
833 ifp->key = MemFree(ifp->key);
834 }
835 if (ifp->loc) {
836 ifp->loc = MemFree(ifp->loc);
837 }
838 if (sfp_out->qual)
839 sfp_out->qual = GBQualFree(sfp_out->qual);
840 return;
841 } /* PrintSourceFeat */
842
843 /*****************************************************************************
844 *
845 * Add the quals of the form "/transl_except=(pos: ,aa: )" to the
846 * SeqFeatPtr sfp_out.
847 *
848 *****************************************************************************/
849
ComposeCodeBreakQuals(Asn2ffJobPtr ajp,BioseqPtr bsp,SeqFeatPtr sfp_in,SeqFeatPtr sfp_out,SeqLocPtr PNTR extra_loc,Int2 extra_loc_cnt,NoteStructPtr nsp)850 static void ComposeCodeBreakQuals (Asn2ffJobPtr ajp, BioseqPtr bsp, SeqFeatPtr sfp_in, SeqFeatPtr sfp_out, SeqLocPtr PNTR extra_loc, Int2 extra_loc_cnt, NoteStructPtr nsp)
851
852 {
853 CdRegionPtr crp;
854 CharPtr buffer, ptr, pos;
855 Choice aa;
856 CodeBreakPtr cbp;
857 SeqCodeTablePtr table;
858 SeqLocPtr slp;
859 Uint1 seq_code=0, the_residue;
860 Int2 i, buflen;
861
862 if ((sfp_in == NULL) || (sfp_in->data.choice != 3)) {
863 return;
864 }
865
866 crp = (CdRegionPtr) sfp_in->data.value.ptrvalue;
867
868 if (crp->code_break != NULL) {
869 cbp = crp->code_break;
870 while (cbp != NULL) {
871 aa = cbp->aa;
872 switch (aa.choice) {
873 case 1:
874 seq_code = 8;
875 break;
876 case 2:
877 seq_code = 7;
878 break;
879 case 3:
880 seq_code = 11;
881 break;
882 }
883 table = NULL;
884 if (seq_code != 0)
885 table=SeqCodeTableFind (seq_code);
886 if (table == NULL) {
887 continue;
888 }
889 if (extra_loc_cnt > 0) { /* was converted to new coordinates*/
890 for (i=0; i < extra_loc_cnt; i++) {
891 if (extra_loc[i] == NULL) {
892 continue;
893 }
894 slp = extra_loc[i];
895 pos = FlatLoc(bsp, slp);
896 if (pos) {
897 the_residue = (Uint1) cbp->aa.value.intvalue;
898 if (the_residue == 'U') {
899 CpNoteToCharPtrStack(nsp, NULL, "selenocysteine");
900 }
901 ptr = Get3LetterSymbol(seq_code, table,
902 the_residue, ajp->error_msgs);
903 buflen = StringLen(pos) + StringLen(ptr) + 11;
904 buffer = MemNew(buflen);
905 sprintf(buffer, "(pos:%s,aa:%s)", pos, ptr);
906 sfp_out->qual = AddGBQual(sfp_out->qual,
907 "transl_except", buffer);
908 MemFree(buffer);
909 MemFree(pos);
910 } else if (ajp->error_msgs) {
911 ErrPostEx(SEV_WARNING, ERR_FEATURE_CodeBreakLoc,
912 "Invalid Code-break.location: %s", pos);
913 }
914 }
915 } else {
916 slp = NULL;
917 while ((slp = SeqLocFindNext(cbp->loc, slp)) != NULL) {
918 pos = FlatLoc(bsp, slp);
919 if (pos) {
920 the_residue = (Uint1) cbp->aa.value.intvalue;
921 if (the_residue == 'U') {
922 CpNoteToCharPtrStack(nsp, NULL, "selenocysteine");
923 }
924 ptr = Get3LetterSymbol(seq_code, table,
925 the_residue, ajp->error_msgs);
926 buflen = StringLen(pos) + StringLen(ptr) + 11;
927 buffer = MemNew(buflen);
928 sprintf(buffer, "(pos:%s,aa:%s)", pos, ptr);
929 sfp_out->qual = AddGBQual(sfp_out->qual,
930 "transl_except", buffer);
931 MemFree(buffer);
932 MemFree(pos);
933 } else if (ajp->error_msgs) {
934 ErrPostEx(SEV_WARNING, ERR_FEATURE_CodeBreakLoc,
935 "Invalid Code-break.location: %s", pos);
936 }
937 }
938 }
939 cbp = cbp->next;
940 }
941 }
942
943 return;
944
945 } /* ComposeCodeBreakQuals */
946
947 /***********************************************************************
948 *void GetGeneticCode(CharPtr ptr, SeqFeatPtr sfp)
949 *
950 * returns ONLY non-standard (i.e., id not 0 or 1)
951 * genetic codes.
952 ***********************************************************************/
953
GetGeneticCode(CharPtr ptr,SeqFeatPtr sfp)954 static void GetGeneticCode(CharPtr ptr, SeqFeatPtr sfp)
955
956 {
957 Boolean code_is_one=FALSE;
958 CdRegionPtr cdr;
959 GBQualPtr qual;
960 ValNodePtr gcp, var;
961
962 cdr = sfp->data.value.ptrvalue;
963 gcp = cdr->genetic_code;
964
965 if (gcp != NULL)
966 {
967 for (var=gcp->data.ptrvalue; var != NULL; var=var->next)
968 {
969 if (var->choice == 2)
970 {
971 if (var->data.intvalue != 0 )
972 {
973 if (var->data.intvalue == 1)
974 code_is_one = TRUE;
975 else
976 sprintf(ptr, "%ld", (long) (var->data.intvalue));
977 }
978 break;
979 }
980 }
981 if (*ptr != '\0')
982 {
983 for (qual=sfp->qual; qual; qual=qual->next)
984 {
985 if (StringCmp("transl_table", qual->qual) == 0 &&
986 StringCmp(ptr, qual->val) != 0)
987 {
988 ErrPostStr(SEV_WARNING,
989 ERR_FEATURE_GcodeAndTTableClash, "");
990 break;
991 }
992 }
993 }
994 else if (code_is_one == TRUE)
995 {
996 for (qual=sfp->qual; qual; qual=qual->next)
997 {
998 if (StringCmp("transl_table", qual->qual) == 0 &&
999 StringCmp("1", qual->val) != 0)
1000 {
1001 ErrPostStr(SEV_WARNING,
1002 ERR_FEATURE_GcodeAndTTableClash, "");
1003 break;
1004 }
1005 }
1006 }
1007 }
1008 else
1009 {
1010 for (qual=sfp->qual; qual; qual=qual->next)
1011 if (StringCmp("transl_table", qual->qual) == 0)
1012 {
1013 StringCpy(ptr, qual->val);
1014 break;
1015 }
1016 }
1017
1018 return;
1019 } /* GetGeneticCode */
1020
cleanup_sfp(SeqFeatPtr sfp_out)1021 static SeqFeatPtr cleanup_sfp(SeqFeatPtr sfp_out)
1022 {
1023 ImpFeatPtr ifp;
1024
1025 if (sfp_out == NULL) {
1026 return NULL;
1027 }
1028 sfp_out->comment = NULL;
1029 sfp_out->location = NULL;
1030 sfp_out->product = NULL;
1031 sfp_out->exp_ev = FALSE;
1032 sfp_out->partial = FALSE;
1033 sfp_out->excpt = FALSE;
1034 ifp = sfp_out->data.value.ptrvalue;
1035 if (ifp->key) {
1036 ifp->key = MemFree(ifp->key);
1037 }
1038 if (ifp->loc) {
1039 ifp->loc = MemFree(ifp->loc);
1040 }
1041 if (sfp_out->qual)
1042 sfp_out->qual = GBQualFree(sfp_out->qual);
1043
1044 return sfp_out;
1045 }
1046
remove_qual(GBQualPtr head,GBQualPtr x)1047 static GBQualPtr remove_qual(GBQualPtr head, GBQualPtr x)
1048 {
1049 GBQualPtr v, p;
1050
1051 if (head == NULL) {
1052 return NULL;
1053 }
1054 if (x == head) {
1055 head = x->next;
1056 x->next = NULL;
1057 return head;
1058 }
1059 for (v = head; v != NULL && v != x; v = v->next) {
1060 p = v;
1061 }
1062 if (v != NULL) {
1063 p->next = x->next;
1064 x->next = NULL;
1065 }
1066 return head;
1067 }
1068
PutGeneFirst(SeqFeatPtr sfp)1069 static void PutGeneFirst(SeqFeatPtr sfp)
1070
1071 {
1072 Boolean still_looking=TRUE;
1073 GBQualPtr gbqual, qual, qual_temp=NULL, qual_gene=NULL;
1074 ImpFeatPtr ifp=NULL;
1075
1076 if ((sfp == NULL) || (sfp->data.choice != 8))
1077 return;
1078 if (sfp->qual == NULL)
1079 return;
1080
1081 ifp = sfp->data.value.ptrvalue;
1082 if (StringCmp(ifp->key, "gene") == 0)
1083 {
1084 gbqual = sfp->qual;
1085 for (qual=gbqual; qual; qual=qual->next) {
1086 if (StringCmp("gene", qual->qual) == 0) {
1087 qual_gene = qual;
1088 break;
1089 }
1090 }
1091 if (qual_gene == NULL) {
1092 return;
1093 }
1094 gbqual = remove_qual(gbqual, qual_gene);
1095 qual_gene->next = gbqual;
1096 sfp->qual = qual_gene;
1097 }
1098 return;
1099 } /* PutGeneFirst */
1100
PutTranslationLast(SeqFeatPtr sfp)1101 static void PutTranslationLast(SeqFeatPtr sfp)
1102
1103 {
1104 Boolean still_looking=TRUE;
1105 GBQualPtr gbqual, qual, qual_temp=NULL, qual_last;
1106 ImpFeatPtr ifp=NULL;
1107
1108 if ((sfp == NULL) || (sfp->data.choice != 8))
1109 return;
1110 if (sfp->qual == NULL)
1111 return;
1112
1113 ifp = sfp->data.value.ptrvalue;
1114 if (StringCmp(ifp->key, "CDS") == 0)
1115 {
1116 gbqual = sfp->qual;
1117 qual_last = NULL;
1118 for (qual=gbqual; qual->next; qual=qual->next)
1119 { /* We need to go to the end of the linked list */
1120 if (still_looking == TRUE &&
1121 StringCmp("translation", qual->qual) == 0)
1122 {
1123 still_looking = FALSE;
1124 if (qual->next != NULL)
1125 { /* if it's not the last qual anyway */
1126 if (qual_last == NULL) /*first*/
1127 gbqual = qual->next;
1128 else
1129 qual_last->next = qual->next;
1130 qual_temp = qual;
1131 qual=qual->next;
1132 qual_temp->next = NULL;
1133 }
1134 }
1135 qual_last = qual;
1136 if (qual->next == NULL)
1137 break;
1138 }
1139 qual->next = qual_temp;
1140 sfp->qual = gbqual;
1141 }
1142 return;
1143 } /* PutTranslationLast */
1144
1145 static CharPtr mrnaevtext1 = "Derived by automated computational analysis";
1146 static CharPtr mrnaevtext2 = "using gene prediction method:";
1147 static CharPtr mrnaevtext3 = "Supporting evidence includes similarity to:";
1148
mRNAEvidenceComment(UserObjectPtr uop,Boolean add)1149 NLM_EXTERN CharPtr mRNAEvidenceComment(UserObjectPtr uop, Boolean add)
1150 {
1151 ObjectIdPtr oip;
1152 UserFieldPtr ufp, u, uu;
1153 CharPtr method = NULL, ptr, ne_name;
1154 static Char temp[20];
1155 Int2 ptrlen=0, np=0, nd=0, nm=0, ne=0;
1156 Boolean is_evidence = FALSE;
1157 Int4 Locus_id = 0;
1158
1159 if (uop == NULL) return NULL;
1160 if ((oip = uop->type) == NULL) return NULL;
1161 if (StringCmp(oip->str, "ModelEvidence") != 0) return NULL;
1162 for (ufp=uop->data; ufp; ufp=ufp->next) {
1163 oip = ufp->label;
1164 if (StringCmp(oip->str, "Method") == 0) {
1165 if (ufp->data.ptrvalue) {
1166 method = StringSave((CharPtr) ufp->data.ptrvalue);
1167 }
1168 }
1169 if (StringCmp(oip->str, "mRNA")==0) {
1170 is_evidence = TRUE;
1171 for (u = (UserFieldPtr) ufp->data.ptrvalue;u; u=u->next) {
1172 for (uu = (UserFieldPtr) u->data.ptrvalue; uu; uu=uu->next) {
1173 oip = uu->label;
1174 if (StringCmp(oip->str, "accession") == 0) {
1175 nm++;
1176 }
1177 }
1178 }
1179 }
1180 if (StringCmp(oip->str, "EST")==0) {
1181 is_evidence = TRUE;
1182 for (u = (UserFieldPtr) ufp->data.ptrvalue;u; u=u->next) {
1183 for (uu = (UserFieldPtr) u->data.ptrvalue;uu; uu=uu->next) {
1184 oip = uu->label;
1185 if (StringCmp(oip->str, "count") == 0) {
1186 ne = uu->data.intvalue;
1187 }
1188 if (StringCmp(oip->str, "organism") == 0) {
1189 ne_name = StringSave(( CharPtr) uu->data.ptrvalue);
1190 }
1191 }
1192 }
1193 }
1194 }
1195 ptrlen = StringLen (mrnaevtext1) + StringLen (mrnaevtext2) + StringLen (mrnaevtext3) + StringLen (method) + 25;
1196 if (np > 0) {
1197 ptrlen += StringLen("proteins") + 5;
1198 }
1199 if (nd > 0) {
1200 ptrlen += StringLen("domains") + 5;
1201 }
1202 if (nm > 0) {
1203 ptrlen += StringLen("mRNAs") + 5;
1204 }
1205 if (ne > 0) {
1206 ptrlen += StringLen("ESTs") + StringLen(ne_name) + 10;
1207 }
1208 ptr = (CharPtr) MemNew(ptrlen) + 1;
1209 if (add) {
1210 if (method != NULL) {
1211 sprintf (ptr, "%s %s %s.", mrnaevtext1, mrnaevtext2, method);
1212 } else {
1213 sprintf (ptr, "%s.", mrnaevtext1);
1214 }
1215 }
1216 if (is_evidence) {
1217 if (add) StringCat(ptr, " ");
1218 StringCat(ptr, "Supporting evidence includes similarity to:");
1219 }
1220 if (np > 0) {
1221 sprintf(temp, " %d proteins", np);
1222 StringCat(ptr, temp);
1223 }
1224 if (nd > 0) {
1225 if (np > 0)
1226 StringCat(ptr, ",");
1227 sprintf(temp, " %d domains", np);
1228 StringCat(ptr, temp);
1229 }
1230 if (nm > 0) {
1231 if (np > 0 || nd > 0)
1232 StringCat(ptr, ",");
1233 if (nm > 1) {
1234 sprintf(temp, " %d mRNAs", nm);
1235 } else {
1236 sprintf(temp, " %d mRNA", nm);
1237 }
1238 StringCat(ptr, temp);
1239 }
1240 if (ne > 0) {
1241 if ( np > 0 || nm > 0 || nd > 0)
1242 StringCat(ptr, ",");
1243 sprintf(temp, " %d %s ESTs", ne, ne_name);
1244 StringCat(ptr, temp);
1245 }
1246 return ptr;
1247 }
1248
mRNAFeatEvidenceComment(SeqFeatPtr sfp_in)1249 static CharPtr mRNAFeatEvidenceComment(SeqFeatPtr sfp_in)
1250 {
1251 RnaRefPtr rfp;
1252 UserObjectPtr uop, obj;
1253 ObjectIdPtr oip;
1254 UserFieldPtr uf;
1255
1256 rfp = (RnaRefPtr) sfp_in->data.value.ptrvalue;
1257 if (rfp->type != 2) { /* mRNA */
1258 return NULL;
1259 }
1260 if ((uop = sfp_in->ext) == NULL)
1261 return NULL;
1262 if ((oip = uop->type) == NULL) return NULL;
1263 if (StringCmp(oip->str, "CombinedFeatureUserObjects") != 0) return NULL;
1264 for (uf=uop->data; uf; uf=uf->next) {
1265 obj = (UserObjectPtr) uf->data.ptrvalue;
1266 return( mRNAEvidenceComment(obj, TRUE));
1267 }
1268 return NULL;
1269 }
1270
PrintNAFeatByNumber(Asn2ffJobPtr ajp,GBEntryPtr gbp)1271 NLM_EXTERN void PrintNAFeatByNumber (Asn2ffJobPtr ajp, GBEntryPtr gbp)
1272 {
1273
1274 Boolean loc_ok;
1275 Char genetic_code[3];
1276 CharPtr ptr=NULL, sptr;
1277 ImpFeatPtr ifp;
1278 SeqFeatPtr sfp_in, sfp_out=NULL;
1279 Int4 status, total_feats, feat_index;
1280 SortStructPtr p;
1281
1282 if (gbp == NULL || gbp->feat == NULL) {
1283 return;
1284 }
1285 feat_index = ajp->pap_index;
1286 total_feats=gbp->feat->sfpListsize;
1287 if (total_feats == 0) {
1288 return;
1289 }
1290 sfp_out=ajp->sfp_out;
1291 if (sfp_out->qual)
1292 sfp_out->qual = GBQualFree(sfp_out->qual);
1293 ifp = sfp_out->data.value.ptrvalue;
1294 if (ifp->loc)
1295 ifp->loc = MemFree(ifp->loc);
1296 if (feat_index < total_feats) {
1297 p = gbp->feat->List + feat_index;
1298 if (p == NULL)
1299 return;
1300 if (p->tempload == TRUE) {
1301 GatherItemWithLock(p->entityID, p->itemID, p->itemtype,
1302 &sfp_in, find_item);
1303 } else {
1304 sfp_in = p->sfp;
1305 }
1306 if (sfp_in == NULL) {
1307 return;
1308 }
1309 if (ajp->mode == PARTIAL_MODE &&
1310 sfp_in->data.choice != SEQFEAT_CDREGION) {
1311 sfp_out = cleanup_sfp(sfp_out);
1312 return;
1313 }
1314 status = ConvertToNAImpFeat(ajp, gbp, sfp_in, &sfp_out, p);
1315 if (status < 1) {
1316 sfp_out = cleanup_sfp(sfp_out);
1317 return;
1318 }
1319 if (p->slp != NULL) {
1320 sfp_out->location = p->slp;
1321 }
1322 ifp = sfp_out->data.value.ptrvalue;
1323 flat2asn_install_feature_user_string(ifp->key, NULL);
1324 loc_ok=CheckAndGetNAFeatLoc(gbp->bsp, &ptr, sfp_out, TRUE);
1325 if (loc_ok == TRUE || ASN2FF_VALIDATE_FEATURES == FALSE) {
1326 ifp->loc = ptr;
1327 } else {
1328 flat2asn_delete_feature_user_string();
1329 flat2asn_install_feature_user_string(ifp->key, ptr);
1330 MemFree(ptr);
1331 if (ASN2FF_SHOW_ERROR_MSG == TRUE) {
1332 ErrPostEx(SEV_WARNING, ERR_FEATURE_Dropped, "Unparsable location");
1333 }
1334 sfp_out = cleanup_sfp(sfp_out);
1335 flat2asn_delete_feature_user_string();
1336 return;
1337 }
1338 flat2asn_delete_feature_user_string();
1339 flat2asn_install_feature_user_string(ifp->key, ptr);
1340 if (p->dup == TRUE) {
1341 if (ASN2FF_SHOW_ERROR_MSG == TRUE) {
1342 ErrPostEx(SEV_WARNING, ERR_FEATURE_Duplicated,
1343 "Duplicated feature dropped");
1344 }
1345 sfp_out = cleanup_sfp(sfp_out);
1346 flat2asn_delete_feature_user_string();
1347 return;
1348 }
1349 if (sfp_in->data.choice == SEQFEAT_CDREGION) {
1350 ComposeCodeBreakQuals(ajp, gbp->bsp, sfp_in, sfp_out,
1351 p->extra_loc, p->extra_loc_cnt, p->nsp);
1352 genetic_code[0]='\0';
1353 if (ASN2FF_TRANSL_TABLE == TRUE) {
1354 GetGeneticCode(genetic_code, sfp_in);
1355 if (genetic_code[0] != '\0') {
1356 sfp_out->qual = AddGBQual(sfp_out->qual,
1357 "transl_table", genetic_code);
1358 }
1359 }
1360 }
1361 if (sfp_in->data.choice == SEQFEAT_GENE) {
1362 if (ajp->show_gene == FALSE) {
1363 sfp_out = cleanup_sfp(sfp_out);
1364 flat2asn_delete_feature_user_string();
1365 return;
1366 }
1367 }
1368 GetNonGeneQuals(ajp->mode, sfp_in, sfp_out, p->nsp);
1369 LookForPartialImpFeat(sfp_out, FALSE);
1370 ComposeGBQuals(ajp, sfp_out, gbp, p, FALSE);
1371 status = ValidateNAImpFeat(sfp_out);
1372 if (sfp_in->data.choice == SEQFEAT_CDREGION) {
1373 PutTranslationLast(sfp_out);
1374 } else if (sfp_in->data.choice == SEQFEAT_GENE) {
1375 PutGeneFirst(sfp_out);
1376 } else if (sfp_in->data.choice == SEQFEAT_RNA) {
1377 if ((sptr = mRNAFeatEvidenceComment(sfp_in)) != NULL) {
1378 sfp_out->qual =
1379 AddGBQual(sfp_out->qual, "note", sptr);
1380 }
1381 }
1382 if (status >= 0 || ASN2FF_VALIDATE_FEATURES == FALSE) {
1383 PrintImpFeatEx(ajp, gbp->bsp, sfp_out, gbp->gi, p->entityID, p->itemID);
1384 }
1385 flat2asn_delete_feature_user_string();
1386 }
1387 sfp_out = cleanup_sfp(sfp_out);
1388 return;
1389 } /* PrintNAFeatByNumber */
1390
1391 /***************************************************************************
1392 *PrintAAFeatByNumber
1393 *
1394 * This function prints out the genpept SeqFeats.
1395 *
1396 **************************************************************************/
1397
PrintAAFeatByNumber(Asn2ffJobPtr ajp,GBEntryPtr gbp)1398 NLM_EXTERN void PrintAAFeatByNumber (Asn2ffJobPtr ajp, GBEntryPtr gbp)
1399 {
1400 CharPtr ptr=NULL;
1401 Char genetic_code[3];
1402 ImpFeatPtr ifp;
1403 Int2 status;
1404 Int4 feat_index, total_feats;
1405 NoteStructPtr nsp;
1406 SeqFeatPtr sfp_in, sfp_out=NULL;
1407 SortStructPtr p;
1408
1409 if (gbp == NULL || gbp->feat == NULL) {
1410 return;
1411 }
1412 feat_index = ajp->pap_index;
1413 total_feats=gbp->feat->sfpListsize;
1414 if (total_feats == 0) {
1415 return;
1416 }
1417 sfp_out=ajp->sfp_out;
1418 if (sfp_out->qual) {
1419 sfp_out->qual = GBQualFree(sfp_out->qual);
1420 }
1421 ifp = sfp_out->data.value.ptrvalue;
1422 if (ifp->loc) {
1423 ifp->loc = MemFree(ifp->loc);
1424 }
1425 if (feat_index < total_feats) {
1426 p = gbp->feat->List + feat_index;
1427 if (p == NULL || p->dup == TRUE) {
1428 return;
1429 }
1430 if ((sfp_in = p->sfp) == NULL) {
1431 GatherItemWithLock(p->entityID, p->itemID, p->itemtype,
1432 &sfp_in, find_item);
1433 }
1434 if (sfp_in == NULL) {
1435 return;
1436 }
1437 nsp = p->nsp;
1438 switch (sfp_in->data.choice) {
1439 /* Note: the functions that CheckAndGetFeatLoc use for
1440 checking fails on protein locations sometimes. */
1441 case SEQFEAT_CDREGION:
1442 GetNonGeneQuals(ajp->mode, sfp_in, sfp_out, nsp);
1443 status = ConvertToAAImpFeat(ajp, gbp, sfp_in, &sfp_out, p);
1444 if (status < 0)
1445 break;
1446 ComposeGBQuals(ajp, sfp_out, gbp, p, FALSE);
1447 GetAAFeatLoc(gbp->bsp, &ptr, sfp_in, TRUE);
1448 ifp->loc = ptr;
1449 ptr = FlatLoc(gbp->bsp, sfp_in->location);
1450 sfp_out->qual =
1451 AddGBQual(sfp_out->qual, "coded_by", ptr);
1452 ptr = MemFree(ptr);
1453 genetic_code[0]='\0';
1454 if (ASN2FF_TRANSL_TABLE == TRUE) {
1455 GetGeneticCode(genetic_code, sfp_in);
1456 if (genetic_code[0] != '\0')
1457 sfp_out->qual =
1458 AddGBQual(sfp_out->qual, "transl_table", genetic_code);
1459 }
1460 status = ValidateAAImpFeat(sfp_out, TRUE);
1461 if (status >= 0)
1462 PrintImpFeat(ajp, gbp->bsp, sfp_out);
1463 break;
1464 case SEQFEAT_PROT:
1465 GetNonGeneQuals(ajp->mode, sfp_in, sfp_out, nsp);
1466 AddProteinQuals(sfp_in, sfp_out, nsp);
1467 status = ConvertToAAImpFeat(ajp, gbp, sfp_in, &sfp_out, p);
1468 if (status < 0)
1469 break;
1470 ComposeGBQuals(ajp, sfp_out, gbp, p, FALSE);
1471 GetAAFeatLoc(gbp->bsp, &ptr, sfp_out, FALSE);
1472 ifp->loc = ptr;
1473 status = ValidateAAImpFeat(sfp_out, FALSE);
1474 if (status >= 0)
1475 PrintImpFeat(ajp, gbp->bsp, sfp_out);
1476 break;
1477 case SEQFEAT_SEQ:
1478 case SEQFEAT_IMP:
1479 case SEQFEAT_REGION:
1480 case SEQFEAT_COMMENT:
1481 case SEQFEAT_BOND:
1482 case SEQFEAT_SITE:
1483 case SEQFEAT_PSEC_STR:
1484 case SEQFEAT_NON_STD_RESIDUE:
1485 case SEQFEAT_HET:
1486 GetNonGeneQuals(ajp->mode, sfp_in, sfp_out, nsp);
1487 status = ConvertToAAImpFeat(ajp, gbp, sfp_in, &sfp_out, p);
1488 if (status < 0)
1489 break;
1490 ComposeGBQuals(ajp, sfp_out, gbp, p, FALSE);
1491 GetAAFeatLoc(gbp->bsp, &ptr, sfp_out, FALSE);
1492 ifp->loc = ptr;
1493 status = ValidateAAImpFeat(sfp_out, FALSE);
1494 if (status >= 0)
1495 PrintImpFeat(ajp, gbp->bsp, sfp_out);
1496 break;
1497 case SEQFEAT_GENE:
1498 if (ajp->show_gene == FALSE) {
1499 break;
1500 }
1501 GetNonGeneQuals(ajp->mode, sfp_in, sfp_out, nsp);
1502 status = ConvertToAAImpFeat(ajp, gbp, sfp_in, &sfp_out, p);
1503 if (status < 0)
1504 break;
1505 ComposeGBQuals(ajp, sfp_out, gbp, p, FALSE);
1506 GetAAFeatLoc(gbp->bsp, &ptr, sfp_out, FALSE);
1507 ifp->loc = ptr;
1508 status = ValidateAAImpFeat(sfp_out, FALSE);
1509 if (status >= 0)
1510 PrintImpFeat(ajp, gbp->bsp, sfp_out);
1511 break;
1512 default:
1513 break;
1514 }
1515 }
1516 sfp_out = cleanup_sfp(sfp_out);
1517 } /* PrintAAFeatByNumber */
1518
1519
1520 /************************************************************************
1521 *GetProductFromCDS(ValNodePtr product, ValNodePtr location, Int4 length)
1522 *
1523 * Gets the CDS product, using SeqPortNewByLoc
1524 * The bsp is that of the protein, and comes from the location. The bsp
1525 * is found in the calling program anyway, as it's used to get
1526 * the EC_NUM.
1527 * The protein sequence comes back in allocated memory. The user
1528 * is responsible for deallocating that.
1529 *
1530 * A check is made (BioseqFind()) that the protein Bioseq is in memory.
1531 * This guarantees that a fetch is NOT made if it is not memory, to accomodate
1532 * the splitting of DNA and protein in Entrez. In this case, it's just
1533 * translated.
1534 *
1535 *************************************************************************/
1536
GetProductFromCDS(ValNodePtr product,ValNodePtr location,Int4 bsp_length)1537 NLM_EXTERN CharPtr GetProductFromCDS(ValNodePtr product, ValNodePtr location, Int4 bsp_length)
1538
1539 {
1540 Boolean at_end=FALSE;
1541 CharPtr protein_seq=NULL, start_ptr=NULL;
1542 Int4 length;
1543 SeqPortPtr spp;
1544 Uint1 residue, code;
1545 BioseqPtr bsp;
1546 SeqIdPtr sip;
1547
1548 if (ASN2FF_IUPACAA_ONLY == TRUE)
1549 code = Seq_code_iupacaa;
1550 else
1551 code = Seq_code_ncbieaa;
1552
1553 if (product) {
1554 sip = SeqLocId(product);
1555 bsp = BioseqFindCore(sip);
1556 if (bsp != NULL) /* Bioseq is (or has been) in memory */ {
1557 length = SeqLocLen(product);
1558 if (length > 0) {
1559 if (SeqLocStart(location) == 0 ||
1560 SeqLocStop(location) == bsp_length-1)
1561 at_end = TRUE;
1562 start_ptr = protein_seq =
1563 (CharPtr) MemNew((size_t) (length*sizeof(CharPtr)));
1564 spp = SeqPortNewByLoc(product, code);
1565 spp->do_virtual = TRUE;
1566 while ((residue=SeqPortGetResidue(spp)) != SEQPORT_EOF) {
1567 if ( !IS_residue(residue) && residue != INVALID_RESIDUE )
1568 continue;
1569 if (residue == INVALID_RESIDUE)
1570 residue = (Uint1) 'X';
1571 *protein_seq = residue;
1572 protein_seq++;
1573 }
1574 SeqPortFree(spp);
1575 if (at_end) {
1576 if (StringLen(start_ptr) < GENPEPT_MIN)
1577 start_ptr = MemFree(start_ptr);
1578 }
1579 }
1580 }
1581 }
1582 return start_ptr;
1583 }
1584
1585 /**************************************************************************
1586 *CharPtr GettRNAaa (tRNAPtr trna, Boolean error_messages)
1587 *
1588 * Return a pointer containing the amino acid type.
1589 **************************************************************************/
1590
GettRNAaa(tRNAPtr trna,Boolean error_msgs)1591 static CharPtr GettRNAaa (tRNAPtr trna, Boolean error_msgs)
1592
1593 {
1594 CharPtr ptr=NULL;
1595 SeqCodeTablePtr table;
1596 Uint1 seq_code;
1597 /*
1598 The choice values used in the tRNA structure do NOT corresond to
1599 the choice(==ENUMs) of Seq-code_type, and the latter are used
1600 by all the utility functions, so we map them...
1601 */
1602 if ( trna && trna -> aatype) {
1603 switch (trna -> aatype) {
1604 case 1:
1605 seq_code = 2;
1606 break;
1607 case 2:
1608 seq_code = 8;
1609 break;
1610 case 3:
1611 seq_code = 7;
1612 break;
1613 case 4:
1614 seq_code = 11;
1615 break;
1616 }
1617
1618 if ((table=SeqCodeTableFind (seq_code)) != NULL)
1619 ptr = Get3LetterSymbol(seq_code, table, trna->aa, error_msgs);
1620 }
1621
1622 return ptr;
1623 } /* GettRNAaa */
1624
1625 /*************************************************************************
1626 *ComposetRNANote (Asn2ffJobPtr ajp, NoteStructPtr nsp, tRNAPtr trna, )
1627 *
1628 * Add info from Trna-ext to Note stack in the GeneStructPtr.
1629 **************************************************************************/
1630
ComposetRNANote(Asn2ffJobPtr ajp,NoteStructPtr nsp,tRNAPtr trna)1631 static void ComposetRNANote(Asn2ffJobPtr ajp, NoteStructPtr nsp, tRNAPtr trna)
1632 {
1633 /*
1634 Char buffer[25];
1635 CharPtr ptr = &(buffer[0]);
1636 Int2 index;
1637 Uint1 codon[4];
1638
1639 if (! trna)
1640 return;
1641
1642 if ((trna->codon)[0] != 255)
1643 {
1644 codon[3] = '\0';
1645 for (index=0; index<6; index++)
1646 {
1647 if ((trna->codon)[index] == 255)
1648 break;
1649 if (CodonForIndex((trna->codon)[index], Seq_code_iupacna, codon))
1650 {
1651 StringCpy(ptr, (CharPtr) codon);
1652 ptr += 3;
1653 }
1654 else
1655 {
1656 *ptr = '?'; ptr++;
1657 }
1658 if (index<5 && (trna->codon)[index+1] != 255)
1659 {
1660 *ptr = ','; ptr++;
1661 *ptr = ' '; ptr++;
1662 }
1663 }
1664 if ((trna->codon)[1] == 255)
1665 {
1666 ptr = &buffer[0];
1667 SaveNoteToCharPtrStack(nsp, "codon recognized:", ptr);
1668 }
1669 else
1670 {
1671 ptr = &buffer[0];
1672 SaveNoteToCharPtrStack(nsp, "codons recognized:", ptr);
1673 }
1674 }
1675 return;
1676 */
1677
1678 Char buffer [25];
1679 Int2 num;
1680
1681 num = ComposeCodonsRecognizedString (trna, buffer, sizeof (buffer));
1682 if (num < 1 || StringHasNoText (buffer)) return;
1683 if (num == 1) {
1684 SaveNoteToCharPtrStack(nsp, "codon recognized:", buffer);
1685 } else {
1686 SaveNoteToCharPtrStack(nsp, "codons recognized:", buffer);
1687 }
1688
1689 } /* ComposetRNANote */
1690
1691
1692 /************************************************************************
1693 * Make the anticodon qualifier and (possible) note to the tRNA
1694 * with the following paradigm:
1695 * 0.) First look at the new anticodon slot on tRNAPtr
1696 * if not found do the rest:
1697 ** 1.) Look at SeqFeat.ext for a UserObject using the fct. QualLocWrite,
1698 * if result is not NULL, use this location in anticodon qualifier;
1699 *
1700 * 2.) Look for an anticodon qualifier, use if no QualLocWrite's
1701 * result was not zero;
1702 * 3.) Make note if neither 1.) or 2.) was true, or there are
1703 * multiple codons.
1704 *****************************
1705 * NEW ALGORITHM 07-15-96
1706 *****************************
1707 * 1) aa present?
1708 * print /product = tRNA-aa
1709 * 2) codon recognized present?
1710 * print /note="codon recognized: codon"
1711 * 3) anticodon and aa present?
1712 * print /anticodon=...
1713 *************************************************************************/
DotRNAQuals(Asn2ffJobPtr ajp,GBEntryPtr gbp,SeqFeatPtr sfp_in,SeqFeatPtr sfp_out,NoteStructPtr nsp,SeqLocPtr PNTR extra_loc,Int2 extra_loc_cnt)1714 static void DotRNAQuals (Asn2ffJobPtr ajp, GBEntryPtr gbp, SeqFeatPtr sfp_in, SeqFeatPtr sfp_out, NoteStructPtr nsp, SeqLocPtr PNTR extra_loc,
1715 Int2 extra_loc_cnt)
1716 {
1717 Boolean found_anticodon=FALSE /*, found_qual=FALSE -- UNUSED */;
1718 Char buffer[40];
1719 CharPtr aa_ptr, newptr=NULL, ptr = &(buffer[0]), tmp=NULL;
1720 GBQualPtr curq;
1721 RnaRefPtr rrp;
1722 tRNAPtr trna=NULL;
1723 SeqLocPtr slp=NULL;
1724 SeqIntPtr sip;
1725
1726 if (sfp_in == NULL) {
1727 return;
1728 }
1729 if (sfp_in->data.choice != SEQFEAT_RNA) {
1730 return;
1731 }
1732 rrp = sfp_in->data.value.ptrvalue;
1733 /* Look for anticodon struct */
1734 if (rrp->ext.choice == 2) {
1735 newptr = MemNew(50*sizeof(Char));
1736 trna = rrp->ext.value.ptrvalue;
1737 if ((aa_ptr = GettRNAaa(trna, ajp->error_msgs)) != NULL) {
1738 if (GBQualPresent("product", sfp_out->qual) == FALSE) {
1739 sprintf(newptr, "tRNA-%s", aa_ptr);
1740 sfp_out->qual = AddGBQual(sfp_out->qual, "product", newptr);
1741 }
1742 }
1743 if (trna && (slp = trna->anticodon) != NULL && aa_ptr) {
1744 if (extra_loc_cnt > 0) {
1745 slp = extra_loc[0];
1746 }
1747 if (slp && slp->choice == SEQLOC_INT) {
1748 sip = slp->data.ptrvalue;
1749 sprintf(ptr, "%ld..%ld", (long) sip->from+1, (long) sip->to+1);
1750 sprintf(newptr, "(pos:%s,aa:%s)", ptr, aa_ptr);
1751 sfp_out->qual = AddGBQual(sfp_out->qual, "anticodon", newptr);
1752 found_anticodon=TRUE;
1753 }
1754 }
1755 }
1756 if (! found_anticodon) {
1757 if (sfp_in->ext) { /* Look for UserObject */
1758 tmp = QualLocWrite(sfp_in->ext, ptr);
1759 if (tmp) {
1760 newptr = MemNew(50*sizeof(Char));
1761 rrp = sfp_in->data.value.ptrvalue;
1762 trna = rrp->ext.value.ptrvalue;
1763 aa_ptr = GettRNAaa(trna, ajp->error_msgs);
1764 if (aa_ptr) {
1765 sprintf(newptr, "(pos:%s,aa:%s)", ptr, aa_ptr);
1766 sfp_out->qual =
1767 AddGBQual(sfp_out->qual, "anticodon", newptr);
1768 found_anticodon=TRUE;
1769 }
1770 }
1771 }
1772 }
1773 if (! found_anticodon) {
1774 /* Look for anticodon qual if no UserObject found */
1775 for (curq=sfp_in->qual; curq; curq=curq->next)
1776 if (StringCmp("anticodon", curq->qual) == 0) {
1777 sfp_out->qual =
1778 AddGBQual(sfp_out->qual, "anticodon", curq->val);
1779 /* found_qual=TRUE; -- NO EFFECT */
1780 break;
1781 }
1782 }
1783
1784 /* make note "codon recognized*/
1785 ComposetRNANote(ajp, nsp, trna);
1786 MemFree(newptr);
1787
1788 } /* DotRNAQuals */
1789
1790 /**************************************************************************
1791 *ConvertToAAImpFeat
1792 *
1793 * This code copies a SeqFeat into an ImpFeat format for use in
1794 * producing GenBank format. Two SeqFeatPtr's should be passed
1795 * in as arguments (sfp_in, sfp_out). On the first call, of a
1796 * number of calls, sfp_out should be NULL so that memory for
1797 * ImpFeat can be allocated. On subsequent calls, sfp_out->data.choice
1798 * should be "8" (for ImpFeats).
1799 *
1800 * Written by Tom Madden
1801 *
1802 **************************************************************************/
1803
ConvertToAAImpFeat(Asn2ffJobPtr ajp,GBEntryPtr gbp,SeqFeatPtr sfp_in,SeqFeatPtr PNTR sfpp_out,SortStructPtr p)1804 NLM_EXTERN Int2 ConvertToAAImpFeat (Asn2ffJobPtr ajp, GBEntryPtr gbp, SeqFeatPtr sfp_in, SeqFeatPtr PNTR sfpp_out, SortStructPtr p)
1805 {
1806 BioseqPtr bsp=NULL;
1807 Char printbuf[41], temp[65];
1808 CharPtr ptr;
1809 ImpFeatPtr ifp, ifp_in;
1810 Int2 retval=1;
1811 NoteStructPtr nsp;
1812 GeneStructPtr gsp;
1813 ProtRefPtr prot;
1814 SeqFeatPtr sfp_out;
1815 SeqIdPtr sip=NULL, xid;
1816 ValNodePtr vnp, vnp1;
1817
1818 sfp_out = *sfpp_out;
1819
1820 if (sfp_out->data.choice != SEQFEAT_IMP)
1821 return -1;
1822
1823 ifp = (ImpFeatPtr) sfp_out->data.value.ptrvalue;
1824
1825 sfp_out->partial = sfp_in->partial;
1826 sfp_out->comment = sfp_in->comment;
1827 sfp_out->exp_ev = sfp_in->exp_ev;
1828 sfp_out->location = sfp_in->location;
1829
1830 nsp = p->nsp;
1831 gsp = p->gsp;
1832 if (sfp_out->comment) {
1833 CpNoteToCharPtrStack(nsp, NULL, (CharPtr) sfp_out->comment);
1834 }
1835
1836 switch (sfp_in->data.choice) {
1837 case SEQFEAT_CDREGION:
1838 ifp->key = StringSave("CDS");
1839 break;
1840 case SEQFEAT_PROT:
1841 prot = sfp_in->data.value.ptrvalue;
1842 if (prot->processed == 0 || prot->processed == 1) {
1843 GetProtRefInfo(ajp->format, gsp, nsp, prot);
1844 ifp->key = StringSave("Protein");
1845 } else if (prot->processed == 2) {
1846 ifp->key = StringSave("mat_peptide");
1847 } else if (prot->processed == 3) {
1848 ifp->key = StringSave("sig_peptide");
1849 } else if (prot->processed == 4) {
1850 ifp->key = StringSave("transit_peptide");
1851 }
1852 if (sfp_in->location) {
1853 sip = SeqLocId(sfp_in->location);
1854 if (sip)
1855 bsp = BioseqFind(sip);
1856 if (bsp) {
1857 vnp = bsp->descr;
1858 for (vnp = bsp->descr; vnp; vnp = vnp->next) {
1859 if (vnp->choice != Seq_descr_modif) {
1860 continue;
1861 }
1862 for (vnp1 = vnp->data.ptrvalue; vnp1; vnp1=vnp1->next) {
1863 if (vnp1->data.intvalue == 1) {
1864 sfp_out->partial = TRUE;
1865 break;
1866 }
1867 }
1868 }
1869 }
1870 }
1871 break;
1872 case SEQFEAT_SEQ:
1873 ifp->key = StringSave("misc_feature");
1874 if ((xid=CheckXrefFeat(gbp->bsp, sfp_in)) != NULL)
1875 {
1876 SeqIdWrite(xid, printbuf, PRINTID_FASTA_SHORT, 40);
1877 ptr = &(temp[0]);
1878 sprintf(ptr, "Cross-reference: %s", printbuf);
1879 SaveNoteToCharPtrStack(nsp, NULL, ptr);
1880 }
1881 else
1882 retval = 0;
1883 break;
1884 case SEQFEAT_IMP:
1885 ifp_in = (ImpFeatPtr) sfp_in->data.value.ptrvalue;
1886 ifp->key = StringSave(ifp_in->key);
1887 break;
1888 case SEQFEAT_REGION:
1889 sfp_out->qual =
1890 AddGBQual(sfp_out->qual, "region_name", sfp_in->data.value.ptrvalue);
1891 ifp->key = StringSave("Region");
1892 break;
1893 case SEQFEAT_COMMENT:
1894 ifp->key = StringSave("misc_feature");
1895 break;
1896 case SEQFEAT_BOND:
1897 ptr = AsnEnumStr("SeqFeatData.bond",
1898 (Int2) (sfp_in->data.value.intvalue));
1899 sfp_out->qual = AddGBQual(sfp_in->qual, "bond_type", ptr);
1900 ifp->key = StringSave("Bond");
1901 break;
1902 case SEQFEAT_SITE:
1903 ptr = AsnEnumStr("SeqFeatData.site",
1904 (Int2) (sfp_in->data.value.intvalue));
1905 sfp_out->qual = AddGBQual(sfp_out->qual, "site_type", ptr);
1906 ifp->key = StringSave("Site");
1907 break;
1908 case SEQFEAT_PSEC_STR:
1909 ptr = AsnEnumStr("SeqFeatData.psec-str",
1910 (Int2) (sfp_in->data.value.intvalue));
1911 sfp_out->qual = AddGBQual(sfp_out->qual, "sec_str_type", ptr);
1912 ifp->key = StringSave("SecStr");
1913 break;
1914 case SEQFEAT_NON_STD_RESIDUE:
1915 sfp_out->qual =
1916 AddGBQual(sfp_out->qual, "non-std-residue",
1917 sfp_in->data.value.ptrvalue);
1918 ifp->key = StringSave("NonStdResidue");
1919 break;
1920 case SEQFEAT_HET:
1921 sfp_out->qual =
1922 AddGBQual(sfp_out->qual, "heterogen", sfp_in->data.value.ptrvalue);
1923 ifp->key = StringSave("Het");
1924 break;
1925 default:
1926 if (ajp->error_msgs == TRUE)
1927 ErrPostStr(SEV_WARNING, ERR_FEATURE_UnknownFeatureKey,
1928 "Unimplemented type of feat in ConvertToAAImpFeat");
1929 retval = 1;
1930 break;
1931 }
1932
1933 return retval;
1934
1935 } /* ConvertToAAImpFeat */
1936
1937 /*****************************************************************************
1938 * CompareTranslation:
1939 * -- if bsp != translation's value return FALSE
1940 *****************************************************************************/
CompareTranslation(ByteStorePtr bsp,CharPtr qval)1941 static Boolean CompareTranslation(ByteStorePtr bsp, CharPtr qval)
1942 {
1943 CharPtr ptr;
1944 Int2 residue, residue1, residue2;
1945 Int4 len, blen;
1946 Boolean done;
1947
1948 if (qval == NULL || bsp == NULL) {
1949 return FALSE; /* no comparison */
1950 }
1951 len = StringLen(qval);
1952 BSSeek(bsp, 0, SEEK_SET);
1953
1954 blen = BSLen(bsp);
1955 done = FALSE;
1956 while ((! done) && (len)) {
1957 residue1 = qval[(len-1)];
1958 if (residue1 == 'X') /* remove terminal X */
1959 len--;
1960 else
1961 done = TRUE;
1962 }
1963 done = FALSE;
1964 while ((! done) && (blen)) {
1965 BSSeek(bsp, (blen-1), SEEK_SET);
1966 residue2 = BSGetByte(bsp);
1967 if (residue2 == 'X')
1968 blen--;
1969 else
1970 done = TRUE;
1971 }
1972 BSSeek(bsp, 0, SEEK_SET);
1973 if (blen != len) {
1974 return FALSE;
1975 } else {
1976 for (ptr = qval; *ptr != '\0' &&
1977 (residue = BSGetByte(bsp)) != EOF; ptr++) {
1978
1979 if (residue != *ptr) {
1980 return FALSE;
1981 }
1982
1983 } /* for */
1984
1985 } /* compare two sequences */
1986 return TRUE;
1987 } /* check */
1988
GatherProductGeneInfo(Asn2ffJobPtr ajp,SeqFeatPtr sfp_in,GBEntryPtr gbp,SortStructPtr gp,Uint1 method)1989 static void GatherProductGeneInfo (Asn2ffJobPtr ajp, SeqFeatPtr sfp_in, GBEntryPtr gbp, SortStructPtr gp, Uint1 method)
1990 {
1991 BioseqPtr p_bsp;
1992 GatherScope gs;
1993 GeneStructPtr gsp;
1994 NoteStructPtr nsp;
1995 Int2 index;
1996 Int4 length, longest_length=0;
1997 ProtRefPtr prot=NULL;
1998 SeqFeatPtr sfp=NULL;
1999 SeqIdPtr sip;
2000 ValNodePtr product=NULL;
2001 OrganizeProtPtr opp;
2002 SortStructPtr p;
2003 Uint2 entityID;
2004
2005 if (sfp_in->product)
2006 product = sfp_in->product;
2007 else
2008 return;
2009 if (gp == NULL)
2010 return;
2011 gsp = gp->gsp;
2012 nsp = gp->nsp;
2013 sip = SeqLocId(product);
2014 p_bsp = BioseqFindCore(sip);
2015 if (p_bsp == NULL) /* Bioseq is (or has been) in memory */
2016 return;
2017 if (ajp->useSeqMgrIndexes) {
2018 sfp = SeqMgrGetBestProteinFeature (p_bsp, NULL);
2019 if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT) {
2020 prot = (ProtRefPtr) sfp->data.value.ptrvalue;
2021 if (prot != NULL) {
2022 GetProtRefInfo(ajp->format, gsp, nsp, prot);
2023 GetProtRefComment(sfp_in, p_bsp, ajp, NULL, nsp, method);
2024 return;
2025 }
2026 }
2027 }
2028 entityID = ObjMgrGetEntityIDForPointer(p_bsp);
2029 opp = (OrganizeProtPtr) MemNew(sizeof(OrganizeProt));
2030 opp->size = 0;
2031 MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
2032 MemSet ((Pointer) (gs.ignore), (int)(TRUE), (size_t) (OBJ_MAX * sizeof(Boolean)));
2033 gs.ignore[OBJ_SEQANNOT] = FALSE;
2034 gs.ignore[OBJ_SEQFEAT] = FALSE;
2035 gs.get_feats_location = TRUE;
2036 gs.target = product;
2037 gs.seglevels = 1;
2038 GatherEntity(entityID, opp, get_prot_feats, &gs);
2039 if (opp->size > 0) {
2040 prot = NULL;
2041 p = opp->list;
2042 for (index=0; index < opp->size; index++, p++) {
2043 if ((sfp = p->sfp) == NULL) {
2044 GatherItemWithLock(p->entityID, p->itemID, p->itemtype,
2045 &sfp, find_item);
2046 }
2047 if (sfp == NULL) {
2048 continue;
2049 }
2050 if (sfp->data.choice != SEQFEAT_PROT) {
2051 continue;
2052 }
2053 if ((length=SeqLocLen(sfp->location)) == -1)
2054 continue;
2055 if (length > longest_length) {
2056 prot = sfp->data.value.ptrvalue;
2057 longest_length = length;
2058 }
2059 }
2060 GetProtRefInfo(ajp->format, gsp, nsp, prot);
2061 }
2062 GetProtRefComment(sfp_in, p_bsp, ajp, opp, nsp, method);
2063 p = opp->list;
2064 for (index=0; index < opp->size; index++, p++) {
2065 if (p && p->gsp)
2066 GeneStructFree(p->gsp);
2067 if (p && p->nsp)
2068 NoteStructFree(p->nsp);
2069 }
2070 MemFree(opp->list);
2071 MemFree(opp);
2072
2073 return;
2074 }
2075
2076 /**************************************************************************
2077 *ConvertToNAImpFeat
2078 *
2079 * This code copies a SeqFeat into an ImpFeat format for use in
2080 * producing GenBank format. Two SeqFeatPtr's should be passed
2081 * in as arguments (sfp_in, sfp_out).
2082 * return status:
2083 * 1: conversion successful
2084 * 0: no conversion, also no error (data in ASN.1 is lost or put out
2085 * otherwise
2086 * -1 an error
2087 **************************************************************************/
2088
ProductIsLocal(Uint2 entityID,SeqLocPtr product)2089 static Boolean ProductIsLocal (Uint2 entityID, SeqLocPtr product)
2090
2091 {
2092 BioseqPtr bsp;
2093 SeqEntryPtr sep, oldscope;
2094 SeqIdPtr sip = NULL;
2095 SeqLocPtr slp;
2096
2097 slp = SeqLocFindNext (product, NULL);
2098 while (slp != NULL && sip == NULL) {
2099 sip = SeqLocId (slp);
2100 slp = SeqLocFindNext (product, slp);
2101 }
2102 if (sip == NULL) return FALSE;
2103 sep = GetTopSeqEntryForEntityID (entityID);
2104 if (sep == NULL) return FALSE;
2105 oldscope = SeqEntrySetScope (sep);
2106 bsp = BioseqFind (sip);
2107 SeqEntrySetScope (oldscope);
2108 if (bsp != NULL) return TRUE;
2109 return FALSE;
2110 }
2111
ConvertToNAImpFeat(Asn2ffJobPtr ajp,GBEntryPtr gbp,SeqFeatPtr sfp_in,SeqFeatPtr PNTR sfpp_out,SortStructPtr gp)2112 NLM_EXTERN Int2 ConvertToNAImpFeat (Asn2ffJobPtr ajp, GBEntryPtr gbp, SeqFeatPtr sfp_in, SeqFeatPtr PNTR sfpp_out, SortStructPtr gp)
2113 {
2114 BioseqPtr bsp=gbp->bsp, pbsp=NULL;
2115 Boolean found_key, non_pseudo = FALSE;
2116 CdRegionPtr cdr;
2117 Char buffer[2], printbuf[41], temp[65];
2118 CharPtr buf_ptr = &(buffer[0]), protein_seq=NULL, ptr = &(temp[0]);
2119 NoteStructPtr nsp;
2120 ImpFeatPtr ifp, ifp_in;
2121 Int2 retval=1;
2122 Int4 length=0;
2123 SeqFeatPtr sfp_out;
2124 SeqIdPtr xid;
2125 ValNodePtr product;
2126 ValNodePtr mod, syn;
2127 BioSourcePtr biosp;
2128 OrgRefPtr orp;
2129 RnaRefPtr rrp;
2130 ByteStorePtr byte_sp;
2131 /* Int4 len_cds, len_prot; -- UNUSED */
2132 Uint1 method = 0;
2133 GeneRefPtr grp;
2134 Boolean was_gene = FALSE;
2135 CharPtr key=NULL, tmp;
2136 GeneStructPtr gsp;
2137 CharPtr except_msg="No explanation supplied", loc;
2138
2139 sfp_out = *sfpp_out;
2140
2141 if (sfp_out->data.choice != SEQFEAT_IMP)
2142 return -1;
2143
2144 ifp = (ImpFeatPtr) sfp_out->data.value.ptrvalue;
2145
2146 sfp_out->partial = sfp_in->partial;
2147 sfp_out->comment = sfp_in->comment;
2148 sfp_out->exp_ev = sfp_in->exp_ev;
2149 sfp_out->location = sfp_in->location;
2150 sfp_out->product = sfp_in->product;
2151 sfp_out->pseudo = sfp_in->pseudo;
2152
2153 found_key = GetNAFeatKey(ajp->show_gene, &(key), sfp_in, sfp_out);
2154 if (!found_key)
2155 return -1;
2156 ifp->key = key;
2157 nsp = gp->nsp;
2158 gsp=gp->gsp;
2159 switch (sfp_in->data.choice)
2160 {
2161 case SEQFEAT_BIOSRC:
2162 biosp = sfp_in->data.value.ptrvalue;
2163 orp = (OrgRefPtr) biosp->org;
2164 if (orp) {
2165 if (orp->taxname) {
2166 sfp_out->qual = AddGBQual(sfp_out->qual,
2167 "organism", orp->taxname);
2168 } else if (orp->common) {
2169 if (StrStr(orp->common, "virus") ||
2170 StrStr(orp->common, "Virus") ||
2171 StrStr(orp->common, "phage") ||
2172 StrStr(orp->common, "Phage") ||
2173 StrStr(orp->common, "viroid") ||
2174 StrStr(orp->common, "Viroid"))
2175 {
2176 sfp_out->qual = AddGBQual(sfp_out->qual, "organism",
2177 orp->common);
2178 }
2179 }
2180 /* added from OrgRef.mod 03.20.96 */
2181 for (mod = orp->mod; mod; mod = mod->next) {
2182 CpNoteToCharPtrStack(nsp, NULL, (CharPtr) mod->data.ptrvalue);
2183 }
2184 } else {
2185 sfp_out->qual = AddGBQual(sfp_out->qual, "organism",
2186 "unknown");
2187 }
2188 sfp_out->qual = AddBioSourceToGBQual(ajp, nsp, biosp, sfp_out->qual,
2189 TRUE);
2190 break;
2191 case SEQFEAT_CDREGION:
2192 product = sfp_in->product;
2193 if (ajp->mode == RELEASE_MODE) {
2194 if (GBQualPresent("pseudo", sfp_in->qual) == FALSE &&
2195 gsp->pseudo == FALSE && sfp_in->pseudo == FALSE) {
2196 non_pseudo = TRUE;
2197 }
2198 if (non_pseudo) {
2199 if (product == NULL) {
2200 if (ajp->error_msgs == TRUE) {
2201 loc = SeqLocPrint(sfp_in->location);
2202 ErrPostEx(SEV_ERROR, ERR_FEATURE_Dropped,
2203 "Dropping CDS due to missing product: %s", loc);
2204 MemFree(loc);
2205 }
2206 return -1;
2207 }
2208 if (ajp->forgbrel && CheckSeqIdChoice(SeqLocId(product)) == FALSE) {
2209 if (ajp->error_msgs == TRUE) {
2210 loc = SeqLocPrint(sfp_in->location);
2211 ErrPostEx(SEV_ERROR, ERR_FEATURE_Dropped,
2212 "Dropping CDS due to missing EMBL/DDBJ/GB protein accession: %s", loc);
2213 MemFree(loc);
2214 }
2215 return -1;
2216 }
2217 if (ajp->forgbrel && (pbsp = BioseqFindCore(SeqLocId(product))) == NULL) {
2218 if (ajp->error_msgs == TRUE) {
2219 loc = SeqLocPrint(sfp_in->location);
2220 ErrPostEx(SEV_ERROR, ERR_FEATURE_Dropped,
2221 "Dropping CDS due to missing protein: %s", loc);
2222 MemFree(loc);
2223 }
2224 return -1;
2225 }
2226 if (pbsp != NULL) {
2227 if (ajp->forgbrel && CheckSeqIdChoice(pbsp->id) == FALSE) {
2228 if (ajp->error_msgs == TRUE) {
2229 loc = SeqLocPrint(sfp_in->location);
2230 ErrPostEx(SEV_ERROR, ERR_FEATURE_Dropped,
2231 "Dropping CDS due to missing EMBL/DDBJ/GB protein accession: %s", loc);
2232 MemFree(loc);
2233 }
2234 return -1;
2235 }
2236 if (ajp->show_version == TRUE) {
2237 if (CheckSeqIdAccVer(pbsp->id) == FALSE) {
2238 if (ajp->error_msgs == TRUE) {
2239 loc = SeqLocPrint(sfp_in->location);
2240 ErrPostEx(SEV_ERROR, ERR_FEATURE_Dropped,
2241 "Dropping CDS due to missing protein accession.version: %s", loc);
2242 MemFree(loc);
2243 }
2244 return -1;
2245 }
2246 }
2247 }
2248 }
2249 }
2250 cdr = (CdRegionPtr) sfp_in->data.value.ptrvalue;
2251 if ((GBQualPresent("codon_start", sfp_in->qual)) == FALSE)
2252 { /* Above checks if codon_start is already present. */
2253 if (cdr->frame)
2254 sprintf(buf_ptr, "%ld", (long) (cdr->frame));
2255 else
2256 sprintf(buf_ptr, "1");
2257 sfp_out->qual = AddGBQual(sfp_out->qual, "codon_start", buf_ptr);
2258 }
2259 if (product && (! ajp->genome_view) && (ProductIsLocal (ajp->entityID, product))) {
2260 byte_sp = ProteinFromCdRegion(sfp_in, FALSE);
2261
2262 if (product) {
2263 length = bsp->length;
2264 protein_seq = GetProductFromCDS(product, sfp_in->location, length);
2265 /* check conflict flag and fix it */
2266 if (cdr->conflict == TRUE) {
2267 if (CompareTranslation(byte_sp, protein_seq)) {
2268 cdr->conflict = FALSE;
2269 } else {
2270 method = METHOD_concept_transl_a;
2271 }
2272 }
2273 if (protein_seq) {
2274 if ((GBQualPresent("pseudo", sfp_in->qual)) == FALSE &&
2275 gsp->pseudo == FALSE && sfp_in->pseudo == FALSE) {
2276 sfp_out->qual = AddGBQual(sfp_out->qual,
2277 "translation", protein_seq);
2278 }
2279 MemFree(protein_seq);
2280 }
2281 }
2282 BSFree(byte_sp);
2283 }
2284 if (sfp_in->pseudo) {
2285 sfp_out->qual = AddGBQual(sfp_out->qual, "pseudo", NULL);
2286 }
2287 if (sfp_in->excpt) {
2288 if (StringCmp("ribosomal slippage", sfp_in->except_text) == 0 ||
2289 StringCmp("ribosome slippage", sfp_in->except_text) == 0) {
2290 sfp_out->qual = AddGBQual(sfp_out->qual,
2291 "note", sfp_in->except_text);
2292 sfp_out->excpt = FALSE;
2293 } else if (StringCmp("trans splicing", sfp_in->except_text) == 0 ||
2294 StringCmp("trans-splicing", sfp_in->except_text) == 0) {
2295 sfp_out->excpt = FALSE;
2296 } else if (sfp_in->except_text) {
2297 sfp_out->qual = AddGBQual(sfp_out->qual,
2298 "exception", sfp_in->except_text);
2299 } else if (GBQualPresent("exception", sfp_in->qual) == TRUE) {
2300 sfp_out->qual = AddGBQual(sfp_out->qual,
2301 "exception", sfp_in->qual->val);
2302 } else if (sfp_out->comment != NULL) {
2303 sfp_out->qual = AddGBQual(sfp_out->qual,
2304 "exception", sfp_in->comment);
2305 sfp_out->comment = NULL;
2306 } else {
2307 sfp_out->qual = AddGBQual(sfp_out->qual,
2308 "exception", except_msg);
2309 }
2310 } else {
2311 if (GBQualPresent("exception", sfp_in->qual) == TRUE) {
2312 sfp_out->qual = AddGBQual(sfp_out->qual,
2313 "exception", sfp_in->qual->val);
2314 }
2315 }
2316
2317 GatherProductGeneInfo(ajp, sfp_in, gbp, gp, method);
2318
2319 /******************************************************************************
2320 - asn2ff shouldn't generate a de-novo /translation for any
2321 cdregion that lacks a product, regardless of mode or -V setting 2/15/99
2322 ******************************************************************************
2323 if (protein_seq == NULL && ajp->mode != RELEASE_MODE) {
2324 protein_seq = BSMerge(byte_sp, NULL);
2325 if ( protein_seq && protein_seq[0] != '-') {
2326 len_prot = StringLen(protein_seq);
2327 SeqLocLen(sfp_in->location) - (cdr->frame - 1);
2328 if (len_prot >= 6) {
2329 if ((GBQualPresent("pseudo", sfp_in->qual)) == FALSE &&
2330 gsp->pseudo == FALSE) {
2331 sfp_out->qual =
2332 AddGBQual(sfp_out->qual,
2333 "translation", protein_seq);
2334 }
2335 }
2336 }
2337 MemFree(protein_seq);
2338 }
2339 BSFree(byte_sp);
2340 */
2341 break;
2342 case SEQFEAT_RNA:
2343 rrp = sfp_in->data.value.ptrvalue;
2344 /* the following code was taken (almost) directly
2345 from Karl Sirotkin's code. */
2346 switch ( rrp -> type){ /* order of case n: matches tests in
2347 is_RNA_type() of genasn.c in
2348 GenBankConversion directory */
2349 case 2:
2350 break;
2351 case 255:
2352 break;
2353 case 3:
2354 if (rrp->ext.choice == 1) {
2355 if ((GBQualPresent("product", sfp_in->qual)) == FALSE) {
2356 sfp_out->qual = AddGBQual(sfp_out->qual,
2357 "product", (CharPtr) rrp->ext.value.ptrvalue);
2358 }
2359 } else if (rrp->ext.choice == 0 ||
2360 rrp->ext.choice == 2) {
2361 DotRNAQuals(ajp, gbp, sfp_in, sfp_out,
2362 gp->nsp, gp->extra_loc, gp->extra_loc_cnt);
2363 }
2364 break;
2365 case 4:
2366 break;
2367 case 1:
2368 if (rrp->ext.choice == 1) {
2369 if ((GBQualPresent("product", sfp_in->qual)) == FALSE) {
2370 sfp_out->qual = AddGBQual(sfp_out->qual,
2371 "product", (CharPtr) rrp->ext.value.ptrvalue);
2372 }
2373 } else if (rrp->ext.choice == 0 ||
2374 rrp->ext.choice == 2) {
2375 DotRNAQuals(ajp, gbp, sfp_in, sfp_out,
2376 gp->nsp, gp->extra_loc, gp->extra_loc_cnt);
2377 }
2378 break;
2379 case 5:
2380 break;
2381 case 6:
2382 break;
2383 }
2384
2385 if (rrp && rrp->pseudo == TRUE) {
2386 if ((GBQualPresent("pseudo", sfp_in->qual)) == FALSE)
2387 sfp_out->qual = AddGBQual(sfp_out->qual, "pseudo", NULL);
2388 }
2389
2390
2391 break;
2392 case SEQFEAT_SEQ:
2393 if ((xid=CheckXrefFeat(bsp, sfp_in)) != NULL) {
2394 ptr = &(temp[0]);
2395 SeqIdWrite(xid, printbuf, PRINTID_FASTA_SHORT, 40);
2396 sprintf(ptr, "Cross-reference: %s", printbuf);
2397 SaveNoteToCharPtrStack(nsp, NULL, ptr);
2398 }
2399 else
2400 retval = 0;
2401 break;
2402 case SEQFEAT_IMP:
2403 ifp_in = (ImpFeatPtr) sfp_in->data.value.ptrvalue;
2404 if (ifp_in->loc != NULL)
2405 ifp->loc = ifp_in->loc;
2406 if (StringCmp(ifp_in->key, "CDS") == 0) {
2407 if ((GBQualPresent("pseudo", sfp_in->qual)) == FALSE &&
2408 ajp->error_msgs == TRUE)
2409 ErrPostStr(SEV_INFO, ERR_FEATURE_non_pseudo,
2410 "ConvertToNAImpFeat: Non-pseudo ImpFeat CDS found");
2411 if ((GBQualPresent("translation", sfp_in->qual)) == TRUE &&
2412 ajp->mode == RELEASE_MODE) {
2413 if (ajp->error_msgs == TRUE) {
2414 ErrPostStr(SEV_ERROR, ERR_FEATURE_Dropped,
2415 "ImpFeat CDS with /translation found");
2416 }
2417 retval = -1;
2418 }
2419 }
2420 break;
2421 case SEQFEAT_REGION:
2422 tmp = MemNew(StringLen(sfp_in->data.value.ptrvalue) + 9);
2423 sprintf(tmp, "Region: %s", (CharPtr ) sfp_in->data.value.ptrvalue);
2424 sfp_out->qual = AddGBQual(sfp_out->qual, "note", tmp);
2425 tmp = MemFree(tmp);
2426 break;
2427 case SEQFEAT_SITE:
2428 AddSiteNoteQual(sfp_in, sfp_out);
2429 break;
2430 case SEQFEAT_RSITE:
2431 break;
2432 case SEQFEAT_COMMENT:
2433 if(ifp->key != NULL)
2434 MemFree(ifp->key);
2435 ifp->key = StringSave("misc_feature");
2436 break;
2437 case SEQFEAT_GENE:
2438 grp = (GeneRefPtr) sfp_in->data.value.ptrvalue;
2439 if (grp == NULL)
2440 break;
2441 syn=grp->syn;
2442 if (grp->locus ) {
2443 sfp_out->qual = AddGBQual(sfp_out->qual, "gene", grp->locus);
2444 was_gene = TRUE;
2445 } else if (syn != NULL) {
2446 sfp_out->qual = AddGBQual(sfp_out->qual, "gene",
2447 syn->data.ptrvalue);
2448 syn=syn->next;
2449 was_gene = TRUE;
2450 }
2451 if (grp->desc ) {
2452 if (was_gene) {
2453 CpNoteToCharPtrStack(nsp, NULL, grp->desc);
2454 } else {
2455 /* s = MemNew(StringLen(grp->desc) + 15);
2456 sprintf(s, "Description: %s", grp->desc);
2457 sfp_out->qual = AddGBQual(sfp_out->qual, "gene", s);*/
2458 sfp_out->qual = AddGBQual(sfp_out->qual, "gene", grp->desc);
2459 }
2460 }
2461 if (grp->allele ) {
2462 if ((GBQualPresent("allele", sfp_in->qual)) == FALSE)
2463 sfp_out->qual = AddGBQual(sfp_out->qual, "allele", grp->allele);
2464 }
2465 if (grp->maploc ) {
2466 if ((GBQualPresent("map", sfp_in->qual)) == FALSE)
2467 sfp_out->qual = AddGBQual(sfp_out->qual, "map", grp->maploc);
2468 }
2469 for (; syn; syn=syn->next) {
2470 CpNoteToCharPtrStack(nsp, NULL, syn->data.ptrvalue);
2471 }
2472 if (grp->pseudo == TRUE || sfp_in->pseudo) {
2473 if ((GBQualPresent("pseudo", sfp_in->qual)) == FALSE)
2474 sfp_out->qual = AddGBQual(sfp_out->qual, "pseudo", NULL);
2475 }
2476 GetDBXrefFromGene(grp, sfp_out);
2477 break;
2478 default:
2479 if (ajp->error_msgs == TRUE)
2480 ErrPostStr(SEV_WARNING, ERR_FEATURE_UnknownFeatureKey,
2481 "Unimplemented type of gbqual in ConvertToNAImpFeat");
2482 retval = 0;
2483 break;
2484 }
2485 if (gsp->grp) {
2486 GetDBXrefFromGene(gsp->grp, sfp_out);
2487 }
2488 if (sfp_in->pseudo) {
2489 sfp_out->qual = AddGBQual(sfp_out->qual, "pseudo", NULL);
2490 }
2491 if (sfp_out->comment) {
2492 CpNoteToCharPtrStack(nsp, NULL, (CharPtr) sfp_out->comment);
2493 sfp_out->comment = NULL;
2494 }
2495 return retval;
2496
2497 } /* ConvertToNAImpFeat */
2498
2499 /*****************************************************************************
2500 *ValidateNAImpFeat
2501 *
2502 * This code validates an ImpFeat using some functions from
2503 * the flat2asn parser.
2504 *
2505 * If a feat is bad and can't be corrected, -1 is returned.
2506 *
2507 *****************************************************************************/
2508
ValidateNAImpFeat(SeqFeatPtr sfp)2509 NLM_EXTERN Int2 ValidateNAImpFeat (SeqFeatPtr sfp)
2510
2511 {
2512 CharPtr key;
2513 ImpFeatPtr ifp;
2514 Int2 index, retval=0, status=0;
2515
2516 if (sfp->data.choice != SEQFEAT_IMP) {
2517 return -1;
2518 } else {
2519
2520 ifp = sfp->data.value.ptrvalue;
2521 key = StringSave(ifp->key);
2522 index = GBFeatKeyNameValid(&key, ASN2FF_SHOW_ERROR_MSG);
2523 if (StringCmp(key, ifp->key) != 0) {
2524 ifp->key = key;
2525 } else {
2526 MemFree(key);
2527 }
2528
2529 if (index == -1) {
2530 retval = -2;
2531 } else {
2532 status = GBFeatKeyQualValid(sfp->cit, index, &sfp->qual,
2533 ASN2FF_SHOW_ERROR_MSG, ASN2FF_VALIDATE_FEATURES);
2534 #ifdef ASN2GNBK_PRINT_UNKNOWN_ORG
2535 if (index == 46 && status == GB_FEAT_ERR_NONE) {
2536 status = GBFeatKeyQualValid(sfp->cit, index, &sfp->qual,
2537 ASN2FF_SHOW_ERROR_MSG, TRUE);
2538 }
2539 #endif
2540 if (status == GB_FEAT_ERR_NONE) {
2541 retval = 1;
2542 } else if (status == GB_FEAT_ERR_REPAIRABLE) {
2543 retval = 0;
2544 } else if (status == GB_FEAT_ERR_DROP) {
2545 retval = -1;
2546 }
2547 }
2548
2549 }
2550
2551 return retval;
2552 } /* ValidateNAImpFeat */
2553
2554 /*****************************************************************************
2555 *ValidateAAImpFeat
2556 *
2557 * This code will validate an ImpFeat using some functions from
2558 * the flat2asn parser. Right now it just checks to see that the
2559 * sfp is an ImpFeat and checks for a partial qualifier.
2560 *
2561 * If a feat is bad and can't be corrected, -1 is returned.
2562 *
2563 *****************************************************************************/
2564
ValidateAAImpFeat(SeqFeatPtr sfp,Boolean use_product)2565 NLM_EXTERN Int2 ValidateAAImpFeat (SeqFeatPtr sfp, Boolean use_product)
2566
2567 {
2568
2569 if (sfp->data.choice != SEQFEAT_IMP)
2570 return -1;
2571
2572 LookForPartialImpFeat(sfp, use_product);
2573
2574 return 0;
2575 } /* ValidateAAImpFeat */
2576
2577
2578 /*****************************************************************************
2579 *void PrepareSourceFeatQuals(SeqFeatPtr sfp_in, SeqFeatPtr sfp_out, GBEntryPtr gbp, Boolean add_modifs)
2580 *
2581 *Normally called from PrintSourceFeat, collects all notes etc. together.
2582 *Note: sfp_out may already have quals when it comes here, they should not
2583 *be deleted!
2584 * add_modifs: allows the addition of modifs to be specified, don't add
2585 * modifs if the source feature is a ImpFeat.
2586 *
2587 *For many cases there is no sfp_in, so that must be checked for.
2588 *****************************************************************************/
2589
PrepareSourceFeatQuals(SeqFeatPtr sfp_in,SeqFeatPtr sfp_out,GBEntryPtr gbp,Boolean add_modif)2590 NLM_EXTERN void PrepareSourceFeatQuals(SeqFeatPtr sfp_in, SeqFeatPtr sfp_out, GBEntryPtr gbp, Boolean add_modif)
2591
2592 {
2593 CharPtr note=NULL;
2594 GBQualPtr qual1;
2595 NoteStructPtr nsp=NULL;
2596
2597 if (gbp->feat) {
2598 nsp=gbp->feat->source_notes;
2599 }
2600 if (sfp_in) {
2601 for (qual1=sfp_in->qual; qual1; qual1=qual1->next) {
2602 if (StringCmp(qual1->qual, "note") == 0)
2603 CpNoteToCharPtrStack(nsp, NULL, qual1->val);
2604 else
2605 sfp_out->qual =
2606 AddGBQual(sfp_out->qual, qual1->qual, qual1->val);
2607 }
2608 }
2609 /* not used in new style */
2610 if (add_modif == TRUE)
2611 sfp_out->qual = AddModifsToGBQual(gbp, sfp_out->qual);
2612 /*--------------------- tatiana */
2613 if (sfp_in && sfp_in->comment) {
2614 CpNoteToCharPtrStack(nsp, NULL, (CharPtr) sfp_in->comment);
2615 }
2616
2617 if (nsp && nsp->note[0]) {
2618 note = ComposeNoteFromNoteStruct(nsp, NULL);
2619 if (note) {
2620 sfp_out->qual = AddGBQual(sfp_out->qual, "note", note);
2621 note = MemFree(note);
2622 }
2623 }
2624 if (sfp_in && sfp_in->cit) {
2625 if (ASN2FF_SHOW_ERROR_MSG)
2626 ErrPostStr(SEV_WARNING, 0, 0,
2627 "Unwanted /citation on 'source' feature will be dropped");
2628 }
2629
2630 return;
2631 }
2632
2633
2634 /*************************************************************************
2635 *AddProteinQuals
2636 *
2637 *************************************************************************/
2638
AddProteinQuals(SeqFeatPtr sfp,SeqFeatPtr sfp_out,NoteStructPtr nsp)2639 NLM_EXTERN void AddProteinQuals (SeqFeatPtr sfp, SeqFeatPtr sfp_out, NoteStructPtr nsp)
2640
2641 {
2642 ProtRefPtr prp=sfp->data.value.ptrvalue;
2643 ValNodePtr vnp;
2644
2645 if (prp->name != NULL) {
2646 for (vnp=prp->name; vnp; vnp=vnp->next)
2647 if (GBQualPresent("product", sfp_out->qual) == FALSE)
2648 sfp_out->qual =
2649 AddGBQual(sfp_out->qual, "product", vnp->data.ptrvalue);
2650 else
2651 CpNoteToCharPtrStack(nsp, NULL, vnp->data.ptrvalue);
2652 }
2653 if (prp->desc) {
2654 sfp_out->qual =
2655 AddGBQual(sfp_out->qual, "name", prp->desc);
2656 }
2657
2658 for (vnp=prp->ec; vnp; vnp=vnp->next)
2659 if ((CheckForQual(sfp_out->qual, "EC_number", vnp->data.ptrvalue)) == 0)
2660 sfp_out->qual =
2661 AddGBQual(sfp_out->qual, "EC_number", vnp->data.ptrvalue);
2662
2663 return;
2664 }
2665
2666 /*______________________________________________________________________
2667 **
2668 ** This code is not currently used.
2669 ** I do not remove this piece of code, just comment it out.
2670 ** -- Dmitri Lukyanov
2671 */
2672 #if 0
2673
2674 static GBQualPtr RemoveQual(GBQualPtr head, GBQualPtr x)
2675 {
2676 GBQualPtr v, p;
2677
2678 if (head == NULL) {
2679 return NULL;
2680 }
2681 if (x == head) {
2682 head = x->next;
2683 x->next = NULL;
2684 GBQualFree(x);
2685 return head;
2686 }
2687 for (v = head; v != NULL && v != x; v = v->next) {
2688 p = v;
2689 }
2690 if (v != NULL) {
2691 p->next = x->next;
2692 x->next = NULL;
2693 GBQualFree(x);
2694 }
2695 return head;
2696 }
2697
2698 #endif
2699 /*______________________________________________________________________
2700 */
2701
Add_gene_id(GeneStructPtr gsp,SeqFeatPtr sfp_out)2702 static void Add_gene_id (GeneStructPtr gsp, SeqFeatPtr sfp_out)
2703 {
2704 ImpFeatPtr ifp;
2705 GeneRefPtr grp;
2706 ValNodePtr vnp;
2707 Char val[40];
2708
2709 if ((grp = gsp->grp) == NULL)
2710 return;
2711 ifp = sfp_out->data.value.ptrvalue;
2712 if (StringCmp(ifp->key, "CDS") != 0) {
2713 return;
2714 }
2715 if ((vnp = grp->syn) == NULL) /* no synonyms */
2716 return;
2717 sprintf(val, "GeneID:%s", vnp->data.ptrvalue);
2718 sfp_out->qual = AddGBQual(sfp_out->qual, "db_xref", val);
2719 }
2720
2721 /****************************************************************************
2722 * Composes the GBQuals for sfp_out using the information in the
2723 * GeneStructPtr (gsp), and then the quals already on sfp_out.
2724 *
2725 * use only info from GeneStruct throw away the quals gene and map if they
2726 * different /tatiana 07.11.95/
2727 * do not add /map to the features other than 'gene' /08-29-97/
2728 * sfp_out: SEQFEAT_IMP
2729 * map /citation added by Tatiana
2730 **************************************************************************/
ComposeGBQuals(Asn2ffJobPtr ajp,SeqFeatPtr sfp_out,GBEntryPtr gbp,SortStructPtr p,Boolean note_pseudo)2731 NLM_EXTERN void ComposeGBQuals (Asn2ffJobPtr ajp, SeqFeatPtr sfp_out, GBEntryPtr gbp, SortStructPtr p, Boolean note_pseudo)
2732 {
2733 Char temp[65];
2734 Char buffer[10];
2735 CharPtr ascii, start, note=NULL, ptr=NULL, tmp;
2736 GBQualPtr gbqp=NULL, qual1, qnext;
2737 GeneStructPtr gsp;
2738 Int2 int_index, status;
2739 NoteStructPtr nsp;
2740 PubStructPtr psp;
2741 SeqFeatPtr sfp = NULL;
2742 Int2 ascii_len, l;
2743 ValNodePtr vnp, vnp1;
2744 ValNodePtr pub, pubq, pubset;
2745 ImpFeatPtr ifp;
2746 BioseqPtr bsp;
2747 Boolean is_contig = FALSE, is_NC = FALSE, is_NG = FALSE;
2748 SeqIdPtr sid;
2749 TextSeqIdPtr tsip;
2750
2751 if (gbp == NULL || gbp->feat == NULL || p == NULL) {
2752 return;
2753 }
2754 bsp = gbp->bsp;
2755 for (sid=bsp->id; sid; sid=sid->next) {
2756 if (sid->choice == SEQID_OTHER) {
2757 tsip = (TextSeqIdPtr) sid->data.ptrvalue;
2758 if (StringNCmp(tsip->accession, "NT", 2) == 0) {
2759 is_contig = TRUE;
2760 }
2761 if (StringNCmp(tsip->accession, "NC", 2) == 0
2762 || StringNCmp(tsip->accession, "NP", 2) == 0) {
2763 is_NC = TRUE;
2764 }
2765 if (StringNCmp(tsip->accession, "NG", 2) == 0) {
2766 is_NG = TRUE;
2767 }
2768 }
2769 }
2770 gsp=p->gsp;
2771 nsp = p->nsp;
2772 if ((sfp=p->sfp) == NULL) {
2773 GatherItemWithLock(p->entityID, p->itemID, p->itemtype,
2774 &sfp, find_item);
2775 }
2776 if (gsp) {
2777 if (gsp->gene) {
2778 /* delete_qual(&(sfp_out->qual), "gene"); */
2779 for (vnp=gsp->gene; vnp; vnp=vnp->next)
2780 {
2781 ascii_len = Sgml2AsciiLen(vnp->data.ptrvalue);
2782 start = ascii = MemNew((size_t) (10+ascii_len));
2783 ascii = Sgml2Ascii(vnp->data.ptrvalue, ascii, ascii_len+1);
2784 if ((GBQualPresent("gene", gbqp)) == FALSE) {
2785 if ((GBQualPresent("gene", sfp_out->qual)) == FALSE) {
2786 gbqp=AddGBQual(gbqp, "gene", start);
2787 }
2788 }
2789 start = MemFree(start);
2790 }
2791 }
2792 if (gsp->product) {
2793 for (vnp=gsp->product; vnp; vnp=vnp->next)
2794 {
2795 if (GBQualPresent("product", gbqp) == FALSE &&
2796 GBQualPresent("product", sfp_out->qual) == FALSE)
2797 sfp_out->qual = AddGBQual(sfp_out->qual, "product",
2798 vnp->data.ptrvalue);
2799 else
2800 CpNoteToCharPtrStack(nsp, NULL, vnp->data.ptrvalue);
2801 }
2802 }
2803 if (gsp->standard_name) {
2804 for (vnp=gsp->standard_name; vnp; vnp=vnp->next)
2805 {
2806 if ((CheckForQual(sfp_out->qual, "standard_name",
2807 vnp->data.ptrvalue)) == 0) {
2808 gbqp=AddGBQual(gbqp, "standard_name", vnp->data.ptrvalue);
2809 }
2810 }
2811 }
2812 if (ajp->show_gene == TRUE) {
2813 ifp = sfp_out->data.value.ptrvalue;
2814 if (StringCmp(ifp->key, "gene") == 0) {
2815 if (gsp->map[0]) {
2816 gbqp = AddGBQual(gbqp, "map", gsp->map[0]);
2817 }
2818 }
2819 } else {
2820 if (gsp->map[0]) {
2821 gbqp = AddGBQual(gbqp, "map", gsp->map[0]);
2822 }
2823 }
2824 for (vnp=gsp->ECNum; vnp; vnp=vnp->next) {
2825 if ((CheckForQual(sfp_out->qual, "EC_number",
2826 vnp->data.ptrvalue)) == 0) {
2827 gbqp=AddGBQual(gbqp, "EC_number", vnp->data.ptrvalue);
2828 }
2829 }
2830 for (vnp=gsp->activity; vnp; vnp=vnp->next) {
2831 if ((CheckForQual(sfp_out->qual, "function",
2832 vnp->data.ptrvalue)) == 0) {
2833 gbqp=AddGBQual(gbqp, "function", vnp->data.ptrvalue);
2834 }
2835 }
2836 if (gsp->pseudo == TRUE) {
2837 if (note_pseudo == TRUE) {
2838 CpNoteToCharPtrStack(nsp, NULL, "pseudogene");
2839 } else if (GBQualPresent("pseudo", gbqp) == FALSE &&
2840 GBQualPresent("pseudo", sfp_out->qual) == FALSE) {
2841 gbqp = AddGBQual(gbqp, "pseudo", NULL);
2842 }
2843 }
2844 }
2845 /* Add Experimental note */
2846 if (sfp != NULL && sfp->data.choice == SEQFEAT_CDREGION)
2847 {
2848 ptr = &(temp[0]);
2849 status = MakeGBSelectNote(ptr, sfp);
2850 if (status > 0)
2851 SaveNoteToCharPtrStack(nsp, NULL, ptr);
2852 ptr=NULL;
2853 /* gene synonym appears as db-xref
2854 if (is_NC) {
2855 Add_gene_id(gsp, sfp_out);
2856 }
2857 */
2858 }
2859 if (nsp && nsp->note[0])
2860 {
2861 note = ComposeNoteFromNoteStruct(nsp, gsp);
2862 if (note)
2863 {
2864 gbqp = AddGBQual(gbqp, "note", note);
2865 note = MemFree(note);
2866 }
2867 }
2868 if (ajp->mode != DIRSUB_MODE) {
2869 AddPID(ajp, sfp_out, (Boolean) (is_contig || is_NG || is_NC));
2870 }
2871 if (is_contig || is_NG || is_NC) {
2872 if (sfp != NULL && sfp->data.choice == SEQFEAT_RNA) {
2873 Add_trid(ajp, sfp_out);
2874 }
2875 }
2876 Add_dbxref(ajp, sfp_out, sfp, bsp);
2877 vnp = gbp->Pub;
2878 if (sfp && sfp->cit) {
2879 buffer[0] = '\0';
2880 pubset = sfp->cit;
2881 for (pubq = pubset->data.ptrvalue; pubq; pubq = pubq->next) {
2882 if (pubq->choice == PUB_Equiv) {
2883 pub = pubq->data.ptrvalue;
2884 for (; pub != NULL; pub = pub->next) {
2885 for (vnp1=vnp; vnp1; vnp1=vnp1->next) {
2886 psp = vnp1->data.ptrvalue;
2887 if (PubLabelMatch(psp->pub, pub) == 0) {
2888 sprintf(buffer, "[%ld]", (long) (psp->number));
2889 gbqp = AddGBQual(gbqp, "citation", buffer);
2890 break;
2891 }
2892 }
2893 }
2894 } else {
2895 pub = pubq;
2896 for (vnp1=vnp; vnp1; vnp1=vnp1->next) {
2897 psp = vnp1->data.ptrvalue;
2898 if (PubLabelMatch(psp->pub, pub) == 0) {
2899 sprintf(buffer, "[%ld]", (long) (psp->number));
2900 gbqp = AddGBQual(gbqp, "citation", buffer);
2901 break;
2902 }
2903 }
2904 }
2905 }
2906 /************** old algorithm for pub matching ****************/
2907 if (buffer[0] == '\0') {
2908 for (vnp1=vnp; vnp1; vnp1=vnp1->next)
2909 {
2910 psp = vnp1->data.ptrvalue;
2911 for (int_index=0; int_index<psp->pubcount; int_index++)
2912 if (sfp == psp->pubfeat[int_index])
2913 {
2914 sprintf(buffer, "[%ld]", (long) (psp->number));
2915 gbqp = AddGBQual(gbqp, "citation", buffer);
2916 }
2917 }
2918 }
2919 }
2920 if (gbqp) /* any gene or note related quals added above? */
2921 {
2922 for (qual1=gbqp; qual1->next; qual1=qual1->next)
2923 ;
2924 qual1->next = sfp_out->qual;
2925 sfp_out->qual = gbqp;
2926 }
2927 /* check for the qual gdb_xref */
2928 for (qual1 = sfp_out->qual; qual1; qual1 = qnext) {
2929 qnext = qual1->next;
2930 if (StringCmp(qual1->qual, "gdb_xref") == 0) {
2931 qual1->qual = StringSave("db_xref");
2932 l = StringLen(qual1->val);
2933 tmp = MemNew(l + 5);
2934 sprintf(tmp, "GDB:%s", qual1->val);
2935 qual1->val = StringSave(tmp);
2936 MemFree(tmp);
2937 }
2938 if (ajp->show_gene == FALSE) {
2939 /* change qual 'replace' to the old style location operator */
2940 /* changed December 1996 release 100.0 */
2941 /* if (StringCmp(qual1->qual, "replace") == 0) {
2942 ifp = sfp_out->data.value.ptrvalue;
2943 loc = FlatLoc(gbp->bsp, sfp->location);
2944 l = StringLen(qual1->val) + StringLen(loc);
2945 tmp = MemNew(l + 15);
2946 sprintf(tmp, "replace(%s,\"%s\")", loc, qual1->val);
2947 MemFree(loc);
2948 ifp->loc = tmp;
2949 sfp_out->qual = RemoveQual(sfp_out->qual, qual1);
2950 }
2951 */
2952 }
2953 }
2954 return;
2955 } /* ComposeGBQuals */
2956
tmp_save(CharPtr str)2957 static CharPtr tmp_save(CharPtr str)
2958 /* deletes spaces from the begining and the end and returns Nlm_StringSave */ {
2959 CharPtr s, ss;
2960
2961 if (str == NULL) {
2962 return NULL;
2963 }
2964 for (; isspace(*str) || *str == ','; str++) continue;
2965 for (s = str; *s != '\0'; s++) {
2966 if (*s == '\n') {
2967 for (ss = s+1; isspace(*ss); ss++) continue;
2968 *s = ' ';
2969 strcpy(s+1, ss);
2970 }
2971 }
2972 for (s=str+StringLen(str)-1; s >= str && (*s == ' ' || *s == ';' ||
2973 *s == ',' || *s == '.' || *s == '\"' || *s == '\t'); s--) {
2974 *s = '\0';
2975 }
2976
2977 if (*str == '\0') {
2978 return NULL;
2979 } else {
2980 return Nlm_StringSave(str);
2981 }
2982 }
NoteCmp(CharPtr n1,CharPtr n2)2983 static Int2 NoteCmp(CharPtr n1, CharPtr n2)
2984 {
2985 CharPtr s1, s2;
2986 Int2 ret = 1;
2987
2988 if (n1 == NULL || n2 == NULL)
2989 return ret;
2990 s1 = tmp_save(n1);
2991 s2 = tmp_save(n2);
2992 if (StringStr(s1, s2) != NULL)
2993 ret = 0; /*duplicated */
2994 MemFree(s1);
2995 MemFree(s2);
2996
2997 return ret;
2998 }
2999
3000 /****************************************************************************
3001 * CharPtr ComposeNoteFromNoteStruct (NoteStructPtr nsp, GeneStrunctPtr gsp)
3002 *
3003 * This function composes a "/note" for a SeqFeatPtr from the information
3004 * in the GeneStructPtr (gsp).
3005 * The first "for" loop initializes the first CharPtr and a check
3006 * is done that the information in gsp->note is *not* redundant. If
3007 * it is not, first gsp->note_annot is copied onto a CharPtr (this
3008 * field contains words describing the origin of the info in note, i.e.,
3009 * "Description"); then the actual note is copied onto the CharPtr.
3010 * The second "for" loop does the same checking as the first and the
3011 * concatenation of more "note" strings is performed.
3012 *
3013 *n.b.: the caller is responsible for deallocating the final returned "note".
3014 ***************************************************************************/
ComposeNoteFromNoteStruct(NoteStructPtr nsp,GeneStructPtr gsp)3015 NLM_EXTERN CharPtr ComposeNoteFromNoteStruct (NoteStructPtr nsp, GeneStructPtr gsp)
3016
3017 {
3018 Boolean status;
3019 CharPtr note1=NULL, note2, note3;
3020 Int2 index, index1, index2, len;
3021
3022 for (index=0; index<nsp->note_index; index++) {
3023 if (gsp) {
3024 if (CompareStringWithGsp(gsp, nsp->note[index]) != 0) {
3025 if (nsp->note_annot[index])
3026 note1 = Cat2Strings(nsp->note_annot[index], nsp->note[index], " ", 0);
3027 else
3028 note1 = StringSave(nsp->note[index]);
3029 len = CheckForExtraChars(note1);
3030 if (len == 0)
3031 note1 = MemFree(note1);
3032 else
3033 break;
3034 }
3035 } else {
3036 if (nsp->note_annot[index])
3037 note1 = Cat2Strings(nsp->note_annot[index], nsp->note[index], " ", 0);
3038 else
3039 note1 = StringSave(nsp->note[index]);
3040 len = CheckForExtraChars(note1);
3041 if (len == 0)
3042 note1 = MemFree(note1);
3043 else
3044 break;
3045 }
3046 }
3047 index++;
3048
3049 for (index1=index; index1<nsp->note_index; index1++)
3050 {
3051 status = TRUE;
3052 note2 = nsp->note[index1];
3053 if (gsp && CompareStringWithGsp(gsp, note2) == 0)
3054 continue;
3055
3056 for (index2=0; index2<index1; index2++) {
3057 if (gsp)
3058 if (GeneStringCmp(note2, nsp->note[index2]) == 0)
3059 status = FALSE;
3060 }
3061 if (status == TRUE) {
3062 if (nsp->note_annot[index1])
3063 note2 = Cat2Strings(nsp->note_annot[index1], nsp->note[index1], " ", 0);
3064 else /* rewrite to not always allocate note2 if no annot?????*/
3065 note2 = StringSave(nsp->note[index1]);
3066 len = CheckForExtraChars(note1);
3067 if (NoteCmp(note1, note2) == 0) {
3068 len = 0;
3069 }
3070 if (len > 0) {
3071 if (note1[len-1] == '.') {
3072 note3 = Cat2Strings(note1, note2, " ", -1);
3073 } else {
3074 note3 = Cat2Strings(note1, note2, "; ", -1);
3075 }
3076 note1 = MemFree(note1);
3077 note2 = MemFree(note2);
3078 note1 = note3;
3079 note3 = NULL;
3080 } else {
3081 if (note2) {
3082 note2 = MemFree(note2);
3083 }
3084 }
3085 }
3086 }
3087
3088 return note1;
3089 } /* ComposeNoteFromNoteStruct */
3090
3091 /*************************************************************************
3092 *static Int2 CheckForExtraChars(CharPtr note)
3093 *
3094 * Check for spaces or semi-colons on the ends of notes.
3095 ************************************************************************/
3096
CheckForExtraChars(CharPtr note)3097 static Int2 CheckForExtraChars(CharPtr note)
3098
3099 {
3100 Int2 len=0;
3101
3102 if (note != NULL)
3103 {
3104 len = StringLen(note);
3105 while (len > 0)
3106 {
3107 if (note[len-1] == ' ' || note[len-1] == ';')
3108 note[len-1] = '\0';
3109 else
3110 break;
3111 len--;
3112 }
3113 }
3114
3115 return len;
3116
3117 } /* CheckForExtraChars */
3118
Add_trid(Asn2ffJobPtr ajp,SeqFeatPtr sfp_out)3119 NLM_EXTERN void Add_trid (Asn2ffJobPtr ajp, SeqFeatPtr sfp_out)
3120
3121 {
3122 ImpFeatPtr ifp;
3123 BIG_ID gi = -1;
3124 SeqIdPtr sip, newid=NULL;
3125 ValNodePtr product;
3126 Char buf[MAX_ACCESSION_LEN+5];
3127
3128 ifp = sfp_out->data.value.ptrvalue;
3129 if (StringCmp(ifp->key, "mRNA") != 0) {
3130 return;
3131 }
3132 product = sfp_out->product;
3133 if (product == NULL) {
3134 return;
3135 }
3136 sip = GetProductSeqId(product);
3137 if (sip == NULL) return;
3138 if (sip->choice == SEQID_GI) {
3139 if ((newid = GetSeqIdForGI(sip->data.intvalue)) != NULL) {
3140 SeqIdWrite(newid, buf, PRINTID_TEXTID_ACC_VER, MAX_ACCESSION_LEN+1);
3141 } else {
3142 sprintf(buf, "%ld", sip->data.intvalue);
3143 }
3144 } else {
3145 SeqIdWrite(sip, buf, PRINTID_TEXTID_ACC_VER, MAX_ACCESSION_LEN+1);
3146 }
3147 sfp_out->qual = AddGBQual(sfp_out->qual, "transcript_id", buf);
3148 }
3149
3150 /*************************************************************************
3151 * sfp_out: synthetic SeqFeatPtr of type ImpFeat for use in printing.
3152 *
3153 * This function puts the GI number on a SeqFeatPtr /db_xref of type CDS.
3154 * Checking is first done to see if this sfp is indeed a CDS, then
3155 * the PID number is gotten from the product SeqId
3156 *****************************************************************************/
3157
AddPID(Asn2ffJobPtr ajp,SeqFeatPtr sfp_out,Boolean is_NTorNG)3158 NLM_EXTERN void AddPID (Asn2ffJobPtr ajp, SeqFeatPtr sfp_out, Boolean is_NTorNG)
3159
3160 {
3161 ImpFeatPtr ifp;
3162 BIG_ID gi = -1;
3163 SeqIdPtr sip, new_id=NULL;
3164 ValNodePtr product, vnp;
3165 BioseqPtr p_bsp = NULL;
3166 DbtagPtr db;
3167 Char val[20];
3168 Char buf[MAX_ACCESSION_LEN+1];
3169
3170 ifp = sfp_out->data.value.ptrvalue;
3171 if (StringCmp(ifp->key, "CDS") != 0) {
3172 return;
3173 }
3174 product = sfp_out->product;
3175 if (product == NULL) {
3176 return;
3177 }
3178 sip = GetProductSeqId(product);
3179 if (sip) { /* Get protein bsp */
3180 if (sip->choice == SEQID_GI && is_NTorNG) {
3181 if ((new_id = GetSeqIdForGI(sip->data.intvalue)) != NULL) {
3182 SeqIdWrite(new_id, buf, PRINTID_TEXTID_ACC_VER, MAX_ACCESSION_LEN+1);
3183 SeqIdFree(new_id); /*** need to free it !!! (EY) ***/
3184 } else {
3185 sprintf(buf, "%ld", sip->data.intvalue);
3186 }
3187 sfp_out->qual = AddGBQual(sfp_out->qual, "protein_id", buf);
3188 } else if ((p_bsp = BioseqFind(sip)) != NULL) {
3189 new_id = GetSeqIdChoice(p_bsp->id);
3190 if (ajp->forgbrel && new_id == NULL) {
3191 ErrPostStr(SEV_ERROR, ERR_ACCESSION_NoAccessNum, "");
3192 } else if (new_id) {
3193 SeqIdWrite(new_id, buf, PRINTID_TEXTID_ACC_VER,
3194 MAX_ACCESSION_LEN+1);
3195 sfp_out->qual = AddGBQual(sfp_out->qual, "protein_id", buf);
3196 }
3197 }
3198 }
3199 if (p_bsp == NULL) {
3200 gi = GetGINumFromSip(sip);
3201 if (gi != -1) {
3202 if (ajp->show_gi) {
3203 val[0] = '\0';
3204 sprintf(val, "PID:g%ld", (long) gi);
3205 if (val[0] != '\0') {
3206 sfp_out->qual = AddGBQual(sfp_out->qual, "db_xref", val);
3207 }
3208 }
3209 if (ajp->show_version) {
3210 val[0] = '\0';
3211 sprintf(val, "GI:%ld", (long) gi);
3212 sfp_out->qual = AddGBQual(sfp_out->qual, "db_xref", val);
3213 }
3214 }
3215 return;
3216 }
3217 for (vnp=p_bsp->id; vnp; vnp=vnp->next) {
3218 if (vnp->choice == SEQID_GENERAL) {
3219 db = vnp->data.ptrvalue;
3220 if (db == NULL) {
3221 continue;
3222 }
3223 val[0] = '\0';
3224 if (StringNCmp(db->db, "PIDe", 4) == 0) {
3225 sprintf(val, "PID:e%ld", (long) db->tag->id);
3226 gi = db->tag->id;
3227 } else if (StringNCmp(db->db, "PIDd", 4) == 0) {
3228 sprintf(val, "PID:d%ld", (long) db->tag->id);
3229 gi = db->tag->id;
3230 } else if (StringNCmp(db->db, "PID", 3) == 0) {
3231 if (db->tag && db->tag->str) {
3232 sprintf(val, "%s:%s", db->db, db->tag->str);
3233 gi = atoi((db->tag->str)+1);
3234 }
3235 }
3236 if (ajp->show_gi && val[0] != '\0') {
3237 sfp_out->qual = AddGBQual(sfp_out->qual, "db_xref", val);
3238 }
3239 /*if (ajp->show_version) {
3240 val[0] = '\0';
3241 sprintf(val, "GI:%ld", (long) gi);
3242 sfp_out->qual = AddGBQual(sfp_out->qual, "db_xref", val);
3243 }*/
3244 }
3245 if (vnp->choice == SEQID_GI) {
3246 if (ajp->show_gi) {
3247 val[0] = '\0';
3248 sprintf(val, "PID:g%ld", (long) vnp->data.intvalue);
3249 sfp_out->qual = AddGBQual(sfp_out->qual, "db_xref", val);
3250 }
3251 if (ajp->show_version) {
3252 val[0] = '\0';
3253 sprintf(val, "GI:%ld", (long) vnp->data.intvalue);
3254 sfp_out->qual = AddGBQual(sfp_out->qual, "db_xref", val);
3255 }
3256 }
3257 }
3258 return;
3259 } /* AddPID */
3260
3261 /***************************************************************************
3262 *Int2 MakeGBSelectNote (CharPtr ptr, SeqFeatPtr sfp)
3263 *
3264 *Adds note to CDS GenBankSelect
3265 ***************************************************************************/
MakeGBSelectNote(CharPtr ptr,SeqFeatPtr sfp)3266 NLM_EXTERN Int2 MakeGBSelectNote (CharPtr ptr, SeqFeatPtr sfp)
3267
3268 {
3269 Boolean found_select=FALSE, found_match=FALSE;
3270 CharPtr acc=NULL;
3271 Int2 number = -1;
3272 ObjectIdPtr oip=NULL, type;
3273 UserFieldPtr ufp;
3274 UserObjectPtr uop=NULL;
3275
3276 if (sfp && (uop=sfp->ext) != NULL)
3277 {
3278 if (uop->_class && (type=uop->type) != NULL)
3279 {
3280 if (StringCmp(uop->_class, "GB-Select") == 0)
3281 found_select = TRUE;
3282 if (type->str)
3283 if (StringCmp(type->str, "SPmatch") == 0)
3284 found_match = TRUE;
3285 if (found_match && found_select)
3286 {
3287 for (ufp=uop->data; ufp; ufp=ufp->next)
3288 {
3289 oip = ufp->label;
3290 if (oip->id == 2)
3291 {
3292 if (ufp->choice == 1)
3293 acc = ufp->data.ptrvalue;
3294 }
3295 else if (oip->id == 3)
3296 {
3297 if (ufp->choice == 2)
3298 {
3299 number = (Int2) (ufp->data.intvalue);
3300 }
3301 }
3302
3303 }
3304 if (number == 1)
3305 sprintf(ptr,
3306 "Identical to Swiss-Prot Accession Number %s", acc);
3307 else if (number == 2 || number == 3)
3308 sprintf(ptr,
3309 "Similar to Swiss-Prot Accession Number %s", acc);
3310 }
3311 }
3312 }
3313 return number;
3314 }
3315
get_prot_feats(GatherContextPtr gcp)3316 NLM_EXTERN Boolean get_prot_feats (GatherContextPtr gcp)
3317 {
3318 BioseqPtr bsp;
3319 OrganizeProtPtr opp;
3320 SeqFeatPtr sfp;
3321 Boolean temp = FALSE;
3322
3323 opp = gcp->userdata;
3324
3325 switch (gcp->thistype)
3326 {
3327 case OBJ_SEQFEAT:
3328 sfp = (SeqFeatPtr) (gcp->thisitem);
3329 if (sfp->data.choice == SEQFEAT_PROT ||
3330 sfp->data.choice == SEQFEAT_REGION ||
3331 sfp->data.choice == SEQFEAT_BOND ||
3332 sfp->data.choice == SEQFEAT_SITE) {
3333 bsp = BioseqFindCore(SeqLocId(sfp->location));
3334 if (gcp->tempload == TRUE) {
3335 temp = TRUE;
3336 }
3337 opp->list = EnlargeSortList(opp->list, opp->size);
3338 opp->size = StoreFeatTemp(opp->list, sfp, opp->size, bsp, NULL,
3339 gcp->entityID, gcp->itemID, gcp->thistype,
3340 gcp->new_loc, NULL, 0, temp);
3341 }
3342 break;
3343 default:
3344 break;
3345 }
3346 return TRUE;
3347 }
3348
3349 /********************************************************************
3350 * Int2 CompareStringWithGsp (GeneStructPtr gsp, CharPtr string)
3351 *
3352 * gsp: GeneStructPtr containing the gene information,
3353 * gene->synonym in is store in gsp->gene with choice 1 (GetGeneRefInfo)
3354 * it is not compared to note string
3355 *
3356 * string: a CharPtr with (possibly) relevant gene information
3357 * (i.e., gene name, allele, product etc.).
3358 *
3359 * A comparison is made between string and the information already
3360 * stored in the gsp. Following the convention for StringCmp,
3361 * "0" is returned if a match is found, otherwise "1" is returned.
3362 * At present (2/7/94) GeneStringCmp is a #define for StringCmp.
3363 ************************************************************************/
3364
CompareStringWithGsp(GeneStructPtr gsp,CharPtr string)3365 NLM_EXTERN Int2 CompareStringWithGsp (GeneStructPtr gsp, CharPtr string)
3366
3367 {
3368 CharPtr ascii, start;
3369 Int2 ascii_len;
3370 ValNodePtr vnp;
3371
3372 for (vnp=gsp->gene; vnp; vnp=vnp->next)
3373 {
3374 if (vnp->choice == 1) {
3375 continue;
3376 }
3377 ascii_len = Sgml2AsciiLen(vnp->data.ptrvalue);
3378 start = ascii = MemNew((size_t) (10+ascii_len));
3379 ascii = Sgml2Ascii(vnp->data.ptrvalue, ascii, ascii_len+1);
3380 if (GeneStringCmp(start, string) == 0)
3381 {
3382 start = MemFree(start);
3383 return 0;
3384 }
3385 start = MemFree(start);
3386 }
3387 vnp=gsp->product;
3388 if (vnp != NULL)
3389 {
3390 if (GeneStringCmp(vnp->data.ptrvalue, string) == 0)
3391 return 0;
3392 }
3393 for (vnp=gsp->standard_name; vnp; vnp=vnp->next)
3394 {
3395 if (GeneStringCmp(vnp->data.ptrvalue, string) == 0)
3396 return 0;
3397 }
3398 if (gsp->map[0] && GeneStringCmp(gsp->map[0], string) == 0)
3399 return 0;
3400 if (gsp->ECNum)
3401 for (vnp=gsp->ECNum; vnp; vnp=vnp->next)
3402 {
3403 if (GeneStringCmp(vnp->data.ptrvalue, string) == 0)
3404 return 0;
3405 }
3406
3407 return 1;
3408 } /* CompareStringWithGsp */
3409
GetDBXrefFromGene(GeneRefPtr grp,SeqFeatPtr sfp)3410 NLM_EXTERN void GetDBXrefFromGene (GeneRefPtr grp, SeqFeatPtr sfp)
3411
3412 {
3413 CharPtr dbase;
3414 DbtagPtr dbtp;
3415 ValNodePtr tmp;
3416 Char buffer[50];
3417
3418 if (grp == NULL) {
3419 return;
3420 }
3421 for (tmp = grp->db; tmp != NULL; tmp=tmp->next) {
3422 dbtp = tmp->data.ptrvalue;
3423 if (dbtp && dbtp->db && dbtp->tag) {
3424 dbase = MemNew(StringLen(dbtp->db) + 3);
3425 sprintf(dbase, "%s:", dbtp->db);
3426 if (dbtp->tag->str) {
3427 sprintf(buffer, "%s%s", dbase, dbtp->tag->str);
3428 sfp->qual = AddGBQual(sfp->qual, "db_xref", buffer);
3429 } else if (dbtp->tag->id) {
3430 sprintf(buffer, "%s%ld", dbase, (long) dbtp->tag->id);
3431 sfp->qual = AddGBQual(sfp->qual, "db_xref", buffer);
3432 }
3433 MemFree(dbase);
3434 }
3435 }
3436
3437 return;
3438 }
3439
3440 /****************************************************************************
3441 * void GetProtRefInfo (GeneStructPtr gsp, NoteStructPtr nsp, ProtRefPtr prp)
3442 *
3443 * gsp: GeneStructPtr containing gene information
3444 * prp: ProtRefPtr from a sfp of type protein or a sfp xref.
3445 *
3446 * If fields are empty on the gsp, and the relevant information
3447 * is given by the prp, that field is filled on the gsp
3448 ****************************************************************************/
GetProtRefInfo(Uint1 format,GeneStructPtr gsp,NoteStructPtr nsp,ProtRefPtr prp)3449 NLM_EXTERN void GetProtRefInfo (Uint1 format, GeneStructPtr gsp, NoteStructPtr nsp, ProtRefPtr prp)
3450 {
3451 ValNodePtr tmp, vnp;
3452
3453 if (prp == NULL) {
3454 return;
3455 }
3456 for (vnp=prp->name; vnp; vnp=vnp->next) {
3457 tmp = ValNodeNew(NULL);
3458 tmp->data.ptrvalue = StringSave(vnp->data.ptrvalue);
3459 gsp->product = tie_next(gsp->product, tmp);
3460 }
3461 for (vnp=prp->ec; vnp; vnp=vnp->next) {
3462 tmp = ValNodeNew(NULL);
3463 tmp->data.ptrvalue = StringSave(vnp->data.ptrvalue);
3464 gsp->ECNum = tie_next(gsp->ECNum, tmp);
3465 }
3466 for (vnp=prp->activity; vnp; vnp=vnp->next) {
3467 tmp = ValNodeNew(NULL);
3468 tmp->data.ptrvalue = StringSave(vnp->data.ptrvalue);
3469 gsp->activity = tie_next(gsp->activity, tmp);
3470 }
3471 if (format != GENPEPT_FMT) {
3472 if (prp->desc) {
3473 SaveNoteToCharPtrStack(nsp, NULL, prp->desc);
3474 }
3475 }
3476 return;
3477 }
3478
3479 /****************************************************************************
3480 *
3481 * sfp: SeqFeatPtr for CDS
3482 * nsp: NoteStructPtr
3483 *
3484 * Used to get comments from the Protein for use in a CDS /note.
3485 *
3486 * Take the main protein ONLY (not sig_peptide mat_peptide)
3487 *
3488 * Will find the Protein Pubs, as they are needed and (presumably) haven't
3489 * been found yet, so as to save "upfront" time when the formatter is
3490 * running in Entrez.
3491 ****************************************************************************/
GetProtRefComment(SeqFeatPtr sfp,BioseqPtr bsp,Asn2ffJobPtr ajp,OrganizeProtPtr opp,NoteStructPtr nsp,Uint1 method)3492 static void GetProtRefComment (SeqFeatPtr sfp, BioseqPtr bsp, Asn2ffJobPtr ajp, OrganizeProtPtr opp, NoteStructPtr nsp, Uint1 method)
3493 {
3494 Boolean first_done=FALSE, protein=FALSE;
3495 CharPtr ptr = NULL, string=NULL, string1=NULL, newstring=NULL, temp, s;
3496 CharPtr conflict_msg_no_protein="Coding region translates with internal stops";
3497 /* CharPtr except_msg_no_protein="Coding region translates with internal stops for reasons explained in citation. "; -- except_msg_no_protein UNUSED */
3498 CharPtr conflict_msg="Protein sequence is in conflict with the conceptual translation";
3499 /* CharPtr except_msg="Protein sequence differs from the conceptual translation for reasons explained in citation. "; -- except_msg UNUSED */
3500 CdRegionPtr cdr=NULL;
3501 Int2 total=0, i;
3502 PubdescPtr pdp;
3503 PubStructPtr psp;
3504 SeqFeatPtr sfp_local=NULL;
3505 ValNodePtr descr, vnp, vnp1, vnp1next, product;
3506 MolInfoPtr mfp;
3507 GatherScope gs;
3508 SeqLocPtr slp;
3509 ProtRefPtr prot_local;
3510 SeqMgrFeatContext fcontext;
3511 SeqMgrDescContext dcontext;
3512 GatherContext gc;
3513 SeqFeatPtr psfp;
3514 ValNodePtr psdp;
3515 ObjMgrDataPtr omdp;
3516 SeqSubmitPtr ssp;
3517 SubmitBlockPtr sbp;
3518 CharPtr prefix = "";
3519
3520 if (ajp->useSeqMgrIndexes) {
3521 sfp_local = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_PROT, 0, &fcontext);
3522 while (sfp_local != NULL) {
3523 prot_local = sfp_local->data.value.ptrvalue;
3524 if (prot_local->processed <= 1) {
3525 if (first_done) {
3526 if (StringLen(sfp_local->comment)) {
3527 string1 = CheckEndPunctuation(sfp_local->comment, '\0');
3528 if (StringCmp(string, string1) != 0) {
3529 newstring = Cat2Strings(string, string1, "; ", 0);
3530 string = MemFree(string);
3531 string = newstring;
3532 }
3533 string1 = MemFree(string1);
3534 }
3535 } else {
3536 if (StringLen(sfp_local->comment)) {
3537 string = CheckEndPunctuation(sfp_local->comment, '\0');
3538 first_done = TRUE;
3539 }
3540 }
3541 }
3542 sfp_local = SeqMgrGetNextFeature (bsp, sfp_local, SEQFEAT_PROT, 0, &fcontext);
3543 }
3544 } else if (opp != NULL) {
3545 for (i = 0; i < opp->size; i++) {
3546 if ((sfp_local = opp->list[i].sfp) == NULL) {
3547 continue;
3548 }
3549 if (sfp_local->data.choice != SEQFEAT_PROT) {
3550 continue;
3551 }
3552 prot_local = sfp_local->data.value.ptrvalue;
3553 if (prot_local->processed > 1) {
3554 continue;
3555 }
3556 if (first_done) {
3557 if (StringLen(sfp_local->comment)) {
3558 string1 = CheckEndPunctuation(sfp_local->comment, '\0');
3559 if (StringCmp(string, string1) != 0) {
3560 newstring = Cat2Strings(string, string1, "; ", 0);
3561 string = MemFree(string);
3562 string = newstring;
3563 }
3564 string1 = MemFree(string1);
3565 }
3566 } else {
3567 if (StringLen(sfp_local->comment)) {
3568 string = CheckEndPunctuation(sfp_local->comment, '\0');
3569 first_done = TRUE;
3570 }
3571 }
3572 }
3573 }
3574
3575 if (bsp && (descr=bsp->descr) != NULL) {
3576 for (vnp=descr; vnp; vnp=vnp->next) {
3577 if (vnp->choice == Seq_descr_comment) {
3578 if (first_done) {
3579 if (StringLen(vnp->data.ptrvalue)) {
3580 string1 = CheckEndPunctuation(vnp->data.ptrvalue, '\0');
3581 if (StringCmp(string, string1) != 0) {
3582 newstring = Cat2Strings(string, string1, "; ", 0);
3583 string = MemFree(string);
3584 string = newstring;
3585 }
3586 string1 = MemFree(string1);
3587 }
3588 } else {
3589 if (StringLen(vnp->data.ptrvalue)) {
3590 string = CheckEndPunctuation(vnp->data.ptrvalue, '\0');
3591 first_done = TRUE;
3592 }
3593 }
3594 } else if (vnp->choice == Seq_descr_molinfo) {
3595 mfp = vnp->data.ptrvalue;
3596 if (mfp && mfp->tech > 1 && mfp->tech != 8) {
3597 if (mfp->tech == MI_TECH_concept_trans_a) {
3598 /* s = StringForSeqMethod(method); */
3599 s = NULL;
3600 } else {
3601 s = StringForSeqTech(mfp->tech);
3602 }
3603 if (s!= NULL && *s != '\0') {
3604 ptr = MemNew(StringLen(s) + 10);
3605 sprintf(ptr, "Method: %s", s);
3606 }
3607 if (first_done) {
3608 newstring = Cat2Strings(string, ptr, "; ", 0);
3609 string = MemFree(string);
3610 string = newstring;
3611 } else {
3612 string = StringSave(ptr);
3613 first_done = TRUE;
3614 }
3615 MemFree(ptr);
3616 }
3617 } else if (vnp->choice == Seq_descr_method) {
3618 if (vnp->data.intvalue > 1) {
3619 if (method == METHOD_concept_transl_a) {
3620 /* s = StringForSeqMethod(method);*/
3621 s = NULL;
3622 } else {
3623 s = StringForSeqMethod((Uint1)(vnp->data.intvalue));
3624 }
3625 if (s!= NULL && *s != '\0') {
3626 ptr = MemNew(StringLen(s) + 10);
3627 sprintf(ptr, "Method: %s", s);
3628 }
3629
3630 if (first_done) {
3631 newstring = Cat2Strings(string, ptr, "; ", 0);
3632 string = MemFree(string);
3633 string = newstring;
3634 } else {
3635 string = StringSave(ptr);
3636 first_done = TRUE;
3637 }
3638 MemFree(ptr);
3639 }
3640 }
3641 }
3642 }
3643 /* gather pubs on protein bioseq do not do checking or sorting*/
3644 vnp = NULL;
3645 if (ajp->useSeqMgrIndexes) {
3646 /* finess calls to get_pubs */
3647 MemSet ((Pointer) (&gc), 0, sizeof (GatherContext));
3648 gc.userdata = (Pointer) (&vnp);
3649 gc.entityID = ajp->entityID;
3650 psdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pub, &dcontext);
3651 while (psdp != NULL) {
3652 gc.thistype = OBJ_SEQDESC;
3653 gc.itemID = dcontext.itemID;
3654 gc.thisitem = (Pointer) psdp;
3655 omdp = dcontext.omdp;
3656 if (omdp != NULL) {
3657 gc.parenttype = omdp->datatype;
3658 gc.parentitem = omdp->dataptr;
3659 } else {
3660 gc.parenttype = 0;
3661 gc.parentitem = NULL;
3662 }
3663 get_pubs (&gc);
3664 psdp = SeqMgrGetNextDescriptor (bsp, psdp, Seq_descr_pub, &dcontext);
3665 }
3666 psfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_PUB, 0, &fcontext);
3667 while (psfp != NULL) {
3668 gc.thistype = OBJ_SEQFEAT;
3669 gc.itemID = dcontext.itemID;
3670 gc.thisitem = (Pointer) psfp;
3671 get_pubs (&gc);
3672 psfp = SeqMgrGetNextFeature (bsp, psfp, SEQFEAT_PUB, 0, &fcontext);
3673 }
3674 omdp = ObjMgrGetData (ajp->entityID);
3675 if (omdp != NULL && omdp->datatype == OBJ_SEQSUB) {
3676 ssp = (SeqSubmitPtr) omdp->dataptr;
3677 if (ssp != NULL) {
3678 sbp = ssp->sub;
3679 if (sbp != NULL) {
3680 gc.thistype = OBJ_SUBMIT_BLOCK;
3681 gc.itemID = 1;
3682 gc.thisitem = (Pointer) sbp;
3683 get_pubs (&gc);
3684 }
3685 }
3686 }
3687 /* also submit block */
3688 } else {
3689 MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
3690 /* MemSet ((Pointer) (gs.ignore), (int)(TRUE), (size_t) (OBJ_MAX * sizeof(Boolean)));
3691 gs.ignore[OBJ_SEQENTRY] = FALSE;
3692 gs.ignore[OBJ_BIOSEQ] = FALSE;
3693 gs.ignore[OBJ_SEQDESC] = FALSE;*/
3694 gs.ignore[OBJ_SEQSUB] = TRUE;
3695 gs.ignore[OBJ_SEQSUB_CIT] = TRUE;
3696 slp = ValNodeNew(NULL);
3697 slp->choice = SEQLOC_WHOLE;
3698 slp->data.ptrvalue = (SeqIdPtr) SeqIdDup (SeqIdFindBest (bsp->id, 0));
3699 gs.target = slp;
3700 gs.seglevels = 4;
3701
3702 GatherEntity(ajp->entityID, &vnp, get_pubs, &gs);
3703 if (slp)
3704 SeqLocFree(slp);
3705 }
3706 /* if ((status = CheckPubs(ajp, bsp, &vnp)) < 0) {
3707 ValNodeFree(vnp);
3708 vnp = NULL;
3709 }
3710 vnp = OrganizePubList(vnp); */
3711 for (vnp1=vnp; vnp1; vnp1=vnp1->next) {
3712 psp = vnp1->data.ptrvalue;
3713 if ((pdp=psp->descr) != NULL) {
3714 if (pdp->fig) {
3715 total += 32;
3716 total += StringLen(pdp->fig);
3717 }
3718 if (pdp->maploc) {
3719 total += 22;
3720 total += StringLen(pdp->maploc);
3721 }
3722 }
3723 }
3724
3725 if (sfp) {
3726 cdr = (CdRegionPtr) sfp->data.value.ptrvalue;
3727 product = sfp->product;
3728 if (product && SeqLocLen(product))
3729 protein = TRUE;
3730 if (sfp->excpt)
3731 total += 112;
3732 if (cdr && cdr->conflict && (protein || ! sfp->excpt))
3733 total += 112;
3734 }
3735
3736 string1 = (CharPtr) MemNew(total*sizeof(Char));
3737
3738 for (vnp1=vnp; vnp1; vnp1=vnp1->next) {
3739 psp = vnp1->data.ptrvalue;
3740 if ((pdp=psp->descr) != NULL) {
3741 if (pdp->fig) {
3742
3743 temp = CheckEndPunctuation(pdp->fig, '\0');
3744 total = StringLen(string1);
3745
3746 sprintf(string1+total, "This sequence comes from %s", temp);
3747 prefix = "; ";
3748 temp = MemFree(temp);
3749 }
3750 if (pdp->maploc) {
3751 total = StringLen(string1);
3752 sprintf(string1+total, "%sMap location %s", prefix, pdp->maploc);
3753 prefix = "; ";
3754 }
3755 }
3756 }
3757
3758 if (sfp) {
3759 if (cdr && cdr->conflict && (protein || ! sfp->excpt)) {
3760 total = StringLen(string1);
3761 sprintf(string1+total, "%s%s", prefix,
3762 protein?conflict_msg:conflict_msg_no_protein);
3763 }
3764 }
3765 if (string && string1) {
3766 newstring = Cat2Strings(string, string1, "; ", 0);
3767 string = MemFree(string);
3768 string1 = MemFree(string1);
3769 } else if (string) {
3770 newstring = string;
3771 } else if (string1) {
3772 newstring = string1;
3773 }
3774
3775 if (newstring) {
3776 SaveNoteToCharPtrStack(nsp, NULL, newstring);
3777 newstring = MemFree(newstring);
3778 }
3779 for (vnp1=vnp; vnp1; vnp1=vnp1next) {
3780 vnp1next = vnp1->next;
3781 psp = vnp1->data.ptrvalue;
3782 FreePubStruct(psp);
3783 MemFree(vnp1);
3784 }
3785 return;
3786 } /* GetProtRefComment */
3787
AddModifsToGBQual(GBEntryPtr gbp,GBQualPtr gbqual)3788 NLM_EXTERN GBQualPtr AddModifsToGBQual (GBEntryPtr gbp, GBQualPtr gbqual)
3789 {
3790 CharPtr ptr;
3791 ValNodePtr descr, man;
3792
3793 descr=BioseqGetSeqDescr(gbp->bsp, Seq_descr_modif, NULL);
3794 if (descr) {
3795 for (man = (ValNodePtr) descr-> data.ptrvalue; man != NULL; man = man -> next){
3796 switch (man -> data.intvalue){
3797 case 3: case 14:
3798 ptr = AsnEnumStr("GIBB-mod",
3799 (Int2) man->data.intvalue);
3800 if (GBQualPresent(ptr, gbqual) == FALSE)
3801 gbqual = AddGBQual(gbqual, ptr, " ");
3802 break;
3803 case 4:
3804 if (GBQualPresent("mitochondrion", gbqual) == FALSE)
3805 gbqual = AddGBQual(gbqual, "mitochondrion", NULL);
3806 break;
3807 case 15:
3808 if (GBQualPresent("insertion_seq", gbqual) == FALSE)
3809 gbqual = AddGBQual(gbqual, "insertion_seq", " ");
3810 break;
3811 case 5: case 6: case 7: case 18: case 19:
3812 ptr = AsnEnumStr("GIBB-mod",
3813 (Int2) man->data.intvalue);
3814 if (GBQualPresent(ptr, gbqual) == FALSE)
3815 gbqual = AddGBQual(gbqual, ptr, NULL);
3816 break;
3817 default:
3818 break;
3819 }
3820 }
3821 }
3822 return gbqual;
3823 } /* AddModifsToGBQual */
3824
3825 /*************************************************************************
3826 *GBQualPtr AddOrgRefModToGBQual (OrgRefPtr orp, GBQualPtr gbqual);
3827 *
3828 *Add the OrgRef.mod to a source feat. Note: a few of the quals added
3829 *may be illegal for a source feature, but the validator will catch them
3830 *in the end.
3831 ***************************************************************************/
AddOrgRefModToGBQual(OrgRefPtr orp,GBQualPtr gbqual)3832 NLM_EXTERN GBQualPtr AddOrgRefModToGBQual (OrgRefPtr orp, GBQualPtr gbqual)
3833
3834 {
3835 CharPtr mod, ptr, temp_ptr;
3836 Char temp[ASN2FF_STD_BUF]; /* ASN2FF_STD_BUF (now 35) is longer than
3837 any qual. */
3838 Int2 index;
3839 ValNodePtr vnp;
3840
3841 if (orp && orp->mod)
3842 {
3843 for (vnp=orp->mod; vnp; vnp=vnp->next)
3844 {
3845 mod = vnp->data.ptrvalue;
3846 if (StringNCmp(mod, "citation", 8) == 0)
3847 continue;
3848 index=0;
3849 for (ptr=mod; *ptr != '\0'; ptr++)
3850 {
3851 index++;
3852 if (*ptr == ' ' || *ptr == '=')
3853 {
3854 ptr++;
3855 index--;
3856 break;
3857 }
3858 }
3859 if (index > ASN2FF_STD_BUF-1)
3860 continue;
3861
3862 temp_ptr = &(temp[0]);
3863 StringNCpy(temp_ptr, mod, index);
3864 temp[index] = '\0';
3865 if ((GBQualNameValid(temp_ptr)) == -1)
3866 continue;
3867 if (ptr)
3868 gbqual = AddGBQual(gbqual, temp_ptr, ptr);
3869 else
3870 gbqual = AddGBQual(gbqual, temp_ptr, NULL);
3871 }
3872 }
3873 return gbqual;
3874 } /* AddOrgRefModToGBQual */
3875
3876 /*************************************************************************
3877 *GBQualPtr AddBioSourceToGBQual (BioSourcePtr biosp, GBQualPtr gbqual);
3878 *
3879 *Add the OrgMod.subtypes and SubSource.subtypes to a source feat.
3880 *Add BioSource.genome to a source feat.
3881 *Note: a few of the quals added may be illegal for a source feature,
3882 *but the validator will catch them in the end.
3883 ***************************************************************************/
3884
3885 static CharPtr organelleQual [] = {
3886 NULL,
3887 NULL,
3888 "plastid:chloroplast",
3889 "plastid:chromoplast",
3890 "mitochondrion:kinetoplast",
3891 "mitochondrion",
3892 "plastid",
3893 NULL,
3894 NULL,
3895 NULL,
3896 NULL,
3897 NULL,
3898 "plastid:cyanelle",
3899 NULL,
3900 NULL,
3901 "nucleomorph",
3902 "plastid:apicoplast",
3903 "plastid:leucoplast",
3904 "plastid:proplastid",
3905 NULL
3906 };
3907
AddBioSourceToGBQual(Asn2ffJobPtr ajp,NoteStructPtr nsp,BioSourcePtr biosp,GBQualPtr gbqual,Boolean new_release)3908 NLM_EXTERN GBQualPtr AddBioSourceToGBQual (Asn2ffJobPtr ajp, NoteStructPtr nsp, BioSourcePtr biosp, GBQualPtr gbqual, Boolean new_release)
3909 {
3910 CharPtr qual, val = NULL;
3911 OrgModPtr omp;
3912 OrgNamePtr onp;
3913 SubSourcePtr ssp;
3914 Int2 i;
3915 Int4 id = -1;
3916 DbtagPtr db = NULL;
3917 OrgRefPtr org;
3918 ValNodePtr vnp;
3919 CharPtr s;
3920
3921 if (biosp == NULL)
3922 return gbqual;
3923 if (biosp->genome) {
3924 i = biosp->genome;
3925 if (i > 1 && i < 20) {
3926 val = organelleQual [i];
3927 if (val != NULL) {
3928 gbqual = AddGBQual (gbqual, "organelle", val);
3929 } else if (i < num_genome) {
3930 qual = genome[i];
3931 if (qual && (GBQualNameValid(qual)) != -1) {
3932 if (i == 8) { /*extrachrom*/
3933 gbqual = AddGBQual(gbqual, "note", "extrachromosomal");
3934 } else {
3935 gbqual = AddGBQual(gbqual, qual, val);
3936 }
3937 } else if (qual && i == 8) {
3938 gbqual = AddGBQual(gbqual, "note", "extrachromosomal");
3939 }
3940 }
3941 }
3942 }
3943 org = (OrgRefPtr) biosp->org;
3944 if (org) {
3945 if ((onp = (OrgNamePtr) org->orgname) != NULL) {
3946 for (omp=onp->mod; omp != NULL; omp=omp->next) {
3947 for (i=0; orgmod_subtype[i].name != NULL; i++) {
3948 if (omp->subtype == orgmod_subtype[i].num)
3949 break;
3950 }
3951 if (orgmod_subtype[i].name == NULL) {
3952 continue;
3953 }
3954 if (orgmod_subtype[i].num == 253) { /* old_lineage */
3955 continue;
3956 }
3957 if (orgmod_subtype[i].num == 254) { /* old_name */
3958 continue;
3959 }
3960 qual = orgmod_subtype[i].name;
3961 if (orgmod_subtype[i].num == 21) { /* nat_hos */
3962 qual = "specific_host";
3963 }
3964 if ((val = omp->subname) == NULL)
3965 val = "";
3966 if ((GBQualNameValid(qual)) != -1) {
3967 gbqual = AddGBQual(gbqual, qual, val);
3968 } else {
3969 s = MemNew(StringLen(val) +
3970 StringLen(qual) + 3);
3971 sprintf(s, "%s: %s", qual, val);
3972 CpNoteToCharPtrStack(nsp, NULL, s);
3973 }
3974 }
3975 }
3976 /* add db_xref */
3977 val = NULL;
3978 for (vnp=org->db; vnp; vnp=vnp->next) {
3979 id = -1;
3980 db = (DbtagPtr) vnp->data.ptrvalue;
3981 if (db && db->db) {
3982 for (i =0; i < DBNUM; i++) {
3983 if (StringCmp(db->db, dbtag[i]) == 0) {
3984 id = i;
3985 break;
3986 }
3987 }
3988 if (id == -1) {
3989 continue; /* unknown dbtag */
3990 }
3991 }
3992 if (db->tag && db->tag->str) {
3993 val = MemNew(StringLen(db->db)+StringLen(db->tag->str)+2);
3994 sprintf(val, "%s:%s", db->db, db->tag->str);
3995 } else if (db->tag) {
3996 val = MemNew(StringLen(db->db)+16);
3997 sprintf(val, "%s:%ld", db->db, (long) db->tag->id);
3998 }
3999 if (val[0] != '\0') {
4000 gbqual = AddGBQual(gbqual, "db_xref", val);
4001 MemFree(val);
4002 }
4003 }
4004 }
4005 for (ssp = biosp->subtype; ssp != NULL; ssp=ssp->next) {
4006 qual = NULL;
4007 if (ssp->subtype == 255) {
4008 qual = "note";
4009 } else if (ssp->subtype > num_subtype) {
4010 qual = NULL;
4011 } else if (ssp->subtype > 0) {
4012 qual = subtype[ssp->subtype - 1];
4013 } else {
4014 qual = "?";
4015 }
4016 val = ssp->name;
4017 if (ssp->subtype != 14 && ssp->subtype != 15) {
4018 if (val == NULL)
4019 val = "";
4020 }
4021 if ((GBQualNameValid(qual)) == -1) {
4022 if (qual == NULL) {
4023 qual = "?";
4024 }
4025 s = MemNew(StringLen(val) + StringLen(qual) + 3);
4026 sprintf(s, "%s: %s", qual, val);
4027 CpNoteToCharPtrStack(nsp, NULL, s);
4028 } else {
4029 gbqual = AddGBQual(gbqual, qual, val);
4030 }
4031 }
4032 if (biosp->is_focus == TRUE) {
4033 gbqual = AddGBQual(gbqual, "focus", NULL);
4034 }
4035 return gbqual;
4036 } /* AddBioSourceToGBQual */
4037
4038 /****************************************************************************
4039 *PrintImpFeatEx
4040 *
4041 * This code prints out an ImpFeat in GenBank and HTML format.
4042 *
4043 ****************************************************************************/
PrintImpFeatEx(Asn2ffJobPtr ajp,BioseqPtr bsp,SeqFeatPtr sfp,BIG_ID gi,Int2 entityID,Uint4 itemID)4044 NLM_EXTERN Int2 PrintImpFeatEx (Asn2ffJobPtr ajp, BioseqPtr bsp, SeqFeatPtr sfp, BIG_ID gi, Int2 entityID, Uint4 itemID)
4045 {
4046 CharPtr flatloc_ptr, key, loc;
4047 GBQualPtr gbqp;
4048 ImpFeatPtr ifp;
4049 Uint1 class_qual, format=ajp->format;
4050 Int2 class_equal, gbqual_index;
4051 static CharPtr buf = NULL;
4052 Uint2 retval;
4053 ValNodePtr seqid;
4054 CharPtr p, q;
4055
4056 if (sfp == NULL)
4057 return -1;
4058 if (sfp->data.choice != SEQFEAT_IMP)
4059 return -1;
4060 ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
4061 key = ifp->key;
4062 loc = ifp->loc;
4063
4064 for (seqid = ajp->id_print; seqid; seqid=seqid->next) {
4065 if (seqid->choice == SEQID_GI) {
4066 }
4067 }
4068 if (format == EMBL_FMT || format == PSEUDOEMBL_FMT ||
4069 format == EMBLPEPT_FMT)
4070 ff_StartPrint(5, 21, ASN2FF_EMBL_MAX, "FT");
4071 else
4072 ff_StartPrint(5, 21, ASN2FF_GB_MAX, NULL);
4073
4074 if (ajp->slp) {
4075 ff_AddString(key);
4076 } else {
4077 www_featkey(key, gi, entityID, itemID);
4078 }
4079 TabToColumn(22);
4080 if (loc == NULL) {
4081 flatloc_ptr = FlatLoc(bsp, sfp->location);
4082 if (get_www()) {
4083 buf = www_featloc(flatloc_ptr);
4084 ff_AddString(buf);
4085 MemFree(buf);
4086 } else {
4087 ff_AddString(flatloc_ptr);
4088 }
4089 MemFree(flatloc_ptr);
4090 } else {
4091 if (get_www()) {
4092 buf = www_featloc(loc);
4093 ff_AddString(buf);
4094 MemFree(buf);
4095 } else {
4096 ff_AddString(loc);
4097 }
4098 }
4099 if (sfp->partial == TRUE) {
4100 retval = SeqLocPartialCheck(sfp->location);
4101 if (retval == SLP_COMPLETE || retval > SLP_OTHER) {
4102 NewContLine();
4103 ff_AddString("/partial");
4104 }
4105 }
4106 for (gbqp=sfp->qual; gbqp; gbqp=gbqp->next) {
4107 gbqual_index = GBQualNameValid(gbqp->qual);
4108 if (gbqual_index != -1) {
4109 NewContLine();
4110 ff_AddChar( '/');
4111 ff_AddString(gbqp->qual);
4112 class_qual = ParFlat_GBQual_names[gbqual_index].gbclass;
4113 if (class_qual == Class_none) {
4114 class_equal=CheckForEqualSign(gbqp->qual);
4115 if (class_equal == 1)
4116 continue;
4117 }
4118 ff_AddChar('=');
4119 if (class_qual == Class_text &&
4120 StringCmp(gbqp->val, "\"\"") == 0) {
4121 ff_AddString(gbqp->val);
4122 continue;
4123 }
4124 if (get_www() && (class_qual == Class_text
4125 || class_qual == Class_note)) {
4126 buf = www_featloc(gbqp->val);
4127 } else {
4128 buf = StringSave(gbqp->val);
4129 }
4130 if (class_qual == Class_text || class_qual == Class_none
4131 || class_qual == Class_ecnum || class_qual == Class_note)
4132 ff_AddString("\"");
4133 if (class_qual == Class_note) {
4134 /* start of process tildes */
4135 if (StringCmp (gbqp->qual, "note") == 0) {
4136 for (p = buf, q = buf; *p != '\0'; *q++ = *p++) {
4137 if (*p != '~')
4138 continue;
4139 if (p [1] != '~')
4140 *p = '\n';
4141 else
4142 p++;
4143 }
4144 *q = '\0';
4145 }
4146 /* end of process tildes */
4147 www_note_gi(buf);
4148 } else if (class_qual != Class_none) {
4149 if (StringCmp(gbqp->qual, "transl_table") == 0) {
4150 www_gcode(buf);
4151 } else if (StringCmp(gbqp->qual, "db_xref") == 0) {
4152 www_db_xref(buf);
4153 } else if (StringCmp(gbqp->qual, "protein_id") == 0 ||
4154 StringCmp(gbqp->qual, "transcript_id") == 0) {
4155 www_protein_id(buf);
4156 } else {
4157 ff_AddString(buf);
4158 }
4159 }
4160 if (class_qual == Class_text || class_qual == Class_none
4161 || class_qual == Class_ecnum || class_qual == Class_note)
4162 ff_AddString("\"");
4163 if (buf) {
4164 MemFree(buf);
4165 }
4166 } else if (format == GENPEPT_FMT) {
4167 if (StringCmp(gbqp->qual, "site_type") == 0) {
4168 NewContLine();
4169 ff_AddChar('/');
4170 ff_AddString(gbqp->qual);
4171 ff_AddChar('=');
4172 ff_AddString("\"");
4173 ff_AddString(gbqp->val);
4174 ff_AddString("\"");
4175 } else if (StringCmp(gbqp->qual, "bond_type") == 0) {
4176 NewContLine();
4177 ff_AddChar('/');
4178 ff_AddString(gbqp->qual);
4179 ff_AddChar('=');
4180 ff_AddString("\"");
4181 ff_AddString(gbqp->val);
4182 ff_AddString("\"");
4183 } else if (StringCmp(gbqp->qual, "region_name") == 0) {
4184 NewContLine();
4185 ff_AddChar('/');
4186 ff_AddString(gbqp->qual);
4187 ff_AddChar('=');
4188 ff_AddString("\"");
4189 ff_AddString(gbqp->val);
4190 ff_AddString("\"");
4191 } else if (StringCmp(gbqp->qual, "sec_str_type") == 0) {
4192 NewContLine();
4193 ff_AddChar('/');
4194 ff_AddString(gbqp->qual);
4195 ff_AddChar('=');
4196 ff_AddString("\"");
4197 ff_AddString(gbqp->val);
4198 ff_AddString("\"");
4199 } else if (StringCmp(gbqp->qual, "non-std-residue") == 0) {
4200 NewContLine();
4201 ff_AddChar('/');
4202 ff_AddString(gbqp->qual);
4203 ff_AddChar('=');
4204 ff_AddString("\"");
4205 ff_AddString(gbqp->val);
4206 ff_AddString("\"");
4207 } else if (StringCmp(gbqp->qual, "heterogen") == 0) {
4208 NewContLine();
4209 ff_AddChar('/');
4210 ff_AddString(gbqp->qual);
4211 ff_AddChar('=');
4212 ff_AddString("\"");
4213 ff_AddString(gbqp->val);
4214 ff_AddString("\"");
4215 } else if (StringCmp(gbqp->qual, "name") == 0) {
4216 NewContLine();
4217 ff_AddChar('/');
4218 ff_AddString(gbqp->qual);
4219 ff_AddChar('=');
4220 ff_AddString("\"");
4221 ff_AddString(gbqp->val);
4222 ff_AddString("\"");
4223 } else if (StringCmp(gbqp->qual, "coded_by") == 0) {
4224 NewContLine();
4225 ff_AddChar('/');
4226 ff_AddString(gbqp->qual);
4227 ff_AddChar('=');
4228 ff_AddString("\"");
4229 ff_AddString(gbqp->val);
4230 ff_AddString("\"");
4231 }
4232 } else if (ASN2FF_VALIDATE_FEATURES == FALSE) {
4233 NewContLine();
4234 ff_AddChar('/');
4235 ff_AddString(gbqp->qual);
4236 if (gbqp->val != NULL && StringLen(gbqp->val) != 0) {
4237 ff_AddChar('=');
4238 ff_AddString("\"");
4239 ff_AddString(gbqp->val);
4240 ff_AddString("\"");
4241 }
4242 }
4243 }
4244
4245 ff_EndPrint();
4246
4247 return 1;
4248 } /*PrintImpFeatEx */
4249
extract_qual(GBQualPtr PNTR head,GBQualPtr x)4250 static GBQualPtr extract_qual(GBQualPtr PNTR head, GBQualPtr x)
4251 {
4252 GBQualPtr v, p;
4253
4254 if (*head == NULL) {
4255 return NULL;
4256 }
4257 if (x == *head) {
4258 *head = x->next;
4259 x->next = NULL;
4260 return x;
4261 }
4262 for (v = *head; v != NULL && v != x; v = v->next) {
4263 p = v;
4264 }
4265 if (v == NULL) {
4266 return NULL;
4267 }
4268 p->next = x->next;
4269 x->next = NULL;
4270 return x;
4271 }
tie_next_qual(GBQualPtr head,GBQualPtr next)4272 static GBQualPtr tie_next_qual(GBQualPtr head, GBQualPtr next)
4273 {
4274 GBQualPtr v;
4275
4276 if (head == NULL) {
4277 return next;
4278 }
4279 for (v = head; v->next != NULL; v = v->next) {
4280 v = v;
4281 }
4282 v->next = next;
4283 return head;
4284 }
4285
4286 /****************************************************************************
4287 *PrintImpFeat
4288 *
4289 * This code prints out an ImpFeat in GenBank and HTML format.
4290 *
4291 ****************************************************************************/
PrintImpFeat(Asn2ffJobPtr ajp,BioseqPtr bsp,SeqFeatPtr sfp)4292 NLM_EXTERN Int2 PrintImpFeat (Asn2ffJobPtr ajp, BioseqPtr bsp, SeqFeatPtr sfp)
4293 {
4294 CharPtr flatloc_ptr, key, loc;
4295 GBQualPtr gbqp;
4296 ImpFeatPtr ifp;
4297 Uint1 class_qual, format=ajp->format;
4298 Int2 class_equal, gbqual_index;
4299 static CharPtr buf = NULL;
4300 Uint2 retval;
4301 Boolean first=TRUE;
4302 GBQualPtr tmp, gbqpnext, head=NULL;
4303
4304 if (sfp == NULL)
4305 return -1;
4306 if (sfp->data.choice != SEQFEAT_IMP)
4307 return -1;
4308 ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
4309 key = ifp->key;
4310 loc = ifp->loc;
4311
4312 if (format == EMBL_FMT || format == PSEUDOEMBL_FMT ||
4313 format == EMBLPEPT_FMT)
4314 ff_StartPrint(5, 21, ASN2FF_EMBL_MAX, "FT");
4315 else
4316 ff_StartPrint(5, 21, ASN2FF_GB_MAX, NULL);
4317 ff_AddString(key);
4318 TabToColumn(22);
4319 if (loc == NULL) {
4320 flatloc_ptr = FlatLoc(bsp, sfp->location);
4321 if (get_www()) {
4322 buf = www_featloc(flatloc_ptr);
4323 ff_AddString(buf);
4324 MemFree(buf);
4325 } else {
4326 ff_AddString(flatloc_ptr);
4327 }
4328 MemFree(flatloc_ptr);
4329 } else {
4330 if (get_www()) {
4331 buf = www_featloc(loc);
4332 ff_AddString(buf);
4333 MemFree(buf);
4334 } else {
4335 ff_AddString(loc);
4336 }
4337 }
4338 if (sfp->partial == TRUE) {
4339 retval = SeqLocPartialCheck(sfp->location);
4340 if (retval == SLP_COMPLETE || retval > SLP_OTHER) {
4341 NewContLine();
4342 ff_AddString("/partial");
4343 }
4344 }
4345 /* put all /note last */
4346 for (gbqp=sfp->qual; gbqp; gbqp=gbqpnext) {
4347 gbqpnext=gbqp->next;
4348 if (StringCmp(gbqp->qual, "note") == 0) {
4349 tmp = extract_qual(&(sfp->qual), gbqp);
4350 head = tie_next_qual(head, tmp);
4351 }
4352 }
4353 if (head) {
4354 sfp->qual = tie_next_qual(sfp->qual, head);
4355 }
4356 for (gbqp=sfp->qual; gbqp; gbqp=gbqp->next) {
4357 gbqual_index = GBQualNameValid(gbqp->qual);
4358 if (gbqual_index != -1) {
4359 NewContLine();
4360 if (first) {
4361 ff_AddChar( '/');
4362 ff_AddString(gbqp->qual);
4363 }
4364 class_qual = ParFlat_GBQual_names[gbqual_index].gbclass;
4365 if (class_qual == Class_none) {
4366 class_equal=CheckForEqualSign(gbqp->qual);
4367 if (class_equal == 1)
4368 continue;
4369 }
4370 if (first) {
4371 ff_AddChar('=');
4372 }
4373 if (class_qual == Class_text &&
4374 StringCmp(gbqp->val, "\"\"") == 0) {
4375 /* an empty string is considered legal */
4376 ff_AddString(gbqp->val);
4377 continue;
4378 }
4379 if (get_www() && (class_qual == Class_text
4380 || class_qual == Class_note)) {
4381 buf = www_featloc(gbqp->val);
4382 } else {
4383 buf = StringSave(gbqp->val);
4384 }
4385 if (class_qual == Class_text || class_qual == Class_none
4386 || class_qual == Class_ecnum)
4387 ff_AddString("\"");
4388 if (first && class_qual == Class_note)
4389 ff_AddString("\"");
4390 if (class_qual == Class_note) {
4391 www_note_gi(buf);
4392 } else if (class_qual != Class_none) {
4393 if (StringCmp(gbqp->qual, "transl_table") == 0) {
4394 www_gcode(buf);
4395 } else if (StringCmp(gbqp->qual, "db_xref") == 0) {
4396 www_db_xref(buf);
4397 } else {
4398 ff_AddString(buf);
4399 }
4400 }
4401 if (class_qual == Class_text || class_qual == Class_none
4402 || class_qual == Class_ecnum)
4403 ff_AddString("\"");
4404 if (gbqp->next == NULL && class_qual == Class_note)
4405 ff_AddString("\"");
4406 if (buf) {
4407 MemFree(buf);
4408 }
4409 if (class_qual == Class_note) {
4410 if (first == TRUE)
4411 first = FALSE;
4412 }
4413 } else if (format == GENPEPT_FMT) {
4414 if (StringCmp(gbqp->qual, "site_type") == 0) {
4415 NewContLine();
4416 ff_AddChar('/');
4417 ff_AddString(gbqp->qual);
4418 ff_AddChar('=');
4419 ff_AddString("\"");
4420 ff_AddString(gbqp->val);
4421 ff_AddString("\"");
4422 } else if (StringCmp(gbqp->qual, "bond_type") == 0) {
4423 NewContLine();
4424 ff_AddChar('/');
4425 ff_AddString(gbqp->qual);
4426 ff_AddChar('=');
4427 ff_AddString("\"");
4428 ff_AddString(gbqp->val);
4429 ff_AddString("\"");
4430 } else if (StringCmp(gbqp->qual, "region_name") == 0) {
4431 NewContLine();
4432 ff_AddChar('/');
4433 ff_AddString(gbqp->qual);
4434 ff_AddChar('=');
4435 ff_AddString("\"");
4436 ff_AddString(gbqp->val);
4437 ff_AddString("\"");
4438 } else if (StringCmp(gbqp->qual, "sec_str_type") == 0) {
4439 NewContLine();
4440 ff_AddChar('/');
4441 ff_AddString(gbqp->qual);
4442 ff_AddChar('=');
4443 ff_AddString("\"");
4444 ff_AddString(gbqp->val);
4445 ff_AddString("\"");
4446 } else if (StringCmp(gbqp->qual, "non-std-residue") == 0) {
4447 NewContLine();
4448 ff_AddChar('/');
4449 ff_AddString(gbqp->qual);
4450 ff_AddChar('=');
4451 ff_AddString("\"");
4452 ff_AddString(gbqp->val);
4453 ff_AddString("\"");
4454 } else if (StringCmp(gbqp->qual, "heterogen") == 0) {
4455 NewContLine();
4456 ff_AddChar('/');
4457 ff_AddString(gbqp->qual);
4458 ff_AddChar('=');
4459 ff_AddString("\"");
4460 ff_AddString(gbqp->val);
4461 ff_AddString("\"");
4462 } else if (StringCmp(gbqp->qual, "name") == 0) {
4463 NewContLine();
4464 ff_AddChar('/');
4465 ff_AddString(gbqp->qual);
4466 ff_AddChar('=');
4467 ff_AddString("\"");
4468 ff_AddString(gbqp->val);
4469 ff_AddString("\"");
4470 } else if (StringCmp(gbqp->qual, "coded_by") == 0) {
4471 NewContLine();
4472 ff_AddChar('/');
4473 ff_AddString(gbqp->qual);
4474 ff_AddChar('=');
4475 ff_AddString("\"");
4476 ff_AddString(gbqp->val);
4477 ff_AddString("\"");
4478 }
4479 } else if (ASN2FF_VALIDATE_FEATURES == FALSE) {
4480 NewContLine();
4481 ff_AddChar('/');
4482 ff_AddString(gbqp->qual);
4483 if (gbqp->val != NULL && StringLen(gbqp->val) != 0) {
4484 ff_AddChar('=');
4485 ff_AddString("\"");
4486 ff_AddString(gbqp->val);
4487 ff_AddString("\"");
4488 }
4489 }
4490 }
4491
4492 ff_EndPrint();
4493
4494 return 1;
4495 } /*PrintImpFeat */
4496
4497 #define NOEQUALTOTAL 13
CheckForEqualSign(CharPtr qual)4498 NLM_EXTERN Int2 CheckForEqualSign(CharPtr qual)
4499 /* this have to be changed. Tatiana 02.28.95 */
4500 {
4501 Int2 i;
4502 static CharPtr NoEqualSign[NOEQUALTOTAL] = {
4503 "chloroplast",
4504 "chromoplast",
4505 "cyanelle",
4506 "germline",
4507 "kinetoplast",
4508 "macronuclear",
4509 "mitochondrion",
4510 "partial",
4511 "proviral",
4512 "pseudo",
4513 "rearranged",
4514 "virion",
4515 "focus"
4516 };
4517
4518 if (qual == NULL)
4519 return -1;
4520
4521 for (i=0; i < NOEQUALTOTAL; i++)
4522 if (StringICmp(qual, NoEqualSign[i]) == 0)
4523 return 1;
4524
4525 return 0;
4526
4527 }
4528
4529 /*-------------------------- delete_qual() ----------------------------*/
4530 /*************************************************************************
4531 * delete_qual:
4532 * -- return TRUE if found the "qual" in the "qlist", also remove
4533 * the "qual" from list
4534 * 7-8-93
4535 **************************************************************************/
delete_qual(GBQualPtr PNTR qlist,CharPtr qual)4536 NLM_EXTERN Boolean delete_qual(GBQualPtr PNTR qlist, CharPtr qual)
4537 {
4538 GBQualPtr curq, preq;
4539
4540 for (preq = NULL, curq = *qlist; curq != NULL; curq = curq->next) {
4541 if (StringCmp(curq->qual, qual) == 0) {
4542 if (preq == NULL)
4543 preq = *qlist = curq->next;
4544 else
4545 preq->next = curq->next;
4546
4547 curq->next = NULL;
4548 GBQualFree(curq);
4549 curq = NULL;
4550
4551 return (TRUE);
4552 }
4553
4554 preq = curq;
4555 }
4556
4557 return (FALSE);
4558
4559 }
4560