1 /*   asn2ff3.c
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *            National Center for Biotechnology Information (NCBI)
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government do not place any restriction on its use or reproduction.
13 *  We would, however, appreciate having the NCBI and the author cited in
14 *  any work or product based on this material
15 *
16 *  Although all reasonable efforts have been taken to ensure the accuracy
17 *  and reliability of the software and data, the NLM and the U.S.
18 *  Government do not and cannot warrant the performance or results that
19 *  may be obtained by using this software or data. The NLM and the U.S.
20 *  Government disclaim all warranties, express or implied, including
21 *  warranties of performance, merchantability or fitness for any particular
22 *  purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name:  asn2ff3.c
27 *
28 * Author:  Karl Sirotkin, Tom Madden, Tatiana Tatusov
29 *
30 * Version Creation Date:   7/15/95
31 *
32 *
33 * File Description:
34 *
35 * Modifications:
36 * --------------------------------------------------------------------------
37 *
38 **************************************/
39 #include <asn2ffp.h>
40 #include <a2ferrdf.h>
41 #include <a2ferr.h>
42 #include <utilpub.h>
43 #include <ffprint.h>
44 #include <parsegb.h>
45 #include <sequtil.h>
46 #include <edutil.h>
47 #include <gather.h>
48 #include <explore.h>
49 #include <sqnutils.h>
50 
51 #define METHOD_concept_transl_a 6
52 
53 NLM_EXTERN CharPtr mRNAEvidenceComment PROTO ((UserObjectPtr obj, Boolean add));
54 NLM_EXTERN Int2 ConvertToNAImpFeat PROTO ((Asn2ffJobPtr ajp, GBEntryPtr gbp, SeqFeatPtr sfp_in, SeqFeatPtr PNTR sfp_out, SortStructPtr p));
55 NLM_EXTERN Int2 ConvertToAAImpFeat PROTO ((Asn2ffJobPtr ajp, GBEntryPtr gbp, SeqFeatPtr sfp_in, SeqFeatPtr PNTR sfp_out, SortStructPtr p));
56 NLM_EXTERN Int2 ValidateAAImpFeat PROTO ((SeqFeatPtr sfp, Boolean use_product));
57 NLM_EXTERN Int2 ValidateNAImpFeat PROTO ((SeqFeatPtr sfp));
58 NLM_EXTERN void AddProteinQuals PROTO ((SeqFeatPtr sfp, SeqFeatPtr sfp_out, NoteStructPtr nsp));
59 static void GetGeneticCode PROTO ((CharPtr ptr, SeqFeatPtr sfp));
60 NLM_EXTERN void ComposeGBQuals PROTO((Asn2ffJobPtr ajp, SeqFeatPtr sfp_out, GBEntryPtr gbp, SortStructPtr p, Boolean note_pseudo));
61 NLM_EXTERN CharPtr ComposeNoteFromNoteStruct PROTO ((NoteStructPtr nsp, GeneStructPtr gsp));
62 NLM_EXTERN void AddPID PROTO ((Asn2ffJobPtr ajp, SeqFeatPtr sfp_out, Boolean is_NTorNG));
63 NLM_EXTERN void Add_trid PROTO ((Asn2ffJobPtr ajp, SeqFeatPtr sfp_out));
64 NLM_EXTERN Int2 MakeGBSelectNote PROTO ((CharPtr ptr, SeqFeatPtr sfp));
65 static void GetProtRefComment PROTO ((SeqFeatPtr sfp, BioseqPtr bsp, Asn2ffJobPtr ajp, OrganizeProtPtr opp, NoteStructPtr nsp, Uint1 method));
66 NLM_EXTERN Int2 MiscFeatOrphanGenes PROTO ((Asn2ffJobPtr ajp, GBEntryPtr gbp, SeqFeatPtr sfp, Int2 index));
67 Int2 CheckForQual PROTO ((GBQualPtr gbqual, CharPtr string_q, CharPtr string_v));
68 NLM_EXTERN GBQualPtr AddModifsToGBQual PROTO ((GBEntryPtr gbp, GBQualPtr gbqual));
69 NLM_EXTERN GBQualPtr AddOrgRefModToGBQual PROTO ((OrgRefPtr orp, GBQualPtr gbqual));
70 NLM_EXTERN Int2 CheckForEqualSign PROTO ((CharPtr qual));
71 NLM_EXTERN CharPtr GetProductFromCDS PROTO ((ValNodePtr product, ValNodePtr location, Int4 length));
72 NLM_EXTERN void PrepareSourceFeatQuals PROTO ((SeqFeatPtr sfp_in, SeqFeatPtr sfp_out, GBEntryPtr gbp, Boolean add_modif));
73 static Int2 CheckForExtraChars PROTO ((CharPtr note));
74 NLM_EXTERN GBQualPtr AddBioSourceToGBQual PROTO((Asn2ffJobPtr ajp, NoteStructPtr nsp, BioSourcePtr biosp, GBQualPtr gbqual, Boolean new_release));
75 NLM_EXTERN Boolean delete_qual PROTO((GBQualPtr PNTR qlist, CharPtr qual));
76 
77 typedef struct {
78 	CharPtr name;
79 	Uint1   num;
80 } ORGMOD;
81 
82 #define num_subtype 25
83 CharPtr subtype[num_subtype] = {
84 "chromosome", "map", "clone", "sub_clone", "haplotype", "genotype", "sex",
85 "cell_line", "cell_type", "tissue_type", "clone_lib", "dev_stage",
86 "frequency", "germline", "rearranged", "lab_host", "pop_variant",
87 "tissue_lib", "plasmid", "transposon", "insertion_seq", "plastid", "country",
88 "segment", "endogenous_virus"};
89 
90 #define num_genome 15
91 static CharPtr genome[num_genome] = {"unknown", "genomic", "chloroplast", "chromoplast", "kinetoplast", "mitochondrion", "plastid", "macronuclear",
92 "extrachrom", "plasmid", "transposon", "insertion_seq", "cyanelle", "proviral", "virion"};
93 
94 /*______________________________________________________________________
95 **
96 **	This code is not currently used.
97 **	I do not remove this piece of code, just comment it out.
98 **	-- Dmitri Lukyanov
99 */
100 #if 0
101 
102 #define num_biomol 7
103 static CharPtr biomol[num_biomol] = {"genomic", "RNA", "mRNA", "rRNA",
104 "tRNA", "snRNA", "scRNA"};
105 
106 #endif
107 /*______________________________________________________________________
108 */
109 
110 ORGMOD orgmod_subtype[34] = {
111 	{ "strain", 2 }, {"sub_strain", 3}, {"type", 4}, {"subtype", 5},
112 	{"variety", 6},	{"serotype",7}, {"serogroup",8}, {"serovar", 9},
113 	{"cultivar", 10}, {"pathovar", 11}, {"chemovar", 12}, {"biovar", 13},
114 	{"biotype", 14}, {"group", 15}, {"subgroup", 16}, {"isolate", 17},
115 	{"common", 18}, {"acronym", 19}, {"dosage", 20}, {"nat_host", 21},
116 	{"sub_species", 22}, {"specimen_voucher", 23}, {"authority", 24},
117 	{"forma", 25}, {"forma_specialis", 26}, {"ecotype", 27},
118 	{"synonym", 28}, {"anamorph", 29}, {"teleomorph", 30}, {"breed", 31},
119 	{"old_lineage", 253}, {"old_name", 254}, {"note", 255}, { NULL, 0 }
120 };
121 
122 /*
123 CharPtr dbtag[DBNUM] = {
124   "PIDe", "PIDd", "PIDg", "PID", "FLYBASE",
125   "GDB", "MIM", "SGD", "SWISS-PROT", "CK",
126   "SPTREMBL", "ATCC", "ATCC (inhost)", "ATCC (dna)", "taxon",
127   "BDGP_EST", "dbEST", "dbSTS", "MGD", "PIR",
128   "GI", "RiceGenes", "UniGene", "LocusID", "dbSNP",
129   "RATMAP", "RGD", "CDD", "UniSTS", "InterimID", "COG", "GO", "niaEST",
130   "GeneID", "BDGP_INS", "SoyBase",
131   };
132 */
133 
134 CharPtr dbtag[DBNUM] = {
135   "PIDe", "PIDd", "PIDg", "PID",
136   "AceView/WormGenes",
137   "ATCC",
138   "ATCC(in host)",
139   "ATCC(dna)",
140   "BDGP_EST",
141   "BDGP_INS",
142   "CDD",
143   "CK",
144   "COG",
145   "dbEST",
146   "dbSNP",
147   "dbSTS",
148   "ENSEMBL",
149   "ESTLIB",
150   "FANTOM_DB",
151   "FLYBASE",
152   "GABI",
153   "GDB",
154   "GeneDB",
155   "GeneID",
156   "GI",
157   "GO",
158   "GOA",
159   "IFO",
160   "IMGT/LIGM",
161   "IMGT/HLA",
162   "InterimID",
163   "Interpro",
164   "ISFinder",
165   "JCM",
166   "LocusID",
167   "MaizeDB",
168   "MGD",
169   "MGI",
170   "MIM",
171   "NextDB",
172   "niaEST",
173   "PIR",
174   "PSEUDO",
175   "RATMAP",
176   "RiceGenes",
177   "REMTREMBL",
178   "RGD",
179   "RZPD",
180   "SGD",
181   "SoyBase",
182   "SPTREMBL",
183   "SWISS-PROT",
184   "taxon",
185   "UniGene",
186   "UniSTS",
187   "WorfDB",
188   "WormBase",
189   "ZFIN",
190   };
191 
192 
193 /*************************************************************************
194 *	sfp_out: synthetic SeqFeatPtr of type ImpFeat for use in printing.
195 *	This function puts the dbxref qualifier on every SeqFeatPtr.
196 *************************************************************************/
IsRefSeq(BioseqPtr bsp)197 static Boolean IsRefSeq (BioseqPtr bsp)
198 {
199   SeqIdPtr        sip;
200 
201   if (bsp == NULL)
202     return FALSE;
203   for (sip = bsp->id; sip != NULL; sip = sip->next) {
204     if (sip->choice == SEQID_OTHER)
205       return TRUE;
206   }
207   return FALSE;
208 }
209 
Add_dbxref(Asn2ffJobPtr ajp,SeqFeatPtr sfp_out,SeqFeatPtr sfp,BioseqPtr bsp)210 static void Add_dbxref (Asn2ffJobPtr ajp, SeqFeatPtr sfp_out, SeqFeatPtr sfp, BioseqPtr bsp)
211 {
212 	Int4 id = -1;
213 	Int2 i;
214 	ValNodePtr vnp;
215 	DbtagPtr db = NULL;
216 	CharPtr val;
217 
218 	if (sfp == NULL || sfp->dbxref == NULL) {
219 		return;
220 	}
221 	for (vnp=sfp->dbxref; vnp; vnp=vnp->next) {
222 		id = -1;
223 		db = vnp->data.ptrvalue;
224 		if (db && db->db) {
225 			for (i =0; i < DBNUM; i++) {
226 				if (StringCmp(db->db, dbtag[i]) == 0) {
227 					id = i;
228 					break;
229 				}
230 			}
231 			if (id == -1 && StringCmp (db->db, "WormBase") == 0 && IsRefSeq (bsp)) {
232 				id = 18; /* show it even if not RefSeq record */
233 			}
234 			if (ajp->mode == RELEASE_MODE && id == -1) {
235 				continue;  /* drop unknown dbtag */
236 			}
237 		}
238 		if (sfp->data.choice == SEQFEAT_CDREGION) {
239 			/*
240 			if (sfp->product != NULL && id > 4) {
241 				continue;
242 			}
243 			*/
244 		} else {
245 			if (id == -1 && ajp->mode != RELEASE_MODE) {
246 			} else
247 			if (id < 4) {
248 				continue;  /* PID is illegal on non-CDS features */
249 			}
250 		}
251 		if (db == NULL) {
252 			return;
253 		}
254 		if (db->tag && db->tag->str) {
255 			val = MemNew(StringLen(db->db)+StringLen(db->tag->str)+2);
256 			sprintf(val, "%s:%s", db->db, db->tag->str);
257 		} else if (db->tag) {
258 			val = MemNew(StringLen(db->db)+16);
259 			if (StringNCmp(db->db, "PIDe", 4) == 0) {
260 					sprintf(val, "PID:e%ld", (long) db->tag->id);
261 			} else if (StringNCmp(db->db, "PIDd", 4) == 0) {
262 					sprintf(val, "PID:d%ld", (long) db->tag->id);
263 			} else if (StringNCmp(db->db, "PIDg", 4) == 0) {
264 					sprintf(val, "PID:g%ld", (long) db->tag->id);
265 			} else {
266 				sprintf(val, "%s:%ld", db->db, (long) db->tag->id);
267 			}
268 		}
269 		if (val[0] != '\0') {
270 			sfp_out->qual = AddGBQual(sfp_out->qual, "db_xref", val);
271 			MemFree(val);
272 		}
273 	}
274 	return;
275 }	/* Add_dbxref */
276 
CheckSeqIdChoice(SeqIdPtr sip)277 static Boolean CheckSeqIdChoice(SeqIdPtr sip)
278 {
279 	Uint1 ch;
280 	SeqIdPtr si;
281 
282 	for (si = sip; si; si=si->next) {
283 		ch = si->choice;
284 		if (ch == SEQID_GI || ch == SEQID_GENBANK || ch == SEQID_EMBL || ch == SEQID_DDBJ ||
285 			ch == SEQID_TPG || ch == SEQID_TPE || ch == SEQID_TPD) {
286 			return TRUE;
287 		}
288 	}
289 	return FALSE;
290 }
291 
GetSeqIdChoice(SeqIdPtr sip)292 static SeqIdPtr GetSeqIdChoice(SeqIdPtr sip)
293 {
294 	Uint1 ch;
295 	SeqIdPtr si;
296 
297 	for (si = sip; si; si=si->next) {
298 		ch = si->choice;
299 		if (ch == SEQID_GI || ch == SEQID_GENBANK || ch == SEQID_EMBL || ch == SEQID_DDBJ ||
300 			ch == SEQID_OTHER || ch == SEQID_TPG || ch == SEQID_TPE || ch == SEQID_TPD) {
301 			return si;
302 		}
303 	}
304 	return NULL;
305 }
306 
CheckSeqIdAccVer(SeqIdPtr sip)307 static Boolean CheckSeqIdAccVer(SeqIdPtr sip)
308 {
309 	Uint1 ch;
310 	SeqIdPtr si;
311 	TextSeqIdPtr tsip;
312 
313 	for (si = sip; si; si=si->next) {
314 		ch = si->choice;
315 		if (ch == SEQID_GENBANK || ch == SEQID_EMBL || ch == SEQID_DDBJ || ch == SEQID_OTHER ||
316 			ch == SEQID_TPG || ch == SEQID_TPE || ch == SEQID_TPD) {
317 			tsip = si->data.ptrvalue;
318 			if (tsip->accession != NULL && tsip->version >= 1) {
319 				return TRUE;
320 			}
321 		}
322 	}
323 	return FALSE;
324 }
325 
GetNonGeneQuals(Int2 mode,SeqFeatPtr sfp_in,SeqFeatPtr sfp_out,NoteStructPtr nsp)326 static void GetNonGeneQuals (Int2 mode, SeqFeatPtr sfp_in, SeqFeatPtr sfp_out, NoteStructPtr nsp)
327 {
328 	GBQualPtr gbqp;
329 	Boolean evidence_present;
330 	Int2 i;
331 
332 	for (gbqp=sfp_in->qual; gbqp; gbqp=gbqp->next) {
333 		if (StringCmp(gbqp->qual, "gene") == 0) {
334 			;
335 		} else if (StringCmp(gbqp->qual, "product") == 0) {
336 			;
337 		} else if (StringCmp(gbqp->qual, "standard_name") == 0) {
338 			;
339 		} else if (StringCmp(gbqp->qual, "map") == 0) {
340 			;
341 		} else if (StringCmp(gbqp->qual, "EC_number") == 0) {
342 			;
343 		} else if (StringCmp(gbqp->qual, "anticodon") == 0) {
344 			;	/* This is done by DotRNAQuals */
345 		} else if (StringCmp(gbqp->qual, "note") == 0) {
346 			CpNoteToCharPtrStack(nsp, NULL, gbqp->val);
347 		} else if (StringCmp(gbqp->qual, "transl_table") == 0) {
348 			sfp_out->qual =
349 				AddGBQual(sfp_out->qual, gbqp->qual, gbqp->val);
350 			/* This is captured by GetGeneticCode */
351 		} else if (StringCmp(gbqp->qual, "db_xref") == 0) {
352 			for (i =0; i < DBNUM; i++) {
353 				if (StringNCmp(gbqp->val, dbtag[i], StringLen(dbtag[i])) == 0) {
354 					break;
355 				}
356 			}
357 			if (mode == RELEASE_MODE && i == DBNUM) {
358 				continue;  /* drop unknown dbtag */
359 			}
360 			sfp_out->qual =
361 				AddGBQual(sfp_out->qual, gbqp->qual, gbqp->val);
362 		} else {
363 			sfp_out->qual =
364 				AddGBQual(sfp_out->qual, gbqp->qual, gbqp->val);
365 		}
366 	}
367 
368 	evidence_present = GBQualPresent("evidence", sfp_out->qual);
369 	if (sfp_out->exp_ev) {
370 		if (evidence_present == FALSE) {
371 			if (sfp_out->exp_ev == 1)
372 				sfp_out->qual =
373 				AddGBQual(sfp_out->qual, "evidence", "experimental");
374 			if (sfp_out->exp_ev == 2)
375 				sfp_out->qual =
376 				AddGBQual(sfp_out->qual, "evidence", "not_experimental");
377 		} else {
378 			for (gbqp=sfp_out->qual; gbqp; gbqp=gbqp->next)
379 				if (StringCmp(gbqp->qual, "evidence") == 0) {
380 					gbqp->val = MemFree(gbqp->val);
381 					if (sfp_out->exp_ev == 1)
382 						gbqp->val = StringSave("experimental");
383 					if (sfp_out->exp_ev == 2)
384 						gbqp->val = StringSave("not_experimental");
385 					break;
386 				}
387 		}
388 	} else if (evidence_present == TRUE) {
389 		for (gbqp=sfp_out->qual; gbqp; gbqp=gbqp->next)
390 			if (StringCmp(gbqp->qual, "evidence") == 0) {
391 				if (StringCmp(gbqp->val, "EXPERIMENTAL") == 0) {
392 					StringCpy(gbqp->val, "experimental");
393 				} else if (StringCmp(gbqp->val, "NOT_EXPERIMENTAL") == 0) {
394 					StringCpy(gbqp->val, "not_experimental");
395 				}
396 				break;
397 			}
398 	}
399 	return;
400 }	/* GetNonGeneQuals */
401 
402 /*****************************************************************************
403 *LookForPartialImpFeat
404 *
405 *	This function first looks for the sfp->qual of type "partial".
406 *	If found the qual is deleted and the variable "partial" is
407 *	set equal to TRUE.  If "partial" is TRUE or if sfp->partial
408 *	is TRUE, FlatAnnotPartial is called (modified version of Karl Sirotkin's
409 *	program) to see if sfp->partial should really be TRUE.
410 *	WARNING: sfp should be an ImpFeatPtr
411 *
412 *	written by Tom Madden (12/7/93)
413 *****************************************************************************/
LookForPartialImpFeat(SeqFeatPtr sfp,Boolean use_product)414 static void LookForPartialImpFeat(SeqFeatPtr sfp, Boolean use_product)
415 
416 {
417 	Boolean partial=FALSE;
418 	GBQualPtr curq, gbqual, lastq=NULL, tmpqual;
419 
420 	gbqual = sfp->qual;
421 
422 	while (gbqual && (StringCmp(gbqual->qual, "partial")==0))
423 	{
424 		partial = TRUE;
425 		tmpqual = gbqual->next;
426 		gbqual->next = NULL;
427 		gbqual = GBQualFree(gbqual);
428 		gbqual = tmpqual;
429 	}
430 
431 	if (gbqual)
432 	{
433 		for (lastq=gbqual, curq=gbqual->next; curq; curq=curq->next)
434 		{
435 			if (StringCmp(curq->qual, "partial") == 0)
436 			{
437 				partial = TRUE;
438 				lastq->next = curq->next;
439 				curq->next = NULL;
440 				curq = GBQualFree(curq);
441 				curq = lastq;
442 			}
443 			else
444 				lastq = curq;
445 		}
446 	}
447 
448 	sfp->qual = gbqual;
449 
450 	if (partial == TRUE || sfp->partial == TRUE)
451 		sfp->partial = FlatAnnotPartial(sfp, use_product);
452 }	/* LookForPartialImpFeat */
453 
SeqCodeNameGet(SeqCodeTablePtr table,Uint1 residue,Boolean error_msgs)454 static CharPtr SeqCodeNameGet (SeqCodeTablePtr table, Uint1 residue, Boolean error_msgs)
455 {
456 	int index=residue - table -> start_at;
457 	static CharPtr oops = "?";
458 
459 	if (index >= 0 && index < (int) table -> num){
460 		return (table -> names) [index];
461 	}else {
462 		if (error_msgs == TRUE)
463 			ErrPostEx(SEV_WARNING, CTX_NCBI2GB, 1,
464 			"asn2ff: %c(%d) > max in SeqCode table=%d",
465 			(char) residue, (int) residue, (int) table -> num);
466 		return oops;
467 	}
468 }
469 
470 /***************************************************************************
471 *CharPtr Get3LetterSymbol (Uint1 seq_code, SeqCodeTablePtr table, Uint1 residue, Boolean error_msgs)
472 *
473 *	if (ASN2FF_IUPACAA_ONLY == TRUE) then
474 *	Check if the residue is legal in iupacaa; if not, return 'X', if so,
475 *	return the three letter code from iupacaa3.
476 *
477 *	if (ASN2FF_IUPACAA_ONLY != TRUE) then
478 *	Then do a translation, if necessary, then get th three letter code
479 *	from iupacaa3.
480 *
481 ***************************************************************************/
482 
Get3LetterSymbol(Uint1 seq_code,SeqCodeTablePtr table,Uint1 residue,Boolean error_msgs)483 static CharPtr Get3LetterSymbol (Uint1 seq_code, SeqCodeTablePtr table, Uint1 residue, Boolean error_msgs)
484 {
485 	static CharPtr bad_symbol= "OTHER";
486 	CharPtr ptr, retval=NULL;
487 	Int2 index;
488 	SeqCodeTablePtr table_3aa;
489 	SeqMapTablePtr smtp;
490 	Uint1 code, new_residue;
491 
492 	if (residue == 42) {  /* stop codon in NCBIeaa */
493 		retval = "TERM";
494 		return retval;
495 	}
496 	if (ASN2FF_IUPACAA_ONLY == TRUE)
497 		code = Seq_code_iupacaa;
498 	else
499 		code = Seq_code_ncbieaa;
500 
501 	if (code != seq_code)
502 	{/* if code and seq_code are identical, then smtp is NULL?? */
503 		smtp = SeqMapTableFind(seq_code, code);
504 		new_residue = SeqMapTableConvert(smtp, residue);
505 	}
506 	else
507 		new_residue = residue;
508 
509 /* The following looks for non-symbols (255) and "Undetermined" (88) */
510 	if ((int) new_residue == 255 || (int) new_residue == 88)
511 		retval = bad_symbol;
512 	else
513 	{
514 		ptr = SeqCodeNameGet(table, residue, error_msgs);
515 
516 		table_3aa=SeqCodeTableFind (Seq_code_iupacaa3);
517 		if (ptr != NULL && *ptr != '\0' && table_3aa != NULL)
518 		{
519 			for (index=0; index < (int) table_3aa->num; index++)
520 			{
521 				if (StringCmp(ptr, (table_3aa->names) [index]) == 0)
522 				{
523 					retval = (table_3aa->symbols) [index];
524 					break;
525 				}
526 			}
527 		}
528 	}
529 
530 	return retval;
531 
532 }	/* Get3LetterSymbol */
533 
GetNameFromOrgName(OrgNamePtr orgname)534 static CharPtr GetNameFromOrgName(OrgNamePtr orgname)
535 {
536 	BinomialOrgNamePtr bi;
537 	CharPtr name = NULL, virus, newname;
538 	Int2 len=0;
539 	Boolean first;
540 	OrgNamePtr org;
541 
542 	switch(orgname->choice)
543 	{
544 		case 1:			/*binomial*/
545 			bi = (BinomialOrgNamePtr) orgname->data;
546 			len = StringLen(bi->genus);
547 			if (bi->species) {
548 				len += StringLen(bi->species);
549 			}
550 			name = MemNew(len + 2);
551 			StringCpy(name, bi->genus);
552 			if (bi->species) {
553 				name = StringCat(name, " ");
554 				name = StringCat(name, bi->species);
555 			} else {
556 				name = StringCat(name, " sp.");
557 			}
558 		break;
559 		case 2:			/*virus*/
560 			virus = (CharPtr) orgname->data;
561 			name = MemNew(StringLen(virus));
562 			StringCpy(name, virus);
563 		break;
564 		case 3:			/*hybrid*/
565 			first = TRUE;
566 			for (org = (OrgNamePtr) orgname->data; org; org=org->next) {
567 				newname = GetNameFromOrgName(org);
568 				len += StringLen(newname) + 3;
569 			}
570 			name = MemNew(len + 1);
571 			for (org = (OrgNamePtr) orgname->data; org; org=org->next) {
572 				newname = GetNameFromOrgName(org);
573 				if (first == TRUE) {
574 					name = StringCat(name, newname);
575 					first = FALSE;
576 				} else {
577 					name = StringCat(name, " x ");
578 					name = StringCat(name, newname);
579 				}
580 			}
581 		break;
582 		case 4:			/*namedhybrid*/
583 			bi = (BinomialOrgNamePtr) orgname->data;
584 			len = StringLen(bi->genus);
585 			if (bi->species) {
586 				len += StringLen(bi->species);
587 			}
588 			name = MemNew(len + 4);
589 			StringCpy(name, bi->genus);
590 			if (bi->species) {
591 				name = StringCat(name, " x ");
592 				name = StringCat(name, bi->species);
593 			}
594 		break;
595 		case 5:			/*partial*/
596 	/* not implemented yet */
597 			ErrPostStr(SEV_WARNING, 0, 0, "Partial name in OrgName.name");
598 		break;
599 		default:
600 		break;
601 	}
602 	return name;
603 }
604 
PrintSourceFeat(Asn2ffJobPtr ajp,GBEntryPtr gbp)605 NLM_EXTERN void PrintSourceFeat(Asn2ffJobPtr ajp, GBEntryPtr gbp)
606 
607 {
608 	BioseqPtr bsp;
609 	Char location[40];
610 	ImpFeatPtr ifp;
611 	Int2  status = -1, /* mol = -1, -- UNUSED */ i, bsize=0;
612 	NoteStructPtr nsp = NULL;
613 	OrgRefPtr orp=NULL;
614 	SeqFeatPtr sfp_in, sfp_out=NULL, sfp;
615 	SeqIntPtr sip;
616 	SeqLocPtr slp, keep_loc;
617 	ValNodePtr vnp=NULL;
618 	BioSourcePtr biosp = NULL;
619 	OrgModPtr omp;
620 	SortStructPtr pss, ps=NULL, bs = NULL, po=NULL;
621 	DescrStructPtr ds;
622 	CharPtr name;
623 
624 	if (gbp == NULL) {
625 		return;
626 	}
627 	if (gbp->feat) {
628 		nsp=gbp->feat->source_notes;
629 		po = gbp->feat->Orglist;
630 		ps = gbp->feat->Sourcelist;
631 		bs = gbp->feat->Biosrclist;
632 		bsize = gbp->feat->biosrcsize;
633 	}
634 	ds = gbp->source_info;
635 	bsp = gbp->bsp;
636 	if (ajp->slp) {
637 		return;
638 	}
639 	sprintf(location, "1..%ld", (long) (bsp->length));
640     sfp_out = ajp->sfp_out;
641 	ifp = sfp_out->data.value.ptrvalue;
642 	ifp->key = StringSave("source");
643 	if (ajp->slp) {
644 		slp = AsnIoMemCopy(ajp->slp,
645 					(AsnReadFunc) SeqLocAsnRead, (AsnWriteFunc) SeqLocAsnWrite);
646 	} else {
647 		slp = (SeqLocPtr) ValNodeNew(NULL);
648 		slp->choice = SEQLOC_INT;
649 		sip = SeqIntNew();
650 		sip->from = 0;
651 		sip->to = (bsp->length)-1;
652 		sip->id = SeqIdDup(SeqIdFindBest (bsp->id, 0));
653 		slp->data.ptrvalue = sip;
654 	}
655 	sfp_out->location = slp;
656 	if (ds != NULL) {
657 		vnp = ds->vnp;
658 		keep_loc = AsnIoMemCopy(slp,
659 					(AsnReadFunc) SeqLocAsnRead, (AsnWriteFunc) SeqLocAsnWrite);
660 		if (vnp && vnp->choice == Seq_descr_source) {
661 			biosp = vnp->data.ptrvalue;
662 			if (biosp->is_focus == TRUE) {
663 				sfp_out->qual = AddGBQual(sfp_out->qual,
664 										"focus", NULL);
665 				if (StringNCmp(gbp->div, "SYN", 3) != 0) {
666 					for (pss=bs, i= 0; pss && i < bsize; i++, pss++) {
667 						if (pss->sfp == NULL)
668 							continue;
669 						sfp_out->location =
670 							SeqLocSubtract(sfp_out->location,
671 												 pss->sfp->location);
672 					}
673 				}
674 			}
675 		}
676 		if (sfp_out->location == NULL) {
677 			sfp_out->location = keep_loc;
678 		}
679 		else
680 			SeqLocFree(keep_loc);
681 	}
682 	flat2asn_install_feature_user_string("source", ifp->loc);
683 	if (gbp->feat && gbp->feat->sfpSourcesize != 0) {
684 		if ((sfp_in = ps->sfp) == NULL) {
685 			GatherItemWithLock(ps->entityID, ps->itemID, ps->itemtype,
686 									&sfp_in, find_item);
687 		}
688 		if (sfp_out->qual != NULL)
689 			sfp_out->qual = GBQualFree(sfp_out->qual);
690 		NoteStructReset(nsp);
691 		PrepareSourceFeatQuals(sfp_in, sfp_out, gbp, FALSE);
692 		Add_dbxref(ajp, sfp_out, sfp_in, bsp);
693 		status = ValidateNAImpFeat(sfp_out);
694 		if (status < 0) {
695 /* source feat is probably missing organism name, add
696 		and try again.  Don't delete old quals! */
697 			if (ds != NULL) {
698 				vnp = ds->vnp;
699 				if (vnp->choice == Seq_descr_source) {
700 					biosp = vnp->data.ptrvalue;
701 					orp = (OrgRefPtr) biosp->org;
702 				} else if (vnp->choice == Seq_descr_org) {
703 					orp = (OrgRefPtr) vnp->data.ptrvalue;
704 				}
705 			} else if (gbp->feat && gbp->feat->sfpOrgsize != 0) {
706 				if ((sfp = po->sfp) == NULL) {
707 					GatherItemWithLock(po->entityID, po->itemID, po->itemtype,
708 									&sfp, find_item);
709 				}
710 				if (sfp != NULL) {
711 					orp = (OrgRefPtr) sfp->data.value.ptrvalue;
712 				}
713 			}
714 			if (orp) {
715 				if (ajp->orgname && orp->orgname) {
716 					name = GetNameFromOrgName(orp->orgname);
717 					sfp_out->qual = AddGBQual(sfp_out->qual,
718 										"organism", name);
719 					MemFree(name);
720 				} else if (orp->taxname) {
721 					sfp_out->qual = AddGBQual(sfp_out->qual,
722 										"organism", orp->taxname);
723 					if (orp->common && sfp_in->comment != NULL)
724 						CpNoteToCharPtrStack(nsp, NULL, orp->common);
725 				} else if (orp->common) {
726 					if (StrStr(orp->common, "virus") ||
727 					    StrStr(orp->common, "Virus") ||
728 					    StrStr(orp->common, "phage") ||
729 					    StrStr(orp->common, "Phage") ||
730 					    StrStr(orp->common, "viroid") ||
731 					    StrStr(orp->common, "Viroid")) {
732 						sfp_out->qual = AddGBQual(sfp_out->qual,
733 											"organism", orp->common);
734 					}
735 				}
736 			}
737 			status = ValidateNAImpFeat(sfp_out);
738 		}
739 	}
740 	if (status < 0) {
741 		if (ds != NULL) {
742 			if ((vnp = ds->vnp) != NULL) {
743 				if (vnp->choice == Seq_descr_source) {
744 					biosp = vnp->data.ptrvalue;
745 					orp = (OrgRefPtr) biosp->org;
746 				} else if (vnp->choice == Seq_descr_org) {
747 					orp = (OrgRefPtr) vnp->data.ptrvalue;
748 				}
749 			}
750 		} else if (gbp->feat && gbp->feat->sfpOrgsize != 0 && po->sfp != NULL) {
751 				orp = (OrgRefPtr) (po->sfp)->data.value.ptrvalue;
752 		} else {
753 			orp = NULL;
754 		}
755 		if (orp) {
756 			if (nsp) {
757 				NoteStructReset(nsp);
758 			}
759 			if (sfp_out->qual != NULL)
760 				sfp_out->qual = GBQualFree(sfp_out->qual);
761 			if (ajp->orgname && orp->orgname) {
762 				name = GetNameFromOrgName(orp->orgname);
763 				sfp_out->qual = AddGBQual(sfp_out->qual,
764 									"organism", name);
765 				MemFree(name);
766 			} else if (orp->taxname) {
767 				sfp_out->qual = AddGBQual(sfp_out->qual,
768 					"organism", orp->taxname);
769 			} else if (orp->common) {
770 				if (StrStr(orp->common, "virus") ||
771 				    StrStr(orp->common, "Virus") ||
772 				    StrStr(orp->common, "phage") ||
773 				    StrStr(orp->common, "Phage") ||
774 				    StrStr(orp->common, "viroid") ||
775 				    StrStr(orp->common, "Viroid")) {
776 					sfp_out->qual = AddGBQual(sfp_out->qual, "organism",
777 															 orp->common);
778 				}
779 			}
780 			if (orp->orgname && orp->orgname->mod) {
781 				omp = orp->orgname->mod;
782 				if (omp->subtype == 0 && omp->subname != NULL) {
783 					CpNoteToCharPtrStack(nsp, NULL, omp->subname);
784 				}
785 			}
786 			sfp_out->qual = AddBioSourceToGBQual(ajp, nsp, biosp, sfp_out->qual, TRUE);
787 			sfp_out->qual = AddOrgRefModToGBQual(orp, sfp_out->qual);
788 		}
789 		if ((vnp=BioseqGetSeqDescr(gbp->bsp, Seq_descr_molinfo, NULL)) != NULL){
790 			/*
791 		 	mfp = vnp->data.ptrvalue;
792 			if (mfp) {
793 				mol = mfp->biomol;
794 			}
795 			-- NO EFFECT */
796 		}
797 		PrepareSourceFeatQuals(NULL, sfp_out, gbp, TRUE);
798 		status = ValidateNAImpFeat(sfp_out);
799 	}
800 /* ----------Organism not found -------------*/
801 	if (status < 0) {
802 		if (sfp_out->qual)
803 			sfp_out->qual = GBQualFree(sfp_out->qual);
804 		sfp_out->qual = AddGBQual(sfp_out->qual, "organism", "unknown");
805 		NoteStructReset(nsp);
806 		if (orp && orp->common)
807 			CpNoteToCharPtrStack(nsp, NULL, orp->common);
808 /*try new first */
809 		if (biosp) {
810 			sfp_out->qual = AddBioSourceToGBQual(ajp, nsp, biosp, sfp_out->qual, TRUE);
811 			if (orp)
812 				sfp_out->qual = AddOrgRefModToGBQual(orp, sfp_out->qual);
813 		}
814 /* try old  then */
815 		sfp_out->qual = AddOrgRefModToGBQual(orp, sfp_out->qual);
816 		PrepareSourceFeatQuals(NULL, sfp_out, gbp, TRUE);
817 		status = ValidateNAImpFeat(sfp_out);
818 	}
819 	flat2asn_delete_feature_user_string();
820 
821 	if (status >= 0 || ASN2FF_VALIDATE_FEATURES == FALSE) {
822 		PrintImpFeat(ajp, gbp->bsp, sfp_out);
823 	}
824 	sfp_out->comment = NULL;
825 	sfp_out->location = SeqLocFree(sfp_out->location);
826 	sfp_out->location = NULL;
827 	sfp_out->product = NULL;
828 	sfp_out->exp_ev = FALSE;
829 	sfp_out->partial = FALSE;
830 	sfp_out->excpt = FALSE;
831 	ifp = sfp_out->data.value.ptrvalue;
832 	if (ifp->key) {
833 		ifp->key = MemFree(ifp->key);
834 	}
835 	if (ifp->loc) {
836 		ifp->loc = MemFree(ifp->loc);
837 	}
838 	if (sfp_out->qual)
839 		sfp_out->qual = GBQualFree(sfp_out->qual);
840 	return;
841 }	/* PrintSourceFeat */
842 
843 /*****************************************************************************
844 *
845 *	Add the quals of the form "/transl_except=(pos: ,aa: )" to the
846 *	SeqFeatPtr sfp_out.
847 *
848 *****************************************************************************/
849 
ComposeCodeBreakQuals(Asn2ffJobPtr ajp,BioseqPtr bsp,SeqFeatPtr sfp_in,SeqFeatPtr sfp_out,SeqLocPtr PNTR extra_loc,Int2 extra_loc_cnt,NoteStructPtr nsp)850 static void ComposeCodeBreakQuals (Asn2ffJobPtr ajp, BioseqPtr bsp, SeqFeatPtr sfp_in, SeqFeatPtr sfp_out, SeqLocPtr PNTR extra_loc, Int2 extra_loc_cnt, NoteStructPtr nsp)
851 
852 {
853 	CdRegionPtr crp;
854 	CharPtr buffer, ptr, pos;
855 	Choice aa;
856 	CodeBreakPtr cbp;
857 	SeqCodeTablePtr table;
858 	SeqLocPtr slp;
859 	Uint1 seq_code=0, the_residue;
860 	Int2 i, buflen;
861 
862 	if ((sfp_in == NULL) || (sfp_in->data.choice != 3)) {
863 		return;
864 	}
865 
866 	crp = (CdRegionPtr) sfp_in->data.value.ptrvalue;
867 
868 	if (crp->code_break != NULL) {
869 		cbp = crp->code_break;
870 		while (cbp != NULL) {
871 			aa = cbp->aa;
872 			switch (aa.choice) {
873 				case 1:
874 					seq_code = 8;
875 					break;
876 				case 2:
877 					seq_code = 7;
878 					break;
879 				case 3:
880 					seq_code = 11;
881 					break;
882 			}
883 			table = NULL;
884 			if (seq_code != 0)
885 				table=SeqCodeTableFind (seq_code);
886 			if (table == NULL) {
887 				continue;
888 			}
889 			if (extra_loc_cnt > 0) {  /* was converted to new coordinates*/
890 				for (i=0; i < extra_loc_cnt; i++) {
891 					if (extra_loc[i] == NULL) {
892 						continue;
893 					}
894 					slp = extra_loc[i];
895 					pos = FlatLoc(bsp, slp);
896 					if (pos) {
897 						the_residue = (Uint1) cbp->aa.value.intvalue;
898 						if (the_residue == 'U') {
899 					CpNoteToCharPtrStack(nsp, NULL, "selenocysteine");
900 						}
901 						ptr = Get3LetterSymbol(seq_code, table,
902 									    the_residue, ajp->error_msgs);
903 						buflen = StringLen(pos) + StringLen(ptr) + 11;
904 						buffer = MemNew(buflen);
905 						sprintf(buffer, "(pos:%s,aa:%s)", pos, ptr);
906 				    	sfp_out->qual = AddGBQual(sfp_out->qual,
907 										    "transl_except", buffer);
908 						MemFree(buffer);
909 						MemFree(pos);
910 			    	} else if (ajp->error_msgs) {
911 				    	ErrPostEx(SEV_WARNING, ERR_FEATURE_CodeBreakLoc,
912 								"Invalid Code-break.location: %s", pos);
913 			    	}
914 				}
915 			} else {
916 				slp = NULL;
917 				while ((slp = SeqLocFindNext(cbp->loc, slp)) != NULL) {
918 					pos = FlatLoc(bsp, slp);
919 			    	if (pos) {
920 				    	the_residue = (Uint1) cbp->aa.value.intvalue;
921 						if (the_residue == 'U') {
922 					CpNoteToCharPtrStack(nsp, NULL, "selenocysteine");
923 						}
924 				    	ptr = Get3LetterSymbol(seq_code, table,
925 									    the_residue, ajp->error_msgs);
926 						buflen = StringLen(pos) + StringLen(ptr) + 11;
927 						buffer = MemNew(buflen);
928 				    	sprintf(buffer, "(pos:%s,aa:%s)", pos, ptr);
929 				    	sfp_out->qual = AddGBQual(sfp_out->qual,
930 										    "transl_except", buffer);
931 						MemFree(buffer);
932 						MemFree(pos);
933 			    	} else if (ajp->error_msgs) {
934 				    	ErrPostEx(SEV_WARNING, ERR_FEATURE_CodeBreakLoc,
935 								"Invalid Code-break.location: %s", pos);
936 					}
937 			    }
938 			}
939 			cbp = cbp->next;
940 		}
941 	}
942 
943 	return;
944 
945 }	/* ComposeCodeBreakQuals */
946 
947 /***********************************************************************
948 *void GetGeneticCode(CharPtr ptr, SeqFeatPtr sfp)
949 *
950 *	returns ONLY non-standard (i.e., id not 0 or 1)
951 *	genetic codes.
952 ***********************************************************************/
953 
GetGeneticCode(CharPtr ptr,SeqFeatPtr sfp)954 static void GetGeneticCode(CharPtr ptr, SeqFeatPtr sfp)
955 
956 {
957 	Boolean code_is_one=FALSE;
958 	CdRegionPtr cdr;
959 	GBQualPtr qual;
960 	ValNodePtr gcp, var;
961 
962 	cdr = sfp->data.value.ptrvalue;
963 	gcp = cdr->genetic_code;
964 
965 	if (gcp != NULL)
966 	{
967 		for (var=gcp->data.ptrvalue; var != NULL; var=var->next)
968 		{
969 			if (var->choice == 2)
970 			{
971 				if (var->data.intvalue != 0 )
972 				{
973 					if (var->data.intvalue == 1)
974 						code_is_one = TRUE;
975 					else
976 						sprintf(ptr, "%ld", (long) (var->data.intvalue));
977 				}
978 				break;
979 			}
980 		}
981 		if (*ptr != '\0')
982 		{
983 			for (qual=sfp->qual; qual; qual=qual->next)
984 			{
985 				if (StringCmp("transl_table", qual->qual) == 0 &&
986 				      	  StringCmp(ptr, qual->val) != 0)
987 				{
988 				      	  ErrPostStr(SEV_WARNING,
989 				 		ERR_FEATURE_GcodeAndTTableClash, "");
990 				          break;
991 				}
992 			}
993 		}
994 		else if (code_is_one == TRUE)
995 		{
996 			for (qual=sfp->qual; qual; qual=qual->next)
997 			{
998 				if (StringCmp("transl_table", qual->qual) == 0 &&
999 				      	  StringCmp("1", qual->val) != 0)
1000 				{
1001 				      	  ErrPostStr(SEV_WARNING,
1002 				 		ERR_FEATURE_GcodeAndTTableClash, "");
1003 				          break;
1004 				}
1005 			}
1006 		}
1007 	}
1008 	else
1009 	{
1010 		for (qual=sfp->qual; qual; qual=qual->next)
1011 			if (StringCmp("transl_table", qual->qual) == 0)
1012 			{
1013 				StringCpy(ptr, qual->val);
1014 				break;
1015 			}
1016 	}
1017 
1018 	return;
1019 }	/* GetGeneticCode */
1020 
cleanup_sfp(SeqFeatPtr sfp_out)1021 static SeqFeatPtr cleanup_sfp(SeqFeatPtr sfp_out)
1022 {
1023 	ImpFeatPtr ifp;
1024 
1025 	if (sfp_out == NULL) {
1026 		return NULL;
1027 	}
1028 	sfp_out->comment = NULL;
1029 	sfp_out->location = NULL;
1030 	sfp_out->product = NULL;
1031 	sfp_out->exp_ev = FALSE;
1032 	sfp_out->partial = FALSE;
1033 	sfp_out->excpt = FALSE;
1034 	ifp = sfp_out->data.value.ptrvalue;
1035 	if (ifp->key) {
1036 		ifp->key = MemFree(ifp->key);
1037 	}
1038 	if (ifp->loc) {
1039 		ifp->loc = MemFree(ifp->loc);
1040 	}
1041 	if (sfp_out->qual)
1042 		sfp_out->qual = GBQualFree(sfp_out->qual);
1043 
1044 	return sfp_out;
1045 }
1046 
remove_qual(GBQualPtr head,GBQualPtr x)1047 static GBQualPtr remove_qual(GBQualPtr head, GBQualPtr x)
1048 {
1049 	GBQualPtr	v, p;
1050 
1051 	if (head == NULL) {
1052 		return NULL;
1053 	}
1054 	if (x == head) {
1055 		head = x->next;
1056 		x->next = NULL;
1057 		return head;
1058 	}
1059 	for (v = head; v != NULL && v != x; v = v->next) {
1060 		p = v;
1061 	}
1062 	if (v != NULL) {
1063 		p->next = x->next;
1064 		x->next = NULL;
1065 	}
1066 	return head;
1067 }
1068 
PutGeneFirst(SeqFeatPtr sfp)1069 static void PutGeneFirst(SeqFeatPtr sfp)
1070 
1071 {
1072 	Boolean still_looking=TRUE;
1073 	GBQualPtr gbqual, qual, qual_temp=NULL, qual_gene=NULL;
1074 	ImpFeatPtr ifp=NULL;
1075 
1076 	if ((sfp == NULL) || (sfp->data.choice != 8))
1077 		return;
1078 	if (sfp->qual == NULL)
1079 		return;
1080 
1081 	ifp = sfp->data.value.ptrvalue;
1082 	if (StringCmp(ifp->key, "gene") == 0)
1083 	{
1084 		gbqual = sfp->qual;
1085 		for (qual=gbqual; qual; qual=qual->next) {
1086 			if (StringCmp("gene", qual->qual) == 0) {
1087 				qual_gene = qual;
1088 				break;
1089 			}
1090 		}
1091 		if (qual_gene == NULL) {
1092 			return;
1093 		}
1094 		gbqual = remove_qual(gbqual, qual_gene);
1095 		qual_gene->next = gbqual;
1096 		sfp->qual = qual_gene;
1097 	}
1098 	return;
1099 }	/* PutGeneFirst */
1100 
PutTranslationLast(SeqFeatPtr sfp)1101 static void PutTranslationLast(SeqFeatPtr sfp)
1102 
1103 {
1104 	Boolean still_looking=TRUE;
1105 	GBQualPtr gbqual, qual, qual_temp=NULL, qual_last;
1106 	ImpFeatPtr ifp=NULL;
1107 
1108 	if ((sfp == NULL) || (sfp->data.choice != 8))
1109 		return;
1110 	if (sfp->qual == NULL)
1111 		return;
1112 
1113 	ifp = sfp->data.value.ptrvalue;
1114 	if (StringCmp(ifp->key, "CDS") == 0)
1115 	{
1116 		gbqual = sfp->qual;
1117 		qual_last = NULL;
1118 		for (qual=gbqual; qual->next; qual=qual->next)
1119 		{ /* We need to go to the end of the linked list */
1120 			if (still_looking == TRUE &&
1121 				StringCmp("translation", qual->qual) == 0)
1122 			{
1123 				still_looking = FALSE;
1124 				if (qual->next != NULL)
1125 				{ /* if it's not the last qual anyway */
1126 					if (qual_last == NULL) /*first*/
1127 						gbqual = qual->next;
1128 					else
1129 						qual_last->next = qual->next;
1130 					qual_temp = qual;
1131 					qual=qual->next;
1132 					qual_temp->next = NULL;
1133 				}
1134 			}
1135 			qual_last = qual;
1136 			if (qual->next == NULL)
1137 				break;
1138 		}
1139 		qual->next = qual_temp;
1140 		sfp->qual = gbqual;
1141 	}
1142 	return;
1143 }	/* PutTranslationLast */
1144 
1145 static CharPtr mrnaevtext1 = "Derived by automated computational analysis";
1146 static CharPtr mrnaevtext2 = "using gene prediction method:";
1147 static CharPtr mrnaevtext3 = "Supporting evidence includes similarity to:";
1148 
mRNAEvidenceComment(UserObjectPtr uop,Boolean add)1149 NLM_EXTERN CharPtr mRNAEvidenceComment(UserObjectPtr uop, Boolean add)
1150 {
1151     ObjectIdPtr		oip;
1152 	UserFieldPtr	ufp, u, uu;
1153 	CharPtr			method = NULL, ptr, ne_name;
1154 	static Char		temp[20];
1155 	Int2			ptrlen=0, np=0, nd=0, nm=0, ne=0;
1156 	Boolean			is_evidence = FALSE;
1157 	Int4			Locus_id = 0;
1158 
1159 	if (uop == NULL) return NULL;
1160 	if ((oip = uop->type) == NULL) return NULL;
1161 	if (StringCmp(oip->str, "ModelEvidence") != 0) return NULL;
1162 	for (ufp=uop->data; ufp; ufp=ufp->next) {
1163 		oip = ufp->label;
1164 		if (StringCmp(oip->str, "Method") == 0) {
1165 			if (ufp->data.ptrvalue) {
1166 				method = StringSave((CharPtr) ufp->data.ptrvalue);
1167 			}
1168 		}
1169 		if (StringCmp(oip->str, "mRNA")==0) {
1170 			is_evidence = TRUE;
1171 			for (u = (UserFieldPtr) ufp->data.ptrvalue;u; u=u->next) {
1172 				for (uu = (UserFieldPtr) u->data.ptrvalue; uu; uu=uu->next) {
1173 				oip = uu->label;
1174 				if (StringCmp(oip->str, "accession") == 0) {
1175 					nm++;
1176 				}
1177 				}
1178 			}
1179 		}
1180 		if (StringCmp(oip->str, "EST")==0) {
1181 			is_evidence = TRUE;
1182 			for (u = (UserFieldPtr) ufp->data.ptrvalue;u; u=u->next) {
1183 				for (uu = (UserFieldPtr) u->data.ptrvalue;uu; uu=uu->next) {
1184 					oip = uu->label;
1185 					if (StringCmp(oip->str, "count") == 0) {
1186 						ne = uu->data.intvalue;
1187 					}
1188 					if (StringCmp(oip->str, "organism") == 0) {
1189 						ne_name = StringSave(( CharPtr) uu->data.ptrvalue);
1190 					}
1191 				}
1192 			}
1193 		}
1194 	}
1195 	ptrlen = StringLen (mrnaevtext1) + StringLen (mrnaevtext2) + StringLen (mrnaevtext3) + StringLen (method) + 25;
1196 	if (np > 0) {
1197 		ptrlen += StringLen("proteins") + 5;
1198 	}
1199 	if (nd > 0) {
1200 		ptrlen += StringLen("domains") + 5;
1201 	}
1202 	if (nm > 0) {
1203 		ptrlen += StringLen("mRNAs") + 5;
1204 	}
1205 	if (ne > 0) {
1206 		ptrlen += StringLen("ESTs") + StringLen(ne_name) + 10;
1207 	}
1208 	ptr = (CharPtr) MemNew(ptrlen) + 1;
1209 	if (add) {
1210 		if (method != NULL) {
1211 			sprintf (ptr, "%s %s %s.", mrnaevtext1, mrnaevtext2, method);
1212 		} else {
1213 			sprintf (ptr, "%s.", mrnaevtext1);
1214 		}
1215 	}
1216 	if (is_evidence) {
1217 		if (add)  StringCat(ptr, " ");
1218 	 StringCat(ptr, "Supporting evidence includes similarity to:");
1219 	}
1220 	if (np > 0) {
1221 	 sprintf(temp, " %d proteins", np);
1222 	 StringCat(ptr, temp);
1223 	}
1224 	if (nd > 0) {
1225 		if (np > 0)
1226 	 		StringCat(ptr, ",");
1227 	 sprintf(temp, " %d domains", np);
1228 	 StringCat(ptr, temp);
1229 	}
1230 	if (nm > 0) {
1231 		if (np > 0 || nd > 0)
1232 	 	StringCat(ptr, ",");
1233 	 if (nm > 1) {
1234 		 sprintf(temp, " %d mRNAs", nm);
1235 	 } else {
1236 		 sprintf(temp, " %d mRNA", nm);
1237 	 }
1238 	 StringCat(ptr, temp);
1239 	}
1240 	if (ne > 0) {
1241 	if ( np > 0 || nm > 0 || nd > 0)
1242 	 	StringCat(ptr, ",");
1243 	 sprintf(temp, " %d %s ESTs", ne, ne_name);
1244 	 StringCat(ptr, temp);
1245 	}
1246 	return ptr;
1247 }
1248 
mRNAFeatEvidenceComment(SeqFeatPtr sfp_in)1249 static CharPtr mRNAFeatEvidenceComment(SeqFeatPtr sfp_in)
1250 {
1251 	RnaRefPtr		rfp;
1252 	UserObjectPtr	uop, obj;
1253     ObjectIdPtr		oip;
1254 	UserFieldPtr	uf;
1255 
1256 	rfp = (RnaRefPtr) sfp_in->data.value.ptrvalue;
1257 	if (rfp->type != 2) { /* mRNA */
1258 		return NULL;
1259 	}
1260 	if ((uop = sfp_in->ext) == NULL)
1261 		return NULL;
1262 	if ((oip = uop->type) == NULL) return NULL;
1263 	if (StringCmp(oip->str, "CombinedFeatureUserObjects") != 0) return NULL;
1264 	for (uf=uop->data; uf; uf=uf->next) {
1265 		obj = (UserObjectPtr) uf->data.ptrvalue;
1266 		return( mRNAEvidenceComment(obj, TRUE));
1267 	}
1268 	return NULL;
1269 }
1270 
PrintNAFeatByNumber(Asn2ffJobPtr ajp,GBEntryPtr gbp)1271 NLM_EXTERN void PrintNAFeatByNumber (Asn2ffJobPtr ajp, GBEntryPtr gbp)
1272 {
1273 
1274 	Boolean loc_ok;
1275 	Char genetic_code[3];
1276 	CharPtr ptr=NULL, sptr;
1277 	ImpFeatPtr ifp;
1278 	SeqFeatPtr sfp_in, sfp_out=NULL;
1279 	Int4 status, total_feats, feat_index;
1280 	SortStructPtr p;
1281 
1282 	if (gbp == NULL || gbp->feat == NULL) {
1283 		return;
1284 	}
1285 	feat_index = ajp->pap_index;
1286 	total_feats=gbp->feat->sfpListsize;
1287 	if (total_feats == 0) {
1288 		return;
1289 	}
1290 	sfp_out=ajp->sfp_out;
1291 	if (sfp_out->qual)
1292 		sfp_out->qual = GBQualFree(sfp_out->qual);
1293 	ifp = sfp_out->data.value.ptrvalue;
1294 	if (ifp->loc)
1295 		ifp->loc = MemFree(ifp->loc);
1296 	if (feat_index < total_feats) {
1297 		p = gbp->feat->List + feat_index;
1298 		if (p == NULL)
1299 			return;
1300 		if (p->tempload == TRUE) {
1301 			GatherItemWithLock(p->entityID, p->itemID, p->itemtype,
1302 									&sfp_in, find_item);
1303 		} else {
1304 			sfp_in = p->sfp;
1305 		}
1306 		if (sfp_in == NULL) {
1307 			return;
1308 		}
1309 		if (ajp->mode == PARTIAL_MODE &&
1310 					sfp_in->data.choice != SEQFEAT_CDREGION) {
1311 			sfp_out = cleanup_sfp(sfp_out);
1312 			return;
1313 		}
1314 		status = ConvertToNAImpFeat(ajp, gbp, sfp_in, &sfp_out, p);
1315 		if (status < 1) {
1316 			sfp_out = cleanup_sfp(sfp_out);
1317 			return;
1318 		}
1319 		if (p->slp != NULL) {
1320 			sfp_out->location = p->slp;
1321 		}
1322 		ifp = sfp_out->data.value.ptrvalue;
1323 		flat2asn_install_feature_user_string(ifp->key, NULL);
1324 		loc_ok=CheckAndGetNAFeatLoc(gbp->bsp, &ptr, sfp_out, TRUE);
1325 		if (loc_ok == TRUE || ASN2FF_VALIDATE_FEATURES == FALSE) {
1326 			ifp->loc = ptr;
1327 		} else {
1328 			flat2asn_delete_feature_user_string();
1329 			flat2asn_install_feature_user_string(ifp->key, ptr);
1330 			MemFree(ptr);
1331 			if (ASN2FF_SHOW_ERROR_MSG == TRUE) {
1332 				ErrPostEx(SEV_WARNING, ERR_FEATURE_Dropped, "Unparsable location");
1333 			}
1334 			sfp_out = cleanup_sfp(sfp_out);
1335 			flat2asn_delete_feature_user_string();
1336 			return;
1337 		}
1338 		flat2asn_delete_feature_user_string();
1339 		flat2asn_install_feature_user_string(ifp->key, ptr);
1340 		if (p->dup == TRUE) {
1341 			if (ASN2FF_SHOW_ERROR_MSG == TRUE) {
1342 				ErrPostEx(SEV_WARNING, ERR_FEATURE_Duplicated,
1343 					"Duplicated feature dropped");
1344 			}
1345 			sfp_out = cleanup_sfp(sfp_out);
1346 			flat2asn_delete_feature_user_string();
1347 			return;
1348 		}
1349 		if (sfp_in->data.choice == SEQFEAT_CDREGION) {
1350 			ComposeCodeBreakQuals(ajp, gbp->bsp, sfp_in, sfp_out,
1351 							p->extra_loc, p->extra_loc_cnt, p->nsp);
1352 			genetic_code[0]='\0';
1353 			if (ASN2FF_TRANSL_TABLE == TRUE) {
1354 				GetGeneticCode(genetic_code, sfp_in);
1355 				if (genetic_code[0] != '\0') {
1356 					sfp_out->qual = AddGBQual(sfp_out->qual,
1357 							"transl_table", genetic_code);
1358 				}
1359 			}
1360 		}
1361 		if (sfp_in->data.choice == SEQFEAT_GENE) {
1362 			if (ajp->show_gene == FALSE) {
1363 				sfp_out = cleanup_sfp(sfp_out);
1364 				flat2asn_delete_feature_user_string();
1365 				return;
1366 			}
1367 		}
1368 		GetNonGeneQuals(ajp->mode, sfp_in, sfp_out, p->nsp);
1369 		LookForPartialImpFeat(sfp_out, FALSE);
1370 		ComposeGBQuals(ajp, sfp_out, gbp, p, FALSE);
1371 		status = ValidateNAImpFeat(sfp_out);
1372 		if (sfp_in->data.choice == SEQFEAT_CDREGION) {
1373 			PutTranslationLast(sfp_out);
1374 		} else if (sfp_in->data.choice == SEQFEAT_GENE) {
1375 			PutGeneFirst(sfp_out);
1376 		} else if (sfp_in->data.choice == SEQFEAT_RNA) {
1377 			if ((sptr = mRNAFeatEvidenceComment(sfp_in)) != NULL) {
1378 				sfp_out->qual =
1379 				     AddGBQual(sfp_out->qual, "note", sptr);
1380 			}
1381 		}
1382 		if (status >= 0 || ASN2FF_VALIDATE_FEATURES == FALSE) {
1383 			PrintImpFeatEx(ajp, gbp->bsp, sfp_out, gbp->gi, p->entityID, p->itemID);
1384 		}
1385 		flat2asn_delete_feature_user_string();
1386 	}
1387 	sfp_out = cleanup_sfp(sfp_out);
1388 	return;
1389 }	/* PrintNAFeatByNumber */
1390 
1391 /***************************************************************************
1392 *PrintAAFeatByNumber
1393 *
1394 *	This function prints out the genpept SeqFeats.
1395 *
1396 **************************************************************************/
1397 
PrintAAFeatByNumber(Asn2ffJobPtr ajp,GBEntryPtr gbp)1398 NLM_EXTERN void PrintAAFeatByNumber (Asn2ffJobPtr ajp, GBEntryPtr gbp)
1399 {
1400 	CharPtr ptr=NULL;
1401 	Char genetic_code[3];
1402 	ImpFeatPtr ifp;
1403 	Int2 status;
1404 	Int4 feat_index, total_feats;
1405 	NoteStructPtr nsp;
1406 	SeqFeatPtr sfp_in, sfp_out=NULL;
1407 	SortStructPtr p;
1408 
1409 	if (gbp == NULL || gbp->feat == NULL) {
1410 		return;
1411 	}
1412 	feat_index = ajp->pap_index;
1413 	total_feats=gbp->feat->sfpListsize;
1414 	if (total_feats == 0) {
1415 		return;
1416 	}
1417 	sfp_out=ajp->sfp_out;
1418 	if (sfp_out->qual) {
1419 		sfp_out->qual = GBQualFree(sfp_out->qual);
1420 	}
1421 	ifp = sfp_out->data.value.ptrvalue;
1422 	if (ifp->loc) {
1423 		ifp->loc = MemFree(ifp->loc);
1424 	}
1425 	if (feat_index < total_feats) {
1426 		p = gbp->feat->List + feat_index;
1427 		if (p == NULL || p->dup == TRUE) {
1428 			return;
1429 		}
1430 		if ((sfp_in = p->sfp) == NULL) {
1431 			GatherItemWithLock(p->entityID, p->itemID, p->itemtype,
1432 									&sfp_in, find_item);
1433 		}
1434 		if (sfp_in == NULL) {
1435 			return;
1436 		}
1437 		nsp = p->nsp;
1438 		switch (sfp_in->data.choice) {
1439 /* Note: the functions that CheckAndGetFeatLoc use for
1440 		checking fails on protein locations sometimes. */
1441 			case SEQFEAT_CDREGION:
1442 				GetNonGeneQuals(ajp->mode, sfp_in, sfp_out, nsp);
1443 				status = ConvertToAAImpFeat(ajp, gbp, sfp_in, &sfp_out, p);
1444 				if (status < 0)
1445 					break;
1446 				ComposeGBQuals(ajp, sfp_out, gbp, p, FALSE);
1447 				GetAAFeatLoc(gbp->bsp, &ptr, sfp_in, TRUE);
1448 				ifp->loc = ptr;
1449 				ptr = FlatLoc(gbp->bsp, sfp_in->location);
1450 				sfp_out->qual =
1451 				     AddGBQual(sfp_out->qual, "coded_by", ptr);
1452 				ptr = MemFree(ptr);
1453 				genetic_code[0]='\0';
1454 				if (ASN2FF_TRANSL_TABLE == TRUE) {
1455 					GetGeneticCode(genetic_code, sfp_in);
1456 					if (genetic_code[0] != '\0')
1457 						sfp_out->qual =
1458 							AddGBQual(sfp_out->qual, "transl_table", genetic_code);
1459 				}
1460 				status = ValidateAAImpFeat(sfp_out, TRUE);
1461 				if (status >= 0)
1462 					PrintImpFeat(ajp, gbp->bsp, sfp_out);
1463 				break;
1464 			case SEQFEAT_PROT:
1465 				GetNonGeneQuals(ajp->mode, sfp_in, sfp_out, nsp);
1466 				AddProteinQuals(sfp_in, sfp_out, nsp);
1467 				status = ConvertToAAImpFeat(ajp, gbp, sfp_in, &sfp_out, p);
1468 				if (status < 0)
1469 					break;
1470 				ComposeGBQuals(ajp, sfp_out, gbp, p, FALSE);
1471 				GetAAFeatLoc(gbp->bsp, &ptr, sfp_out, FALSE);
1472 				ifp->loc = ptr;
1473 				status = ValidateAAImpFeat(sfp_out, FALSE);
1474 				if (status >= 0)
1475 					PrintImpFeat(ajp, gbp->bsp, sfp_out);
1476 				break;
1477 			case SEQFEAT_SEQ:
1478 			case SEQFEAT_IMP:
1479 			case SEQFEAT_REGION:
1480 			case SEQFEAT_COMMENT:
1481 			case SEQFEAT_BOND:
1482 			case SEQFEAT_SITE:
1483 			case SEQFEAT_PSEC_STR:
1484 			case SEQFEAT_NON_STD_RESIDUE:
1485 			case SEQFEAT_HET:
1486 				GetNonGeneQuals(ajp->mode, sfp_in, sfp_out, nsp);
1487 				status = ConvertToAAImpFeat(ajp, gbp, sfp_in, &sfp_out, p);
1488 				if (status < 0)
1489 					break;
1490 				ComposeGBQuals(ajp, sfp_out, gbp, p, FALSE);
1491 				GetAAFeatLoc(gbp->bsp, &ptr, sfp_out, FALSE);
1492 				ifp->loc = ptr;
1493 				status = ValidateAAImpFeat(sfp_out, FALSE);
1494 				if (status >= 0)
1495 					PrintImpFeat(ajp, gbp->bsp, sfp_out);
1496 				break;
1497 			case SEQFEAT_GENE:
1498 				if (ajp->show_gene == FALSE) {
1499 					break;
1500 				}
1501 				GetNonGeneQuals(ajp->mode, sfp_in, sfp_out, nsp);
1502 				status = ConvertToAAImpFeat(ajp, gbp, sfp_in, &sfp_out, p);
1503 				if (status < 0)
1504 					break;
1505 				ComposeGBQuals(ajp, sfp_out, gbp, p, FALSE);
1506 				GetAAFeatLoc(gbp->bsp, &ptr, sfp_out, FALSE);
1507 				ifp->loc = ptr;
1508 				status = ValidateAAImpFeat(sfp_out, FALSE);
1509 				if (status >= 0)
1510 					PrintImpFeat(ajp, gbp->bsp, sfp_out);
1511 				break;
1512 			default:
1513 				break;
1514 		}
1515 	}
1516 	sfp_out = cleanup_sfp(sfp_out);
1517 }	/* PrintAAFeatByNumber */
1518 
1519 
1520 /************************************************************************
1521 *GetProductFromCDS(ValNodePtr product, ValNodePtr location, Int4 length)
1522 *
1523 *	Gets the CDS product, using SeqPortNewByLoc
1524 *	The bsp is that of the protein, and comes from the location.  The bsp
1525 *	is found in the calling program anyway, as it's used to get
1526 *	the EC_NUM.
1527 *	The protein sequence comes back in allocated memory.  The user
1528 *	is responsible for deallocating that.
1529 *
1530 *  A check is made (BioseqFind()) that the protein Bioseq is in memory.
1531 *  This guarantees that a fetch is NOT made if it is not memory, to accomodate
1532 *  the splitting of DNA and protein in Entrez. In this case, it's just
1533 *  translated.
1534 *
1535 *************************************************************************/
1536 
GetProductFromCDS(ValNodePtr product,ValNodePtr location,Int4 bsp_length)1537 NLM_EXTERN CharPtr GetProductFromCDS(ValNodePtr product, ValNodePtr location, Int4 bsp_length)
1538 
1539 {
1540 	Boolean at_end=FALSE;
1541 	CharPtr protein_seq=NULL, start_ptr=NULL;
1542 	Int4 length;
1543 	SeqPortPtr spp;
1544 	Uint1 residue, code;
1545 	BioseqPtr bsp;
1546 	SeqIdPtr sip;
1547 
1548 	if (ASN2FF_IUPACAA_ONLY == TRUE)
1549 		code = Seq_code_iupacaa;
1550 	else
1551 		code = Seq_code_ncbieaa;
1552 
1553 	if (product) {
1554 		sip = SeqLocId(product);
1555 		bsp = BioseqFindCore(sip);
1556 		if (bsp != NULL)    /* Bioseq is (or has been) in memory */ {
1557 			length = SeqLocLen(product);
1558 			if (length > 0) {
1559 				if (SeqLocStart(location) == 0 ||
1560 					SeqLocStop(location) == bsp_length-1)
1561 					at_end = TRUE;
1562 				start_ptr = protein_seq =
1563 					(CharPtr) MemNew((size_t) (length*sizeof(CharPtr)));
1564 				spp = SeqPortNewByLoc(product, code);
1565 				spp->do_virtual = TRUE;
1566 				while ((residue=SeqPortGetResidue(spp)) != SEQPORT_EOF) {
1567 					if ( !IS_residue(residue) && residue != INVALID_RESIDUE )
1568 						continue;
1569 					if (residue == INVALID_RESIDUE)
1570 						residue = (Uint1) 'X';
1571 					*protein_seq = residue;
1572 					protein_seq++;
1573 				}
1574 				SeqPortFree(spp);
1575 				if (at_end) {
1576 					if (StringLen(start_ptr) < GENPEPT_MIN)
1577 						start_ptr = MemFree(start_ptr);
1578 				}
1579 			}
1580 		}
1581 	}
1582 	return start_ptr;
1583 }
1584 
1585 /**************************************************************************
1586 *CharPtr GettRNAaa (tRNAPtr trna, Boolean error_messages)
1587 *
1588 *	Return a pointer containing the amino acid type.
1589 **************************************************************************/
1590 
GettRNAaa(tRNAPtr trna,Boolean error_msgs)1591 static CharPtr GettRNAaa (tRNAPtr trna, Boolean error_msgs)
1592 
1593 {
1594 	CharPtr ptr=NULL;
1595 	SeqCodeTablePtr table;
1596 	Uint1 seq_code;
1597 /*
1598   The choice values used in the tRNA structure do NOT corresond to
1599   the choice(==ENUMs) of Seq-code_type, and the latter are used
1600   by all the utility functions, so we map them...
1601 */
1602 	if ( trna && trna -> aatype) {
1603 		switch (trna -> aatype) {
1604 			case 1:
1605 				seq_code = 2;
1606 				break;
1607 			case 2:
1608 				seq_code = 8;
1609 				break;
1610 			case 3:
1611 				seq_code = 7;
1612 				break;
1613 			case 4:
1614 				seq_code = 11;
1615 				break;
1616 		}
1617 
1618 		if ((table=SeqCodeTableFind (seq_code)) != NULL)
1619 		  ptr = Get3LetterSymbol(seq_code, table, trna->aa, error_msgs);
1620 	}
1621 
1622 	return ptr;
1623 }	/* GettRNAaa */
1624 
1625 /*************************************************************************
1626 *ComposetRNANote (Asn2ffJobPtr ajp, NoteStructPtr nsp, tRNAPtr trna, )
1627 *
1628 *	Add info from Trna-ext to Note stack in the GeneStructPtr.
1629 **************************************************************************/
1630 
ComposetRNANote(Asn2ffJobPtr ajp,NoteStructPtr nsp,tRNAPtr trna)1631 static void ComposetRNANote(Asn2ffJobPtr ajp, NoteStructPtr nsp, tRNAPtr trna)
1632 {
1633 	/*
1634 	Char buffer[25];
1635 	CharPtr ptr = &(buffer[0]);
1636 	Int2 index;
1637 	Uint1 codon[4];
1638 
1639 	if (! trna)
1640 		return;
1641 
1642 	if ((trna->codon)[0] != 255)
1643 	{
1644 		codon[3] = '\0';
1645 		for (index=0; index<6; index++)
1646 		{
1647 			if ((trna->codon)[index] == 255)
1648 				break;
1649 			if (CodonForIndex((trna->codon)[index], Seq_code_iupacna, codon))
1650 			{
1651 				StringCpy(ptr, (CharPtr) codon);
1652 				ptr += 3;
1653 			}
1654 			else
1655 			{
1656 				*ptr = '?';	ptr++;
1657 			}
1658 			if (index<5 && (trna->codon)[index+1] != 255)
1659 			{
1660 				*ptr = ',';	ptr++;
1661 				*ptr = ' ';	ptr++;
1662 			}
1663 		}
1664 		if ((trna->codon)[1] == 255)
1665 		{
1666 			ptr = &buffer[0];
1667 			SaveNoteToCharPtrStack(nsp, "codon recognized:", ptr);
1668 		}
1669 		else
1670 		{
1671 			ptr = &buffer[0];
1672 			SaveNoteToCharPtrStack(nsp, "codons recognized:", ptr);
1673 		}
1674 	}
1675 	return;
1676 	*/
1677 
1678 	Char  buffer [25];
1679 	Int2  num;
1680 
1681 	num = ComposeCodonsRecognizedString (trna, buffer, sizeof (buffer));
1682 	if (num < 1 || StringHasNoText (buffer)) return;
1683 	if (num == 1) {
1684 			SaveNoteToCharPtrStack(nsp, "codon recognized:", buffer);
1685 	} else {
1686 			SaveNoteToCharPtrStack(nsp, "codons recognized:", buffer);
1687 	}
1688 
1689 }	/* ComposetRNANote */
1690 
1691 
1692 /************************************************************************
1693 *	Make the anticodon qualifier and (possible) note to the tRNA
1694 *	with the following paradigm:
1695 *	0.) First look at the new anticodon slot on tRNAPtr
1696 *	if not found do the rest:
1697 **	1.) Look at SeqFeat.ext for a UserObject using the fct. QualLocWrite,
1698 *	if result is not NULL, use this location in anticodon qualifier;
1699 *
1700 *	2.) Look for an anticodon qualifier, use if no QualLocWrite's
1701 *	result was not zero;
1702 *	3.) Make note if neither 1.) or 2.) was true, or there are
1703 *	multiple codons.
1704 *****************************
1705 *	NEW ALGORITHM 07-15-96
1706 *****************************
1707 *	1) aa present?
1708 *	      print /product = tRNA-aa
1709 *	2) codon recognized present?
1710 *	      print /note="codon recognized: codon"
1711 *	3) anticodon and aa present?
1712 *	      print /anticodon=...
1713 *************************************************************************/
DotRNAQuals(Asn2ffJobPtr ajp,GBEntryPtr gbp,SeqFeatPtr sfp_in,SeqFeatPtr sfp_out,NoteStructPtr nsp,SeqLocPtr PNTR extra_loc,Int2 extra_loc_cnt)1714 static void DotRNAQuals (Asn2ffJobPtr ajp, GBEntryPtr gbp, SeqFeatPtr sfp_in, SeqFeatPtr sfp_out, NoteStructPtr nsp, SeqLocPtr PNTR extra_loc,
1715 Int2 extra_loc_cnt)
1716 {
1717 	Boolean found_anticodon=FALSE /*, found_qual=FALSE -- UNUSED */;
1718 	Char buffer[40];
1719 	CharPtr aa_ptr, newptr=NULL, ptr = &(buffer[0]), tmp=NULL;
1720 	GBQualPtr curq;
1721 	RnaRefPtr rrp;
1722 	tRNAPtr trna=NULL;
1723 	SeqLocPtr slp=NULL;
1724 	SeqIntPtr sip;
1725 
1726 	if (sfp_in == NULL) {
1727 		return;
1728 	}
1729 	if (sfp_in->data.choice != SEQFEAT_RNA) {
1730 		return;
1731 	}
1732 	rrp = sfp_in->data.value.ptrvalue;
1733 /* Look for anticodon struct */
1734 	if (rrp->ext.choice == 2) {
1735 		newptr = MemNew(50*sizeof(Char));
1736 		trna = rrp->ext.value.ptrvalue;
1737 		if ((aa_ptr = GettRNAaa(trna, ajp->error_msgs)) != NULL) {
1738 			if (GBQualPresent("product", sfp_out->qual) == FALSE) {
1739 				sprintf(newptr, "tRNA-%s", aa_ptr);
1740 				sfp_out->qual = AddGBQual(sfp_out->qual, "product", newptr);
1741 			}
1742 		}
1743 		if (trna && (slp = trna->anticodon) != NULL && aa_ptr) {
1744 			if (extra_loc_cnt > 0) {
1745 				slp = extra_loc[0];
1746 			}
1747 			if (slp && slp->choice == SEQLOC_INT) {
1748 				sip = slp->data.ptrvalue;
1749 				sprintf(ptr, "%ld..%ld", (long) sip->from+1, (long) sip->to+1);
1750 				sprintf(newptr, "(pos:%s,aa:%s)", ptr, aa_ptr);
1751 				sfp_out->qual = AddGBQual(sfp_out->qual, "anticodon", newptr);
1752 				found_anticodon=TRUE;
1753 			}
1754 		}
1755 	}
1756 	if (! found_anticodon) {
1757 		if (sfp_in->ext) {	/* Look for UserObject */
1758 			tmp = QualLocWrite(sfp_in->ext, ptr);
1759 			if (tmp) {
1760 				newptr = MemNew(50*sizeof(Char));
1761 				rrp = sfp_in->data.value.ptrvalue;
1762 				trna = rrp->ext.value.ptrvalue;
1763 				aa_ptr = GettRNAaa(trna, ajp->error_msgs);
1764 				if (aa_ptr) {
1765 					sprintf(newptr, "(pos:%s,aa:%s)", ptr, aa_ptr);
1766 					sfp_out->qual =
1767 							AddGBQual(sfp_out->qual, "anticodon", newptr);
1768 					found_anticodon=TRUE;
1769 				}
1770 			}
1771 		}
1772 	}
1773 	if (! found_anticodon) {
1774 		/* Look for anticodon qual if no UserObject found */
1775 		for (curq=sfp_in->qual; curq; curq=curq->next)
1776 			if (StringCmp("anticodon", curq->qual) == 0) {
1777 			    sfp_out->qual =
1778 			       AddGBQual(sfp_out->qual, "anticodon", curq->val);
1779 			    /* found_qual=TRUE; -- NO EFFECT */
1780 			    break;
1781 			}
1782 	}
1783 
1784 /* make note "codon recognized*/
1785 	ComposetRNANote(ajp, nsp, trna);
1786 	MemFree(newptr);
1787 
1788 }	/* DotRNAQuals */
1789 
1790 /**************************************************************************
1791 *ConvertToAAImpFeat
1792 *
1793 *	This code copies a SeqFeat into an ImpFeat format for use in
1794 *	producing GenBank format.  Two SeqFeatPtr's should be passed
1795 *	in as arguments (sfp_in, sfp_out).  On the first call, of a
1796 *	number of calls, sfp_out should be NULL so that memory for
1797 *	ImpFeat can be allocated.  On subsequent calls, sfp_out->data.choice
1798 *	should be "8" (for ImpFeats).
1799 *
1800 *	Written by Tom Madden
1801 *
1802 **************************************************************************/
1803 
ConvertToAAImpFeat(Asn2ffJobPtr ajp,GBEntryPtr gbp,SeqFeatPtr sfp_in,SeqFeatPtr PNTR sfpp_out,SortStructPtr p)1804 NLM_EXTERN Int2 ConvertToAAImpFeat (Asn2ffJobPtr ajp, GBEntryPtr gbp, SeqFeatPtr sfp_in, SeqFeatPtr PNTR sfpp_out, SortStructPtr p)
1805 {
1806 	BioseqPtr bsp=NULL;
1807 	Char printbuf[41], temp[65];
1808 	CharPtr ptr;
1809 	ImpFeatPtr ifp, ifp_in;
1810 	Int2 retval=1;
1811 	NoteStructPtr nsp;
1812 	GeneStructPtr gsp;
1813 	ProtRefPtr prot;
1814 	SeqFeatPtr sfp_out;
1815 	SeqIdPtr sip=NULL, xid;
1816 	ValNodePtr vnp, vnp1;
1817 
1818 	sfp_out = *sfpp_out;
1819 
1820 	if (sfp_out->data.choice != SEQFEAT_IMP)
1821 		return -1;
1822 
1823 	ifp = (ImpFeatPtr) sfp_out->data.value.ptrvalue;
1824 
1825 	sfp_out->partial = sfp_in->partial;
1826 	sfp_out->comment = sfp_in->comment;
1827 	sfp_out->exp_ev = sfp_in->exp_ev;
1828 	sfp_out->location = sfp_in->location;
1829 
1830 	nsp = p->nsp;
1831 	gsp = p->gsp;
1832 	if (sfp_out->comment) {
1833 		CpNoteToCharPtrStack(nsp, NULL, (CharPtr) sfp_out->comment);
1834 	}
1835 
1836 	switch (sfp_in->data.choice) {
1837 	case SEQFEAT_CDREGION:
1838 		ifp->key = StringSave("CDS");
1839 		break;
1840 	case SEQFEAT_PROT:
1841 		prot = sfp_in->data.value.ptrvalue;
1842 		if (prot->processed == 0 || prot->processed == 1) {
1843 			GetProtRefInfo(ajp->format, gsp, nsp, prot);
1844 			ifp->key = StringSave("Protein");
1845 		} else if (prot->processed == 2) {
1846 			ifp->key = StringSave("mat_peptide");
1847 		} else if (prot->processed == 3) {
1848 			ifp->key = StringSave("sig_peptide");
1849 		} else if (prot->processed == 4) {
1850 			ifp->key = StringSave("transit_peptide");
1851 		}
1852 		if (sfp_in->location) {
1853 			sip = SeqLocId(sfp_in->location);
1854 			if (sip)
1855 				bsp = BioseqFind(sip);
1856 			if (bsp) {
1857 				vnp = bsp->descr;
1858 				for (vnp = bsp->descr; vnp; vnp = vnp->next) {
1859 					if (vnp->choice != Seq_descr_modif) {
1860 						continue;
1861 					}
1862 					for (vnp1 = vnp->data.ptrvalue; vnp1; vnp1=vnp1->next) {
1863 						if (vnp1->data.intvalue == 1) {
1864 							sfp_out->partial = TRUE;
1865 							break;
1866 					    }
1867 					}
1868 				}
1869 			}
1870 		}
1871 		break;
1872 	case SEQFEAT_SEQ:
1873 		ifp->key = StringSave("misc_feature");
1874 		if ((xid=CheckXrefFeat(gbp->bsp, sfp_in)) != NULL)
1875 		{
1876 			SeqIdWrite(xid, printbuf, PRINTID_FASTA_SHORT, 40);
1877 			ptr = &(temp[0]);
1878 			sprintf(ptr, "Cross-reference: %s", printbuf);
1879 			SaveNoteToCharPtrStack(nsp, NULL, ptr);
1880 		}
1881 		else
1882 			retval = 0;
1883 		break;
1884 	case SEQFEAT_IMP:
1885 		ifp_in = (ImpFeatPtr) sfp_in->data.value.ptrvalue;
1886 		ifp->key = StringSave(ifp_in->key);
1887 		break;
1888 	case SEQFEAT_REGION:
1889 		sfp_out->qual =
1890 		AddGBQual(sfp_out->qual, "region_name", sfp_in->data.value.ptrvalue);
1891 		ifp->key = StringSave("Region");
1892 		break;
1893 	case SEQFEAT_COMMENT:
1894 		ifp->key = StringSave("misc_feature");
1895 		break;
1896 	case SEQFEAT_BOND:
1897 		ptr = AsnEnumStr("SeqFeatData.bond",
1898 							(Int2) (sfp_in->data.value.intvalue));
1899 		sfp_out->qual = AddGBQual(sfp_in->qual, "bond_type", ptr);
1900 		ifp->key = StringSave("Bond");
1901 		break;
1902 	case SEQFEAT_SITE:
1903 		ptr = AsnEnumStr("SeqFeatData.site",
1904 							(Int2) (sfp_in->data.value.intvalue));
1905 		sfp_out->qual = AddGBQual(sfp_out->qual, "site_type", ptr);
1906 		ifp->key = StringSave("Site");
1907 		break;
1908 	case SEQFEAT_PSEC_STR:
1909 		ptr = AsnEnumStr("SeqFeatData.psec-str",
1910 								(Int2) (sfp_in->data.value.intvalue));
1911 		sfp_out->qual = AddGBQual(sfp_out->qual, "sec_str_type", ptr);
1912 		ifp->key = StringSave("SecStr");
1913 		break;
1914 	case SEQFEAT_NON_STD_RESIDUE:
1915 		sfp_out->qual =
1916 			AddGBQual(sfp_out->qual, "non-std-residue",
1917 											sfp_in->data.value.ptrvalue);
1918 		ifp->key = StringSave("NonStdResidue");
1919 		break;
1920 	case SEQFEAT_HET:
1921 		sfp_out->qual =
1922 		     AddGBQual(sfp_out->qual, "heterogen", sfp_in->data.value.ptrvalue);
1923 		ifp->key = StringSave("Het");
1924 		break;
1925 	default:
1926 		if (ajp->error_msgs == TRUE)
1927 			ErrPostStr(SEV_WARNING, ERR_FEATURE_UnknownFeatureKey,
1928 				"Unimplemented type of feat in ConvertToAAImpFeat");
1929 		retval = 1;
1930 		break;
1931 	}
1932 
1933 	return retval;
1934 
1935 }	/* ConvertToAAImpFeat */
1936 
1937 /*****************************************************************************
1938 *  CompareTranslation:
1939 *  -- if bsp != translation's value return FALSE
1940 *****************************************************************************/
CompareTranslation(ByteStorePtr bsp,CharPtr qval)1941 static Boolean CompareTranslation(ByteStorePtr bsp, CharPtr qval)
1942 {
1943 	CharPtr		 		ptr;
1944 	Int2			 residue, residue1, residue2;
1945 	Int4			 len, blen;
1946 	Boolean		 done;
1947 
1948 	if (qval == NULL || bsp == NULL) {
1949 		return FALSE;  /* no comparison */
1950 	}
1951 	len = StringLen(qval);
1952 	BSSeek(bsp, 0, SEEK_SET);
1953 
1954 	blen = BSLen(bsp);
1955 	done = FALSE;
1956 	while ((! done) && (len)) {
1957 		  residue1 = qval[(len-1)];
1958 		  if (residue1 == 'X')	/* remove terminal X */
1959 				len--;
1960 		  else
1961 				done = TRUE;
1962 	 }
1963 	 done = FALSE;
1964 	 while ((! done) && (blen)) {
1965 		  BSSeek(bsp, (blen-1), SEEK_SET);
1966 		  residue2 = BSGetByte(bsp);
1967 		  if (residue2 == 'X')
1968 				blen--;
1969 		  else
1970 				done = TRUE;
1971 	 }
1972 		BSSeek(bsp, 0, SEEK_SET);
1973 		if (blen != len) {
1974 			return FALSE;
1975 		} else {
1976 			for (ptr = qval; *ptr != '\0' &&
1977 								(residue = BSGetByte(bsp)) != EOF; ptr++) {
1978 
1979 				 if (residue != *ptr) {
1980 					return FALSE;
1981 				 }
1982 
1983 			 } /* for */
1984 
1985 		 } /* compare two sequences */
1986 			return TRUE;
1987 } /* check */
1988 
GatherProductGeneInfo(Asn2ffJobPtr ajp,SeqFeatPtr sfp_in,GBEntryPtr gbp,SortStructPtr gp,Uint1 method)1989 static void  GatherProductGeneInfo (Asn2ffJobPtr ajp, SeqFeatPtr sfp_in, GBEntryPtr gbp, SortStructPtr gp, Uint1 method)
1990 {
1991 	BioseqPtr p_bsp;
1992 	GatherScope gs;
1993 	GeneStructPtr gsp;
1994 	NoteStructPtr nsp;
1995 	Int2 index;
1996 	Int4 length, longest_length=0;
1997 	ProtRefPtr prot=NULL;
1998 	SeqFeatPtr sfp=NULL;
1999 	SeqIdPtr sip;
2000 	ValNodePtr product=NULL;
2001 	OrganizeProtPtr opp;
2002 	SortStructPtr p;
2003 	Uint2 entityID;
2004 
2005 	if (sfp_in->product)
2006 		product = sfp_in->product;
2007 	else
2008 		return;
2009 	if (gp == NULL)
2010 		return;
2011 	gsp = gp->gsp;
2012 	nsp = gp->nsp;
2013 	sip = SeqLocId(product);
2014 	p_bsp = BioseqFindCore(sip);
2015 	if (p_bsp == NULL)    /* Bioseq is (or has been) in memory */
2016 		return;
2017 	if (ajp->useSeqMgrIndexes) {
2018 		sfp = SeqMgrGetBestProteinFeature (p_bsp, NULL);
2019 		if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT) {
2020 			prot = (ProtRefPtr) sfp->data.value.ptrvalue;
2021 			if (prot != NULL) {
2022 				GetProtRefInfo(ajp->format, gsp, nsp, prot);
2023 				GetProtRefComment(sfp_in, p_bsp, ajp, NULL, nsp, method);
2024  				return;
2025  			}
2026  		}
2027 	}
2028 	entityID = ObjMgrGetEntityIDForPointer(p_bsp);
2029 	opp = (OrganizeProtPtr) MemNew(sizeof(OrganizeProt));
2030 	opp->size = 0;
2031 	  MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
2032 	MemSet ((Pointer) (gs.ignore), (int)(TRUE), (size_t) (OBJ_MAX * sizeof(Boolean)));
2033 	gs.ignore[OBJ_SEQANNOT] = FALSE;
2034 	gs.ignore[OBJ_SEQFEAT] = FALSE;
2035 	gs.get_feats_location = TRUE;
2036 	gs.target = product;
2037 	gs.seglevels = 1;
2038 	GatherEntity(entityID, opp, get_prot_feats, &gs);
2039 	if (opp->size > 0)	{
2040 		prot = NULL;
2041 		p = opp->list;
2042 		for (index=0; index < opp->size; index++, p++) {
2043 			if ((sfp = p->sfp) == NULL) {
2044 				GatherItemWithLock(p->entityID, p->itemID, p->itemtype,
2045 								&sfp, find_item);
2046 			}
2047 			if (sfp == NULL) {
2048 				continue;
2049 			}
2050 			if (sfp->data.choice != SEQFEAT_PROT) {
2051 				continue;
2052 			}
2053 			if ((length=SeqLocLen(sfp->location)) == -1)
2054 				continue;
2055 			if (length > longest_length) {
2056 				prot = sfp->data.value.ptrvalue;
2057 				longest_length = length;
2058 			}
2059 		}
2060 		GetProtRefInfo(ajp->format, gsp, nsp, prot);
2061 	}
2062 	GetProtRefComment(sfp_in, p_bsp, ajp, opp, nsp, method);
2063 	p = opp->list;
2064 	for (index=0; index < opp->size; index++, p++) {
2065 		if (p && p->gsp)
2066 			GeneStructFree(p->gsp);
2067 		if (p && p->nsp)
2068 			NoteStructFree(p->nsp);
2069 	}
2070 	MemFree(opp->list);
2071 	MemFree(opp);
2072 
2073  	return;
2074 }
2075 
2076 /**************************************************************************
2077 *ConvertToNAImpFeat
2078 *
2079 *	This code copies a SeqFeat into an ImpFeat format for use in
2080 *	producing GenBank format.  Two SeqFeatPtr's should be passed
2081 *	in as arguments (sfp_in, sfp_out).
2082 *	return status:
2083 *		1: conversion successful
2084 *		0: no conversion, also no error (data in ASN.1 is lost or put out
2085 *			otherwise
2086 *		-1 an error
2087 **************************************************************************/
2088 
ProductIsLocal(Uint2 entityID,SeqLocPtr product)2089 static Boolean ProductIsLocal (Uint2 entityID, SeqLocPtr product)
2090 
2091 {
2092   BioseqPtr    bsp;
2093   SeqEntryPtr  sep, oldscope;
2094   SeqIdPtr     sip = NULL;
2095   SeqLocPtr    slp;
2096 
2097   slp = SeqLocFindNext (product, NULL);
2098   while (slp != NULL && sip == NULL) {
2099     sip = SeqLocId (slp);
2100     slp = SeqLocFindNext (product, slp);
2101   }
2102   if (sip == NULL) return FALSE;
2103   sep = GetTopSeqEntryForEntityID (entityID);
2104   if (sep == NULL) return FALSE;
2105   oldscope = SeqEntrySetScope (sep);
2106   bsp = BioseqFind (sip);
2107   SeqEntrySetScope (oldscope);
2108   if (bsp != NULL) return TRUE;
2109   return FALSE;
2110 }
2111 
ConvertToNAImpFeat(Asn2ffJobPtr ajp,GBEntryPtr gbp,SeqFeatPtr sfp_in,SeqFeatPtr PNTR sfpp_out,SortStructPtr gp)2112 NLM_EXTERN Int2 ConvertToNAImpFeat (Asn2ffJobPtr ajp, GBEntryPtr gbp, SeqFeatPtr sfp_in, SeqFeatPtr PNTR sfpp_out, SortStructPtr gp)
2113 {
2114 	BioseqPtr bsp=gbp->bsp, pbsp=NULL;
2115 	Boolean found_key, non_pseudo = FALSE;
2116 	CdRegionPtr cdr;
2117 	Char buffer[2], printbuf[41], temp[65];
2118 	CharPtr buf_ptr = &(buffer[0]), protein_seq=NULL, ptr = &(temp[0]);
2119 	NoteStructPtr nsp;
2120 	ImpFeatPtr ifp, ifp_in;
2121 	Int2 retval=1;
2122 	Int4 length=0;
2123 	SeqFeatPtr sfp_out;
2124 	SeqIdPtr xid;
2125 	ValNodePtr product;
2126 	ValNodePtr mod, syn;
2127 	BioSourcePtr biosp;
2128 	OrgRefPtr orp;
2129 	RnaRefPtr rrp;
2130 	ByteStorePtr byte_sp;
2131 	/* Int4 len_cds, len_prot; -- UNUSED */
2132 	Uint1 method = 0;
2133 	GeneRefPtr grp;
2134 	Boolean was_gene = FALSE;
2135 	CharPtr key=NULL, tmp;
2136 	GeneStructPtr gsp;
2137 	CharPtr except_msg="No explanation supplied", loc;
2138 
2139 	sfp_out = *sfpp_out;
2140 
2141 	if (sfp_out->data.choice != SEQFEAT_IMP)
2142 		return -1;
2143 
2144 	ifp = (ImpFeatPtr) sfp_out->data.value.ptrvalue;
2145 
2146 	sfp_out->partial = sfp_in->partial;
2147 	sfp_out->comment = sfp_in->comment;
2148 	sfp_out->exp_ev = sfp_in->exp_ev;
2149 	sfp_out->location = sfp_in->location;
2150 	sfp_out->product = sfp_in->product;
2151 	sfp_out->pseudo = sfp_in->pseudo;
2152 
2153 	found_key = GetNAFeatKey(ajp->show_gene, &(key), sfp_in, sfp_out);
2154 	if (!found_key)
2155 		return -1;
2156 	ifp->key = key;
2157 	nsp = gp->nsp;
2158 	gsp=gp->gsp;
2159 	switch (sfp_in->data.choice)
2160 	{
2161 	case SEQFEAT_BIOSRC:
2162 		biosp = sfp_in->data.value.ptrvalue;
2163 		orp = (OrgRefPtr) biosp->org;
2164 		if (orp) {
2165 			if (orp->taxname) {
2166 				sfp_out->qual = AddGBQual(sfp_out->qual,
2167 									"organism", orp->taxname);
2168 			} else if (orp->common) {
2169 				if (StrStr(orp->common, "virus") ||
2170 				    StrStr(orp->common, "Virus") ||
2171 				    StrStr(orp->common, "phage") ||
2172 				    StrStr(orp->common, "Phage") ||
2173 				    StrStr(orp->common, "viroid") ||
2174 				    StrStr(orp->common, "Viroid"))
2175 				{
2176 					sfp_out->qual = AddGBQual(sfp_out->qual, "organism",
2177 															 orp->common);
2178 				}
2179 			}
2180 /* added from OrgRef.mod 03.20.96 */
2181 			for (mod = orp->mod; mod; mod = mod->next) {
2182 				CpNoteToCharPtrStack(nsp, NULL, (CharPtr) mod->data.ptrvalue);
2183 			}
2184 		} else {
2185 			sfp_out->qual = AddGBQual(sfp_out->qual, "organism",
2186 															 "unknown");
2187 		}
2188 		sfp_out->qual = AddBioSourceToGBQual(ajp, nsp, biosp, sfp_out->qual,
2189 		 	TRUE);
2190 		break;
2191 	case SEQFEAT_CDREGION:
2192 		product = sfp_in->product;
2193 		if (ajp->mode == RELEASE_MODE) {
2194 			if (GBQualPresent("pseudo", sfp_in->qual) == FALSE &&
2195 						gsp->pseudo == FALSE && sfp_in->pseudo == FALSE) {
2196 				non_pseudo = TRUE;
2197 			}
2198 		  if (non_pseudo) {
2199 			if (product == NULL) {
2200 				if (ajp->error_msgs == TRUE) {
2201 					loc = SeqLocPrint(sfp_in->location);
2202 					ErrPostEx(SEV_ERROR, ERR_FEATURE_Dropped,
2203 						"Dropping CDS due to missing product: %s", loc);
2204 					MemFree(loc);
2205 				}
2206 				return -1;
2207 			}
2208 			if (ajp->forgbrel && CheckSeqIdChoice(SeqLocId(product)) == FALSE) {
2209 				if (ajp->error_msgs == TRUE) {
2210 					loc = SeqLocPrint(sfp_in->location);
2211 					ErrPostEx(SEV_ERROR, ERR_FEATURE_Dropped,
2212 						"Dropping CDS due to missing EMBL/DDBJ/GB protein accession: %s", loc);
2213 					MemFree(loc);
2214 				}
2215 				return -1;
2216 			}
2217 			if (ajp->forgbrel && (pbsp = BioseqFindCore(SeqLocId(product))) == NULL) {
2218 				if (ajp->error_msgs == TRUE) {
2219 					loc = SeqLocPrint(sfp_in->location);
2220 					ErrPostEx(SEV_ERROR, ERR_FEATURE_Dropped,
2221 						"Dropping CDS due to missing protein: %s", loc);
2222 					MemFree(loc);
2223 				}
2224 				return -1;
2225 			}
2226 			if (pbsp != NULL) {
2227 				if (ajp->forgbrel && CheckSeqIdChoice(pbsp->id) == FALSE) {
2228 						if (ajp->error_msgs == TRUE) {
2229 							loc = SeqLocPrint(sfp_in->location);
2230 							ErrPostEx(SEV_ERROR, ERR_FEATURE_Dropped,
2231 				    		"Dropping CDS due to missing EMBL/DDBJ/GB protein accession: %s", loc);
2232 							MemFree(loc);
2233 						}
2234 						return -1;
2235 				}
2236 				if (ajp->show_version == TRUE) {
2237 					if (CheckSeqIdAccVer(pbsp->id) == FALSE) {
2238 						if (ajp->error_msgs == TRUE) {
2239 							loc = SeqLocPrint(sfp_in->location);
2240 							ErrPostEx(SEV_ERROR, ERR_FEATURE_Dropped,
2241 								"Dropping CDS due to missing protein accession.version: %s", loc);
2242 							MemFree(loc);
2243 						}
2244 						return -1;
2245 					}
2246 				}
2247 			}
2248 		  }
2249 		}
2250 		cdr = (CdRegionPtr) sfp_in->data.value.ptrvalue;
2251 		if ((GBQualPresent("codon_start", sfp_in->qual)) == FALSE)
2252 		{ /* Above checks if codon_start is already present. */
2253 			if (cdr->frame)
2254 				sprintf(buf_ptr, "%ld", (long) (cdr->frame));
2255 			else
2256 				sprintf(buf_ptr, "1");
2257 			sfp_out->qual = AddGBQual(sfp_out->qual, "codon_start", buf_ptr);
2258 		}
2259 		if (product && (! ajp->genome_view) && (ProductIsLocal (ajp->entityID, product))) {
2260 			byte_sp = ProteinFromCdRegion(sfp_in, FALSE);
2261 
2262 			if (product) {
2263 				length = bsp->length;
2264 				protein_seq = GetProductFromCDS(product, sfp_in->location, length);
2265 /* check conflict flag and fix it */
2266 				if (cdr->conflict == TRUE) {
2267 					if (CompareTranslation(byte_sp, protein_seq)) {
2268 						cdr->conflict = FALSE;
2269 					} else {
2270 						method = METHOD_concept_transl_a;
2271 					}
2272 				}
2273 				if (protein_seq) {
2274 					if ((GBQualPresent("pseudo", sfp_in->qual)) == FALSE &&
2275 						 gsp->pseudo == FALSE && sfp_in->pseudo == FALSE) {
2276 						sfp_out->qual = AddGBQual(sfp_out->qual,
2277 										"translation", protein_seq);
2278 					}
2279 					MemFree(protein_seq);
2280 				}
2281 			}
2282 			BSFree(byte_sp);
2283 		}
2284 		if (sfp_in->pseudo) {
2285 			sfp_out->qual = AddGBQual(sfp_out->qual, "pseudo", NULL);
2286 		}
2287 		if (sfp_in->excpt) {
2288 			if (StringCmp("ribosomal slippage", sfp_in->except_text) == 0 ||
2289 				StringCmp("ribosome slippage", sfp_in->except_text) == 0) {
2290 				sfp_out->qual = AddGBQual(sfp_out->qual,
2291 									"note", sfp_in->except_text);
2292 				sfp_out->excpt = FALSE;
2293 			} else if (StringCmp("trans splicing", sfp_in->except_text) == 0 ||
2294 						StringCmp("trans-splicing", sfp_in->except_text) == 0) {
2295 				sfp_out->excpt = FALSE;
2296 			} else if (sfp_in->except_text) {
2297 				sfp_out->qual = AddGBQual(sfp_out->qual,
2298 									"exception", sfp_in->except_text);
2299 			} else if (GBQualPresent("exception", sfp_in->qual) == TRUE) {
2300 				sfp_out->qual = AddGBQual(sfp_out->qual,
2301 									"exception", sfp_in->qual->val);
2302 			} else if (sfp_out->comment != NULL) {
2303 				sfp_out->qual = AddGBQual(sfp_out->qual,
2304 									"exception", sfp_in->comment);
2305 				sfp_out->comment = NULL;
2306 			} else {
2307 				sfp_out->qual = AddGBQual(sfp_out->qual,
2308 									"exception", except_msg);
2309 			}
2310 		} else {
2311 			if (GBQualPresent("exception", sfp_in->qual) == TRUE) {
2312 				sfp_out->qual = AddGBQual(sfp_out->qual,
2313 									"exception", sfp_in->qual->val);
2314 			}
2315 		}
2316 
2317 		GatherProductGeneInfo(ajp, sfp_in, gbp, gp, method);
2318 
2319 /******************************************************************************
2320 - asn2ff shouldn't generate a de-novo /translation for any
2321   cdregion that lacks a product, regardless of mode or -V setting  2/15/99
2322 ******************************************************************************
2323 		if (protein_seq == NULL && ajp->mode != RELEASE_MODE) {
2324 			protein_seq = BSMerge(byte_sp, NULL);
2325 			if ( protein_seq && protein_seq[0] != '-') {
2326 				len_prot = StringLen(protein_seq);
2327 				SeqLocLen(sfp_in->location) - (cdr->frame - 1);
2328 				if (len_prot >= 6) {
2329 					if ((GBQualPresent("pseudo", sfp_in->qual)) == FALSE &&
2330 						 gsp->pseudo == FALSE) {
2331 						sfp_out->qual =
2332 							AddGBQual(sfp_out->qual,
2333 								"translation", protein_seq);
2334 					}
2335 				}
2336 			}
2337 			MemFree(protein_seq);
2338 		}
2339 		BSFree(byte_sp);
2340 */
2341 		break;
2342 	case SEQFEAT_RNA:
2343 		rrp = sfp_in->data.value.ptrvalue;
2344 		/* the following code was taken (almost) directly
2345 			from Karl Sirotkin's code.					*/
2346 		switch ( rrp -> type){ /* order of case n: matches tests in
2347                                 is_RNA_type() of genasn.c in
2348                                 GenBankConversion directory */
2349 			case 2:
2350 				break;
2351 			case 255:
2352 				break;
2353 			case 3:
2354 				if (rrp->ext.choice == 1) {
2355 					if ((GBQualPresent("product", sfp_in->qual)) == FALSE) {
2356 						sfp_out->qual = AddGBQual(sfp_out->qual,
2357 								"product", (CharPtr) rrp->ext.value.ptrvalue);
2358 					}
2359 				} else if (rrp->ext.choice == 0 ||
2360 					rrp->ext.choice == 2) {
2361 					DotRNAQuals(ajp, gbp, sfp_in, sfp_out,
2362 							gp->nsp, gp->extra_loc, gp->extra_loc_cnt);
2363 				}
2364 				break;
2365 			case 4:
2366 				break;
2367 			case 1:
2368 				if (rrp->ext.choice == 1) {
2369 					if ((GBQualPresent("product", sfp_in->qual)) == FALSE) {
2370 						sfp_out->qual = AddGBQual(sfp_out->qual,
2371 								"product", (CharPtr) rrp->ext.value.ptrvalue);
2372 					}
2373 				} else if (rrp->ext.choice == 0 ||
2374 					rrp->ext.choice == 2) {
2375 					DotRNAQuals(ajp, gbp, sfp_in, sfp_out,
2376 							gp->nsp, gp->extra_loc, gp->extra_loc_cnt);
2377 				}
2378 				break;
2379 			case 5:
2380 				break;
2381 			case 6:
2382 				break;
2383 		}
2384 
2385 		if (rrp && rrp->pseudo == TRUE) {
2386 			if ((GBQualPresent("pseudo", sfp_in->qual)) == FALSE)
2387 				sfp_out->qual = AddGBQual(sfp_out->qual, "pseudo", NULL);
2388 		}
2389 
2390 
2391 		break;
2392 	case SEQFEAT_SEQ:
2393 		if ((xid=CheckXrefFeat(bsp, sfp_in)) != NULL) {
2394 			ptr = &(temp[0]);
2395 			SeqIdWrite(xid, printbuf, PRINTID_FASTA_SHORT, 40);
2396 			sprintf(ptr, "Cross-reference: %s", printbuf);
2397 			SaveNoteToCharPtrStack(nsp, NULL, ptr);
2398 		}
2399 		else
2400 			retval = 0;
2401 		break;
2402 	case SEQFEAT_IMP:
2403 		ifp_in = (ImpFeatPtr) sfp_in->data.value.ptrvalue;
2404 		if (ifp_in->loc != NULL)
2405 			ifp->loc = ifp_in->loc;
2406 		if (StringCmp(ifp_in->key, "CDS") == 0) {
2407 			if ((GBQualPresent("pseudo", sfp_in->qual)) == FALSE &&
2408 				ajp->error_msgs == TRUE)
2409 				ErrPostStr(SEV_INFO, ERR_FEATURE_non_pseudo,
2410 				    "ConvertToNAImpFeat: Non-pseudo ImpFeat CDS found");
2411 			if ((GBQualPresent("translation", sfp_in->qual)) == TRUE &&
2412 												ajp->mode == RELEASE_MODE) {
2413 				if (ajp->error_msgs == TRUE) {
2414 					ErrPostStr(SEV_ERROR, ERR_FEATURE_Dropped,
2415 				    "ImpFeat CDS with /translation found");
2416 				}
2417 				retval = -1;
2418 			}
2419 		}
2420 		break;
2421 	case SEQFEAT_REGION:
2422 		tmp = MemNew(StringLen(sfp_in->data.value.ptrvalue) + 9);
2423 		sprintf(tmp, "Region: %s", (CharPtr ) sfp_in->data.value.ptrvalue);
2424 		sfp_out->qual = AddGBQual(sfp_out->qual, "note", tmp);
2425 		tmp = MemFree(tmp);
2426 		break;
2427 	case SEQFEAT_SITE:
2428 		AddSiteNoteQual(sfp_in, sfp_out);
2429 		break;
2430 	case SEQFEAT_RSITE:
2431 		break;
2432 	case SEQFEAT_COMMENT:
2433 		if(ifp->key != NULL)
2434 			MemFree(ifp->key);
2435 		ifp->key = StringSave("misc_feature");
2436 		break;
2437 	case SEQFEAT_GENE:
2438 		grp = (GeneRefPtr) sfp_in->data.value.ptrvalue;
2439 		if (grp == NULL)
2440 			break;
2441 		syn=grp->syn;
2442 		if (grp->locus ) {
2443 			sfp_out->qual = AddGBQual(sfp_out->qual, "gene", grp->locus);
2444 			was_gene = TRUE;
2445 		} else if (syn != NULL) {
2446 			sfp_out->qual = AddGBQual(sfp_out->qual, "gene",
2447 												syn->data.ptrvalue);
2448 			syn=syn->next;
2449 			was_gene = TRUE;
2450 		}
2451 		if (grp->desc ) {
2452 			if (was_gene) {
2453 				CpNoteToCharPtrStack(nsp, NULL, grp->desc);
2454 			} else {
2455 			/*	s = MemNew(StringLen(grp->desc) + 15);
2456 				sprintf(s, "Description: %s", grp->desc);
2457 				sfp_out->qual = AddGBQual(sfp_out->qual, "gene", s);*/
2458 				sfp_out->qual = AddGBQual(sfp_out->qual, "gene", grp->desc);
2459 			}
2460 		}
2461 		if (grp->allele ) {
2462 			if ((GBQualPresent("allele", sfp_in->qual)) == FALSE)
2463 				sfp_out->qual = AddGBQual(sfp_out->qual, "allele", grp->allele);
2464 		}
2465 		if (grp->maploc ) {
2466 			if ((GBQualPresent("map", sfp_in->qual)) == FALSE)
2467 				sfp_out->qual = AddGBQual(sfp_out->qual, "map", grp->maploc);
2468 		}
2469 		for (; syn; syn=syn->next) {
2470 			CpNoteToCharPtrStack(nsp, NULL, syn->data.ptrvalue);
2471 		}
2472 		if (grp->pseudo == TRUE || sfp_in->pseudo) {
2473 			if ((GBQualPresent("pseudo", sfp_in->qual)) == FALSE)
2474 				sfp_out->qual = AddGBQual(sfp_out->qual, "pseudo", NULL);
2475 		}
2476 		GetDBXrefFromGene(grp, sfp_out);
2477 		break;
2478 	default:
2479 		if (ajp->error_msgs == TRUE)
2480 			ErrPostStr(SEV_WARNING, ERR_FEATURE_UnknownFeatureKey,
2481 				"Unimplemented type of gbqual in ConvertToNAImpFeat");
2482 		retval = 0;
2483 		break;
2484 	}
2485 	if (gsp->grp) {
2486 		GetDBXrefFromGene(gsp->grp, sfp_out);
2487 	}
2488 	if (sfp_in->pseudo) {
2489 		sfp_out->qual = AddGBQual(sfp_out->qual, "pseudo", NULL);
2490 	}
2491 	if (sfp_out->comment) {
2492 		CpNoteToCharPtrStack(nsp, NULL, (CharPtr) sfp_out->comment);
2493 		sfp_out->comment = NULL;
2494 	}
2495 	return retval;
2496 
2497 }	/* ConvertToNAImpFeat */
2498 
2499 /*****************************************************************************
2500 *ValidateNAImpFeat
2501 *
2502 *	This code validates an ImpFeat using some functions from
2503 *	the flat2asn parser.
2504 *
2505 *	If a feat is bad and can't be corrected, -1 is returned.
2506 *
2507 *****************************************************************************/
2508 
ValidateNAImpFeat(SeqFeatPtr sfp)2509 NLM_EXTERN Int2 ValidateNAImpFeat (SeqFeatPtr sfp)
2510 
2511 {
2512 	CharPtr key;
2513 	ImpFeatPtr ifp;
2514 	Int2 index, retval=0, status=0;
2515 
2516 	if (sfp->data.choice != SEQFEAT_IMP) {
2517 		return -1;
2518 	} else {
2519 
2520 		ifp = sfp->data.value.ptrvalue;
2521 		key = StringSave(ifp->key);
2522 		index = GBFeatKeyNameValid(&key, ASN2FF_SHOW_ERROR_MSG);
2523 		if (StringCmp(key, ifp->key) != 0) {
2524 			ifp->key = key;
2525 		} else {
2526 			MemFree(key);
2527 		}
2528 
2529 		if (index == -1) {
2530 			retval = -2;
2531 		} else {
2532 			status = GBFeatKeyQualValid(sfp->cit, index, &sfp->qual,
2533 						ASN2FF_SHOW_ERROR_MSG, ASN2FF_VALIDATE_FEATURES);
2534 #ifdef ASN2GNBK_PRINT_UNKNOWN_ORG
2535 			if (index == 46 && status == GB_FEAT_ERR_NONE) {
2536 				status = GBFeatKeyQualValid(sfp->cit, index, &sfp->qual,
2537 							ASN2FF_SHOW_ERROR_MSG, TRUE);
2538 			}
2539 #endif
2540 			if (status == GB_FEAT_ERR_NONE) {
2541 				retval = 1;
2542 			} else if (status == GB_FEAT_ERR_REPAIRABLE) {
2543 				retval = 0;
2544 			} else if (status == GB_FEAT_ERR_DROP) {
2545 				retval = -1;
2546 			}
2547 		}
2548 
2549 	}
2550 
2551 	return retval;
2552 }	/* ValidateNAImpFeat */
2553 
2554 /*****************************************************************************
2555 *ValidateAAImpFeat
2556 *
2557 *	This code will validate an ImpFeat using some functions from
2558 *	the flat2asn parser.  Right now it just checks to see that the
2559 *	sfp is an ImpFeat and checks for a partial qualifier.
2560 *
2561 *	If a feat is bad and can't be corrected, -1 is returned.
2562 *
2563 *****************************************************************************/
2564 
ValidateAAImpFeat(SeqFeatPtr sfp,Boolean use_product)2565 NLM_EXTERN Int2 ValidateAAImpFeat (SeqFeatPtr sfp, Boolean use_product)
2566 
2567 {
2568 
2569 	if (sfp->data.choice != SEQFEAT_IMP)
2570 		return -1;
2571 
2572 	LookForPartialImpFeat(sfp, use_product);
2573 
2574 	return 0;
2575 }	/* ValidateAAImpFeat */
2576 
2577 
2578 /*****************************************************************************
2579 *void PrepareSourceFeatQuals(SeqFeatPtr sfp_in, SeqFeatPtr sfp_out, GBEntryPtr gbp, Boolean add_modifs)
2580 *
2581 *Normally called from PrintSourceFeat, collects all notes etc. together.
2582 *Note: sfp_out may already have quals when it comes here, they should not
2583 *be deleted!
2584 * 	add_modifs: allows the addition of modifs to be specified, don't add
2585 *		modifs if the source feature is a ImpFeat.
2586 *
2587 *For many cases there is no sfp_in, so that must be checked for.
2588 *****************************************************************************/
2589 
PrepareSourceFeatQuals(SeqFeatPtr sfp_in,SeqFeatPtr sfp_out,GBEntryPtr gbp,Boolean add_modif)2590 NLM_EXTERN void PrepareSourceFeatQuals(SeqFeatPtr sfp_in, SeqFeatPtr sfp_out, GBEntryPtr gbp, Boolean add_modif)
2591 
2592 {
2593 	CharPtr note=NULL;
2594 	GBQualPtr qual1;
2595 	NoteStructPtr nsp=NULL;
2596 
2597 	if (gbp->feat) {
2598 		nsp=gbp->feat->source_notes;
2599 	}
2600 	if (sfp_in) {
2601 		for (qual1=sfp_in->qual; qual1; qual1=qual1->next) {
2602 			if (StringCmp(qual1->qual, "note") == 0)
2603 				CpNoteToCharPtrStack(nsp, NULL, qual1->val);
2604 			else
2605 				sfp_out->qual =
2606 					AddGBQual(sfp_out->qual, qual1->qual, qual1->val);
2607 		}
2608 	}
2609 /* not used in new style  */
2610 	if (add_modif == TRUE)
2611 		sfp_out->qual = AddModifsToGBQual(gbp, sfp_out->qual);
2612 /*---------------------    tatiana */
2613 	if (sfp_in && sfp_in->comment) {
2614 		CpNoteToCharPtrStack(nsp, NULL, (CharPtr) sfp_in->comment);
2615 	}
2616 
2617 	if (nsp && nsp->note[0]) {
2618 		note = ComposeNoteFromNoteStruct(nsp, NULL);
2619 		if (note) {
2620 			sfp_out->qual = AddGBQual(sfp_out->qual, "note", note);
2621 			note = MemFree(note);
2622 		}
2623 	}
2624 	if (sfp_in && sfp_in->cit) {
2625 		if (ASN2FF_SHOW_ERROR_MSG)
2626 			ErrPostStr(SEV_WARNING, 0, 0,
2627 				"Unwanted /citation on 'source' feature will be dropped");
2628 	}
2629 
2630 	return;
2631 }
2632 
2633 
2634 /*************************************************************************
2635 *AddProteinQuals
2636 *
2637 *************************************************************************/
2638 
AddProteinQuals(SeqFeatPtr sfp,SeqFeatPtr sfp_out,NoteStructPtr nsp)2639 NLM_EXTERN void AddProteinQuals (SeqFeatPtr sfp, SeqFeatPtr sfp_out, NoteStructPtr nsp)
2640 
2641 {
2642 	ProtRefPtr prp=sfp->data.value.ptrvalue;
2643 	ValNodePtr vnp;
2644 
2645 	if (prp->name != NULL) {
2646 		for (vnp=prp->name; vnp; vnp=vnp->next)
2647 			if (GBQualPresent("product", sfp_out->qual) == FALSE)
2648 				sfp_out->qual =
2649 				     AddGBQual(sfp_out->qual, "product", vnp->data.ptrvalue);
2650 			else
2651 				CpNoteToCharPtrStack(nsp, NULL, vnp->data.ptrvalue);
2652 	}
2653 	if (prp->desc) {
2654 		sfp_out->qual =
2655 		     AddGBQual(sfp_out->qual, "name", prp->desc);
2656 	}
2657 
2658 	for (vnp=prp->ec; vnp; vnp=vnp->next)
2659 		if ((CheckForQual(sfp_out->qual, "EC_number", vnp->data.ptrvalue)) == 0)
2660 			sfp_out->qual =
2661 		    		AddGBQual(sfp_out->qual, "EC_number", vnp->data.ptrvalue);
2662 
2663 	return;
2664 }
2665 
2666 /*______________________________________________________________________
2667 **
2668 **	This code is not currently used.
2669 **	I do not remove this piece of code, just comment it out.
2670 **	-- Dmitri Lukyanov
2671 */
2672 #if 0
2673 
2674 static GBQualPtr RemoveQual(GBQualPtr head, GBQualPtr x)
2675 {
2676 	GBQualPtr	v, p;
2677 
2678 	if (head == NULL) {
2679 		return NULL;
2680 	}
2681 	if (x == head) {
2682 		head = x->next;
2683 		x->next = NULL;
2684 		GBQualFree(x);
2685 		return head;
2686 	}
2687 	for (v = head; v != NULL && v != x; v = v->next) {
2688 		p = v;
2689 	}
2690 	if (v != NULL) {
2691 		p->next = x->next;
2692 		x->next = NULL;
2693 		GBQualFree(x);
2694 	}
2695 	return head;
2696 }
2697 
2698 #endif
2699 /*______________________________________________________________________
2700 */
2701 
Add_gene_id(GeneStructPtr gsp,SeqFeatPtr sfp_out)2702 static void Add_gene_id (GeneStructPtr gsp, SeqFeatPtr sfp_out)
2703 {
2704 	ImpFeatPtr ifp;
2705 	GeneRefPtr grp;
2706 	ValNodePtr vnp;
2707 	Char val[40];
2708 
2709 	if ((grp = gsp->grp) == NULL)
2710 		return;
2711 	ifp = sfp_out->data.value.ptrvalue;
2712 	if (StringCmp(ifp->key, "CDS") != 0) {
2713 		return;
2714 	}
2715 	if ((vnp = grp->syn) == NULL)  /* no synonyms */
2716 		return;
2717 	sprintf(val, "GeneID:%s", vnp->data.ptrvalue);
2718 	sfp_out->qual = AddGBQual(sfp_out->qual, "db_xref", val);
2719 }
2720 
2721 /****************************************************************************
2722 *	Composes the GBQuals for sfp_out using the information in the
2723 *	GeneStructPtr (gsp), and then the quals already on sfp_out.
2724 *
2725 * 	use only info from GeneStruct throw away the quals gene and map if they
2726 *	different /tatiana  07.11.95/
2727 *	do not add /map to the features other than 'gene' /08-29-97/
2728 *	sfp_out: SEQFEAT_IMP
2729 * 	map /citation added by Tatiana
2730 **************************************************************************/
ComposeGBQuals(Asn2ffJobPtr ajp,SeqFeatPtr sfp_out,GBEntryPtr gbp,SortStructPtr p,Boolean note_pseudo)2731 NLM_EXTERN void ComposeGBQuals (Asn2ffJobPtr ajp, SeqFeatPtr sfp_out, GBEntryPtr gbp, SortStructPtr p, Boolean note_pseudo)
2732 {
2733 	Char temp[65];
2734 	Char buffer[10];
2735 	CharPtr ascii, start, note=NULL, ptr=NULL, tmp;
2736 	GBQualPtr gbqp=NULL, qual1, qnext;
2737 	GeneStructPtr gsp;
2738 	Int2 int_index, status;
2739 	NoteStructPtr nsp;
2740 	PubStructPtr psp;
2741 	SeqFeatPtr sfp = NULL;
2742 	Int2 ascii_len, l;
2743 	ValNodePtr vnp, vnp1;
2744 	ValNodePtr pub, pubq, pubset;
2745 	ImpFeatPtr ifp;
2746 	BioseqPtr bsp;
2747 	Boolean is_contig = FALSE, is_NC = FALSE, is_NG = FALSE;
2748 	SeqIdPtr sid;
2749 	TextSeqIdPtr tsip;
2750 
2751 	if (gbp == NULL || gbp->feat == NULL || p == NULL) {
2752 		return;
2753 	}
2754 	bsp = gbp->bsp;
2755 	for (sid=bsp->id; sid; sid=sid->next) {
2756 		if (sid->choice == SEQID_OTHER) {
2757 			tsip = (TextSeqIdPtr) sid->data.ptrvalue;
2758 			if (StringNCmp(tsip->accession, "NT", 2) == 0) {
2759 				is_contig = TRUE;
2760 			}
2761 			if (StringNCmp(tsip->accession, "NC", 2) == 0
2762 					|| StringNCmp(tsip->accession, "NP", 2) == 0) {
2763 				is_NC = TRUE;
2764 			}
2765 			if (StringNCmp(tsip->accession, "NG", 2) == 0) {
2766 				is_NG = TRUE;
2767 			}
2768 		}
2769 	}
2770 	gsp=p->gsp;
2771 	nsp = p->nsp;
2772 	if ((sfp=p->sfp) == NULL) {
2773 		GatherItemWithLock(p->entityID, p->itemID, p->itemtype,
2774 								&sfp, find_item);
2775 	}
2776 	if (gsp) {
2777 		if (gsp->gene) {
2778 		/*	delete_qual(&(sfp_out->qual), "gene"); */
2779 			for (vnp=gsp->gene; vnp; vnp=vnp->next)
2780 			{
2781 				ascii_len = Sgml2AsciiLen(vnp->data.ptrvalue);
2782 				start = ascii = MemNew((size_t) (10+ascii_len));
2783 				ascii = Sgml2Ascii(vnp->data.ptrvalue, ascii, ascii_len+1);
2784 				if ((GBQualPresent("gene", gbqp)) == FALSE) {
2785 					if ((GBQualPresent("gene", sfp_out->qual)) == FALSE) {
2786 						gbqp=AddGBQual(gbqp, "gene", start);
2787 					}
2788 				}
2789 				start = MemFree(start);
2790 			}
2791 		}
2792 		if (gsp->product) {
2793 			for (vnp=gsp->product; vnp; vnp=vnp->next)
2794 			{
2795 				if (GBQualPresent("product", gbqp) == FALSE &&
2796 					GBQualPresent("product", sfp_out->qual) == FALSE)
2797 						sfp_out->qual = AddGBQual(sfp_out->qual, "product",
2798 														 vnp->data.ptrvalue);
2799 				else
2800 					CpNoteToCharPtrStack(nsp, NULL, vnp->data.ptrvalue);
2801 			}
2802 		}
2803 		if (gsp->standard_name) {
2804 			for (vnp=gsp->standard_name; vnp; vnp=vnp->next)
2805 			{
2806 				if ((CheckForQual(sfp_out->qual, "standard_name",
2807 											 vnp->data.ptrvalue)) == 0) {
2808 					gbqp=AddGBQual(gbqp, "standard_name", vnp->data.ptrvalue);
2809 				}
2810 			}
2811 		}
2812 		if (ajp->show_gene == TRUE) {
2813 			ifp = sfp_out->data.value.ptrvalue;
2814 			if (StringCmp(ifp->key, "gene") == 0) {
2815 				if (gsp->map[0]) {
2816 					gbqp = AddGBQual(gbqp, "map", gsp->map[0]);
2817 				}
2818 			}
2819 		} else {
2820 			if (gsp->map[0]) {
2821 				gbqp = AddGBQual(gbqp, "map", gsp->map[0]);
2822 			}
2823 		}
2824 		for (vnp=gsp->ECNum; vnp; vnp=vnp->next) {
2825 			if ((CheckForQual(sfp_out->qual, "EC_number",
2826 									vnp->data.ptrvalue)) == 0) {
2827 				gbqp=AddGBQual(gbqp, "EC_number", vnp->data.ptrvalue);
2828 			}
2829 		}
2830 		for (vnp=gsp->activity; vnp; vnp=vnp->next) {
2831 			if ((CheckForQual(sfp_out->qual, "function",
2832 									vnp->data.ptrvalue)) == 0) {
2833 				gbqp=AddGBQual(gbqp, "function", vnp->data.ptrvalue);
2834 			}
2835 		}
2836 		if (gsp->pseudo == TRUE) {
2837 			if (note_pseudo == TRUE) {
2838 					CpNoteToCharPtrStack(nsp, NULL, "pseudogene");
2839 			} else if (GBQualPresent("pseudo", gbqp) == FALSE &&
2840 					GBQualPresent("pseudo", sfp_out->qual) == FALSE) {
2841 				gbqp = AddGBQual(gbqp, "pseudo", NULL);
2842 			}
2843 		}
2844 	}
2845 /* Add Experimental note */
2846 	if (sfp != NULL && sfp->data.choice == SEQFEAT_CDREGION)
2847 	{
2848 		ptr = &(temp[0]);
2849 		status = MakeGBSelectNote(ptr, sfp);
2850 		if (status > 0)
2851 			SaveNoteToCharPtrStack(nsp, NULL, ptr);
2852 		ptr=NULL;
2853 /* gene synonym appears as db-xref
2854 		if (is_NC) {
2855 			Add_gene_id(gsp, sfp_out);
2856 		}
2857 */
2858 	}
2859 	if (nsp && nsp->note[0])
2860 	{
2861 		note = ComposeNoteFromNoteStruct(nsp, gsp);
2862 		if (note)
2863 		{
2864 			gbqp = AddGBQual(gbqp, "note", note);
2865 			note = MemFree(note);
2866 		}
2867 	}
2868 	if (ajp->mode != DIRSUB_MODE) {
2869 		AddPID(ajp, sfp_out, (Boolean) (is_contig || is_NG || is_NC));
2870 	}
2871 	if (is_contig || is_NG || is_NC) {
2872 		if (sfp != NULL && sfp->data.choice == SEQFEAT_RNA) {
2873 			Add_trid(ajp, sfp_out);
2874 		}
2875 	}
2876 	Add_dbxref(ajp, sfp_out, sfp, bsp);
2877 	vnp = gbp->Pub;
2878 	if (sfp && sfp->cit) {
2879 		buffer[0] = '\0';
2880 		pubset = sfp->cit;
2881 		for (pubq = pubset->data.ptrvalue; pubq; pubq = pubq->next) {
2882 			if (pubq->choice == PUB_Equiv) {
2883 				pub = pubq->data.ptrvalue;
2884 				for (; pub != NULL; pub = pub->next) {
2885 					for (vnp1=vnp; vnp1; vnp1=vnp1->next) {
2886 						psp = vnp1->data.ptrvalue;
2887 						if (PubLabelMatch(psp->pub, pub) == 0) {
2888 							sprintf(buffer, "[%ld]", (long) (psp->number));
2889 							 gbqp = AddGBQual(gbqp, "citation", buffer);
2890 							break;
2891 						}
2892 					}
2893 				}
2894 			} else {
2895 				pub = pubq;
2896 				for (vnp1=vnp; vnp1; vnp1=vnp1->next) {
2897 					psp = vnp1->data.ptrvalue;
2898 					if (PubLabelMatch(psp->pub, pub) == 0) {
2899 						sprintf(buffer, "[%ld]", (long) (psp->number));
2900 						 gbqp = AddGBQual(gbqp, "citation", buffer);
2901 						break;
2902 					}
2903 				}
2904 			}
2905 		}
2906 /************** old algorithm for pub matching ****************/
2907 		if (buffer[0] == '\0') {
2908 			for (vnp1=vnp; vnp1; vnp1=vnp1->next)
2909 			{
2910 				psp = vnp1->data.ptrvalue;
2911 				for (int_index=0; int_index<psp->pubcount; int_index++)
2912 					if (sfp == psp->pubfeat[int_index])
2913 					{
2914 						sprintf(buffer, "[%ld]", (long) (psp->number));
2915 						gbqp = AddGBQual(gbqp, "citation", buffer);
2916 					}
2917 			}
2918 		}
2919 	}
2920 	if (gbqp)	/* any gene or note related quals added above? */
2921 	{
2922 		for (qual1=gbqp; qual1->next; qual1=qual1->next)
2923 			;
2924 		qual1->next = sfp_out->qual;
2925 		sfp_out->qual = gbqp;
2926 	}
2927 /* check for the qual gdb_xref */
2928 	for (qual1 = sfp_out->qual; qual1; qual1 = qnext) {
2929 		qnext = qual1->next;
2930 		if (StringCmp(qual1->qual, "gdb_xref") == 0) {
2931 			qual1->qual = StringSave("db_xref");
2932 			l = StringLen(qual1->val);
2933 			tmp = MemNew(l + 5);
2934 			sprintf(tmp, "GDB:%s", qual1->val);
2935 			qual1->val = StringSave(tmp);
2936 			MemFree(tmp);
2937 		}
2938 		if (ajp->show_gene == FALSE) {
2939 /* change qual 'replace' to the old style location operator */
2940 /* changed December 1996 release 100.0 */
2941 		/*	if (StringCmp(qual1->qual, "replace") == 0) {
2942 				ifp = sfp_out->data.value.ptrvalue;
2943 				loc = FlatLoc(gbp->bsp, sfp->location);
2944 				l = StringLen(qual1->val) + StringLen(loc);
2945 				tmp = MemNew(l + 15);
2946 				sprintf(tmp, "replace(%s,\"%s\")", loc, qual1->val);
2947 				MemFree(loc);
2948 				ifp->loc = tmp;
2949 				sfp_out->qual = RemoveQual(sfp_out->qual, qual1);
2950 			}
2951 				*/
2952 		}
2953 	}
2954 	return;
2955 }	/* ComposeGBQuals */
2956 
tmp_save(CharPtr str)2957 static CharPtr  tmp_save(CharPtr str)
2958 /* deletes spaces from the begining and the end and returns Nlm_StringSave */		           {
2959 	CharPtr s, ss;
2960 
2961 	if (str == NULL) {
2962 		return NULL;
2963 	}
2964 	for (; isspace(*str) || *str == ','; str++) continue;
2965 	for (s = str; *s != '\0'; s++) {
2966 		if (*s == '\n') {
2967 			for (ss = s+1; isspace(*ss); ss++) continue;
2968 			*s = ' ';
2969 			strcpy(s+1, ss);
2970 		}
2971 	}
2972 	for (s=str+StringLen(str)-1; s >= str && (*s == ' ' || *s == ';' ||
2973 		 *s == ',' || *s == '.' || *s == '\"' || *s == '\t'); s--) {
2974 		*s = '\0';
2975 	}
2976 
2977 	if (*str == '\0') {
2978 	    return NULL;
2979 	} else {
2980 	    return Nlm_StringSave(str);
2981 	}
2982 }
NoteCmp(CharPtr n1,CharPtr n2)2983 static Int2 NoteCmp(CharPtr n1, CharPtr n2)
2984 {
2985 	CharPtr s1, s2;
2986 	Int2 ret = 1;
2987 
2988 	if (n1 == NULL || n2 == NULL)
2989 		return ret;
2990 	s1 = tmp_save(n1);
2991 	s2 = tmp_save(n2);
2992 	if (StringStr(s1, s2) != NULL)
2993 		ret = 0;  /*duplicated */
2994 	MemFree(s1);
2995 	MemFree(s2);
2996 
2997 	return ret;
2998 }
2999 
3000 /****************************************************************************
3001 * CharPtr ComposeNoteFromNoteStruct (NoteStructPtr nsp, GeneStrunctPtr gsp)
3002 *
3003 *	This function composes a "/note" for a SeqFeatPtr from the information
3004 *	in the GeneStructPtr (gsp).
3005 *	The first "for" loop initializes the first CharPtr and a check
3006 *	is done that the information in gsp->note is *not* redundant.  If
3007 *	it is not, first gsp->note_annot is copied onto a CharPtr (this
3008 *	field contains words describing the origin of the info in note, i.e.,
3009 *	"Description"); then the actual note is copied onto the CharPtr.
3010 *	The second "for" loop does the same checking as the first and the
3011 *	concatenation of more "note" strings is performed.
3012 *
3013 *n.b.: the caller is responsible for deallocating the final returned "note".
3014 ***************************************************************************/
ComposeNoteFromNoteStruct(NoteStructPtr nsp,GeneStructPtr gsp)3015 NLM_EXTERN CharPtr ComposeNoteFromNoteStruct (NoteStructPtr nsp, GeneStructPtr gsp)
3016 
3017 {
3018 	Boolean status;
3019 	CharPtr note1=NULL, note2, note3;
3020 	Int2 index, index1, index2, len;
3021 
3022 	for (index=0; index<nsp->note_index; index++) {
3023 		if (gsp) {
3024 			if (CompareStringWithGsp(gsp, nsp->note[index]) != 0) {
3025 				if (nsp->note_annot[index])
3026 					note1 = Cat2Strings(nsp->note_annot[index], nsp->note[index], " ", 0);
3027 				else
3028 					note1 = StringSave(nsp->note[index]);
3029 				len = CheckForExtraChars(note1);
3030 				if (len == 0)
3031 					note1 = MemFree(note1);
3032 				else
3033 					break;
3034 			}
3035 		} else {
3036 			if (nsp->note_annot[index])
3037 				note1 = Cat2Strings(nsp->note_annot[index], nsp->note[index], " ", 0);
3038 			else
3039 				note1 = StringSave(nsp->note[index]);
3040 			len = CheckForExtraChars(note1);
3041 			if (len == 0)
3042 				note1 = MemFree(note1);
3043 			else
3044 				break;
3045 		}
3046 	}
3047 	index++;
3048 
3049 	for (index1=index; index1<nsp->note_index; index1++)
3050 	{
3051 		status = TRUE;
3052 		note2 = nsp->note[index1];
3053 		if (gsp && CompareStringWithGsp(gsp, note2) == 0)
3054 			continue;
3055 
3056 		for (index2=0; index2<index1; index2++) {
3057 			if (gsp)
3058 				if (GeneStringCmp(note2, nsp->note[index2]) == 0)
3059 					status = FALSE;
3060 		}
3061 		if (status == TRUE) {
3062 			if (nsp->note_annot[index1])
3063 				note2 = Cat2Strings(nsp->note_annot[index1], nsp->note[index1], " ", 0);
3064 			else /* rewrite to not always allocate note2 if no annot?????*/
3065 				note2 = StringSave(nsp->note[index1]);
3066 			len = CheckForExtraChars(note1);
3067 			if (NoteCmp(note1, note2) == 0) {
3068 				len = 0;
3069 			}
3070 			if (len > 0) {
3071 				if (note1[len-1] == '.') {
3072 			        	note3 = Cat2Strings(note1, note2, "  ", -1);
3073 				} else {
3074 					note3 = Cat2Strings(note1, note2, "; ", -1);
3075 				}
3076 				note1 = MemFree(note1);
3077 				note2 = MemFree(note2);
3078 				note1 = note3;
3079 				note3 = NULL;
3080 			} else {
3081 				if (note2) {
3082 					note2 = MemFree(note2);
3083 				}
3084 			}
3085 		}
3086 	}
3087 
3088 	return note1;
3089 }	/* ComposeNoteFromNoteStruct */
3090 
3091 /*************************************************************************
3092 *static Int2 CheckForExtraChars(CharPtr note)
3093 *
3094 *	Check for spaces or semi-colons on the ends of notes.
3095 ************************************************************************/
3096 
CheckForExtraChars(CharPtr note)3097 static Int2 CheckForExtraChars(CharPtr note)
3098 
3099 {
3100 	Int2 len=0;
3101 
3102 	if (note != NULL)
3103 	{
3104 		len = StringLen(note);
3105 		while (len > 0)
3106 		{
3107 			if (note[len-1] == ' ' || note[len-1] == ';')
3108 				note[len-1] = '\0';
3109 			else
3110 				break;
3111 			len--;
3112 		}
3113 	}
3114 
3115 	return len;
3116 
3117 }	/* CheckForExtraChars */
3118 
Add_trid(Asn2ffJobPtr ajp,SeqFeatPtr sfp_out)3119 NLM_EXTERN void Add_trid (Asn2ffJobPtr ajp, SeqFeatPtr sfp_out)
3120 
3121 {
3122 	ImpFeatPtr ifp;
3123 	BIG_ID gi = -1;
3124 	SeqIdPtr sip, newid=NULL;
3125 	ValNodePtr product;
3126 	Char buf[MAX_ACCESSION_LEN+5];
3127 
3128 	ifp = sfp_out->data.value.ptrvalue;
3129 	if (StringCmp(ifp->key, "mRNA") != 0) {
3130 		return;
3131 	}
3132 	product = sfp_out->product;
3133 	if (product == NULL) {
3134 		return;
3135 	}
3136 	sip = GetProductSeqId(product);
3137 	if (sip == NULL) return;
3138 	if (sip->choice == SEQID_GI) {
3139 		if ((newid = GetSeqIdForGI(sip->data.intvalue)) != NULL) {
3140 			SeqIdWrite(newid, buf, PRINTID_TEXTID_ACC_VER, MAX_ACCESSION_LEN+1);
3141 		} else {
3142 			sprintf(buf, "%ld", sip->data.intvalue);
3143 		}
3144 	} else {
3145 		SeqIdWrite(sip, buf, PRINTID_TEXTID_ACC_VER, MAX_ACCESSION_LEN+1);
3146 	}
3147 	sfp_out->qual = AddGBQual(sfp_out->qual, "transcript_id", buf);
3148 }
3149 
3150 /*************************************************************************
3151 *	sfp_out: synthetic SeqFeatPtr of type ImpFeat for use in printing.
3152 *
3153 *	This function puts the GI number on a SeqFeatPtr /db_xref of type CDS.
3154 *	Checking is first done to see if this sfp is indeed a CDS, then
3155 *	the PID number is gotten from the product SeqId
3156 *****************************************************************************/
3157 
AddPID(Asn2ffJobPtr ajp,SeqFeatPtr sfp_out,Boolean is_NTorNG)3158 NLM_EXTERN void AddPID (Asn2ffJobPtr ajp, SeqFeatPtr sfp_out, Boolean is_NTorNG)
3159 
3160 {
3161 	ImpFeatPtr ifp;
3162 	BIG_ID gi = -1;
3163 	SeqIdPtr sip, new_id=NULL;
3164 	ValNodePtr product, vnp;
3165 	BioseqPtr p_bsp = NULL;
3166 	DbtagPtr db;
3167 	Char val[20];
3168 	Char buf[MAX_ACCESSION_LEN+1];
3169 
3170 	ifp = sfp_out->data.value.ptrvalue;
3171 	if (StringCmp(ifp->key, "CDS") != 0) {
3172 		return;
3173 	}
3174 	product = sfp_out->product;
3175 	if (product == NULL) {
3176 		return;
3177 	}
3178 	sip = GetProductSeqId(product);
3179 	if (sip) {	/* Get protein bsp	*/
3180 		if (sip->choice == SEQID_GI && is_NTorNG) {
3181 			if ((new_id = GetSeqIdForGI(sip->data.intvalue)) != NULL) {
3182 				SeqIdWrite(new_id, buf, PRINTID_TEXTID_ACC_VER, MAX_ACCESSION_LEN+1);
3183 				SeqIdFree(new_id); /*** need to free it !!! (EY) ***/
3184 			} else {
3185 				sprintf(buf, "%ld", sip->data.intvalue);
3186 			}
3187 			sfp_out->qual = AddGBQual(sfp_out->qual, "protein_id", buf);
3188 		} else if ((p_bsp = BioseqFind(sip)) != NULL) {
3189 			new_id = GetSeqIdChoice(p_bsp->id);
3190 			if (ajp->forgbrel && new_id == NULL) {
3191 				ErrPostStr(SEV_ERROR, ERR_ACCESSION_NoAccessNum, "");
3192 			} else if (new_id) {
3193 				SeqIdWrite(new_id, buf, PRINTID_TEXTID_ACC_VER,
3194 														MAX_ACCESSION_LEN+1);
3195 				sfp_out->qual = AddGBQual(sfp_out->qual, "protein_id", buf);
3196 			}
3197 		}
3198 	}
3199 	if (p_bsp == NULL) {
3200 		gi = GetGINumFromSip(sip);
3201 		if (gi != -1) {
3202 			if (ajp->show_gi) {
3203 				val[0] = '\0';
3204 				sprintf(val, "PID:g%ld", (long) gi);
3205 				if (val[0] != '\0') {
3206 					sfp_out->qual = AddGBQual(sfp_out->qual, "db_xref", val);
3207 				}
3208 			}
3209 			if (ajp->show_version) {
3210 				val[0] = '\0';
3211 				sprintf(val, "GI:%ld", (long) gi);
3212 				sfp_out->qual = AddGBQual(sfp_out->qual, "db_xref", val);
3213 			}
3214 		}
3215 		return;
3216 	}
3217 	for (vnp=p_bsp->id; vnp; vnp=vnp->next) {
3218 		if (vnp->choice == SEQID_GENERAL) {
3219 			db = vnp->data.ptrvalue;
3220 			if (db == NULL) {
3221 				continue;
3222 			}
3223 			val[0] = '\0';
3224 			if (StringNCmp(db->db, "PIDe", 4) == 0) {
3225 					sprintf(val, "PID:e%ld", (long) db->tag->id);
3226 					gi = db->tag->id;
3227 			} else if (StringNCmp(db->db, "PIDd", 4) == 0) {
3228 					sprintf(val, "PID:d%ld", (long) db->tag->id);
3229 					gi = db->tag->id;
3230 			} else if (StringNCmp(db->db, "PID", 3) == 0) {
3231 				if (db->tag && db->tag->str) {
3232 					sprintf(val, "%s:%s", db->db, db->tag->str);
3233 					gi = atoi((db->tag->str)+1);
3234 				}
3235 			}
3236 			if (ajp->show_gi && val[0] != '\0') {
3237 				sfp_out->qual = AddGBQual(sfp_out->qual, "db_xref", val);
3238 			}
3239 			/*if (ajp->show_version) {
3240 				val[0] = '\0';
3241 				sprintf(val, "GI:%ld", (long) gi);
3242 				sfp_out->qual = AddGBQual(sfp_out->qual, "db_xref", val);
3243 			}*/
3244 		}
3245 		if (vnp->choice == SEQID_GI) {
3246 			if (ajp->show_gi) {
3247 				val[0] = '\0';
3248 				sprintf(val, "PID:g%ld", (long) vnp->data.intvalue);
3249 				sfp_out->qual = AddGBQual(sfp_out->qual, "db_xref", val);
3250 			}
3251 			if (ajp->show_version) {
3252 				val[0] = '\0';
3253 				sprintf(val, "GI:%ld", (long) vnp->data.intvalue);
3254 				sfp_out->qual = AddGBQual(sfp_out->qual, "db_xref", val);
3255 			}
3256 		}
3257 	}
3258 	return;
3259 }	/* AddPID */
3260 
3261 /***************************************************************************
3262 *Int2 MakeGBSelectNote (CharPtr ptr, SeqFeatPtr sfp)
3263 *
3264 *Adds note to CDS GenBankSelect
3265 ***************************************************************************/
MakeGBSelectNote(CharPtr ptr,SeqFeatPtr sfp)3266 NLM_EXTERN Int2 MakeGBSelectNote (CharPtr ptr, SeqFeatPtr sfp)
3267 
3268 {
3269 	Boolean found_select=FALSE, found_match=FALSE;
3270 	CharPtr acc=NULL;
3271 	Int2 number = -1;
3272 	ObjectIdPtr oip=NULL, type;
3273 	UserFieldPtr ufp;
3274 	UserObjectPtr uop=NULL;
3275 
3276 	if (sfp && (uop=sfp->ext) != NULL)
3277 	{
3278 		if (uop->_class && (type=uop->type) != NULL)
3279 		{
3280 			if (StringCmp(uop->_class, "GB-Select") == 0)
3281 				found_select = TRUE;
3282 			if (type->str)
3283 				if (StringCmp(type->str, "SPmatch") == 0)
3284 					found_match = TRUE;
3285 			if (found_match && found_select)
3286 			{
3287 				for (ufp=uop->data; ufp; ufp=ufp->next)
3288 				{
3289 					oip = ufp->label;
3290 					if (oip->id == 2)
3291 					{
3292 						if (ufp->choice == 1)
3293 							acc = ufp->data.ptrvalue;
3294 					}
3295 					else if (oip->id == 3)
3296 					{
3297 						if (ufp->choice == 2)
3298 						{
3299 							number = (Int2) (ufp->data.intvalue);
3300 						}
3301 					}
3302 
3303 				}
3304 				if (number == 1)
3305 					sprintf(ptr,
3306 						"Identical to Swiss-Prot Accession Number %s", acc);
3307 				else if (number == 2 || number == 3)
3308 					sprintf(ptr,
3309 						"Similar to Swiss-Prot Accession Number %s", acc);
3310 			}
3311 		}
3312 	}
3313 	return number;
3314 }
3315 
get_prot_feats(GatherContextPtr gcp)3316 NLM_EXTERN Boolean get_prot_feats (GatherContextPtr gcp)
3317 {
3318 	BioseqPtr	bsp;
3319 	OrganizeProtPtr opp;
3320 	SeqFeatPtr sfp;
3321 	Boolean temp = FALSE;
3322 
3323 	opp = gcp->userdata;
3324 
3325 	switch (gcp->thistype)
3326 	{
3327 		case OBJ_SEQFEAT:
3328 			sfp = (SeqFeatPtr) (gcp->thisitem);
3329 			if (sfp->data.choice == SEQFEAT_PROT ||
3330 					sfp->data.choice == SEQFEAT_REGION ||
3331 					sfp->data.choice == SEQFEAT_BOND ||
3332 						sfp->data.choice == SEQFEAT_SITE) {
3333 				bsp = BioseqFindCore(SeqLocId(sfp->location));
3334 		    	if (gcp->tempload == TRUE) {
3335 		    		temp = TRUE;
3336 		    	}
3337 		    	opp->list = EnlargeSortList(opp->list, opp->size);
3338 				opp->size = StoreFeatTemp(opp->list, sfp, opp->size, bsp, NULL,
3339 					gcp->entityID, gcp->itemID, gcp->thistype,
3340 						gcp->new_loc, NULL, 0, temp);
3341 			}
3342 			break;
3343 		default:
3344 			break;
3345 	}
3346 	return TRUE;
3347 }
3348 
3349 /********************************************************************
3350 *	Int2 CompareStringWithGsp (GeneStructPtr gsp, CharPtr string)
3351 *
3352 *	gsp: GeneStructPtr containing the gene information,
3353 *	gene->synonym in is store in gsp->gene with choice 1 (GetGeneRefInfo)
3354 *	it is not compared to note string
3355 *
3356 *	string: a CharPtr with (possibly) relevant gene information
3357 *		(i.e., gene name, allele, product etc.).
3358 *
3359 *	A comparison is made between string and the information already
3360 *	stored in the gsp.  Following the convention for StringCmp,
3361 *	"0" is returned if a match is found, otherwise "1" is returned.
3362 *	At present (2/7/94) GeneStringCmp is a #define for StringCmp.
3363 ************************************************************************/
3364 
CompareStringWithGsp(GeneStructPtr gsp,CharPtr string)3365 NLM_EXTERN Int2 CompareStringWithGsp (GeneStructPtr gsp, CharPtr string)
3366 
3367 {
3368 	CharPtr ascii, start;
3369 	Int2 ascii_len;
3370 	ValNodePtr vnp;
3371 
3372 	for (vnp=gsp->gene; vnp; vnp=vnp->next)
3373 	{
3374 		if (vnp->choice == 1) {
3375 			continue;
3376 		}
3377 		ascii_len = Sgml2AsciiLen(vnp->data.ptrvalue);
3378 		start = ascii = MemNew((size_t) (10+ascii_len));
3379 		ascii = Sgml2Ascii(vnp->data.ptrvalue, ascii, ascii_len+1);
3380 		if (GeneStringCmp(start, string) == 0)
3381 		{
3382 			start = MemFree(start);
3383 			return 0;
3384 		}
3385 		start = MemFree(start);
3386 	}
3387 	vnp=gsp->product;
3388 	if (vnp != NULL)
3389 	{
3390 		if (GeneStringCmp(vnp->data.ptrvalue, string) == 0)
3391 			return 0;
3392 	}
3393 	for (vnp=gsp->standard_name; vnp; vnp=vnp->next)
3394 	{
3395 		if (GeneStringCmp(vnp->data.ptrvalue, string) == 0)
3396 			return 0;
3397 	}
3398 	if (gsp->map[0] && GeneStringCmp(gsp->map[0], string) == 0)
3399 		return 0;
3400 	if (gsp->ECNum)
3401 	for (vnp=gsp->ECNum; vnp; vnp=vnp->next)
3402 	{
3403 		if (GeneStringCmp(vnp->data.ptrvalue, string) == 0)
3404 			return 0;
3405 	}
3406 
3407 	return 1;
3408 }	/* CompareStringWithGsp */
3409 
GetDBXrefFromGene(GeneRefPtr grp,SeqFeatPtr sfp)3410 NLM_EXTERN void GetDBXrefFromGene (GeneRefPtr grp, SeqFeatPtr sfp)
3411 
3412 {
3413 	CharPtr dbase;
3414 	DbtagPtr dbtp;
3415 	ValNodePtr tmp;
3416 	Char buffer[50];
3417 
3418 	if (grp == NULL) {
3419 		return;
3420 	}
3421 	for (tmp = grp->db; tmp != NULL; tmp=tmp->next) {
3422 	    dbtp = tmp->data.ptrvalue;
3423 	    if (dbtp && dbtp->db && dbtp->tag) {
3424 			dbase = MemNew(StringLen(dbtp->db) + 3);
3425 			sprintf(dbase, "%s:", dbtp->db);
3426 			if (dbtp->tag->str) {
3427 				sprintf(buffer, "%s%s", dbase, dbtp->tag->str);
3428 				sfp->qual = AddGBQual(sfp->qual, "db_xref", buffer);
3429 			} else if (dbtp->tag->id) {
3430 				sprintf(buffer, "%s%ld", dbase, (long) dbtp->tag->id);
3431 				sfp->qual = AddGBQual(sfp->qual, "db_xref", buffer);
3432 			}
3433 			MemFree(dbase);
3434 	    }
3435 	}
3436 
3437 	return;
3438 }
3439 
3440 /****************************************************************************
3441 *	void GetProtRefInfo (GeneStructPtr gsp, NoteStructPtr nsp, ProtRefPtr prp)
3442 *
3443 *	gsp: GeneStructPtr containing gene information
3444 *	prp: ProtRefPtr from a sfp of type protein or a sfp xref.
3445 *
3446 *	If fields are empty on the gsp, and the relevant information
3447 *	is given by the prp, that field is filled on the gsp
3448 ****************************************************************************/
GetProtRefInfo(Uint1 format,GeneStructPtr gsp,NoteStructPtr nsp,ProtRefPtr prp)3449 NLM_EXTERN void GetProtRefInfo (Uint1 format, GeneStructPtr gsp, NoteStructPtr nsp, ProtRefPtr prp)
3450 {
3451 	ValNodePtr tmp, vnp;
3452 
3453 	if (prp == NULL) {
3454 		return;
3455 	}
3456 	for (vnp=prp->name; vnp; vnp=vnp->next) {
3457 		tmp = ValNodeNew(NULL);
3458 		tmp->data.ptrvalue = StringSave(vnp->data.ptrvalue);
3459 		gsp->product = tie_next(gsp->product, tmp);
3460 	}
3461 	for (vnp=prp->ec; vnp; vnp=vnp->next) {
3462 		tmp = ValNodeNew(NULL);
3463 		tmp->data.ptrvalue = StringSave(vnp->data.ptrvalue);
3464 		gsp->ECNum = tie_next(gsp->ECNum, tmp);
3465 	}
3466 	for (vnp=prp->activity; vnp; vnp=vnp->next) {
3467 		tmp = ValNodeNew(NULL);
3468 		tmp->data.ptrvalue = StringSave(vnp->data.ptrvalue);
3469 		gsp->activity = tie_next(gsp->activity, tmp);
3470 	}
3471 	if (format != GENPEPT_FMT) {
3472 		if (prp->desc) {
3473 			SaveNoteToCharPtrStack(nsp, NULL, prp->desc);
3474 		}
3475 	}
3476 	return;
3477 }
3478 
3479 /****************************************************************************
3480 *
3481 *	sfp: SeqFeatPtr for CDS
3482 *	nsp: NoteStructPtr
3483 *
3484 * Used to get comments from the Protein for use in a CDS /note.
3485 *
3486 * Take the main protein ONLY (not sig_peptide mat_peptide)
3487 *
3488 * Will find the Protein Pubs, as they are needed and (presumably) haven't
3489 * been found yet, so as to save "upfront" time when the formatter is
3490 * running in Entrez.
3491 ****************************************************************************/
GetProtRefComment(SeqFeatPtr sfp,BioseqPtr bsp,Asn2ffJobPtr ajp,OrganizeProtPtr opp,NoteStructPtr nsp,Uint1 method)3492 static void GetProtRefComment (SeqFeatPtr sfp, BioseqPtr bsp, Asn2ffJobPtr ajp, OrganizeProtPtr opp, NoteStructPtr nsp, Uint1 method)
3493 {
3494 	Boolean first_done=FALSE, protein=FALSE;
3495 	CharPtr ptr = NULL, string=NULL, string1=NULL, newstring=NULL, temp, s;
3496 	CharPtr conflict_msg_no_protein="Coding region translates with internal stops";
3497 /*	CharPtr except_msg_no_protein="Coding region translates with internal stops for reasons explained in citation. "; -- except_msg_no_protein UNUSED */
3498 	CharPtr conflict_msg="Protein sequence is in conflict with the conceptual translation";
3499 /*	CharPtr except_msg="Protein sequence differs from the conceptual translation for reasons explained in citation. "; -- except_msg UNUSED */
3500 	CdRegionPtr cdr=NULL;
3501 	Int2 total=0, i;
3502 	PubdescPtr pdp;
3503 	PubStructPtr psp;
3504 	SeqFeatPtr sfp_local=NULL;
3505 	ValNodePtr descr, vnp, vnp1, vnp1next, product;
3506 	MolInfoPtr mfp;
3507 	GatherScope gs;
3508 	SeqLocPtr slp;
3509 	ProtRefPtr prot_local;
3510 	SeqMgrFeatContext fcontext;
3511 	SeqMgrDescContext dcontext;
3512 	GatherContext gc;
3513 	SeqFeatPtr psfp;
3514 	ValNodePtr psdp;
3515 	ObjMgrDataPtr omdp;
3516 	SeqSubmitPtr ssp;
3517 	SubmitBlockPtr sbp;
3518 	CharPtr prefix = "";
3519 
3520 	if (ajp->useSeqMgrIndexes) {
3521 		sfp_local = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_PROT, 0, &fcontext);
3522 		while (sfp_local != NULL) {
3523 			prot_local = sfp_local->data.value.ptrvalue;
3524 			if (prot_local->processed <= 1) {
3525 				if (first_done) {
3526 					if (StringLen(sfp_local->comment)) {
3527 						string1 = CheckEndPunctuation(sfp_local->comment, '\0');
3528 						if (StringCmp(string, string1) != 0) {
3529 							newstring = Cat2Strings(string, string1, "; ", 0);
3530 							string = MemFree(string);
3531 							string = newstring;
3532 						}
3533 						string1 = MemFree(string1);
3534 					}
3535 				} else {
3536 					if (StringLen(sfp_local->comment)) {
3537 						string = CheckEndPunctuation(sfp_local->comment, '\0');
3538 						first_done = TRUE;
3539 					}
3540 				}
3541 			}
3542 			sfp_local = SeqMgrGetNextFeature (bsp, sfp_local, SEQFEAT_PROT, 0, &fcontext);
3543 		}
3544 	} else if (opp != NULL) {
3545 		for (i = 0; i < opp->size; i++) {
3546 			if ((sfp_local = opp->list[i].sfp) == NULL) {
3547 				continue;
3548 			}
3549 			if (sfp_local->data.choice != SEQFEAT_PROT) {
3550 				continue;
3551 			}
3552 			prot_local = sfp_local->data.value.ptrvalue;
3553 			if (prot_local->processed > 1) {
3554 				continue;
3555 			}
3556 			if (first_done) {
3557 				if (StringLen(sfp_local->comment)) {
3558 					string1 = CheckEndPunctuation(sfp_local->comment, '\0');
3559 					if (StringCmp(string, string1) != 0) {
3560 						newstring = Cat2Strings(string, string1, "; ", 0);
3561 						string = MemFree(string);
3562 						string = newstring;
3563 					}
3564 					string1 = MemFree(string1);
3565 				}
3566 			} else {
3567 				if (StringLen(sfp_local->comment)) {
3568 					string = CheckEndPunctuation(sfp_local->comment, '\0');
3569 					first_done = TRUE;
3570 				}
3571 			}
3572 		}
3573 	}
3574 
3575 	if (bsp && (descr=bsp->descr) != NULL) {
3576 		for (vnp=descr; vnp; vnp=vnp->next) {
3577 			if (vnp->choice == Seq_descr_comment) {
3578 				if (first_done) {
3579 					if (StringLen(vnp->data.ptrvalue)) {
3580 						string1 = CheckEndPunctuation(vnp->data.ptrvalue, '\0');
3581 						if (StringCmp(string, string1) != 0) {
3582 							newstring = Cat2Strings(string, string1, "; ", 0);
3583 							string = MemFree(string);
3584 							string = newstring;
3585 						}
3586 						string1 = MemFree(string1);
3587 					}
3588 				} else {
3589 					if (StringLen(vnp->data.ptrvalue)) {
3590 						string = CheckEndPunctuation(vnp->data.ptrvalue, '\0');
3591 						first_done = TRUE;
3592 					}
3593 				}
3594 			} else if (vnp->choice == Seq_descr_molinfo) {
3595 				mfp = vnp->data.ptrvalue;
3596 				if (mfp && mfp->tech > 1 && mfp->tech != 8) {
3597 					if (mfp->tech == MI_TECH_concept_trans_a) {
3598 				/*		s = StringForSeqMethod(method); */
3599 						s = NULL;
3600 					} else {
3601 						s = StringForSeqTech(mfp->tech);
3602 					}
3603 					if (s!= NULL && *s != '\0') {
3604 						ptr = MemNew(StringLen(s) + 10);
3605 						sprintf(ptr, "Method: %s", s);
3606 					}
3607 					if (first_done) {
3608 						newstring = Cat2Strings(string, ptr, "; ", 0);
3609 						string = MemFree(string);
3610 						string = newstring;
3611 					} else {
3612 						string = StringSave(ptr);
3613 						first_done = TRUE;
3614 					}
3615 					MemFree(ptr);
3616 				}
3617 			} else if (vnp->choice == Seq_descr_method) {
3618 				if (vnp->data.intvalue > 1) {
3619 					if (method == METHOD_concept_transl_a) {
3620 					/*	s = StringForSeqMethod(method);*/
3621 						s = NULL;
3622 					} else {
3623 						s = StringForSeqMethod((Uint1)(vnp->data.intvalue));
3624 					}
3625 					if (s!= NULL && *s != '\0') {
3626 						ptr = MemNew(StringLen(s) + 10);
3627 						sprintf(ptr, "Method: %s", s);
3628 					}
3629 
3630 					if (first_done) {
3631 						newstring = Cat2Strings(string, ptr, "; ", 0);
3632 						string = MemFree(string);
3633 						string = newstring;
3634 					} else {
3635 						string = StringSave(ptr);
3636 						first_done = TRUE;
3637 					}
3638 					MemFree(ptr);
3639 				}
3640 			}
3641 		}
3642 	}
3643 /* gather pubs on protein bioseq do not do checking or sorting*/
3644 	vnp = NULL;
3645 	if (ajp->useSeqMgrIndexes) {
3646 		/* finess calls to get_pubs */
3647 		MemSet ((Pointer) (&gc), 0, sizeof (GatherContext));
3648 		gc.userdata = (Pointer) (&vnp);
3649 		gc.entityID = ajp->entityID;
3650 		psdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pub, &dcontext);
3651 		while (psdp != NULL) {
3652 			gc.thistype = OBJ_SEQDESC;
3653 			gc.itemID = dcontext.itemID;
3654 			gc.thisitem = (Pointer) psdp;
3655 			omdp = dcontext.omdp;
3656 			if (omdp != NULL) {
3657 				gc.parenttype = omdp->datatype;
3658 				gc.parentitem = omdp->dataptr;
3659 			} else {
3660 				gc.parenttype = 0;
3661 				gc.parentitem = NULL;
3662 			}
3663 			get_pubs (&gc);
3664 			psdp = SeqMgrGetNextDescriptor (bsp, psdp, Seq_descr_pub, &dcontext);
3665 		}
3666 		psfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_PUB, 0, &fcontext);
3667 		while (psfp != NULL) {
3668 			gc.thistype = OBJ_SEQFEAT;
3669 			gc.itemID = dcontext.itemID;
3670 			gc.thisitem = (Pointer) psfp;
3671 			get_pubs (&gc);
3672 			psfp = SeqMgrGetNextFeature (bsp, psfp, SEQFEAT_PUB, 0, &fcontext);
3673 		}
3674 		omdp = ObjMgrGetData (ajp->entityID);
3675 		if (omdp != NULL && omdp->datatype == OBJ_SEQSUB) {
3676 			ssp = (SeqSubmitPtr) omdp->dataptr;
3677 			if (ssp != NULL) {
3678 				sbp = ssp->sub;
3679 				if (sbp != NULL) {
3680 					gc.thistype = OBJ_SUBMIT_BLOCK;
3681 					gc.itemID = 1;
3682 					gc.thisitem = (Pointer) sbp;
3683 					get_pubs (&gc);
3684 				}
3685 			}
3686 		}
3687 		/* also submit block */
3688 	} else {
3689   		MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
3690 /*	MemSet ((Pointer) (gs.ignore), (int)(TRUE), (size_t) (OBJ_MAX * sizeof(Boolean)));
3691 		gs.ignore[OBJ_SEQENTRY] = FALSE;
3692 		gs.ignore[OBJ_BIOSEQ] = FALSE;
3693 		gs.ignore[OBJ_SEQDESC] = FALSE;*/
3694 		gs.ignore[OBJ_SEQSUB] = TRUE;
3695 		gs.ignore[OBJ_SEQSUB_CIT] = TRUE;
3696 		slp = ValNodeNew(NULL);
3697 		slp->choice = SEQLOC_WHOLE;
3698 		slp->data.ptrvalue = (SeqIdPtr) SeqIdDup (SeqIdFindBest (bsp->id, 0));
3699 		gs.target = slp;
3700 		gs.seglevels = 4;
3701 
3702 		GatherEntity(ajp->entityID, &vnp, get_pubs, &gs);
3703 		if (slp)
3704 			SeqLocFree(slp);
3705 	}
3706 /*	if ((status = CheckPubs(ajp, bsp, &vnp)) < 0) {
3707 			ValNodeFree(vnp);
3708 			vnp = NULL;
3709 	}
3710 	vnp = OrganizePubList(vnp); */
3711 	for (vnp1=vnp; vnp1; vnp1=vnp1->next) {
3712 		psp = vnp1->data.ptrvalue;
3713 		if ((pdp=psp->descr) != NULL) {
3714 			if (pdp->fig) {
3715 				total += 32;
3716 				total += StringLen(pdp->fig);
3717 			}
3718 			if (pdp->maploc) {
3719 				total += 22;
3720 				total += StringLen(pdp->maploc);
3721 			}
3722 		}
3723 	}
3724 
3725 	if (sfp) {
3726 		cdr = (CdRegionPtr) sfp->data.value.ptrvalue;
3727 		product = sfp->product;
3728 		if (product && SeqLocLen(product))
3729 			protein = TRUE;
3730 		if (sfp->excpt)
3731 			total += 112;
3732 		if (cdr && cdr->conflict && (protein || ! sfp->excpt))
3733 			total += 112;
3734 	}
3735 
3736 	string1 = (CharPtr) MemNew(total*sizeof(Char));
3737 
3738 	for (vnp1=vnp; vnp1; vnp1=vnp1->next) {
3739 		psp = vnp1->data.ptrvalue;
3740 		if ((pdp=psp->descr) != NULL) {
3741 			if (pdp->fig) {
3742 
3743 				temp = CheckEndPunctuation(pdp->fig, '\0');
3744 				total = StringLen(string1);
3745 
3746 				sprintf(string1+total, "This sequence comes from %s", temp);
3747 				prefix = "; ";
3748 				temp = MemFree(temp);
3749 			}
3750 			if (pdp->maploc) {
3751 				total = StringLen(string1);
3752 				sprintf(string1+total, "%sMap location %s", prefix, pdp->maploc);
3753 				prefix = "; ";
3754 			}
3755 		}
3756 	}
3757 
3758 	if (sfp) {
3759 		if (cdr && cdr->conflict && (protein || ! sfp->excpt)) {
3760 			total = StringLen(string1);
3761 			sprintf(string1+total, "%s%s", prefix,
3762 					protein?conflict_msg:conflict_msg_no_protein);
3763 		}
3764 	}
3765 	if (string && string1) {
3766 		newstring = Cat2Strings(string, string1, "; ", 0);
3767 		string = MemFree(string);
3768 		string1 = MemFree(string1);
3769 	} else if (string) {
3770 		newstring = string;
3771 	} else if (string1) {
3772 		newstring = string1;
3773 	}
3774 
3775 	if (newstring) {
3776 		SaveNoteToCharPtrStack(nsp, NULL, newstring);
3777 		newstring = MemFree(newstring);
3778 	}
3779 	for (vnp1=vnp; vnp1; vnp1=vnp1next) {
3780 		vnp1next = vnp1->next;
3781 		psp = vnp1->data.ptrvalue;
3782 		FreePubStruct(psp);
3783 		MemFree(vnp1);
3784 	}
3785 	return;
3786 }	/* GetProtRefComment */
3787 
AddModifsToGBQual(GBEntryPtr gbp,GBQualPtr gbqual)3788 NLM_EXTERN GBQualPtr AddModifsToGBQual (GBEntryPtr gbp, GBQualPtr gbqual)
3789 {
3790 	CharPtr ptr;
3791 	ValNodePtr descr, man;
3792 
3793 	descr=BioseqGetSeqDescr(gbp->bsp, Seq_descr_modif, NULL);
3794 	if (descr) {
3795 		for (man = (ValNodePtr) descr-> data.ptrvalue; man != NULL; man = man -> next){
3796 			switch (man -> data.intvalue){
3797 			case 3: case 14:
3798 				ptr = AsnEnumStr("GIBB-mod",
3799 		  		   (Int2) man->data.intvalue);
3800 				if (GBQualPresent(ptr, gbqual) == FALSE)
3801 					gbqual = AddGBQual(gbqual, ptr, " ");
3802 				break;
3803 			case 4:
3804 				if (GBQualPresent("mitochondrion", gbqual) == FALSE)
3805 					gbqual = AddGBQual(gbqual, "mitochondrion", NULL);
3806 				break;
3807 			case 15:
3808 				if (GBQualPresent("insertion_seq", gbqual) == FALSE)
3809 					gbqual = AddGBQual(gbqual, "insertion_seq", " ");
3810 				break;
3811 			case 5: case 6: case 7: case 18: case 19:
3812 				ptr = AsnEnumStr("GIBB-mod",
3813 		  		   (Int2) man->data.intvalue);
3814 				if (GBQualPresent(ptr, gbqual) == FALSE)
3815 					gbqual = AddGBQual(gbqual, ptr, NULL);
3816 				break;
3817 			default:
3818 				break;
3819 			}
3820 		}
3821 	}
3822 	return gbqual;
3823 }	/* AddModifsToGBQual */
3824 
3825 /*************************************************************************
3826 *GBQualPtr AddOrgRefModToGBQual (OrgRefPtr orp, GBQualPtr gbqual);
3827 *
3828 *Add the OrgRef.mod to a source feat.  Note: a few of the quals added
3829 *may be illegal for a source feature, but the validator will catch them
3830 *in the end.
3831 ***************************************************************************/
AddOrgRefModToGBQual(OrgRefPtr orp,GBQualPtr gbqual)3832 NLM_EXTERN GBQualPtr AddOrgRefModToGBQual (OrgRefPtr orp, GBQualPtr gbqual)
3833 
3834 {
3835 	CharPtr mod, ptr, temp_ptr;
3836 	Char temp[ASN2FF_STD_BUF]; /* ASN2FF_STD_BUF (now 35) is longer than
3837 any qual. */
3838 	Int2 index;
3839 	ValNodePtr vnp;
3840 
3841 	if (orp && orp->mod)
3842 	{
3843 		for (vnp=orp->mod; vnp; vnp=vnp->next)
3844 		{
3845 			mod = vnp->data.ptrvalue;
3846 			if (StringNCmp(mod, "citation", 8) == 0)
3847 				continue;
3848 			index=0;
3849 			for (ptr=mod; *ptr != '\0'; ptr++)
3850 			{
3851 				index++;
3852 				if (*ptr == ' ' || *ptr == '=')
3853 				{
3854 					ptr++;
3855 					index--;
3856 					break;
3857 				}
3858 			}
3859 			if (index > ASN2FF_STD_BUF-1)
3860 				continue;
3861 
3862 			temp_ptr = &(temp[0]);
3863 			StringNCpy(temp_ptr, mod, index);
3864 			temp[index] = '\0';
3865 			if ((GBQualNameValid(temp_ptr)) == -1)
3866 				continue;
3867 			if (ptr)
3868 				gbqual = AddGBQual(gbqual, temp_ptr, ptr);
3869 			else
3870 				gbqual = AddGBQual(gbqual, temp_ptr, NULL);
3871 		}
3872 	}
3873 	return gbqual;
3874 }	/* AddOrgRefModToGBQual */
3875 
3876 /*************************************************************************
3877 *GBQualPtr AddBioSourceToGBQual (BioSourcePtr biosp, GBQualPtr gbqual);
3878 *
3879 *Add the OrgMod.subtypes and SubSource.subtypes to a source feat.
3880 *Add BioSource.genome to a source feat.
3881 *Note: a few of the quals added may be illegal for a source feature,
3882 *but the validator will catch them in the end.
3883 ***************************************************************************/
3884 
3885 static CharPtr organelleQual [] = {
3886   NULL,
3887   NULL,
3888   "plastid:chloroplast",
3889   "plastid:chromoplast",
3890   "mitochondrion:kinetoplast",
3891   "mitochondrion",
3892   "plastid",
3893   NULL,
3894   NULL,
3895   NULL,
3896   NULL,
3897   NULL,
3898   "plastid:cyanelle",
3899   NULL,
3900   NULL,
3901   "nucleomorph",
3902   "plastid:apicoplast",
3903   "plastid:leucoplast",
3904   "plastid:proplastid",
3905   NULL
3906 };
3907 
AddBioSourceToGBQual(Asn2ffJobPtr ajp,NoteStructPtr nsp,BioSourcePtr biosp,GBQualPtr gbqual,Boolean new_release)3908 NLM_EXTERN GBQualPtr AddBioSourceToGBQual (Asn2ffJobPtr ajp, NoteStructPtr nsp, BioSourcePtr biosp, GBQualPtr gbqual, Boolean new_release)
3909 {
3910 	CharPtr qual, val = NULL;
3911 	OrgModPtr omp;
3912 	OrgNamePtr onp;
3913 	SubSourcePtr ssp;
3914 	Int2 i;
3915 	Int4 id = -1;
3916 	DbtagPtr db = NULL;
3917 	OrgRefPtr org;
3918 	ValNodePtr vnp;
3919 	CharPtr s;
3920 
3921 	if (biosp == NULL)
3922 		return gbqual;
3923 	if (biosp->genome) {
3924 		i = biosp->genome;
3925 		if (i > 1 && i < 20) {
3926 			val = organelleQual [i];
3927 			if (val != NULL) {
3928 				gbqual = AddGBQual (gbqual, "organelle", val);
3929 			} else if (i < num_genome) {
3930 				qual = genome[i];
3931 				if (qual && (GBQualNameValid(qual)) != -1) {
3932 					if (i == 8) {  /*extrachrom*/
3933 						gbqual = AddGBQual(gbqual, "note", "extrachromosomal");
3934 					} else {
3935 						gbqual = AddGBQual(gbqual, qual, val);
3936 					}
3937 				} else if (qual && i == 8) {
3938 					gbqual = AddGBQual(gbqual, "note", "extrachromosomal");
3939 				}
3940 			}
3941 		}
3942 	}
3943 	org = (OrgRefPtr) biosp->org;
3944 	if (org) {
3945 		if ((onp = (OrgNamePtr) org->orgname) != NULL) {
3946 			for (omp=onp->mod; omp != NULL; omp=omp->next) {
3947 				for (i=0; orgmod_subtype[i].name != NULL; i++) {
3948 					if (omp->subtype == orgmod_subtype[i].num)
3949 						break;
3950 				}
3951 				if (orgmod_subtype[i].name == NULL) {
3952 					continue;
3953 				}
3954 				if (orgmod_subtype[i].num == 253) { /* old_lineage */
3955 					continue;
3956 				}
3957 				if (orgmod_subtype[i].num == 254) { /* old_name */
3958 					continue;
3959 				}
3960 				qual = orgmod_subtype[i].name;
3961 				if (orgmod_subtype[i].num == 21) {   /* nat_hos */
3962 					qual = "specific_host";
3963 				}
3964 				if ((val = omp->subname) == NULL)
3965 					val = "";
3966 				if ((GBQualNameValid(qual)) != -1) {
3967 					gbqual = AddGBQual(gbqual, qual, val);
3968 				} else {
3969 					s = MemNew(StringLen(val) +
3970 							StringLen(qual) + 3);
3971 					sprintf(s, "%s: %s", qual, val);
3972 					CpNoteToCharPtrStack(nsp, NULL, s);
3973 				}
3974 			}
3975 		}
3976 /* add db_xref */
3977 		val = NULL;
3978 		for (vnp=org->db; vnp; vnp=vnp->next) {
3979 			id = -1;
3980 			db = (DbtagPtr) vnp->data.ptrvalue;
3981 			if (db && db->db) {
3982 				for (i =0; i < DBNUM; i++) {
3983 					if (StringCmp(db->db, dbtag[i]) == 0) {
3984 						id = i;
3985 						break;
3986 					}
3987 				}
3988 				if (id == -1) {
3989 					continue;  /* unknown dbtag */
3990 				}
3991 			}
3992 			if (db->tag && db->tag->str) {
3993 				val = MemNew(StringLen(db->db)+StringLen(db->tag->str)+2);
3994 				sprintf(val, "%s:%s", db->db, db->tag->str);
3995 			} else if (db->tag) {
3996 				val = MemNew(StringLen(db->db)+16);
3997 				sprintf(val, "%s:%ld", db->db, (long) db->tag->id);
3998 			}
3999 			if (val[0] != '\0') {
4000 				gbqual = AddGBQual(gbqual, "db_xref", val);
4001 				MemFree(val);
4002 			}
4003 		}
4004 	}
4005 	for (ssp = biosp->subtype; ssp != NULL; ssp=ssp->next) {
4006 		qual = NULL;
4007 		if (ssp->subtype == 255) {
4008 			qual = "note";
4009 		} else if (ssp->subtype > num_subtype) {
4010 			qual = NULL;
4011 		} else if (ssp->subtype > 0) {
4012 			qual = subtype[ssp->subtype - 1];
4013 		} else {
4014 			qual = "?";
4015 		}
4016 		val = ssp->name;
4017 		if (ssp->subtype != 14 && ssp->subtype != 15) {
4018 			if (val == NULL)
4019 				val = "";
4020 			}
4021 		if ((GBQualNameValid(qual)) == -1) {
4022 			if (qual == NULL) {
4023 				qual = "?";
4024 			}
4025 			s = MemNew(StringLen(val) + StringLen(qual) + 3);
4026 			sprintf(s, "%s: %s", qual, val);
4027 			CpNoteToCharPtrStack(nsp, NULL, s);
4028 		} else {
4029 			gbqual = AddGBQual(gbqual, qual, val);
4030 		}
4031 	}
4032 	if (biosp->is_focus == TRUE) {
4033 		gbqual = AddGBQual(gbqual, "focus", NULL);
4034 	}
4035 	return gbqual;
4036 }	/* AddBioSourceToGBQual */
4037 
4038 /****************************************************************************
4039 *PrintImpFeatEx
4040 *
4041 *	This code prints out an ImpFeat in GenBank and HTML format.
4042 *
4043 ****************************************************************************/
PrintImpFeatEx(Asn2ffJobPtr ajp,BioseqPtr bsp,SeqFeatPtr sfp,BIG_ID gi,Int2 entityID,Uint4 itemID)4044 NLM_EXTERN Int2 PrintImpFeatEx (Asn2ffJobPtr ajp, BioseqPtr bsp, SeqFeatPtr sfp, BIG_ID gi, Int2 entityID, Uint4 itemID)
4045 {
4046 	CharPtr flatloc_ptr, key, loc;
4047 	GBQualPtr gbqp;
4048 	ImpFeatPtr ifp;
4049 	Uint1 class_qual, format=ajp->format;
4050 	Int2 class_equal, gbqual_index;
4051 	static CharPtr buf = NULL;
4052 	Uint2 retval;
4053 	ValNodePtr seqid;
4054 	CharPtr p, q;
4055 
4056 	if (sfp == NULL)
4057 		return -1;
4058 	if (sfp->data.choice != SEQFEAT_IMP)
4059 		return -1;
4060 	ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
4061 	key = ifp->key;
4062 	loc = ifp->loc;
4063 
4064 	for (seqid = ajp->id_print; seqid; seqid=seqid->next) {
4065 		if (seqid->choice == SEQID_GI) {
4066 		}
4067 	}
4068 	if (format == EMBL_FMT || format == PSEUDOEMBL_FMT ||
4069 						format == EMBLPEPT_FMT)
4070 		ff_StartPrint(5, 21, ASN2FF_EMBL_MAX, "FT");
4071 	else
4072 		ff_StartPrint(5, 21, ASN2FF_GB_MAX, NULL);
4073 
4074 	if (ajp->slp) {
4075 		ff_AddString(key);
4076 	} else {
4077 		www_featkey(key, gi, entityID, itemID);
4078 	}
4079 	TabToColumn(22);
4080 	if (loc == NULL) {
4081 		flatloc_ptr = FlatLoc(bsp, sfp->location);
4082 		if (get_www()) {
4083 			buf = www_featloc(flatloc_ptr);
4084 			ff_AddString(buf);
4085 			MemFree(buf);
4086 		} else {
4087 			ff_AddString(flatloc_ptr);
4088 		}
4089 		MemFree(flatloc_ptr);
4090 	} else {
4091 		if (get_www()) {
4092 			buf = www_featloc(loc);
4093 			ff_AddString(buf);
4094 			MemFree(buf);
4095 		} else {
4096 			ff_AddString(loc);
4097 		}
4098 	}
4099 	if (sfp->partial == TRUE) {
4100 		retval = SeqLocPartialCheck(sfp->location);
4101 		if (retval == SLP_COMPLETE || retval > SLP_OTHER) {
4102 			NewContLine();
4103 			ff_AddString("/partial");
4104 		}
4105 	}
4106 	for (gbqp=sfp->qual; gbqp; gbqp=gbqp->next) {
4107 		gbqual_index = GBQualNameValid(gbqp->qual);
4108 		if (gbqual_index != -1) {
4109 			NewContLine();
4110 			ff_AddChar( '/');
4111 			ff_AddString(gbqp->qual);
4112 			class_qual = ParFlat_GBQual_names[gbqual_index].gbclass;
4113 			if (class_qual == Class_none) {
4114 				class_equal=CheckForEqualSign(gbqp->qual);
4115 				if (class_equal == 1)
4116 					continue;
4117 			}
4118 			ff_AddChar('=');
4119 			if (class_qual == Class_text &&
4120 				StringCmp(gbqp->val, "\"\"") == 0) {
4121 				ff_AddString(gbqp->val);
4122 				continue;
4123 			}
4124 			if (get_www() && (class_qual == Class_text
4125 							|| class_qual == Class_note)) {
4126 				buf = www_featloc(gbqp->val);
4127 			} else {
4128 				buf = StringSave(gbqp->val);
4129 			}
4130 			if (class_qual == Class_text || class_qual == Class_none
4131 				|| class_qual == Class_ecnum || class_qual == Class_note)
4132 				ff_AddString("\"");
4133 			if (class_qual == Class_note) {
4134 				/* start of process tildes */
4135 				if (StringCmp (gbqp->qual, "note") == 0) {
4136 					for (p = buf, q = buf; *p != '\0'; *q++ = *p++) {
4137 						if (*p != '~')
4138 							continue;
4139 						if (p [1] != '~')
4140 							*p = '\n';
4141 						else
4142 							p++;
4143 					}
4144 					*q = '\0';
4145 				}
4146 				/* end of process tildes */
4147 				www_note_gi(buf);
4148 			} else if (class_qual != Class_none) {
4149 				if (StringCmp(gbqp->qual, "transl_table") == 0) {
4150 					www_gcode(buf);
4151 				} else if (StringCmp(gbqp->qual, "db_xref") == 0) {
4152 					www_db_xref(buf);
4153 				} else if (StringCmp(gbqp->qual, "protein_id") == 0 ||
4154 					StringCmp(gbqp->qual, "transcript_id") == 0) {
4155 					www_protein_id(buf);
4156 				} else {
4157 					ff_AddString(buf);
4158 				}
4159 			}
4160 			if (class_qual == Class_text || class_qual == Class_none
4161 				|| class_qual == Class_ecnum || class_qual == Class_note)
4162 				ff_AddString("\"");
4163 			if (buf) {
4164 				MemFree(buf);
4165 			}
4166 		} else if (format == GENPEPT_FMT) {
4167 			if (StringCmp(gbqp->qual, "site_type") == 0) {
4168 				NewContLine();
4169 				ff_AddChar('/');
4170 				ff_AddString(gbqp->qual);
4171 				ff_AddChar('=');
4172 				ff_AddString("\"");
4173 				ff_AddString(gbqp->val);
4174 				ff_AddString("\"");
4175 			} else if (StringCmp(gbqp->qual, "bond_type") == 0) {
4176 				NewContLine();
4177 				ff_AddChar('/');
4178 				ff_AddString(gbqp->qual);
4179 				ff_AddChar('=');
4180 				ff_AddString("\"");
4181 				ff_AddString(gbqp->val);
4182 				ff_AddString("\"");
4183 			} else if (StringCmp(gbqp->qual, "region_name") == 0) {
4184 				NewContLine();
4185 				ff_AddChar('/');
4186 				ff_AddString(gbqp->qual);
4187 				ff_AddChar('=');
4188 				ff_AddString("\"");
4189 				ff_AddString(gbqp->val);
4190 				ff_AddString("\"");
4191 			} else if (StringCmp(gbqp->qual, "sec_str_type") == 0) {
4192 				NewContLine();
4193 				ff_AddChar('/');
4194 				ff_AddString(gbqp->qual);
4195 				ff_AddChar('=');
4196 				ff_AddString("\"");
4197 				ff_AddString(gbqp->val);
4198 				ff_AddString("\"");
4199 			} else if (StringCmp(gbqp->qual, "non-std-residue") == 0) {
4200 				NewContLine();
4201 				ff_AddChar('/');
4202 				ff_AddString(gbqp->qual);
4203 				ff_AddChar('=');
4204 				ff_AddString("\"");
4205 				ff_AddString(gbqp->val);
4206 				ff_AddString("\"");
4207 			} else if (StringCmp(gbqp->qual, "heterogen") == 0) {
4208 				NewContLine();
4209 				ff_AddChar('/');
4210 				ff_AddString(gbqp->qual);
4211 				ff_AddChar('=');
4212 				ff_AddString("\"");
4213 				ff_AddString(gbqp->val);
4214 				ff_AddString("\"");
4215 			} else if (StringCmp(gbqp->qual, "name") == 0) {
4216 				NewContLine();
4217 				ff_AddChar('/');
4218 				ff_AddString(gbqp->qual);
4219 				ff_AddChar('=');
4220 				ff_AddString("\"");
4221 				ff_AddString(gbqp->val);
4222 				ff_AddString("\"");
4223 			} else if (StringCmp(gbqp->qual, "coded_by") == 0) {
4224 				NewContLine();
4225 				ff_AddChar('/');
4226 				ff_AddString(gbqp->qual);
4227 				ff_AddChar('=');
4228 				ff_AddString("\"");
4229 				ff_AddString(gbqp->val);
4230 				ff_AddString("\"");
4231 			}
4232 		} else if (ASN2FF_VALIDATE_FEATURES == FALSE) {
4233 			NewContLine();
4234 			ff_AddChar('/');
4235 			ff_AddString(gbqp->qual);
4236 			if (gbqp->val != NULL && StringLen(gbqp->val) != 0) {
4237 				ff_AddChar('=');
4238 				ff_AddString("\"");
4239 				ff_AddString(gbqp->val);
4240 				ff_AddString("\"");
4241 			}
4242 		}
4243 	}
4244 
4245 	ff_EndPrint();
4246 
4247 	return 1;
4248 } /*PrintImpFeatEx */
4249 
extract_qual(GBQualPtr PNTR head,GBQualPtr x)4250 static GBQualPtr extract_qual(GBQualPtr PNTR head, GBQualPtr x)
4251 {
4252 	GBQualPtr	v, p;
4253 
4254 	if (*head == NULL) {
4255 		return NULL;
4256 	}
4257 	if (x == *head) {
4258 		*head = x->next;
4259 		x->next = NULL;
4260 		return x;
4261 	}
4262 	for (v = *head; v != NULL && v != x; v = v->next) {
4263 		p = v;
4264 	}
4265 	if (v == NULL) {
4266 		return NULL;
4267 	}
4268 	p->next = x->next;
4269 	x->next = NULL;
4270 	return x;
4271 }
tie_next_qual(GBQualPtr head,GBQualPtr next)4272 static GBQualPtr tie_next_qual(GBQualPtr head, GBQualPtr next)
4273 {
4274 	GBQualPtr v;
4275 
4276 	if (head == NULL) {
4277 		return next;
4278 	}
4279 	for (v = head; v->next != NULL; v = v->next) {
4280 		v = v;
4281 	}
4282 	v->next = next;
4283 	return head;
4284 }
4285 
4286 /****************************************************************************
4287 *PrintImpFeat
4288 *
4289 *	This code prints out an ImpFeat in GenBank and HTML format.
4290 *
4291 ****************************************************************************/
PrintImpFeat(Asn2ffJobPtr ajp,BioseqPtr bsp,SeqFeatPtr sfp)4292 NLM_EXTERN Int2 PrintImpFeat (Asn2ffJobPtr ajp, BioseqPtr bsp, SeqFeatPtr sfp)
4293 {
4294 	CharPtr flatloc_ptr, key, loc;
4295 	GBQualPtr gbqp;
4296 	ImpFeatPtr ifp;
4297 	Uint1 class_qual, format=ajp->format;
4298 	Int2 class_equal, gbqual_index;
4299 	static CharPtr buf = NULL;
4300 	Uint2 retval;
4301 	Boolean first=TRUE;
4302 	GBQualPtr tmp, gbqpnext, head=NULL;
4303 
4304 	if (sfp == NULL)
4305 		return -1;
4306 	if (sfp->data.choice != SEQFEAT_IMP)
4307 		return -1;
4308 	ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
4309 	key = ifp->key;
4310 	loc = ifp->loc;
4311 
4312 	if (format == EMBL_FMT || format == PSEUDOEMBL_FMT ||
4313 						format == EMBLPEPT_FMT)
4314 		ff_StartPrint(5, 21, ASN2FF_EMBL_MAX, "FT");
4315 	else
4316 		ff_StartPrint(5, 21, ASN2FF_GB_MAX, NULL);
4317 	ff_AddString(key);
4318 	TabToColumn(22);
4319 	if (loc == NULL) {
4320 		flatloc_ptr = FlatLoc(bsp, sfp->location);
4321 		if (get_www()) {
4322 			buf = www_featloc(flatloc_ptr);
4323 			ff_AddString(buf);
4324 			MemFree(buf);
4325 		} else {
4326 			ff_AddString(flatloc_ptr);
4327 		}
4328 		MemFree(flatloc_ptr);
4329 	} else {
4330 		if (get_www()) {
4331 			buf = www_featloc(loc);
4332 			ff_AddString(buf);
4333 			MemFree(buf);
4334 		} else {
4335 			ff_AddString(loc);
4336 		}
4337 	}
4338 	if (sfp->partial == TRUE) {
4339 		retval = SeqLocPartialCheck(sfp->location);
4340 		if (retval == SLP_COMPLETE || retval > SLP_OTHER) {
4341 			NewContLine();
4342 			ff_AddString("/partial");
4343 		}
4344 	}
4345 /* put all /note last */
4346 	for (gbqp=sfp->qual; gbqp; gbqp=gbqpnext) {
4347 		gbqpnext=gbqp->next;
4348 		if (StringCmp(gbqp->qual, "note") == 0) {
4349 			tmp = extract_qual(&(sfp->qual), gbqp);
4350 			head = tie_next_qual(head, tmp);
4351 		}
4352 	}
4353 	if (head) {
4354 		sfp->qual = tie_next_qual(sfp->qual, head);
4355 	}
4356 	for (gbqp=sfp->qual; gbqp; gbqp=gbqp->next) {
4357 		gbqual_index = GBQualNameValid(gbqp->qual);
4358 		if (gbqual_index != -1) {
4359 			NewContLine();
4360 			if (first) {
4361 				ff_AddChar( '/');
4362 				ff_AddString(gbqp->qual);
4363 			}
4364 			class_qual = ParFlat_GBQual_names[gbqual_index].gbclass;
4365 			if (class_qual == Class_none) {
4366 				class_equal=CheckForEqualSign(gbqp->qual);
4367 				if (class_equal == 1)
4368 					continue;
4369 			}
4370 			if (first) {
4371 				ff_AddChar('=');
4372 			}
4373 			if (class_qual == Class_text &&
4374 				StringCmp(gbqp->val, "\"\"") == 0) {
4375 			/* an empty string is considered legal */
4376 				ff_AddString(gbqp->val);
4377 				continue;
4378 			}
4379 			if (get_www() && (class_qual == Class_text
4380 							|| class_qual == Class_note)) {
4381 				buf = www_featloc(gbqp->val);
4382 			} else {
4383 				buf = StringSave(gbqp->val);
4384 			}
4385 			if (class_qual == Class_text || class_qual == Class_none
4386 				|| class_qual == Class_ecnum)
4387 				ff_AddString("\"");
4388 			if (first && class_qual == Class_note)
4389 					ff_AddString("\"");
4390 			if (class_qual == Class_note) {
4391 				www_note_gi(buf);
4392 			} else if (class_qual != Class_none) {
4393 				if (StringCmp(gbqp->qual, "transl_table") == 0) {
4394 					www_gcode(buf);
4395 				} else if (StringCmp(gbqp->qual, "db_xref") == 0) {
4396 					www_db_xref(buf);
4397 				} else {
4398 					ff_AddString(buf);
4399 				}
4400 			}
4401 			if (class_qual == Class_text || class_qual == Class_none
4402 				|| class_qual == Class_ecnum)
4403 				ff_AddString("\"");
4404 			if (gbqp->next == NULL && class_qual == Class_note)
4405 				ff_AddString("\"");
4406 			if (buf) {
4407 				MemFree(buf);
4408 			}
4409 			if (class_qual == Class_note) {
4410 				if (first == TRUE)
4411 					first = FALSE;
4412 			}
4413 		} else if (format == GENPEPT_FMT) {
4414 			if (StringCmp(gbqp->qual, "site_type") == 0) {
4415 				NewContLine();
4416 				ff_AddChar('/');
4417 				ff_AddString(gbqp->qual);
4418 				ff_AddChar('=');
4419 				ff_AddString("\"");
4420 				ff_AddString(gbqp->val);
4421 				ff_AddString("\"");
4422 			} else if (StringCmp(gbqp->qual, "bond_type") == 0) {
4423 				NewContLine();
4424 				ff_AddChar('/');
4425 				ff_AddString(gbqp->qual);
4426 				ff_AddChar('=');
4427 				ff_AddString("\"");
4428 				ff_AddString(gbqp->val);
4429 				ff_AddString("\"");
4430 			} else if (StringCmp(gbqp->qual, "region_name") == 0) {
4431 				NewContLine();
4432 				ff_AddChar('/');
4433 				ff_AddString(gbqp->qual);
4434 				ff_AddChar('=');
4435 				ff_AddString("\"");
4436 				ff_AddString(gbqp->val);
4437 				ff_AddString("\"");
4438 			} else if (StringCmp(gbqp->qual, "sec_str_type") == 0) {
4439 				NewContLine();
4440 				ff_AddChar('/');
4441 				ff_AddString(gbqp->qual);
4442 				ff_AddChar('=');
4443 				ff_AddString("\"");
4444 				ff_AddString(gbqp->val);
4445 				ff_AddString("\"");
4446 			} else if (StringCmp(gbqp->qual, "non-std-residue") == 0) {
4447 				NewContLine();
4448 				ff_AddChar('/');
4449 				ff_AddString(gbqp->qual);
4450 				ff_AddChar('=');
4451 				ff_AddString("\"");
4452 				ff_AddString(gbqp->val);
4453 				ff_AddString("\"");
4454 			} else if (StringCmp(gbqp->qual, "heterogen") == 0) {
4455 				NewContLine();
4456 				ff_AddChar('/');
4457 				ff_AddString(gbqp->qual);
4458 				ff_AddChar('=');
4459 				ff_AddString("\"");
4460 				ff_AddString(gbqp->val);
4461 				ff_AddString("\"");
4462 			} else if (StringCmp(gbqp->qual, "name") == 0) {
4463 				NewContLine();
4464 				ff_AddChar('/');
4465 				ff_AddString(gbqp->qual);
4466 				ff_AddChar('=');
4467 				ff_AddString("\"");
4468 				ff_AddString(gbqp->val);
4469 				ff_AddString("\"");
4470 			} else if (StringCmp(gbqp->qual, "coded_by") == 0) {
4471 				NewContLine();
4472 				ff_AddChar('/');
4473 				ff_AddString(gbqp->qual);
4474 				ff_AddChar('=');
4475 				ff_AddString("\"");
4476 				ff_AddString(gbqp->val);
4477 				ff_AddString("\"");
4478 			}
4479 		} else if (ASN2FF_VALIDATE_FEATURES == FALSE) {
4480 			NewContLine();
4481 			ff_AddChar('/');
4482 			ff_AddString(gbqp->qual);
4483 			if (gbqp->val != NULL && StringLen(gbqp->val) != 0) {
4484 				ff_AddChar('=');
4485 				ff_AddString("\"");
4486 				ff_AddString(gbqp->val);
4487 				ff_AddString("\"");
4488 			}
4489 		}
4490 	}
4491 
4492 	ff_EndPrint();
4493 
4494 	return 1;
4495 } /*PrintImpFeat */
4496 
4497 #define NOEQUALTOTAL 13
CheckForEqualSign(CharPtr qual)4498 NLM_EXTERN Int2 CheckForEqualSign(CharPtr qual)
4499 				/* this have to be changed. Tatiana 02.28.95 */
4500 {
4501 	Int2 i;
4502 	static CharPtr NoEqualSign[NOEQUALTOTAL] = {
4503 	"chloroplast",
4504 	"chromoplast",
4505 	"cyanelle",
4506 	"germline",
4507 	"kinetoplast",
4508 	"macronuclear",
4509 	"mitochondrion",
4510 	"partial",
4511 	"proviral",
4512 	"pseudo",
4513 	"rearranged",
4514 	"virion",
4515 	"focus"
4516 	};
4517 
4518 	if (qual == NULL)
4519 		return -1;
4520 
4521 	for (i=0; i < NOEQUALTOTAL; i++)
4522 		if (StringICmp(qual, NoEqualSign[i]) == 0)
4523 			return 1;
4524 
4525 	return 0;
4526 
4527 }
4528 
4529 /*-------------------------- delete_qual() ----------------------------*/
4530 /*************************************************************************
4531 *   delete_qual:
4532 *   -- return TRUE if found the "qual" in the "qlist", also remove
4533 *      the "qual" from list
4534 *                                                                7-8-93
4535 **************************************************************************/
delete_qual(GBQualPtr PNTR qlist,CharPtr qual)4536 NLM_EXTERN Boolean delete_qual(GBQualPtr PNTR qlist, CharPtr qual)
4537 {
4538    GBQualPtr curq, preq;
4539 
4540    for (preq = NULL, curq = *qlist; curq != NULL; curq = curq->next) {
4541        if (StringCmp(curq->qual, qual) == 0) {
4542           if (preq == NULL)
4543              preq = *qlist = curq->next;
4544           else
4545              preq->next = curq->next;
4546 
4547           curq->next = NULL;
4548           GBQualFree(curq);
4549           curq = NULL;
4550 
4551           return (TRUE);
4552        }
4553 
4554        preq = curq;
4555    }
4556 
4557    return (FALSE);
4558 
4559 }
4560