1 /*   asn2ff6.c
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *            National Center for Biotechnology Information (NCBI)
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government do not place any restriction on its use or reproduction.
13 *  We would, however, appreciate having the NCBI and the author cited in
14 *  any work or product based on this material
15 *
16 *  Although all reasonable efforts have been taken to ensure the accuracy
17 *  and reliability of the software and data, the NLM and the U.S.
18 *  Government do not and cannot warrant the performance or results that
19 *  may be obtained by using this software or data. The NLM and the U.S.
20 *  Government disclaim all warranties, express or implied, including
21 *  warranties of performance, merchantability or fitness for any particular
22 *  purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name:  asn2ff6.c
27 *
28 * Author:  Karl Sirotkin, Tom Madden, Tatiana Tatusov
29 *
30 * Version Creation Date:   7/15/95
31 *
32 * $Revision: 6.69 $
33 *
34 * File Description:
35 *
36 * Modifications:
37 * --------------------------------------------------------------------------
38 * $Log: asn2ff6.c,v $
39 * Revision 6.69  2006/07/13 17:06:38  bollin
40 * use Uint4 instead of Uint2 for itemID values
41 * removed unused variables
42 * resolved compiler warnings
43 *
44 * Revision 6.68  2002/02/15 18:30:55  kans
45 * no longer change snoRNA to misc_RNA
46 *
47 * Revision 6.67  2001/12/28 21:37:10  kans
48 * allow sfp->product to be SEQLOC_EQUIV
49 *
50 * Revision 6.66  2001/12/21 20:21:06  cavanaug
51 * old_locus_fmt now controls generated of *old* LOCUS line format
52 *
53 * Revision 6.65  2001/12/05 18:13:53  cavanaug
54 * Changes for new LOCUS line format
55 *
56 * Revision 6.64  2001/08/21 17:33:33  kans
57 * snoRNA can show /product
58 *
59 * Revision 6.63  2001/08/07 15:51:08  kans
60 * use NUM_SEQID, added third party annotation seqids
61 *
62 * Revision 6.62  2001/07/18 14:50:13  kans
63 * gather features with gsc.useSeqMgrIndexes if genpept, raw, indexing requested, and IndexedGetDescrForDiv to speed up finding division
64 *
65 * Revision 6.61  2001/07/03 20:01:41  kans
66 * AddGBQual ASN2GNBK_STRIP_NOTE_PERIODS trim trailing tilde first
67 *
68 * Revision 6.60  2001/07/03 00:05:51  kans
69 * TrimSpacesAndJunkFromEnds on genbankblock->source if ASN2GNBK_STRIP_NOTE_PERIODS
70 *
71 * Revision 6.59  2001/06/26 23:43:35  kans
72 * moved second period check to inside last period check
73 *
74 * Revision 6.58  2001/06/26 23:36:06  kans
75 * in AddGBQual if ASN2GNBK_STRIP_NOTE_PERIODS, trim one or two periods at end
76 *
77 * Revision 6.57  2001/06/13 14:41:58  yaschenk
78 * changing increment of 10 to 1024 in EnlargeSortList()
79 *
80 * Revision 6.56  2001/06/04 21:30:52  kans
81 * TrimSpacesAndSemicolons trims leading semicolons as well as leading spaces
82 *
83 * Revision 6.55  2001/06/01 18:46:26  tatiana
84 * NG_ added to ValidateAccession
85 *
86 * Revision 6.54  2001/05/31 23:45:48  kans
87 * if ASN2GNBK_STRIP_NOTE_PERIODS and IsEllipsis, do not strip period
88 *
89 * Revision 6.53  2001/05/29 23:27:47  kans
90 * added support for snoRNA - flatfile prints as misc_RNA for now
91 *
92 * Revision 6.52  2001/04/16 16:51:42  tatiana
93 * GetDivision(): CON division never use for aa
94 *
95 * Revision 6.51  2001/04/06 12:47:43  beloslyu
96 * missing flatloc declaration was added
97 *
98 * Revision 6.50  2001/04/05 21:41:26  tatiana
99 * REGION added in GetLocusPartsAwp()
100 *
101 * Revision 6.49  2001/04/04 22:05:16  kans
102 * In GB_PrintPubs under ASN2GNBK_STRIP_NOTE_PERIODS clean up comma/space/semicolon (TF)
103 *
104 * Revision 6.48  2001/04/04 21:46:56  kans
105 * TrimSpacesAndJunkFromEnds if ASN2GNBK_STRIP_NOTE_PERIODS (TF)
106 *
107 * Revision 6.47  2001/04/02 21:25:19  kans
108 * AddGBQual under ASN2GNBK_STRIP_NOTE_PERIODS also removes ; ; substrings
109 *
110 * Revision 6.46  2001/03/26 17:36:06  kans
111 * added NULL for endogenous-virus to genome prefix array
112 *
113 * Revision 6.45  2001/02/16 16:52:22  tatiana
114 * special case locus for NT_ records
115 *
116 * Revision 6.44  2001/01/26 19:21:48  kans
117 * extrachromosomal into source note, removed macronuclear, extrachrom, plasmid from organism line
118 *
119 * Revision 6.43  2001/01/19 21:51:04  kans
120 * finally got ASN2GNBK_STRIP_NOTE_PERIODS logic right
121 *
122 * Revision 6.42  2001/01/19 18:45:28  kans
123 * another attempt to use ASN2GNBK_STRIP_NOTE_PERIODS to remove extraneous asn2ff/asn2gnbk diffs
124 *
125 * Revision 6.41  2001/01/08 18:36:40  kans
126 * removed ASN2GNBK_STRIP_NOTE_PERIODS - this was not the right place
127 *
128 * Revision 6.40  2001/01/06 22:09:42  kans
129 * added ASN2GNBK_STRIP_NOTE_PERIODS to try to eliminate trivial note discrepancies
130 *
131 * Revision 6.39  2000/11/29 20:46:11  tatiana
132 * HTC division added for MI_TECH_htc
133 *
134 * Revision 6.38  2000/10/24 20:28:44  tatiana
135 * ValidateAccession accepts XP, XM
136 *
137 * Revision 6.37  2000/09/20 21:26:19  tatiana
138 * all organelles adde to ORGANISM line
139 *
140 * Revision 6.36  2000/09/11 18:52:59  tatiana
141 * PUBMED linetype is legal in release mode
142 *
143 * Revision 6.35  2000/08/25 16:16:46  kans
144 * ValidateLocus initializes num_of_digits even if > 1000 segments
145 *
146 * Revision 6.34  2000/08/01 21:09:39  tatiana
147 * ValidateVersion is colld in forgbrel option only
148 *
149 * Revision 6.33  2000/06/29 12:23:30  kans
150 * GenPept on Seq_repr_virtual shown only if is_www || ajp->mode != RELEASE_MODE, earlier kludge of ignoring get_www was probably too broad
151 *
152 * Revision 6.32  2000/06/28 19:31:22  kans
153 * in SeqToAwp always set is_www to TRUE, so virtual sequences show up on non-web applications
154 *
155 * Revision 6.31  2000/06/23 15:42:34  tatiana
156 * removed virion and proviral from ORGANISM line
157 *
158 * Revision 6.30  2000/06/21 15:04:57  tatiana
159 * space added to Virion
160 *
161 * Revision 6.29  2000/06/12 20:49:04  tatiana
162 * new organelles added to ORGANISM filed
163 *
164 * Revision 6.28  2000/06/05 17:51:53  tatiana
165 * increase size of feature arrays to Int4
166 *
167 * Revision 6.27  2000/02/09 19:34:39  kans
168 * added forgbrel flag to Asn2ffJobPtr, currently used to suppress PUBMED line, which was not formally announced in release notes
169 *
170 * Revision 6.26  2000/01/28 17:56:48  kans
171 * show_gi always FALSE to suppress NID and PID, added support for PUBMED line in GenBank format
172 *
173 * Revision 6.25  2000/01/18 17:09:24  tatiana
174 * NP added to ValidateAccession
175 *
176 * Revision 6.24  1999/10/06 20:20:24  bazhin
177 * Removed memory leaks in GeneStructContentFree() and GetPubsAwp()
178 * functions.
179 *
180 * Revision 6.23  1999/09/23 18:09:33  tatiana
181 * ValidateAccession modified for N*_ accession
182 *
183 * Revision 6.22  1999/09/15 18:17:12  tatiana
184 * GRAPHIK_FMT corrected
185 *
186 * Revision 6.18  1999/04/02 19:33:55  tatiana
187 * MI_TECH_htgs_0 added in BioseqGetGBDivCode()
188 *
189 * Revision 6.17  1999/04/01 20:44:12  kans
190 * Int2 lengths to Int4 to allow CountGapsInDeltaSeq with buffer > 32K
191 *
192 * Revision 6.16  1999/03/31 01:09:23  tatiana
193 * ValidateAccession accepts 3+5
194 *
195 * Revision 6.15  1999/03/30 21:00:45  tatiana
196 *  ValidateOtherAccession() added
197 *
198 * Revision 6.14  1999/03/22 23:22:32  tatiana
199 * accession.version modifications
200 *
201 * Revision 6.13  1999/01/12 16:57:55  kans
202 * SeqToAwp checks for null ep before dereferencing
203 *
204 * Revision 6.12  1998/11/24 20:15:03  kans
205 * seqid other has better priority than local so refgene id is used preferentially
206 *
207 * Revision 6.11  1998/10/30 01:12:00  kans
208 * GetPubsAwp GatherEntity filters out OBJ_SEQALIGN - this was being hit many times on big records, and there is no need for asn2ff to see alignments
209 *
210 * Revision 6.10  1998/09/24 17:46:00  kans
211 * fixed GetDBXrefFromGene problem (TT)
212 *
213 * Revision 6.9  1998/06/15 14:59:49  tatiana
214 * UNIX compiler warnings fixed
215 *
216 * Revision 6.8  1998/05/11 21:58:33  tatiana
217 * some functions moved from asn2ff1.c
218 *
219 * Revision 6.7  1998/05/05 19:53:50  tatiana
220 * SEQFEAT_RSITE supressed in GetNAFeatKey()
221 *
222 * Revision 6.6  1998/04/30 21:49:10  tatiana
223 * *** empty log message ***
224 *
225 * Revision 6.5  1998/02/10 17:01:14  tatiana
226 * AddGBQualEx() added
227 *
228 * Revision 6.4  1998/01/13 21:35:20  tatiana
229 *  AsnIoHash moved to asnio.c file
230 *
231 * Revision 6.3  1998/01/13 21:14:50  tatiana
232 * static AsnIoHash changed to AsnIoHash to avoid fubction name collision
233 *
234 * Revision 6.2  1997/12/15 15:53:29  tatiana
235 * features processing has been changed
236 *
237 * Revision 6.1  1997/09/16 15:41:49  kans
238 * added SEQFEAT_SITE case to GetNAFeatKey (TT)
239 *
240 * Revision 5.25  1997/07/28 19:03:59  vakatov
241 * [WIN32,MSVC++]  Restored lost "NCBIOBJ.LIB" pro-DLL modifications
242 *
243  * Revision 5.24  1997/07/28 14:26:11  vakatov
244  * BioseqGetGBDivCode() proto in-sync with its header-located declaration
245  *
246  * Revision 5.23  1997/07/24 23:57:41  tatiana
247  * fixed sfp_order
248  *
249  * Revision 5.22  1997/07/24 15:59:06  tatiana
250  * aaaaaaa bug fixed in Getscblknum
251  *
252  * Revision 5.21  1997/07/16 21:18:42  tatiana
253  *  added sorting by feat type in CompareSfpForHeap()
254  *
255  * Revision 5.20  1997/06/19 18:37:17  vakatov
256  * [WIN32,MSVC++]  Adopted for the "NCBIOBJ.LIB" DLL'ization
257  *
258  * Revision 5.19  1997/05/21 14:43:27  tatiana
259  * fix empty /product in GetNAFeatKey
260  *
261  * Revision 5.17  1997/01/13  22:33:04  tatiana
262  * added CompareGeneName()
263  *
264  * Revision 5.16  1996/12/17  22:47:56  tatiana
265  * added StoreFeatFree()
266  *
267  * Revision 5.15  1996/10/25  22:12:10  tatiana
268  * doesn't add empty ("") val if qual is translation
269  *
270  * Revision 5.14  1996/10/02  15:14:38  tatiana
271  * a bug fixed
272  *
273  * Revision 5.13  1996/10/01  22:42:09  tatiana
274  * fixed duplicated notes in NoteToCharPtrStack
275  *
276  * Revision 5.12  1996/09/09  13:36:02  kans
277  * moved BioseqGetGBDivCode from toasn.[ch] to asn2ff.h/asn2ff6.c
278  *
279  * Revision 5.11  1996/09/03  19:52:49  tatiana
280  * extra_loc added
281  *
282  * Revision 5.10  1996/08/28  21:40:35  tatiana
283  * don't copy new location from gather
284  *
285  * Revision 5.9  1996/08/16  20:34:45  tatiana
286  * GetNAFeatKey() changed
287  *
288  * Revision 5.7  1996/08/09  21:08:57  tatiana
289  * a bug fixed in GetNAFeatKey
290  *
291  * Revision 5.6  1996/07/30  16:35:05  tatiana
292  * Boolean new added to GetNaFeatKey()
293  *
294  * Revision 5.5  1996/07/19  21:38:15  tatiana
295  * ERR_GI_No_GI_Number changed from 	ErrPostEx to	ErrPostStr
296  *
297  * Revision 5.3  1996/07/02  18:11:18  tatiana
298  * calculate hash in StoreFeat
299  *
300  * Revision 5.2  1996/06/14  18:05:03  tatiana
301  * GetNAFeatKey change
302  *
303  * Revision 5.1  1996/06/11  15:26:36  tatiana
304  * GetGINumber is modified to get also embl NI
305  *
306  * Revision 4.17  1996/05/16  21:00:52  tatiana
307  * RemoveRedundantFeats addded
308  *
309  * Revision 4.16  1996/04/29  18:51:42  tatiana
310  * whole_book format added
311  *
312  * Revision 4.15  1996/04/15  14:36:23  tatiana
313  * memory leaks cleaning
314  *
315  * Revision 4.13  1996/02/28  04:53:06  ostell
316  * changes to support segmented master seeuquences
317  *
318  * Revision 4.12  1996/02/15  15:54:51  tatiana
319  * minor clean ups
320  *
321  * Revision 4.11  1996/01/29  22:39:10  tatiana
322  * error posting MODULE
323  *
324  * Revision 4.10  1995/12/20  22:41:56  tatiana
325  * removed redundant functions
326  *
327  * Revision 4.9  1995/12/12  20:21:05  tatiana
328  * CitSub validation fixed
329  *
330  * Revision 4.8  1995/12/10  22:19:31  tatiana
331  * Imprint in CitSub became optional
332  *
333  * Revision 4.7  1995/11/17  21:28:35  kans
334  * asn2ff now uses gather (Tatiana)
335  *
336  * Revision 4.2  1995/08/04  15:26:42  tatiana
337  * bug fixed in GetPubDate (check for Null pointer).
338  *
339  * Revision 4.1  1995/08/01  14:53:08  tatiana
340  * change SeqIdPrint to SeqIdWrite
341  *
342  * Revision 1.57  1995/07/17  19:33:20  kans
343  * parameters combined into Asn2ffJobPtr structure
344 * ==========================================================================
345 */
346 
347 #include <asn2ff6.h>
348 #include <asn2ffp.h>
349 #include <a2ferrdf.h>
350 #include <asn2ffg.h>
351 #include <utilpub.h>
352 #include <ffprint.h>
353 #include <explore.h>
354 #include <sqnutils.h>
355 
356 #define BUF_EXT_LENGTH 4
357 
358 /*---------- order for other id FASTA_LONG (copied from SeqIdWrite) ------- */
359 
360 static Uint1 fasta_order[NUM_SEQID] = {
361 33, /* 0 = not set */
362 20, /* 1 = local Object-id */
363 15,  /* 2 = gibbsq */
364 16,  /* 3 = gibbmt */
365 30, /* 4 = giim Giimport-id */
366 10, /* 5 = genbank */
367 10, /* 6 = embl */
368 10, /* 7 = pir */
369 10, /* 8 = swissprot */
370 15,  /* 9 = patent */
371 10, /* 10 = other TextSeqId */
372 20, /* 11 = general Dbtag */
373 32,  /* 12 = gi */
374 10, /* 13 = ddbj */
375 10, /* 14 = prf */
376 12, /* 15 = pdb */
377 10,  /* 16 = tpg */
378 10,  /* 17 = tpe */
379 10   /* 18 = tpd */
380 };
381 
382 
383 static Uint1 sfp_order[21] = {0,
384 2, /* SEQFEAT_GENE */
385 5, /* SEQFEAT_ORG */
386 3, /* SEQFEAT_CDREGION */
387 5, /* SEQFEAT_PROT */
388 1, /* SEQFEAT_RNA */
389 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5};
390 
391 SeqFeatPtr MakeSyntheticSeqFeat PROTO ((void));
392 ValNodePtr LookForPubsOnFeat PROTO ((SeqFeatPtr sfp, ValNodePtr PubOnFeat));
393 
394 Boolean asn2ff_flags[13];
395 
GeneStructNew(void)396 NLM_EXTERN GeneStructPtr GeneStructNew (void)
397 {
398 	GeneStructPtr gsp;
399 
400 	gsp = (GeneStructPtr) MemNew(sizeof(GeneStruct));
401 	gsp->gene = NULL;
402 	gsp->product = NULL;
403 	gsp->standard_name = NULL;
404 	gsp->map = (CharPtr PNTR) MemNew(sizeof(CharPtr));
405 	gsp->map_index = 0;
406 	gsp->map_size = 1;
407 	gsp->ECNum = NULL;
408 	gsp->activity = NULL;
409 	gsp->grp = NULL;
410 
411 	return gsp;
412 }
413 
NoteStructNew(NoteStructPtr nsp)414 NLM_EXTERN NoteStructPtr NoteStructNew (NoteStructPtr nsp)
415 {
416 	nsp = (NoteStructPtr) MemNew(sizeof(NoteStruct));
417 
418 	nsp->note = (CharPtr PNTR) MemNew(5*sizeof(CharPtr));
419 	nsp->note_annot = (CharPtr PNTR) MemNew(5*sizeof(CharPtr));
420 	nsp->note_alloc = (Uint1 PNTR) MemNew(5*sizeof(Uint1));
421 	nsp->note_index = 0;
422 	nsp->note_size = 5;
423 
424 	return nsp;
425 }
426 
GeneStructContentFree(GeneStructPtr gsp)427 static void GeneStructContentFree(GeneStructPtr gsp)
428 {
429 	ValNodePtr v, vnext;
430 
431 	if (gsp->gene) {
432 		if (gsp->gene->data.ptrvalue != NULL) {
433 			MemFree(gsp->gene->data.ptrvalue);
434 		}
435 		gsp->gene = ValNodeFree(gsp->gene);
436 	}
437 	for (v = gsp->product; v; v = vnext) {
438 		vnext = v->next;
439 		if (v->data.ptrvalue != NULL) {
440 			MemFree(v->data.ptrvalue);
441 		}
442 		MemFree(v);
443 	}
444 	for (v = gsp->standard_name; v != NULL; v = vnext) {
445 		vnext = v->next;
446 		if (v->data.ptrvalue != NULL) {
447 			MemFree(v->data.ptrvalue);
448 		}
449 		MemFree(v);
450 	}
451 	for (v = gsp->ECNum; v; v = vnext) {
452 		vnext = v->next;
453 		if (v->data.ptrvalue != NULL) {
454 			MemFree(v->data.ptrvalue);
455 		}
456 		MemFree(v);
457 	}
458 	for (v = gsp->activity; v; v = vnext) {
459 		vnext = v->next;
460 		if (v->data.ptrvalue != NULL) {
461 			MemFree(v->data.ptrvalue);
462 		}
463 		MemFree(v);
464 	}
465 	if (gsp->grp) {
466 		GeneRefFree(gsp->grp);
467 	}
468 	return;
469 }
470 
GeneStructFree(GeneStructPtr gsp)471 NLM_EXTERN void GeneStructFree (GeneStructPtr gsp)
472 {
473 	if (gsp == NULL)
474 		return;
475 	gsp->map = MemFree(gsp->map);
476 	GeneStructContentFree(gsp);
477 	MemFree(gsp);
478 }
479 
NoteStructFree(NoteStructPtr nsp)480 NLM_EXTERN void NoteStructFree (NoteStructPtr nsp)
481 {
482 	Int2 index;
483 
484 	if (nsp == NULL) {
485 	return;
486 	}
487 	for (index=0; index < nsp->note_index; index++) {
488 			if (nsp->note_alloc[index] == ASN2FLAT_ALLOC)
489 				nsp->note[index] = MemFree(nsp->note[index]);
490 	}
491 	nsp->note = MemFree(nsp->note);
492 	nsp->note_annot = MemFree(nsp->note_annot);
493 	nsp->note_alloc = MemFree(nsp->note_alloc);
494 	MemFree(nsp);
495 }
496 
NoteStructReset(NoteStructPtr nsp)497 NLM_EXTERN void NoteStructReset (NoteStructPtr nsp)
498 {
499 	Int2 index;
500 
501 	if (nsp == NULL) {
502 		return;
503 	}
504 	for (index=0; index<nsp->note_index; index++)
505 	{
506 		if (nsp->note_alloc[index] == ASN2FLAT_ALLOC)
507 			nsp->note[index] = MemFree(nsp->note[index]);
508 		nsp->note[index] = NULL;
509 		nsp->note_annot[index] = NULL;
510 	}
511 	nsp->note_index = 0;
512 }
513 
514 
ListFree(SeqFeatPtr PNTR PNTR List,Int4 range)515 NLM_EXTERN void ListFree (SeqFeatPtr PNTR PNTR List, Int4 range)
516 {
517 	Int4 index;
518 
519 	for (index=0; index < range; index++)
520 		MemFree(List[index]);
521 
522 	MemFree(List);
523 }
524 
525 /***********************************************************************
526 *SeqFeatPtr MakeSyntheticSeqFeat(void)
527 *
528 *	This function allocates a "synthetic" SeqFeatPtr, which is
529 *	used to print the SeqFeats out.  To print out SeqFeats, they
530 *	are copied to this "synthetic" sfp, which is an ImpFeat,
531 *	adjusted, validated, and then a function prints out this ImpFeat.
532 *************************************************************************/
533 
MakeSyntheticSeqFeat(void)534 NLM_EXTERN SeqFeatPtr MakeSyntheticSeqFeat(void)
535 {
536 	ImpFeatPtr ifp;
537 	SeqFeatPtr sfp_out;
538 
539 	sfp_out = SeqFeatNew();
540 	if (sfp_out)
541 	{
542 		sfp_out->data.choice = SEQFEAT_IMP;
543 		sfp_out->qual = NULL;
544 		ifp = sfp_out->data.value.ptrvalue = ImpFeatNew();
545 	/*	ifp->key = (CharPtr) MemNew(20*sizeof(Char)); */
546 /* key may be more than 20 char one day and cause segmentation fault */
547 		ifp->key = NULL;
548 		ifp->loc = NULL;
549 		sfp_out->comment = NULL;
550 		sfp_out->location = NULL;
551 		sfp_out->product = NULL;
552 	}
553 
554 	return sfp_out;
555 }
556 
CpNoteToCharPtrStack(NoteStructPtr nsp,CharPtr annot,CharPtr string)557 NLM_EXTERN void CpNoteToCharPtrStack (NoteStructPtr nsp, CharPtr annot, CharPtr string)
558 {
559 	NoteToCharPtrStack(nsp, annot, string, ASN2FLAT_NOT_ALLOC);
560 	return;
561 }
562 
SaveNoteToCharPtrStack(NoteStructPtr nsp,CharPtr annot,CharPtr string)563 NLM_EXTERN void SaveNoteToCharPtrStack (NoteStructPtr nsp, CharPtr annot, CharPtr string)
564 {
565 	NoteToCharPtrStack(nsp, annot, string, ASN2FLAT_ALLOC);
566 	return;
567 }
568 
569 
NoteToCharPtrStack(NoteStructPtr nsp,CharPtr annot,CharPtr string,Uint1 alloc)570 NLM_EXTERN void NoteToCharPtrStack (NoteStructPtr nsp, CharPtr annot, CharPtr string, Uint1 alloc)
571 {
572 	Int2 index, note_size;
573 
574 	if (nsp)
575 	{
576 		note_size = nsp->note_size;
577 		index = nsp->note_index;
578 	}
579 	else
580 		return;
581 /*** add check for duplicated notes 9-27-96 ***/
582 /*	if (string[StringLen(string)-1] == '.')
583 		string[StringLen(string)-1] = '\0';
584 	for (i = 0; i < note_size; i++) {
585 		if (nsp->note[i] && StringStr(nsp->note[i], string) != NULL) {
586 			return;
587 		}
588 	}
589 */
590 	if (index == note_size)
591 		EnlargeCharPtrStack(nsp, 5);
592 
593 	nsp->note_annot[index] = annot;
594 
595 	if (alloc == ASN2FLAT_NOT_ALLOC)
596 	{
597 		nsp->note_alloc[index] = ASN2FLAT_NOT_ALLOC;
598 		nsp->note[index] = string;
599 	}
600 	else if (alloc == ASN2FLAT_ALLOC)
601 	{
602 		nsp->note_alloc[index] = ASN2FLAT_ALLOC;
603 		nsp->note[index] = StringSave(string);
604 	}
605 
606 	nsp->note_index++;
607 
608 	return;
609 }
610 
EnlargeCharPtrStack(NoteStructPtr nsp,Int2 enlarge)611 NLM_EXTERN void EnlargeCharPtrStack (NoteStructPtr nsp, Int2 enlarge)
612 {
613 	CharPtr PNTR newstr;
614 	CharPtr PNTR new_annot;
615 	Int2 index;
616 	Uint1 PNTR new_alloc;
617 
618 	newstr = (CharPtr PNTR) MemNew((size_t)
619 		((enlarge+(nsp->note_size))*sizeof(CharPtr)));
620 	new_annot = (CharPtr PNTR) MemNew((size_t)
621 		((enlarge+(nsp->note_size))*sizeof(CharPtr)));
622 	new_alloc = (Uint1 PNTR) MemNew((size_t)
623 		((enlarge+(nsp->note_size))*sizeof(Uint1)));
624 
625 	for (index=0; index<(nsp->note_size); index++) {
626 		newstr[index] = nsp->note[index];
627 		new_annot[index] = nsp->note_annot[index];
628 		new_alloc[index] = nsp->note_alloc[index];
629 	}
630 	nsp->note_size += enlarge;
631 	nsp->note = MemFree(nsp->note);
632 	nsp->note_annot = MemFree(nsp->note_annot);
633 	nsp->note_alloc = MemFree(nsp->note_alloc);
634 	nsp->note = newstr;
635 	nsp->note_annot = new_annot;
636 	nsp->note_alloc = new_alloc;
637 }
638 
EnlargeSortList(SortStructPtr List,Int4 size)639 NLM_EXTERN SortStructPtr EnlargeSortList(SortStructPtr List, Int4 size)
640 {
641 	SortStructPtr NewList;
642 
643 	if (size % 1024 == 0) {
644 		NewList = (SortStructPtr) MemNew((size+1024)*sizeof(SortStruct));
645 		if (size > 0) {
646 			MemCopy(NewList, List, (size * sizeof(SortStruct)));
647 			MemFree(List);
648 		}
649 		return NewList;
650 	}
651 	return List;
652 
653 }	/* EnlargeSortList */
654 
CompareSfpForHeap(VoidPtr vp1,VoidPtr vp2)655 NLM_EXTERN int LIBCALLBACK CompareSfpForHeap (VoidPtr vp1, VoidPtr vp2)
656 {
657 
658 	SortStructPtr sp1 = vp1;
659 	SortStructPtr sp2 = vp2;
660 	BioseqPtr bsp;
661 	SeqFeatPtr sfp1, sfp2;
662 	Int2 status = 0;
663 
664 	bsp = sp1->bsp;
665 	sfp1 = sp1->sfp;
666 	sfp2 = sp2->sfp;
667 	if (sfp1 == NULL || sfp2 == NULL) {
668 		return status;
669 	}
670 
671 	status = SeqLocOrder(sfp1->location, sfp2->location, bsp);
672 
673 	if (ABS(status) >= 2 && sp1->seg_bsp) {
674 		status = SeqLocOrder(sfp1->location, sfp2->location, sp1->seg_bsp);
675 	}
676 	if (status == 0 && sfp1->data.choice < 6 && sfp2->data.choice < 6) {
677 		status = sfp_order[sfp1->data.choice] - sfp_order[sfp2->data.choice];
678 	}
679 	return status;
680 }
CompareGeneName(VoidPtr vp1,VoidPtr vp2)681 NLM_EXTERN int LIBCALLBACK CompareGeneName (VoidPtr vp1, VoidPtr vp2)
682 {
683 
684 	SortStructPtr sp1 = vp1;
685 	SortStructPtr sp2 = vp2;
686 	SeqFeatPtr sfp1, sfp2;
687 	GeneRefPtr gr1, gr2;
688 	Int2 status = 0;
689 
690 	sfp1 = sp1->sfp;
691 	sfp2 = sp2->sfp;
692 	if (sfp1 == NULL || sfp2 == NULL) {
693 		return status;
694 	}
695 	if (sfp1->data.choice != SEQFEAT_GENE)
696 		return status;
697 	if (sfp2->data.choice != SEQFEAT_GENE)
698 		return status;
699 	gr1 = (GeneRefPtr) sfp1->data.value.ptrvalue;
700 	gr2 = (GeneRefPtr) sfp2->data.value.ptrvalue;
701 	if (gr1 == NULL || gr2 == NULL)
702 		return status;
703 	status = StringCmp(gr1->locus, gr2->locus);
704 
705 	return status;
706 }
707 
708 /**************************************************************************
709 *	This function returns the gi number
710 *	If no gi number is found, -1 is returned  and a warning is
711 *	issued.
712 *	06-10-96
713 *	This fubction is changed to void. It will find NCBI gi and embl ni
714 *	and fill up gbp structure
715 **************************************************************************/
716 
GetGINumber(GBEntryPtr gbp)717 NLM_EXTERN void GetGINumber(GBEntryPtr gbp)
718 {
719 	Boolean 	found_gi;
720 	ValNodePtr 	vnp;
721 	Int4 		gi = -1;
722 	CharPtr 	ni = NULL;
723     DbtagPtr	dbtag;
724     ObjectIdPtr oid;
725 
726 	found_gi = FALSE;
727 	if (gbp == NULL)
728 		return;
729 	if (gbp->bsp == NULL)
730 		return;
731 	for (vnp=gbp->bsp->id; vnp; vnp=vnp->next) {
732 		if (vnp->choice == SEQID_GI) {
733 			gi = vnp->data.intvalue;
734 			if (gi != 0) {
735 				found_gi = TRUE;
736 				break;
737 			} else {
738 				if (ASN2FF_SHOW_ERROR_MSG == TRUE) {
739 					ErrPostEx(SEV_WARNING, ERR_GI_No_GI_Number,
740 									"Zero gi number: %d", gi);
741 				}
742 			}
743 		} else if (vnp->choice == SEQID_GENERAL) {
744 			dbtag = vnp->data.ptrvalue;
745 			if (StringCmp(dbtag->db, "NID") == 0) {
746 				oid = dbtag->tag;
747 				if (oid->str) {
748 					ni = StringSave(oid->str);
749 				}
750 			}
751 		}
752 	}
753 	if (! found_gi) {
754 		if (ASN2FF_SHOW_ERROR_MSG == TRUE) {
755 			ErrPostStr(SEV_WARNING, ERR_GI_No_GI_Number, "");
756 		}
757 		gi = -1;
758 	}
759 	gbp->gi = gi;
760 	gbp->ni = ni;
761 	return;
762 }
763 
764 /***********************************************************************
765 *
766 *	GetGIs gets the GI's.
767 *
768 ************************************************************************/
GetGIs(Asn2ffJobPtr ajp)769 NLM_EXTERN void GetGIs (Asn2ffJobPtr ajp)
770 {
771 	GBEntryPtr gbp;
772 
773 	for (gbp = ajp->asn2ffwep->gbp; gbp; gbp = gbp->next) {
774 		GetGINumber(gbp);
775 	}
776 	return;
777 }
778 
GetProductSeqId(ValNodePtr product)779 NLM_EXTERN SeqIdPtr GetProductSeqId(ValNodePtr product)
780 {
781 	SeqIdPtr sip=NULL;
782 	SeqIntPtr seq_int;
783 	SeqLocPtr slp;
784 
785 	if (product)
786 	{
787 		if (product->choice == SEQLOC_WHOLE)
788 		{
789 			sip = (SeqIdPtr) product->data.ptrvalue;
790 		}
791 		else if (product->choice == SEQLOC_INT)
792 		{
793 			seq_int = (SeqIntPtr) product->data.ptrvalue;
794 			sip = seq_int->id;
795 		}
796 		else if (product->choice == SEQLOC_EQUIV)
797 		{
798 			for (slp = (SeqLocPtr) product->data.ptrvalue; slp != NULL; slp = slp->next) {
799 				sip = GetProductSeqId (slp);
800 				if (sip != NULL) return sip;
801 			}
802 		}
803 	}
804 	return sip;
805 }
806 
807 /*****************************************************************************
808 *check_range
809 *
810 *	This function is called by the gbparse functions of Karl Sirotkin
811 *	and determines if the length of a BioSeqPtr is sensible.
812 *	Pointer data is not used !! Tatiana !!
813 *	Tom Madden
814 *****************************************************************************/
815 
check_range(Pointer data,SeqIdPtr seq_id)816 NLM_EXTERN Int4 check_range(Pointer data, SeqIdPtr seq_id)
817 
818 {
819 	BioseqPtr bsp;
820 
821 	bsp = BioseqFind(seq_id);
822 	if (bsp)
823 		return bsp->length;
824 	else
825 		return 0;
826 }	/* check_range */
827 
828 /****************************************************************************
829 *do_loc_errors
830 *
831 *	This function is called both by the gbparse functions of Karl Sirotkin
832 *	and by asn2ff.  If called by gbparse, error messages are stored in
833 *	buffers and a flag is set; if called by asn2ff, the error messages
834 *	are retrieved and the flag reset.
835 *
836 ***************************************************************************/
837 
do_loc_errors(CharPtr front,CharPtr details)838 NLM_EXTERN void do_loc_errors(CharPtr front, CharPtr details)
839 {
840 	ErrPostEx(SEV_INFO, ERR_FEATURE_Bad_location, "%s: %s\n", front, details);
841 }
842 
843 /***************************************************************************
844 *do_no_loc_errors
845 *
846 *	Is used when no error messages are wanted.
847 ****************************************************************************/
848 
do_no_loc_errors(CharPtr front,CharPtr details)849 NLM_EXTERN void do_no_loc_errors(CharPtr front, CharPtr details)
850 {
851 	return;
852 }
853 
854 /***************************************************************************
855 *Boolean GBQualPresent(CharPtr ptr, GBQualPtr gbqual)
856 *
857 *This function check that a qual, that is to be added to the list of qual
858 *isn't already present.
859 ***************************************************************************/
GBQualPresent(CharPtr ptr,GBQualPtr gbqual)860 NLM_EXTERN Boolean GBQualPresent(CharPtr ptr, GBQualPtr gbqual)
861 
862 {
863 	Boolean present=FALSE;
864 	GBQualPtr qual;
865 
866 	for (qual=gbqual; qual; qual=qual->next)
867 		if (StringCmp(ptr, qual->qual) == 0)
868 		{
869 			present = TRUE;
870 			break;
871 		}
872 
873 	return present;
874 }	/* GBQualPresent */
875 
876 /**********************************************************************
877 *Boolean GetNAFeatKey(CharPtr buffer, SeqFeatPtr sfp)
878 *
879 *	This function places the sfp "key" in buffer and returns TRUE
880 *	if successful, it returns FALSE if not successful.
881 *	This function only works for nucleic acid sequences, as the
882 *	keys (for corresponding numbers) are different for peptides.
883 ***********************************************************************/
884 
GetNAFeatKey(Boolean is_new,CharPtr PNTR buffer,SeqFeatPtr sfp,SeqFeatPtr sfp_out)885 NLM_EXTERN Boolean GetNAFeatKey(Boolean is_new, CharPtr PNTR buffer, SeqFeatPtr sfp, SeqFeatPtr sfp_out)
886 {
887 
888 	Boolean retval=TRUE;
889 	ImpFeatPtr ifp;
890 	RnaRefPtr rrp;
891 	CharPtr str = NULL;
892 	Int2 index;
893 
894 
895 	switch (sfp->data.choice)
896 	{
897 	case SEQFEAT_GENE:	/* gene becomes misc_feat for purposes of CheckNAFeat */
898 		if (is_new) {
899 			*buffer = StringSave("gene");
900 		} else {
901 			*buffer = StringSave("misc_feature");
902 		}
903 		break;
904 	case SEQFEAT_CDREGION:
905 		*buffer = StringSave("CDS");
906 		break;
907 	case SEQFEAT_RNA:
908 		rrp = sfp->data.value.ptrvalue;
909 		/* the following code was taken (almost) directly from Karl
910 		Sirotkin's code.					*/
911 		switch ( rrp -> type){
912 			case 1:
913 				*buffer =StringSave("precursor_RNA");
914 				break;
915 			case 2:
916 				*buffer = StringSave("mRNA");
917 				break;
918 			case 3:
919 				*buffer = StringSave("tRNA");
920 				break;
921 			case 4:
922 				*buffer = StringSave("rRNA");
923 				break;
924 			case 5:
925 				*buffer = StringSave("snRNA");
926 				break;
927 			case 6:
928 				*buffer = StringSave("scRNA");
929 				break;
930 			case 7:
931 				*buffer = StringSave("snoRNA"); /* snoRNA */
932 				break;
933 			case 255:
934 				*buffer = StringSave("misc_RNA");
935 				break;
936 		}
937 		switch ( rrp -> type){
938 			case 2:
939 			case 4:
940 			case 5:
941 			case 6:
942 			case 7:
943 			case 255:
944 				if (rrp ->ext.choice == 1 && sfp_out) {
945 					 str = rrp->ext.value.ptrvalue;
946 					 if (str != NULL && *str != '\0') {
947 					 	index = GBFeatKeyNameValid(buffer, FALSE);
948 					 	if (GBQualValidToAdd(index, "product")) {
949 							sfp_out->qual = AddGBQual(sfp_out->qual,
950 									"product", str);
951 						}
952 					}
953 				}
954 				break;
955 		}
956 		break;
957 	case SEQFEAT_IMP:
958 		ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
959 		*buffer = StringSave(ifp->key);
960 		break;
961 	case SEQFEAT_SEQ:
962 	case SEQFEAT_SITE:
963 	case SEQFEAT_REGION:
964 	case SEQFEAT_COMMENT:
965 		*buffer = StringSave("misc_feature");
966 		break;
967 	case SEQFEAT_BIOSRC:
968 		*buffer = StringSave("source");
969 		break;
970 	case SEQFEAT_RSITE:
971 	default:
972 		retval = FALSE;
973 		break;
974 	}
975 
976 	return retval;
977 }	/* GetNAFeatKey */
978 
979 /**************************************************************************
980 *SeqIdPtr CheckXrefFeat (BioseqPtr bsp, SeqFeatPtr sfp)
981 *
982 * 	First the location of the xref is checked to see if it overlaps
983 *	the sequence.  If this feature has a xref that is NOT of type
984 *	genbank, embl, or ddbj, it is put out as a misc_feat.  If it's
985 *	one of genbank, embl, or ddbj, it has been put out as a second
986 *	accession.  If the feature should be put out as a misc_feat, then
987 *	the SeqIdPtr (xid) is returned, otherwise NULL.
988 **************************************************************************/
989 
CheckXrefFeat(BioseqPtr bsp,SeqFeatPtr sfp)990 NLM_EXTERN SeqIdPtr CheckXrefFeat (BioseqPtr bsp, SeqFeatPtr sfp)
991 {
992 
993 	SeqIdPtr xid=NULL;
994 	SeqIntPtr si;
995 	SeqLocPtr xref;
996 	ValNodePtr location;
997 
998 	location = ValNodeNew(NULL);
999 	si = SeqIntNew();
1000 	location->choice = SEQLOC_INT;
1001 	location->data.ptrvalue = si;
1002 	si->from = 0;
1003 	si->to = bsp->length - 1;
1004 	si->id = bsp->id;	/* Don't delete id!! */
1005 	if (SeqLocCompare(sfp->location, location) != 0)
1006 	{
1007 		xref = (SeqLocPtr) sfp->data.value.ptrvalue;
1008 		xid = (SeqIdPtr) xref->data.ptrvalue;
1009 		if (xid->choice != 5 && xid->choice != 6 && xid->choice != 13)
1010 			;
1011 		else
1012 			xid = NULL;
1013 	}
1014 	si->id = NULL;
1015 	SeqIntFree(si);
1016 	ValNodeFree(location);
1017 	return xid;
1018 }
1019 
GetGINumFromSip(SeqIdPtr sip)1020 NLM_EXTERN Int4 GetGINumFromSip (SeqIdPtr sip)
1021 {
1022 	Int4 gi = -1;
1023 	ValNodePtr vnp;
1024 
1025 	for (vnp=sip; vnp; vnp=vnp->next)
1026 		if (vnp->choice == SEQID_GI)
1027 			gi = vnp->data.intvalue;
1028 
1029 	return gi;
1030 }
1031 
1032 /*****************************************************************************
1033 *FlatRefBest
1034 *
1035 *	returns ValNodePtr to best (for FlatFile production) pub in a equiv set
1036 *****************************************************************************/
FlatRefBest(ValNodePtr equiv,Boolean error_msgs,Boolean anything)1037 NLM_EXTERN ValNodePtr FlatRefBest(ValNodePtr equiv, Boolean error_msgs, Boolean anything)
1038 {
1039 	ValNodePtr the_pub, retval = NULL, newpub;
1040 	CitBookPtr cb;
1041 	CitSubPtr cs;
1042 	CitGenPtr cg;
1043 	CitArtPtr ca;
1044 	MedlineEntryPtr ml;
1045 	CitJourPtr jp;
1046 	ImprintPtr ip;
1047 	Boolean good_one;
1048 	Int1 bad_one= 0;
1049 	CharPtr str_ret;
1050 
1051 	if (equiv->choice == PUB_Equiv) {
1052 		newpub = equiv->data.ptrvalue;
1053 	} else {
1054 		newpub = equiv;
1055 	}
1056 	for (the_pub = newpub, good_one = FALSE; the_pub && ! good_one
1057 			; the_pub = the_pub -> next) {
1058 
1059 		switch ( the_pub -> choice) {
1060 
1061       case PUB_Sub:
1062          cs = (CitSubPtr) the_pub -> data.ptrvalue;
1063          if (cs) {
1064 			if ( cs -> imp){
1065 				ip = cs -> imp;
1066 				if ( ip -> date) {
1067 					retval = the_pub;
1068 					good_one = TRUE; /* good for submitted */
1069 				}
1070 			} else if (cs->date) {
1071 					retval = the_pub;
1072 					good_one = TRUE; /* good for submitted */
1073 			}
1074 		}
1075          break;
1076 		case PUB_Man:
1077 		case PUB_Book:
1078 			cb = (CitBookPtr) the_pub -> data.ptrvalue;
1079 			if ( cb -> imp) {
1080 				ip = cb -> imp;
1081 				if ( ip -> date) {
1082 					retval = the_pub;
1083 					good_one = TRUE; /* good for thesis or book */
1084 				}
1085 			}
1086 		break;
1087 		case PUB_Patent:
1088 			retval = the_pub;
1089 			good_one = TRUE; /* might exclude later...*/
1090 		break;
1091 		case PUB_Article:
1092 		case PUB_Medline:
1093 			if ( the_pub -> choice == PUB_Medline) {
1094 				ml = (MedlineEntryPtr) the_pub -> data.ptrvalue;
1095 				ca = (CitArtPtr) ml -> cit;
1096 
1097 			} else {
1098 				ca = (CitArtPtr) the_pub -> data.ptrvalue;
1099 			}
1100 		if (ca -> fromptr) {
1101 			if (ca -> from ==1) {
1102 				jp = (CitJourPtr) ca -> fromptr;
1103 				if ( jp -> imp) {
1104 					ip = jp -> imp;
1105 					if ( ip -> date) {
1106 						retval = the_pub;
1107 						good_one = TRUE; /* good as it gets */
1108 					}
1109 				}
1110 			} else {
1111 				CitBookPtr book = (CitBookPtr) ca -> fromptr;
1112 					if ( book -> imp) {
1113 						ip = book -> imp;
1114 						if ( ip -> date) {
1115 							retval = the_pub;
1116 							good_one = TRUE; /* good for book */
1117 						}
1118 					}
1119 
1120 			}
1121 		}
1122 			break;
1123 		case PUB_Gen:
1124 			cg = (CitGenPtr) the_pub -> data.ptrvalue;
1125 			if (cg -> cit) {
1126 				str_ret = NULL;
1127 				str_ret = StrStr(cg -> cit ,"Journal=\"");
1128 				if ((str_ret) || (cg->title) || (cg->journal) || (cg->date)) {
1129 					retval = the_pub;  /*unless something better */
1130 				} else {
1131 					if (StringNICmp("unpublished", cg->cit, 11) == 0)
1132 						retval = the_pub;
1133 					else if (StringNICmp("to be published", cg->cit, 15) == 0)
1134 						retval = the_pub;
1135 					else if (StringNICmp("in press", cg->cit, 8) == 0)
1136 						retval = the_pub;
1137 					else if (StringNICmp("submitted", cg->cit, 8) == 0)
1138 						retval = the_pub;
1139 				}
1140 			} else if (cg -> journal) {
1141 				retval = the_pub;  /*unless something better */
1142 			}
1143 
1144 			break;
1145 		case PUB_Proc:
1146 			bad_one = the_pub -> choice;
1147 			break;
1148 		}
1149 	}
1150 
1151 	if (! retval && anything) {
1152 	   for (the_pub = newpub; the_pub; the_pub = the_pub -> next) {
1153 		if (the_pub->choice == PUB_Muid)
1154 			retval = the_pub;
1155 	   }
1156 	   if (! retval) /* Take anything left over now and hope for the best */
1157 		retval = newpub;
1158 	}
1159 
1160 	if ( ! retval && bad_one != 0) {
1161 		if (error_msgs == TRUE)
1162 			ErrPostEx(SEV_WARNING, ERR_REFERENCE_Illegalreference,
1163 			"FlatRefBest: Unimplemented pub type = %d", bad_one);
1164 	}
1165 
1166 	return retval;
1167 }	/* FlatRefBest */
1168 
StoreFeatTemp(SortStruct PNTR List,SeqFeatPtr sfp,Int4 currentsize,BioseqPtr bsp,BioseqPtr seg,Uint2 entityID,Uint4 itemID,Uint2 itemtype,SeqLocPtr slp,SeqLocPtr PNTR extra_loc,Int2 extra_loc_cnt,Boolean temp)1169 NLM_EXTERN Int4 StoreFeatTemp(SortStruct PNTR List, SeqFeatPtr sfp,
1170 Int4 currentsize, BioseqPtr bsp, BioseqPtr seg, Uint2 entityID, Uint4 itemID, Uint2 itemtype,SeqLocPtr slp, SeqLocPtr PNTR extra_loc, Int2 extra_loc_cnt,
1171 Boolean temp)
1172 {
1173 	SeqLocPtr PNTR slpp = NULL;
1174 
1175 	List[currentsize].entityID = entityID;
1176 	List[currentsize].itemID = itemID;
1177 	List[currentsize].itemtype = itemtype;
1178 	List[currentsize].sfp = sfp;
1179 	List[currentsize].bsp = bsp;
1180 	List[currentsize].seg_bsp = seg;
1181 	List[currentsize].dup = FALSE;
1182 	List[currentsize].hash = AsnIoHash(sfp,
1183 						(AsnWriteFunc) SeqFeatAsnWrite);
1184 	List[currentsize].slp = slp;
1185 	if (extra_loc_cnt > 0) {
1186 		slpp = MemNew(extra_loc_cnt*(sizeof(SeqLocPtr)));
1187 		MemCpy(slpp, extra_loc, extra_loc_cnt*(sizeof(SeqLocPtr)));
1188 	}
1189 	List[currentsize].extra_loc = slpp;
1190 	List[currentsize].extra_loc_cnt = extra_loc_cnt;
1191 	List[currentsize].tempload = temp;
1192 	List[currentsize].gsp = NULL;
1193 	List[currentsize].nsp = NoteStructNew(List[currentsize].nsp);
1194 
1195 	currentsize++;
1196 
1197 	return currentsize;
1198 }
1199 
StoreFeat(SortStruct PNTR List,SeqFeatPtr sfp,Int4 currentsize,BioseqPtr bsp,BioseqPtr seg,Uint2 entityID,Uint4 itemID,Uint2 itemtype,SeqLocPtr slp,SeqLocPtr PNTR extra_loc,Int2 extra_loc_cnt)1200 NLM_EXTERN Int4 StoreFeat(SortStruct PNTR List, SeqFeatPtr sfp, Int4 currentsize,
1201 BioseqPtr bsp, BioseqPtr seg, Uint2 entityID, Uint4 itemID, Uint2 itemtype,
1202 SeqLocPtr slp, SeqLocPtr PNTR extra_loc, Int2 extra_loc_cnt)
1203 {
1204 	return StoreFeatFree(List, sfp, currentsize, bsp, seg, entityID, itemID,
1205 					itemtype,slp, extra_loc, extra_loc_cnt, FALSE);
1206 }
1207 
StoreFeatFree(SortStruct PNTR List,SeqFeatPtr sfp,Int4 currentsize,BioseqPtr bsp,BioseqPtr seg,Uint2 entityID,Uint4 itemID,Uint2 itemtype,SeqLocPtr slp,SeqLocPtr PNTR extra_loc,Int2 extra_loc_cnt,Boolean feat_free)1208 NLM_EXTERN Int4 StoreFeatFree(SortStruct PNTR List, SeqFeatPtr sfp, Int4 currentsize,
1209 BioseqPtr bsp, BioseqPtr seg, Uint2 entityID, Uint4 itemID, Uint2 itemtype,
1210 SeqLocPtr slp, SeqLocPtr PNTR extra_loc, Int2 extra_loc_cnt, Boolean feat_free)
1211 {
1212 	SeqLocPtr PNTR slpp = NULL;
1213 
1214 	List[currentsize].entityID = entityID;
1215 	List[currentsize].itemID = itemID;
1216 	List[currentsize].itemtype = itemtype;
1217 	List[currentsize].sfp = sfp;
1218 	List[currentsize].bsp = bsp;
1219 	List[currentsize].seg_bsp = seg;
1220 	List[currentsize].dup = FALSE;
1221 	List[currentsize].hash = AsnIoHash(sfp,
1222 						(AsnWriteFunc) SeqFeatAsnWrite);
1223 	List[currentsize].slp = slp;
1224 	if (extra_loc_cnt > 0) {
1225 		slpp = MemNew(extra_loc_cnt*(sizeof(SeqLocPtr)));
1226 		MemCpy(slpp, extra_loc, extra_loc_cnt*(sizeof(SeqLocPtr)));
1227 	}
1228 	List[currentsize].extra_loc = slpp;
1229 	List[currentsize].extra_loc_cnt = extra_loc_cnt;
1230 	List[currentsize].feat_free = feat_free;
1231 	List[currentsize].gsp = NULL;
1232 	List[currentsize].nsp = NoteStructNew(List[currentsize].nsp);
1233 
1234 	currentsize++;
1235 
1236 	return currentsize;
1237 }
1238 /****************************************************************************
1239 *CharPtr Cat2Strings (CharPtr string1, CharPtr string2, CharPtr separator, Int2 num)
1240 *
1241 * Concatenates two strings (string1 and string2) and separates them by a
1242 * "separator".  If num>0, takes num spaces off the end of string1 on
1243 * concatenation; if num<0 takes all spaces off the end of the complete
1244 * string.
1245 *****************************************************************************/
Cat2Strings(CharPtr string1,CharPtr string2,CharPtr separator,Int2 num)1246 NLM_EXTERN CharPtr Cat2Strings (CharPtr string1, CharPtr string2, CharPtr separator, Int2 num)
1247 
1248 {
1249 	Boolean no_space=FALSE;
1250 	Int4 length1=0, length2=0, length_sep=0, length_total;
1251 	CharPtr newstring=NULL;
1252 
1253 	if (num < 0)
1254 	{
1255 		num=0;
1256 		no_space=TRUE;
1257 	}
1258 
1259 	if (string1 != NULL)
1260 		length1 = StringLen(string1);
1261 	if (string2 != NULL)
1262 		length2 = StringLen(string2);
1263 	if (separator != NULL)
1264 		length_sep = StringLen(separator);
1265 
1266 	length_total = length1+length2+length_sep-num+1;
1267 
1268 	newstring = (CharPtr) MemNew(length_total*sizeof(Char));
1269 
1270 	if (string1 != NULL)
1271 		newstring = StringCat(newstring, string1);
1272 	if ((length1-num) >= 0)
1273 		newstring[length1-num] = '\0';
1274 	if (no_space && length1 > 0)
1275 		while (length1 > 0 && newstring[length1-1] == ' ')
1276 		{
1277 			newstring[length1-1] = '\0';
1278 			length1--;
1279 		}
1280 	if (separator != NULL)
1281 		newstring = StringCat(newstring, separator);
1282 	if (string2 != NULL)
1283 		newstring = StringCat(newstring, string2);
1284 
1285 
1286 	return newstring;
1287 }
1288 
AddGBQualEx(CharPtr PNTR key,GBQualPtr gbqual,CharPtr qual,CharPtr val)1289 NLM_EXTERN GBQualPtr AddGBQualEx (CharPtr PNTR key, GBQualPtr gbqual, CharPtr qual, CharPtr val)
1290 {
1291 	Int2 index;
1292 
1293 	index = GBFeatKeyNameValid(key, FALSE);
1294 	if (GBQualValidToAdd(index,qual)) {
1295 		return AddGBQual(gbqual, qual, val);
1296 	}
1297 	return gbqual;
1298 }
1299 
1300 /************************************************************************
1301 *AddGBQual
1302 *
1303 *	This function makes a new GBQual and adds a "val" and a
1304 *	a "qual".
1305 *   doesn't add qual if it's already there /tatiana/
1306 * 	doesn't add empty ("") val if qual is translation
1307 ***********************************************************************/
1308 #ifdef ASN2GNBK_STRIP_NOTE_PERIODS
IsEllipsis(CharPtr str)1309 static Boolean IsEllipsis (
1310   CharPtr str
1311 )
1312 
1313 {
1314   size_t   len;
1315   CharPtr  ptr;
1316 
1317   if (StringHasNoText (str)) return FALSE;
1318   len = StringLen (str);
1319   if (len < 3) return FALSE;
1320   ptr = str + len - 3;
1321   return (Boolean) (ptr [0] == '.' && ptr [1] == '.' && ptr [2] == '.');
1322 }
1323 #endif
1324 
AddGBQual(GBQualPtr gbqual,CharPtr qual,CharPtr val)1325 NLM_EXTERN GBQualPtr AddGBQual (GBQualPtr gbqual, CharPtr qual, CharPtr val)
1326 {
1327 	GBQualPtr curq, note = NULL;
1328 
1329 	if (StringCmp(qual, "translation") == 0) {
1330 		if (val == NULL)
1331 			return gbqual;
1332 		if (*val == '\0')
1333 			return gbqual;
1334 	}
1335 	if (gbqual) {
1336 		if (CheckForQual(gbqual, qual, val) == 1) {
1337 			return gbqual;
1338 		}
1339 		for (curq=gbqual; curq->next != NULL; curq=curq->next)
1340 			continue;
1341 		curq->next = GBQualNew();
1342 		curq = curq->next;
1343 		if (val)
1344 			curq->val = StringSave(val);
1345 		curq->qual = StringSave(qual);
1346 		note = curq;
1347 	} else {
1348 		gbqual = GBQualNew();
1349 		gbqual->next = NULL;
1350 		if (val)
1351 			gbqual->val = StringSave(val);
1352 		gbqual->qual = StringSave(qual);
1353 		note = gbqual;
1354 	}
1355 
1356 #ifdef ASN2GNBK_STRIP_NOTE_PERIODS
1357 	if (note != NULL && StringICmp (qual, "note") == 0) {
1358 		size_t len;
1359 		CharPtr p, q;
1360 		len = StringLen (note->val);
1361 		if (len > 0 && note->val [len - 1] == '~') {
1362 			note->val [len - 1] = '\0';
1363 		}
1364 		if (! IsEllipsis (note->val)) {
1365 			len = StringLen (note->val);
1366 			if (len > 0 && note->val [len - 1] == '.') {
1367  				note->val [len - 1] = '\0';
1368 				if (len > 1 && note->val [len - 2] == '.') {
1369  					note->val [len - 2] = '\0';
1370 				}
1371 			}
1372 		}
1373 		TrimSpacesAndJunkFromEnds (note->val,TRUE);
1374 		TrimSpacesAndSemicolons (note->val);
1375 		p = note->val;
1376 		q = note->val;
1377 		while (*p) {
1378 		  if (*p == ';' && p [1] == ' ' && p [2] == ';') {
1379 		    p += 2;
1380 		  } else {
1381 		    *q = *p;
1382 		    p++;
1383 		    q++;
1384 		  }
1385 		}
1386 		*q = '\0';
1387 	}
1388 #endif
1389 
1390 	return gbqual;
1391 }
1392 
1393 /****************************************************************************
1394 *	Int2 CheckForQual(GBQualPtr gbqual, CharPtr string_q, CharPtr string_v)
1395 *
1396 *	Compares string (a potential gbqual->val) against all gbquals.
1397 *	If a match is found, "1" is returned; if not "0".
1398 ****************************************************************************/
1399 
CheckForQual(GBQualPtr gbqual,CharPtr string_q,CharPtr string_v)1400 NLM_EXTERN Int2 CheckForQual (GBQualPtr gbqual, CharPtr string_q, CharPtr string_v)
1401 {
1402 	GBQualPtr curq;
1403 
1404 	for (curq=gbqual; curq; curq=curq->next) {
1405 		if (StringCmp(string_q, curq->qual) == 0) {
1406 			if (curq->val == NULL) {
1407 				curq->val = StringSave(string_v);
1408 				return 1;
1409 			}
1410 			if (StringCmp(string_v, curq->val) == 0) {
1411 				return 1;
1412 			}
1413 		}
1414 	}
1415 	return 0;
1416 }
1417 
1418 
1419 /****************************************************************************
1420 *
1421 *	MakeAnAccession is for last ditch efforts to get an accession
1422 *	after all the normal things have failed.
1423 *
1424 ****************************************************************************/
1425 
MakeAnAccession(CharPtr new_buf,SeqIdPtr seq_id,Int2 buflen)1426 NLM_EXTERN CharPtr MakeAnAccession (CharPtr new_buf, SeqIdPtr seq_id, Int2 buflen)
1427 {
1428 	SeqIdPtr new_id;
1429 
1430 	new_id = SeqIdFindBest(seq_id, SEQID_GENBANK);
1431 	SeqIdWrite(new_id, new_buf, PRINTID_TEXTID_ACCESSION, buflen);
1432 	return new_buf;
1433 
1434 }
1435 
GetGBSourceLine(GBBlockPtr gb)1436 NLM_EXTERN CharPtr GetGBSourceLine (GBBlockPtr gb)
1437 {
1438 	CharPtr source = NULL;
1439 
1440 	if(gb && gb->source)
1441 		source = StringSave(gb->source);
1442 
1443 #ifdef ASN2GNBK_STRIP_NOTE_PERIODS
1444 	if (source != NULL) {
1445 		TrimSpacesAndJunkFromEnds (source,TRUE);
1446 	}
1447 #endif
1448 	return source;
1449 }
1450 
FlatOrganelle(Asn2ffJobPtr ajp,GBEntryPtr gbp)1451 NLM_EXTERN CharPtr FlatOrganelle(Asn2ffJobPtr ajp, GBEntryPtr gbp)
1452 {
1453 	CharPtr retval = NULL;
1454 	ValNodePtr man, vnp=NULL;
1455 	static char * organelle_names [] = {
1456 		 "Mitochondrion " ,
1457     "Chloroplast " ,
1458     "Kinetoplast ",
1459     "Cyanelle "};
1460 	BioSourcePtr biosp=NULL;
1461 /*
1462 	static CharPtr genome[] = {
1463 	NULL, NULL, "Chloroplast ", "Chromoplast ", "Kinetoplast ", "Mitochondrion ", "Plastid ", "Macronuclear ", "Extrachrom ", "Plasmid ", NULL, NULL, "Cyanelle ", "Proviral ", "Virion ", "Nucleomorph ", "Apicoplast ", "Leucoplast ", "Proplastid "};
1464 */
1465 	static CharPtr genome[] = {
1466 	NULL, NULL, "Chloroplast ", "Chromoplast ", "Kinetoplast ", "Mitochondrion ", "Plastid ", NULL, NULL, NULL, NULL, NULL, "Cyanelle ", NULL, NULL, "Nucleomorph ", "Apicoplast ", "Leucoplast ", "Proplastid ", NULL};
1467 
1468 /* try new first */
1469 	if ((vnp=GatherDescrByChoice(ajp, gbp, Seq_descr_source)) != NULL)
1470 	{
1471 		biosp = vnp->data.ptrvalue;
1472 	/*	if (biosp->genome < 6 || biosp->genome > 12)*/
1473 			retval = StringSave(genome[biosp->genome]);
1474 	}
1475 /* old next */
1476 	if (biosp == NULL) {
1477 		if ((vnp=GatherDescrByChoice(ajp, gbp, Seq_descr_modif)) != NULL)
1478 		{
1479 			for (man = (ValNodePtr) vnp-> data.ptrvalue; man; man = man -> next)
1480 			{
1481 				switch (man -> data.intvalue){
1482 					case 4: case 5: case 6: case 7:
1483 					if (! retval )
1484 						retval = StringSave(organelle_names
1485 								[man->data.intvalue-4]);
1486 						break;
1487 					default:
1488 						break;
1489 					}
1490 			}
1491 		}
1492 	}
1493 	return retval;
1494 }
1495 
GetNumOfSeqBlks(Asn2ffJobPtr ajp,GBEntryPtr gbp)1496 NLM_EXTERN Int4 GetNumOfSeqBlks (Asn2ffJobPtr ajp, GBEntryPtr gbp)
1497 {
1498         Int4 length, num_of_seqblks;
1499 
1500         length = BioseqGetLen(gbp->bsp);
1501 		if (ajp->slp) {
1502 			length = SeqLocLen(ajp->slp);
1503 		}
1504         num_of_seqblks = ROUNDUP(length, SEQ_BLK_SIZE)/SEQ_BLK_SIZE;
1505 
1506         return num_of_seqblks;
1507 }
1508 
1509 
1510 /*************************************************************************
1511 *	New asn.1 spec - division is in Orgname.div
1512 *	check MolInfo.tech
1513 *	check GBBlock for PAT or SYN
1514 *	get division from Orgname.div (in BioSource)
1515 *	09-05-96
1516 *************************************************************************/
1517 
IndexedGetDescrForDiv(BioseqPtr bsp,DivStructPtr PNTR dspp)1518 static void IndexedGetDescrForDiv (BioseqPtr bsp, DivStructPtr PNTR dspp)
1519 
1520 {
1521 	SeqMgrDescContext context;
1522 	ValNodePtr tmp;
1523 	DivStructPtr	dsp;
1524 	BioSourcePtr bsr;
1525 	MolInfoPtr mol;
1526 	CharPtr gb_div=NULL;
1527 	GBBlockPtr gb;
1528 
1529 	dsp = *dspp;
1530 	tmp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &context);
1531 			if (tmp != NULL) {
1532 				if (tmp->data.ptrvalue != NULL) {
1533 					mol = (MolInfoPtr) tmp->data.ptrvalue;
1534 					if (mol->tech != 0) {
1535 						if (dsp->tech == 0) {
1536 							dsp->tech = mol->tech;
1537 						} else if (mol->tech != dsp->tech) {
1538 							dsp->was_err = TRUE;
1539 							if (dsp->err_post) {
1540 								ErrPostEx(SEV_WARNING, 0, 0,
1541 								"Different Molinfo in one entry: %d|%d",
1542 									mol->tech, dsp->tech);
1543 							}
1544 							dsp->tech = mol->tech;
1545 						}
1546 						dsp->techID = context.itemID;
1547 						dsp->techtype = OBJ_SEQDESC;
1548 						*dspp = dsp;
1549 					}
1550 				}
1551 			}
1552 
1553 	tmp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context);
1554 			while (tmp != NULL && dsp->orgdiv == NULL) {
1555 				bsr = (BioSourcePtr) tmp->data.ptrvalue;
1556 				if (bsr && bsr->org) {
1557 					if (bsr->org->orgname && bsr->org->orgname->div) {
1558 						gb_div = bsr->org->orgname->div;
1559 						if (dsp->orgdiv == NULL) {
1560 							dsp->orgdiv = gb_div;
1561 						} else if (StringCmp(gb_div, dsp->orgdiv) != 0) {
1562 							dsp->was_err = TRUE;
1563 							if (dsp->err_post) {
1564 								ErrPostEx(SEV_WARNING, 0, 0,
1565 							"Different Taxonomy divisions in one entry: %s|%s",
1566 									gb_div, dsp->orgdiv);
1567 							}
1568 							dsp->orgdiv = gb_div;
1569 						}
1570 						dsp->biosrc = bsr;
1571 						dsp->orgID = context.itemID;
1572 						dsp->orgtype = OBJ_SEQDESC;
1573 						*dspp = dsp;
1574 					}
1575 				}
1576 				tmp = SeqMgrGetNextDescriptor (bsp, tmp, Seq_descr_source, &context);
1577 			}
1578 
1579 	tmp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &context);
1580 			if (tmp != NULL) {
1581 				gb = (GBBlockPtr) tmp->data.ptrvalue;
1582 				if (gb->div) {
1583 					gb_div = gb->div;
1584 					if (dsp->gbdiv == NULL) {
1585 						dsp->gbdiv = gb_div;
1586 					} else if (StringCmp(gb_div, dsp->gbdiv) != 0) {
1587 						dsp->was_err = TRUE;
1588 						if (dsp->err_post) {
1589 							ErrPostEx(SEV_WARNING, 0, 0,
1590 							"Different GBBlock divisions in one entry: %s|%s",
1591 								gb_div, dsp->gbdiv);
1592 						}
1593 						dsp->gbdiv = gb_div;
1594 					}
1595 					dsp->gbID = context.itemID;
1596 					dsp->gbtype = OBJ_SEQDESC;
1597 					*dspp = dsp;
1598 				}
1599 			}
1600 }
1601 
GetDescrForDiv(GatherContextPtr gcp)1602 static Boolean GetDescrForDiv (GatherContextPtr gcp)
1603 {
1604 /* find only one (closest to the target!) vnp with given choice */
1605 	ValNodePtr	tmp;
1606 	DivStructPtr	PNTR dspp;
1607 	DivStructPtr	dsp;
1608 	BioSourcePtr bsr;
1609 	MolInfoPtr mol;
1610 	CharPtr gb_div=NULL;
1611 	GBBlockPtr gb;
1612 
1613 	dspp = gcp->userdata;
1614 	dsp = *dspp;
1615 	switch (gcp->thistype)
1616 	{
1617 		case OBJ_SEQDESC:
1618 			tmp = (ValNodePtr) (gcp->thisitem);
1619 			if (tmp->choice == Seq_descr_molinfo) {
1620 				if (tmp->data.ptrvalue != NULL) {
1621 					mol = (MolInfoPtr) tmp->data.ptrvalue;
1622 					if (mol->tech != 0) {
1623 						if (dsp->tech == 0) {
1624 							dsp->tech = mol->tech;
1625 						} else if (mol->tech != dsp->tech) {
1626 							dsp->was_err = TRUE;
1627 							if (dsp->err_post) {
1628 								ErrPostEx(SEV_WARNING, 0, 0,
1629 								"Different Molinfo in one entry: %d|%d",
1630 									mol->tech, dsp->tech);
1631 							}
1632 							dsp->tech = mol->tech;
1633 						}
1634 						dsp->techID = gcp->itemID;
1635 						dsp->techtype = gcp->thistype;
1636 						*dspp = dsp;
1637 					}
1638 				}
1639 			} else if (tmp->choice == Seq_descr_source) {
1640 				bsr = (BioSourcePtr) tmp->data.ptrvalue;
1641 				if (bsr && bsr->org) {
1642 					if (bsr->org->orgname && bsr->org->orgname->div) {
1643 						gb_div = bsr->org->orgname->div;
1644 						if (dsp->orgdiv == NULL) {
1645 							dsp->orgdiv = gb_div;
1646 						} else if (StringCmp(gb_div, dsp->orgdiv) != 0) {
1647 							dsp->was_err = TRUE;
1648 							if (dsp->err_post) {
1649 								ErrPostEx(SEV_WARNING, 0, 0,
1650 							"Different Taxonomy divisions in one entry: %s|%s",
1651 									gb_div, dsp->orgdiv);
1652 							}
1653 							dsp->orgdiv = gb_div;
1654 						}
1655 						dsp->biosrc = bsr;
1656 						dsp->orgID = gcp->itemID;
1657 						dsp->orgtype = gcp->thistype;
1658 						*dspp = dsp;
1659 					}
1660 				}
1661 			} else if (tmp->choice == Seq_descr_genbank) {
1662 				gb = (GBBlockPtr) tmp->data.ptrvalue;
1663 				if (gb->div) {
1664 					gb_div = gb->div;
1665 					if (dsp->gbdiv == NULL) {
1666 						dsp->gbdiv = gb_div;
1667 					} else if (StringCmp(gb_div, dsp->gbdiv) != 0) {
1668 						dsp->was_err = TRUE;
1669 						if (dsp->err_post) {
1670 							ErrPostEx(SEV_WARNING, 0, 0,
1671 							"Different GBBlock divisions in one entry: %s|%s",
1672 								gb_div, dsp->gbdiv);
1673 						}
1674 						dsp->gbdiv = gb_div;
1675 					}
1676 					dsp->gbID = gcp->itemID;
1677 					dsp->gbtype = gcp->thistype;
1678 					*dspp = dsp;
1679 				}
1680 			}
1681 			break;
1682 		default:
1683 			break;
1684 	}
1685 	return TRUE;
1686 }
1687 
1688 /**************************************************************************
1689 *	0 - nothing found
1690 *	1 - return division code OK
1691 *	2 - return division code but errors were found
1692 **************************************************************************/
BioseqGetGBDivCodeEx(BioseqPtr bsp,CharPtr buf,Int2 buflen,Boolean err_post,Boolean useFeatureIndexing)1693 static Int2 BioseqGetGBDivCodeEx (BioseqPtr bsp, CharPtr buf, Int2 buflen, Boolean err_post, Boolean useFeatureIndexing)
1694 {
1695 	GatherScope gsc;
1696 	SeqLocPtr slp = NULL;
1697 	Uint2 bspID;
1698 	DivStructPtr dsp;
1699 	BioSourcePtr bsr = NULL;
1700 	Int2 tech, /*UNUSED*/diff, retval = 0;
1701 	CharPtr orgdiv, gbdiv;
1702 	SeqIdPtr sip;
1703 
1704 	if (buf == NULL)
1705 		return 0;
1706 	*buf = '\0';
1707 /* check for Patent SeqId  */
1708 	for (sip = bsp->id; sip; sip=sip->next) {
1709 		if (sip->choice == SEQID_PATENT) {
1710 			diff = LabelCopy(buf, "PAT", buflen);
1711 			return 1;
1712 		}
1713 	}
1714 	bspID = ObjMgrGetEntityIDForPointer(bsp);
1715 	dsp = MemNew(sizeof(DivStruct));
1716 	dsp->err_post = err_post;
1717 	dsp->entityID = bspID;
1718 	dsp->tech = 0;
1719 	dsp->gbdiv = NULL;
1720 	dsp->orgdiv = NULL;
1721 	dsp->biosrc = NULL;
1722 	dsp->was_err = FALSE;
1723   	MemSet ((Pointer) (&gsc), 0, sizeof (GatherScope));
1724 	MemSet ((Pointer) (gsc.ignore), (int)(TRUE), (size_t) (OBJ_MAX * sizeof(Boolean)));
1725 	gsc.ignore[OBJ_SEQDESC] = FALSE;
1726 	slp = ValNodeNew(NULL);
1727 	slp->choice = SEQLOC_WHOLE;
1728 	slp->data.ptrvalue = (SeqIdPtr) SeqIdDup (SeqIdFindBest (bsp->id, 0));
1729 	gsc.target = slp;
1730 
1731 	if (useFeatureIndexing) {
1732 		IndexedGetDescrForDiv (bsp, &dsp);
1733 	} else {
1734 		GatherEntity(bspID, &dsp, GetDescrForDiv, &gsc);
1735 	}
1736 
1737 	SeqLocFree(slp);
1738 	orgdiv = dsp->orgdiv;
1739 	gbdiv = dsp->gbdiv;
1740 	tech = dsp->tech;
1741 	bsr = dsp->biosrc;
1742 	if (dsp->was_err) {
1743 		retval = 2;
1744 	} else {
1745 		retval = 1;
1746 	}
1747 	MemFree(dsp);
1748 	switch (tech) {
1749 		case MI_TECH_est:
1750 			diff = LabelCopy(buf, "EST", buflen);
1751 		break;
1752 		case MI_TECH_sts:  /* Sequence Tagged Site */
1753 			diff = LabelCopy(buf, "STS", buflen);
1754 		break;
1755 		case MI_TECH_survey:
1756 			diff = LabelCopy(buf, "GSS", buflen);
1757 		break;
1758 		case MI_TECH_htc:
1759 			diff = LabelCopy(buf, "HTC", buflen);
1760 		break;
1761 		case MI_TECH_htgs_0:
1762 		case MI_TECH_htgs_1:
1763 		case MI_TECH_htgs_2:
1764 			diff = LabelCopy(buf, "HTG", buflen);
1765 		break;
1766 		default:
1767 		break;
1768 	}
1769 	if (*buf != '\0') {
1770 		return retval;
1771 	}
1772 /*  new slot for synthetic sequences */
1773 	if (bsr && bsr->origin == 5) {
1774 		diff = LabelCopy(buf, "SYN", buflen);
1775 		return retval;
1776 	}
1777 /***** division in GBBlock becomes obsolete  ********/
1778 	if (gbdiv != NULL) {
1779 		if (StringCmp(gbdiv, "PAT") == 0 ||
1780 					StringCmp(gbdiv, "SYN") == 0 || orgdiv == NULL) {
1781 			diff = LabelCopy(buf, gbdiv, buflen);
1782 		return retval;
1783 		}
1784 	}
1785 
1786 /**********/
1787 	if (orgdiv != NULL) {
1788 		diff = LabelCopy(buf, orgdiv, buflen);
1789 		return retval;
1790 	}
1791 	return 0;
1792 }
1793 
BioseqGetGBDivCode(BioseqPtr bsp,CharPtr buf,Int2 buflen,Boolean err_post)1794 NLM_EXTERN Int2 BioseqGetGBDivCode(BioseqPtr bsp, CharPtr buf, Int2 buflen, Boolean err_post)
1795 
1796 {
1797 	return BioseqGetGBDivCodeEx (bsp, buf, buflen, err_post, FALSE);
1798 }
1799 
1800 
1801 /*============================================================================*\
1802  * Function:
1803  *	StrStripSpaces
1804  *
1805  * Purpose:
1806  *	Strips all spaces in string in following manner. If the function
1807  *	meet several spaces (spaces and tabs) in succession it replaces them
1808  *	with one space.
1809  *	Strips all spaces after '(' and before ')'
1810  *
1811 \*----------------------------------------------------------------------------*/
StrStripSpaces(CharPtr str)1812 static void StrStripSpaces(CharPtr str)
1813 {
1814 	CharPtr	new_str;
1815 
1816 	if (str == NULL) {
1817 		return;
1818 	}
1819 
1820 	new_str = str;
1821 	while (*str != '\0') {
1822 		*new_str++ = *str;
1823 		if (*str == ' ' || *str == '\t' || *str == '(') {
1824 			for (str++; *str == ' ' || *str == '\t'; str++) ;
1825 			if (*str == ')' || *str == ',') {
1826 				new_str--;
1827 			}
1828 		} else {
1829 			str++;
1830 		}
1831 	}
1832 	*new_str = '\0';
1833 }
1834 
GetFlatRetract(ValNodePtr pub)1835 static CharPtr GetFlatRetract(ValNodePtr pub)
1836 {
1837 	CitArtPtr cit;
1838 	CitJourPtr jour = NULL;
1839 	CitRetractPtr ret = NULL;
1840 	CharPtr buffer;
1841 	Int2 len;
1842 
1843 	if (pub == NULL)
1844 		return NULL;
1845 	if (pub->choice != PUB_Article)
1846 		return NULL;
1847 	cit = pub->data.ptrvalue;
1848 	if (cit->from == 1) {
1849 		jour = cit->fromptr;
1850 		if (jour && jour->imp) {
1851 			ret = jour->imp->retract;
1852 			if (ret && ret->type == 3) { /* other types can be added later */
1853 				len = StringLen(ret->exp) + 11;
1854 				buffer = (CharPtr) MemNew(len*sizeof(Char));
1855 				sprintf(buffer, "Erratum:[%s]", ret->exp);
1856 				return buffer;
1857 			}
1858 		}
1859 	}
1860 	return NULL;
1861 }
1862 
GetSubmitDescr(ValNodePtr pub)1863 static CharPtr GetSubmitDescr(ValNodePtr pub)
1864 {
1865 	CitSubPtr cs;
1866 
1867 	if (pub == NULL) {
1868 		return NULL;
1869 	}
1870 	if (pub->choice != PUB_Sub) {
1871 		return NULL;
1872 	}
1873 	cs = (CitSubPtr) pub->data.ptrvalue;
1874 	if (cs->descr == NULL) {
1875 		return NULL;
1876 	}
1877 	return (StringSave(cs->descr));
1878 }
1879 
GetMuid(ValNodePtr equiv)1880 static Int4 GetMuid(ValNodePtr equiv)
1881 {
1882 	Int4 muid=0;
1883 	ValNodePtr newpub, the_pub;
1884 	MedlineEntryPtr ml;
1885 
1886 	if (equiv->choice == PUB_Equiv)
1887 		newpub = equiv->data.ptrvalue;
1888 	else
1889 		newpub = equiv;
1890 
1891 	for (the_pub = newpub; the_pub; the_pub = the_pub -> next) {
1892 		if (the_pub->choice == PUB_Muid) {
1893 			muid = the_pub->data.intvalue;
1894 			break;
1895 		}
1896 		if (the_pub->choice == PUB_Medline) {
1897 			ml = (MedlineEntryPtr) the_pub -> data.ptrvalue;
1898 			muid = ml->uid;
1899 		}
1900 	}
1901 
1902 	return muid;
1903 
1904 }	/* GetMuid */
1905 
GetPmid(ValNodePtr equiv)1906 static Int4 GetPmid(ValNodePtr equiv)
1907 {
1908 	Int4 pmid=0;
1909 	ValNodePtr newpub, the_pub;
1910 	MedlineEntryPtr ml;
1911 
1912 	if (equiv->choice == PUB_Equiv)
1913 		newpub = equiv->data.ptrvalue;
1914 	else
1915 		newpub = equiv;
1916 
1917 	for (the_pub = newpub; the_pub; the_pub = the_pub -> next) {
1918 		if (the_pub->choice == PUB_PMid) {
1919 			pmid = the_pub->data.intvalue;
1920 			break;
1921 		}
1922 		if (the_pub->choice == PUB_Medline) {
1923 			ml = (MedlineEntryPtr) the_pub -> data.ptrvalue;
1924 			pmid = ml->pmid;
1925 		}
1926 	}
1927 
1928 	return pmid;
1929 
1930 }	/* GetPmid */
1931 
1932 /***************************************************************************
1933 * SeqLocPtr GetBaseRangeForCitation (SeqLocPtr loc, SeqLocPtr slp, Int4Ptr start, Int4Ptr stop)
1934 *
1935 *
1936 *	This function finds the start and stop Int4 values for a location.
1937 *	If this is a cmplex location (e.g., SEQLOC_MIX), then the
1938 *	function is called several times, with the returned slp used
1939 *	as an argument on the next round.
1940 *
1941 *	The first call should be with slp set to NULL.
1942 *
1943 ****************************************************************************/
1944 
GetBaseRangeForCitation(SeqLocPtr loc,SeqLocPtr slp,Int4Ptr start,Int4Ptr stop)1945 static SeqLocPtr GetBaseRangeForCitation (SeqLocPtr loc, SeqLocPtr slp, Int4Ptr start, Int4Ptr stop)
1946 {
1947 	Int4 tmp_start, tmp_stop, tmp_range;
1948 
1949 	*start = 0;
1950 	*stop = 0;
1951 
1952 	switch (loc->choice)
1953 	{
1954 		case SEQLOC_BOND:
1955        		case SEQLOC_FEAT:
1956         	case SEQLOC_NULL:
1957         	case SEQLOC_EMPTY:
1958 			slp = NULL;
1959                         break;
1960         	case SEQLOC_WHOLE:
1961         	case SEQLOC_INT:
1962 			if ((tmp_start = SeqLocStart(loc)) >= 0  &&
1963 					(tmp_stop = SeqLocStop(loc)) >= 0)
1964 			{
1965 				tmp_range = tmp_stop - tmp_start;
1966 				if (tmp_range >= 0)
1967 				{ /* +1 for Genbank format. */
1968 					*start = tmp_start+1;
1969 					*stop = tmp_stop+1;
1970 				}
1971 			}
1972 			slp = NULL;
1973 			break;
1974         	case SEQLOC_MIX:
1975         	case SEQLOC_EQUIV:
1976         	case SEQLOC_PACKED_INT:
1977 			if (slp == NULL)
1978 				slp = loc->data.ptrvalue;
1979 			if (slp != NULL)
1980 			{
1981 				if ((tmp_start = SeqLocStart(slp)) >= 0  &&
1982 						(tmp_stop = SeqLocStop(slp)) >= 0)
1983 				{
1984 					tmp_range = tmp_stop - tmp_start;
1985 					if (tmp_range >= 0)
1986 					{ /* +1 for Genbank format. */
1987 						*start = tmp_start+1;
1988 						*stop = tmp_stop+1;
1989 					}
1990 				}
1991 				slp = slp->next;
1992 			}
1993 			break;
1994         	case SEQLOC_PACKED_PNT:
1995        		case SEQLOC_PNT:
1996 			slp = NULL;
1997 			break;
1998 		default:
1999 			slp = NULL;
2000 			break;
2001 	}
2002 	return slp;
2003 }
2004 
2005 /*************************************************************************
2006 *GB_PrintPubs
2007 *
2008 *	"GB_PrintPubs" to dump pubs in Flat File (i.e., Genbank) format.
2009 *
2010 **************************************************************************/
2011 
GB_PrintPubs(Asn2ffJobPtr ajp,GBEntryPtr gbp,PubStructPtr psp)2012 void GB_PrintPubs (Asn2ffJobPtr ajp, GBEntryPtr gbp, PubStructPtr psp)
2013 
2014 {
2015 
2016 	BioseqPtr bsp=gbp->bsp;
2017 	Boolean first_time, ignore_this=FALSE, submit=FALSE, tag;
2018 	Char buffer[150];
2019 	CharPtr authors=NULL,title=NULL,journal=NULL,string_start, string, retract;
2020 	CharPtr descr = NULL;
2021 	Int2 i;
2022 	Int4 gibbsq, muid, pmid, pat_seqid=0, start=0, stop=0;
2023 	PubdescPtr pdp;
2024 	SeqFeatPtr sfp;
2025 	SeqLocPtr loc, slp;
2026 	ValNodePtr pub;
2027 
2028 	if (ASN2FF_SHOW_ALL_PUBS) {
2029 		pub = FlatRefBest(psp->pub, ajp->error_msgs, TRUE);
2030 	} else {
2031 		pub = FlatRefBest(psp->pub, ajp->error_msgs, FALSE);
2032 	}
2033 	if (pub == NULL)
2034 	{
2035 		if (ajp->error_msgs == TRUE)
2036 			PostARefErrMessage (ajp, bsp, psp, NULL, -1, NULL);
2037 		return;
2038 	}
2039 	ignore_this = FlatIgnoreThisPatentPub(bsp, pub, &pat_seqid);
2040 	if (ajp->format != GENPEPT_FMT)
2041 	{
2042 		if (ignore_this == TRUE)
2043 		{
2044 			if (ajp->error_msgs == TRUE)
2045 				PostARefErrMessage (ajp, bsp, psp, NULL, -1, NULL);
2046 			return;
2047 		}
2048 	}
2049 
2050 	ff_StartPrint(0, 12, ASN2FF_GB_MAX, NULL);
2051 	ff_AddString("REFERENCE");
2052 	TabToColumn(13);
2053 	ff_AddInteger("%ld", (long) psp->number);
2054 	if (psp->start == 1) {
2055 		TabToColumn(16);
2056 		if (psp->descr != NULL) {
2057 			if (psp->descr->reftype != 0) {
2058 				ff_AddString("(sites)");
2059 			} else {
2060 				if (ajp->format != GENPEPT_FMT) {
2061 					ff_AddString("(bases ");
2062 				} else {
2063 					ff_AddString("(residues ");
2064 				}
2065 				if (ajp->slp) {
2066 					ff_AddInteger("%ld", (long) (SeqLocStart(ajp->slp) + 1));
2067 					ff_AddString(" to ");
2068 					ff_AddInteger("%ld", (long) (SeqLocStop(ajp->slp) + 1));
2069 				} else {
2070 					ff_AddString("1 to ");
2071 					ff_AddInteger("%ld", (long) bsp->length);
2072 				}
2073 				ff_AddChar(')');
2074 			}
2075 		}
2076 	}
2077 	else if (psp->start == 2) {
2078 		TabToColumn(16);
2079 		if (ajp->format != GENPEPT_FMT)
2080 			ff_AddString("(bases ");
2081 		else
2082 			ff_AddString("(residues ");
2083 		for (i=0; i<psp->citcount; i++) {
2084 			sfp = psp->citfeat[i];
2085 			loc = (SeqLocPtr) sfp->location;
2086 			slp = GetBaseRangeForCitation (loc, NULL, &start, &stop);
2087 			if (start != 0 || stop != 0) {
2088 				ff_AddInteger("%ld", (long) start);
2089 				ff_AddString(" to ");
2090 				ff_AddInteger("%ld", (long) stop);
2091 				if (slp != NULL || i+1 != psp->citcount)
2092 					ff_AddString("; ");
2093 			}
2094 			while (slp != NULL) {
2095 				slp = GetBaseRangeForCitation (loc, slp, &start, &stop);
2096 				if (start != 0 || stop != 0) {
2097 					ff_AddInteger("%ld", (long) start);
2098 					ff_AddString(" to ");
2099 					ff_AddInteger("%ld", (long) stop);
2100 					if (slp != NULL || i+1 != psp->citcount)
2101 						ff_AddString("; ");
2102 				}
2103 			}
2104 		}
2105 		ff_AddChar(')');
2106 	} else if (psp->start == 3) {
2107 		TabToColumn(16);
2108 		ff_AddString("(sites)");
2109 	} else {
2110 		if (ajp->error_msgs == TRUE)
2111 			ErrPostEx(SEV_WARNING, CTX_NCBI2GB, 1,
2112 			 "Incorrect start value (%d) in PubStruct\n", psp->start);
2113 	}
2114 	ff_EndPrint();
2115 
2116 	authors = FlatAuthor(ajp, pub);
2117 	ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2118 	ff_AddString("AUTHORS");
2119 	TabToColumn(13);
2120 
2121 	if (authors && *authors != NULLB) {
2122 		ff_AddString(authors);
2123 	} else {
2124 		ff_AddChar('.');
2125 	}
2126 	ff_EndPrint();
2127 
2128 	title = FlatPubTitle(pub);
2129 	if (title ) {
2130 		if ( *title  != NULLB) {
2131 			ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2132 			ff_AddString("TITLE");
2133 			TabToColumn(13);
2134 			StrStripSpaces(title);
2135 			ff_AddString(title);
2136 			ff_EndPrint();
2137 		}
2138 	}
2139 
2140 	journal = FlatJournal(ajp, gbp, pub, pat_seqid, &submit, FALSE);
2141 	ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2142 	ff_AddString("JOURNAL");
2143 	TabToColumn(13);
2144 	if (journal ) {
2145 #ifdef ASN2GNBK_STRIP_NOTE_PERIODS
2146 		CharPtr p, q;
2147 		p = journal;
2148 		q = journal;
2149 		while (*p) {
2150 		  if (*p == ',' && p [1] == ' ' && p [2] == ';') {
2151 		    p += 2;
2152 		  } else {
2153 		    *q = *p;
2154 		    p++;
2155 		    q++;
2156 		  }
2157 		}
2158 		*q = '\0';
2159 #endif
2160 		StrStripSpaces(journal);
2161 		ff_AddString(journal);
2162 	} else {
2163 		ff_AddString("Unpublished");
2164 	}
2165 	ff_EndPrint();
2166 
2167 	muid = GetMuid(psp->pub);
2168 	if (muid > 0) {
2169 		ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2170 		ff_AddString("MEDLINE");
2171 		TabToColumn(13);
2172 		www_muid(muid);
2173 		ff_EndPrint();
2174 	}
2175 	pmid = GetPmid (psp->pub);
2176 	if (pmid > 0) {
2177 		ff_StartPrint(3, 12, ASN2FF_GB_MAX, NULL);
2178 		ff_AddString("PUBMED");
2179 		TabToColumn(13);
2180 		www_muid(pmid);
2181 		ff_EndPrint();
2182 	}
2183 
2184 	tag = FALSE;
2185 	pdp = psp->descr;
2186 	if (pdp != NULL && pdp->comment != NULL) {
2187 		if (StringCmp(pdp->comment, "full automatic") != 0 &&
2188 		  StringCmp(pdp->comment, "full staff_review") != 0 &&
2189 		   StringCmp(pdp->comment, "full staff_entry") != 0 &&
2190 		    StringCmp(pdp->comment, "simple staff_review") != 0 &&
2191 		      StringCmp(pdp->comment, "simple staff_entry") != 0 &&
2192 		       StringCmp(pdp->comment, "simple automatic") != 0 &&
2193 		        StringCmp(pdp->comment, "unannotated automatic") != 0 &&
2194 		         StringCmp(pdp->comment, "unannotated staff_review") != 0 &&
2195 		          StringCmp(pdp->comment, "unannotated staff_entry") != 0)
2196 		{
2197 			ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2198 			ff_AddString("REMARK  ");
2199 			TabToColumn(13);
2200 			ff_AddStringWithTildes(pdp->comment);
2201 			tag = TRUE;
2202 		}
2203 	}
2204 	string = &buffer[0];
2205 	gibbsq = GetGibbsqStatement(gbp, string);
2206 	if (gibbsq > 0) {
2207 		if (tag != TRUE) {
2208 			ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2209 			ff_AddString("REMARK");
2210 			TabToColumn(13);
2211 		} else {
2212 			NewContLine();
2213 		}
2214 		ff_AddStringWithTildes(string);
2215 		tag = TRUE;
2216 	}
2217 	string = GetGibbsqComment(gbp);
2218 	if (string) {
2219 		string_start = string;
2220 		if (tag != TRUE) {
2221 			ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2222 			ff_AddString("REMARK");
2223 			TabToColumn(13);
2224 		} else {
2225 			NewContLine();
2226 		}
2227 		first_time = TRUE;
2228 /* Can't this be rewritten to use ff_AddString????  That would be faster! */
2229 		while (*string != '\0') {
2230 			if (*string == '~') {
2231 				if (first_time == FALSE)
2232 					NewContLine();
2233 				else
2234 					first_time = FALSE;
2235 			} else if (*string == '\"') {
2236 				*string = '\'';
2237 				ff_AddChar(*string);
2238 			} else {
2239 				ff_AddChar(*string);
2240 			}
2241 			string++;
2242 		}
2243 		string_start = MemFree(string_start);
2244 		tag=TRUE;
2245 	}
2246 	retract = GetFlatRetract(pub);
2247 	if (retract) {
2248 		if (tag != TRUE) {
2249 			ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2250 			ff_AddString("REMARK");
2251 			TabToColumn(13);
2252 		} else {
2253 			NewContLine();
2254 		}
2255 		ff_AddStringWithTildes(retract);
2256 		tag = TRUE;
2257 		MemFree(retract);
2258 	}
2259 	descr = GetSubmitDescr(pub);
2260 	if (descr) {
2261 		if (tag != TRUE) {
2262 			ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2263 			ff_AddString("REMARK");
2264 			TabToColumn(13);
2265 		} else {
2266 			NewContLine();
2267 		}
2268 		ff_AddStringWithTildes(descr);
2269 		tag = TRUE;
2270 	}
2271 	if (tag == TRUE)
2272 		ff_EndPrint();
2273 
2274 	if (authors)
2275 		MemFree(authors);
2276 
2277         MemFree(descr);
2278 	MemFree(title);
2279 	MemFree(journal);
2280 }	/* GB_PrintPubs */
2281 
2282 /*************************************************************************
2283 *GR_PrintPubs
2284 *
2285 *	"GR_PrintPubs" to dump pubs in Flat File (i.e., Genbank) format.
2286 *
2287 **************************************************************************/
2288 
GR_PrintPubs(Asn2ffJobPtr ajp,GBEntryPtr gbp,PubStructPtr psp)2289 void GR_PrintPubs (Asn2ffJobPtr ajp, GBEntryPtr gbp, PubStructPtr psp)
2290 
2291 {
2292 
2293 	BioseqPtr bsp=gbp->bsp;
2294 	Boolean ignore_this=FALSE, submit=FALSE;
2295 	CharPtr authors=NULL,title=NULL,journal=NULL;
2296 	CharPtr descr = NULL;
2297 	Int4 muid, pmid, pat_seqid=0, start=0, stop=0;
2298 	ValNodePtr pub;
2299 
2300 	if (ASN2FF_SHOW_ALL_PUBS) {
2301 		pub = FlatRefBest(psp->pub, ajp->error_msgs, TRUE);
2302 	} else {
2303 		pub = FlatRefBest(psp->pub, ajp->error_msgs, FALSE);
2304 	}
2305 	if (pub == NULL)
2306 	{
2307 		if (ajp->error_msgs == TRUE)
2308 			PostARefErrMessage (ajp, bsp, psp, NULL, -1, NULL);
2309 		return;
2310 	}
2311 	ignore_this = FlatIgnoreThisPatentPub(bsp, pub, &pat_seqid);
2312 	if (ajp->format != GENPEPT_FMT)
2313 	{
2314 		if (ignore_this == TRUE)
2315 		{
2316 			if (ajp->error_msgs == TRUE)
2317 				PostARefErrMessage (ajp, bsp, psp, NULL, -1, NULL);
2318 			return;
2319 		}
2320 	}
2321 
2322 	ff_StartPrint(0, 12, ASN2FF_GB_MAX, NULL);
2323 	ff_AddString("<BR><BR>");
2324 	title = FlatPubTitle(pub);
2325 	if (title ) {
2326 		if ( *title  != NULLB) {
2327 			StrStripSpaces(title);
2328 			ff_AddString("<B>");
2329 			ff_AddString(title);
2330 			ff_AddString("</B>");
2331 			ff_EndPrint();
2332 		}
2333 	}
2334 	authors = FlatAuthor(ajp, pub);
2335 
2336 	if (authors && *authors != NULLB) {
2337 		ff_AddString("<BR>");
2338 		ff_AddString(authors);
2339 	} else {
2340 		ff_AddChar('.');
2341 	}
2342 	ff_EndPrint();
2343 
2344 
2345 	journal = FlatJournal(ajp, gbp, pub, pat_seqid, &submit, FALSE);
2346 	ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2347 	ff_AddString("<BR>");
2348 	if (journal ) {
2349 		StrStripSpaces(journal);
2350 		ff_AddString(journal);
2351 	} else {
2352 		ff_AddString("Unpublished");
2353 	}
2354 	ff_EndPrint();
2355 
2356 	muid = GetMuid(psp->pub);
2357 	if (muid > 0) {
2358 		ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2359 		ff_AddString("<BR>");
2360 		TabToColumn(13);
2361 		www_muid(muid);
2362 		ff_EndPrint();
2363 	}
2364 	pmid = GetPmid (psp->pub); /* not sure what GR format should be generating */
2365 	/*
2366 	if (pmid > 0) {
2367 		ff_StartPrint(3, 12, ASN2FF_GB_MAX, NULL);
2368 		ff_AddString("<BR>");
2369 		TabToColumn(13);
2370 		www_muid(pmid);
2371 		ff_EndPrint();
2372 	}
2373 	*/
2374 
2375 
2376 	if (authors)
2377 		MemFree(authors);
2378 
2379 	MemFree(title);
2380 	MemFree(journal);
2381 
2382 }	/* GR_PrintPubs */
2383 
2384 /*************************************************************************
2385 *EMBL_PrintPubs
2386 *
2387 *	"EMBL_PrintPubs" to dump pubs in FlatFile (EMBL) format.
2388 *
2389 **************************************************************************/
2390 
EMBL_PrintPubs(Asn2ffJobPtr ajp,GBEntryPtr gbp,PubStructPtr psp)2391 void EMBL_PrintPubs (Asn2ffJobPtr ajp, GBEntryPtr gbp, PubStructPtr psp)
2392 
2393 {
2394 
2395 	BioseqPtr bsp=gbp->bsp;
2396 	Boolean ignore_this=FALSE, submit=FALSE;
2397 	CharPtr authors=NULL, title=NULL, journal=NULL, new_journal;
2398 	Int2 i;
2399 	Int4 pat_seqid=0;
2400 	Int4 start=0, stop=0, tmp_range, range;
2401 	PubdescPtr descr=psp->descr;
2402 	SeqFeatPtr sfp;
2403 	SeqLocPtr loc, slp;
2404 	ValNodePtr pub;
2405 	Int4 muid;
2406 	Char s[15];
2407 
2408 	pub = FlatRefBest(psp->pub, ajp->error_msgs, FALSE);
2409 	if (pub == NULL)
2410 	{
2411 		if (ajp->error_msgs == TRUE)
2412 			ErrPostStr(SEV_WARNING, ERR_REFERENCE_Illegalreference, "FFDumpPubs: Invalid Pub found.");
2413 		return;
2414 	}
2415 	ignore_this = FlatIgnoreThisPatentPub(bsp, pub, &pat_seqid);
2416 	if (ignore_this == TRUE && ASN2FF_IGNORE_PATENT_PUBS != FALSE)
2417 	{
2418 		if (ajp->error_msgs == TRUE)
2419 			ErrPostStr(SEV_WARNING, ERR_REFERENCE_Illegalreference, "FFDumpPubs: Invalid Patent Pub");
2420 		return;
2421 	}
2422 
2423 	PrintXX();
2424 
2425 	ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "RN");
2426 	ff_AddChar('[');
2427 	ff_AddInteger("%ld", (long) psp->number);
2428 	ff_AddChar(']');
2429 	ff_EndPrint();
2430 	if (psp->start == 1)
2431 	{
2432 		ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "RP");
2433 		ff_AddString("1-");
2434 		ff_AddInteger("%ld", (long) bsp->length);
2435 		ff_EndPrint();
2436 	}
2437 	else if (psp->start == 2)
2438 	{
2439 		range = 0;
2440 		ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "RP");
2441 		for (i=0; i<psp->citcount; i++)
2442 		{
2443 			sfp = psp->citfeat[i];
2444 			loc = (SeqLocPtr) sfp->location;
2445 			slp = GetBaseRangeForCitation (loc, NULL, &start, &stop);
2446 			if (start != 0 || stop != 0)
2447 			{ /* Why do I need the tmp_range test??? */
2448 				tmp_range = stop - start;
2449 				if (tmp_range >= range)
2450 				{
2451 					range = tmp_range;
2452 					ff_AddInteger("%ld", (long) start);
2453 					ff_AddChar('-');
2454 					ff_AddInteger("%ld", (long) stop);
2455 					if (slp != NULL || i+1 != psp->citcount)
2456 						ff_AddString(", ");
2457 				}
2458 			}
2459 			while (slp != NULL)
2460 			{
2461 				slp = GetBaseRangeForCitation (loc, slp, &start, &stop);
2462 				if (start != 0 || stop != 0)
2463 				{
2464 					ff_AddInteger("%ld", (long) start);
2465 					ff_AddChar('-');
2466 					ff_AddInteger("%ld", (long) stop);
2467 					if (slp != NULL || i+1 != psp->citcount)
2468 						ff_AddString(", ");
2469 				}
2470 			}
2471 		}
2472 		ff_EndPrint();
2473 	}
2474 	else if (psp->start == 3 && ajp->pseudo == TRUE)
2475 	{ /* "sites" only for pseudo-embl.  */
2476 		ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "RP");
2477 		ff_AddString("(sites)");
2478 		ff_EndPrint();
2479 	}
2480 
2481 	journal = FlatJournal(ajp, gbp, pub, pat_seqid, &submit, FALSE);
2482 
2483 	if (descr && descr->comment)
2484 	{
2485 		ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "RC");
2486 		ff_AddString(descr->comment);
2487 		ff_EndPrint();
2488 	}
2489 	authors = FlatAuthor(ajp, pub);
2490 	ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "RA");
2491 	if (authors)
2492 		ff_AddString(authors);
2493 	ff_AddChar(';');
2494 	ff_EndPrint();
2495 
2496 	ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "RT");
2497 	if (! submit)
2498 	{
2499 		title = FlatPubTitle(pub);
2500 		if (title ){
2501 			if ( *title )
2502 			{
2503 				ff_AddChar('\"');
2504 				StrStripSpaces(title);
2505 				ff_AddString(title);
2506 				ff_AddChar('\"');
2507 			}
2508 		}
2509 	}
2510 	ff_AddChar(';');
2511 	ff_EndPrint();
2512 
2513 	ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "RL");
2514 	if (journal)
2515 	{
2516 		new_journal = CheckEndPunctuation(journal, '.');
2517 		StrStripSpaces(new_journal);
2518 		ff_AddString(new_journal);
2519 		new_journal = MemFree(new_journal);
2520 	}
2521 	ff_EndPrint();
2522 
2523 	muid = GetMuid(psp->pub);
2524 	if (muid != 0) {
2525 		sprintf(s, "%ld.", (long) muid);
2526 		s[StringLen(s)] = '\0';
2527 		ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "RX");
2528 		ff_AddString("MEDLINE; ");
2529 		ff_AddString(s);
2530 		ff_EndPrint();
2531 	}
2532 	if (authors)
2533 		MemFree(authors);
2534 	MemFree(title);
2535 	MemFree(journal);
2536 }	/* EMBL_PrintPubs */
2537 
2538 /***************************************************************************
2539 *CharPtr CheckLocusLength (Boolean error_msgs, CharPtr locus, Int2 locus_max, Int2 total_segs)
2540 *
2541 *	Calculate the length of the locus; if it's too long, take characters
2542 *	off the front.  If it's part of a segmented set and the locus ends
2543 *	in a number, add an "S".  If it appears to be an NCBI locus of the
2544 *	form HSU00001, then take two letters off the front.
2545 ***************************************************************************/
CheckLocusLength(Boolean error_msgs,CharPtr locus,Int2 locus_max,Int2 total_segs)2546 static CharPtr CheckLocusLength (Boolean error_msgs, CharPtr locus, Int2 locus_max, Int2 total_segs)
2547 
2548 {
2549 	Boolean cut_two=FALSE;
2550 	CharPtr buffer;
2551 	Int2 length, surplus;
2552 
2553 	length = StringLen(locus);
2554 	buffer = MemNew((length+2)*sizeof(Char));
2555 	buffer = StringCpy(buffer, locus);
2556 
2557 	if (total_segs > 0 && IS_DIGIT(locus[length-1]) != 0)
2558 		if (locus[length-1] != '0' || IS_DIGIT(locus[length-2]) != 0)
2559 		{
2560 			length++;
2561 			buffer[length-1] = 'S';
2562 			buffer[length] = '\0';
2563 		}
2564 
2565 	surplus = length - locus_max;
2566 
2567 	if (surplus > 0)
2568 	{
2569 		if (surplus <= 2)
2570 		{	/* Check if this is of the form HSU00001S */
2571 			if (IS_ALPHA(buffer[0]) != 0 &&
2572 				IS_ALPHA(buffer[1]) != 0 &&
2573 				IS_ALPHA(buffer[2]) != 0 &&
2574 	    		        IS_DIGIT(buffer[3]) != 0 &&
2575 	        	        IS_DIGIT(buffer[4]) != 0 &&
2576 	       	          	IS_DIGIT(buffer[5]) != 0 &&
2577 	       	             	IS_DIGIT(buffer[6]) != 0 &&
2578 	       	                IS_DIGIT(buffer[7]) != 0 &&
2579 	       	                buffer[8] == 'S' &&
2580 				buffer[9] == '\0')
2581 					cut_two = TRUE;
2582 		}
2583 
2584 		if (cut_two == TRUE)
2585 			locus = StringCpy(locus, buffer+2);
2586 		else
2587 			locus = StringCpy(locus, buffer+surplus);
2588 		if (error_msgs == TRUE)
2589 		{
2590 			flat2asn_delete_locus_user_string();
2591 			flat2asn_install_locus_user_string(buffer);
2592 			ErrPostStr(SEV_INFO, ERR_LOCUS_ChangedLocusName,
2593 				"Locusname length is more than 16, locusname is truncated");
2594 		}
2595 	}
2596 
2597 	buffer = MemFree(buffer);
2598 
2599 	return locus;
2600 }
2601 
GetPubsAwp(Asn2ffJobPtr ajp,GBEntryPtr gbp)2602 NLM_EXTERN Int4 GetPubsAwp (Asn2ffJobPtr ajp, GBEntryPtr gbp)
2603 {
2604 	GatherScope gs;
2605 	BioseqPtr bsp = NULL;
2606 	ValNodePtr vnp, v;
2607 	SeqLocPtr slp = NULL;
2608 	SeqIdPtr isip;
2609 	Int4 status, i;
2610 	Char buffer[31];
2611 
2612 	bsp = gbp->bsp;
2613 	if (bsp == NULL) {
2614 		return 0;
2615 	}
2616 	isip = bsp->id;
2617 	vnp = NULL;
2618   	MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
2619   	gs.get_feats_location = TRUE;
2620   	if (ajp->genome_view == TRUE) {
2621 		gs.seglevels = 0;
2622   	} else if (ajp->only_one) {
2623 		gs.seglevels = 2;
2624 	} else {
2625 		gs.seglevels = 1;
2626 	}
2627 /*	MemSet ((Pointer) (gs.ignore), (int)(TRUE), (size_t) (OBJ_MAX * sizeof(Boolean)));
2628 	gs.ignore[OBJ_SEQDESC] = FALSE;
2629 	gs.ignore[OBJ_SEQANNOT] = FALSE;
2630 	gs.ignore[OBJ_SEQFEAT] = FALSE;
2631 	gs.ignore[OBJ_SEQSUB] = FALSE;
2632 	gs.ignore[OBJ_SEQSUB_CIT] = FALSE;*/
2633 
2634 	MemSet ((Pointer) (gs.ignore), (int)(FALSE), (size_t) (OBJ_MAX * sizeof(Boolean)));
2635 	gs.ignore[OBJ_SEQALIGN] = TRUE; /* this was being hit many times on big records */
2636 
2637 	if (ajp->slp == NULL) {
2638 		slp = ValNodeNew(NULL);
2639 		slp->choice = SEQLOC_WHOLE;
2640 		slp->data.ptrvalue = (SeqIdPtr) SeqIdDup (SeqIdFindBest (bsp->id, 0));
2641 		gs.target = slp;
2642 	} else {
2643 		gs.target = ajp->slp;
2644 	}
2645 	GatherEntity(ajp->entityID, &vnp, get_pubs, &gs);
2646 	if (slp)
2647 		SeqLocFree(slp);
2648 	if ((status = CheckPubs(ajp, bsp, &vnp)) <= 0) {
2649 		if (ajp->error_msgs == TRUE) {
2650 			MakeAnAccession(buffer, isip, 30);
2651 			flat2asn_delete_locus_user_string();
2652 			flat2asn_install_locus_user_string(buffer);
2653 			flat2asn_delete_accession_user_string();
2654 			flat2asn_install_accession_user_string(buffer);
2655 			ErrPostStr(SEV_ERROR, ERR_REFERENCE_NoValidRefs,
2656 			"No refs found that would result in legal flatfile format");
2657 		}
2658 		/* found something. */
2659 		if (status < 0) {
2660 			ValNodeFree(vnp);
2661 			vnp = NULL;
2662 		}
2663 	}
2664 	gbp->Pub = OrganizePubList(vnp);
2665 	for (v = gbp->Pub, i=0; v != NULL; v= v->next, i++);
2666 
2667 	return i;
2668 }
2669 /*************************************************************************
2670 *	Check for EMBL format first
2671 *	Call   to find div for Genbank records
2672 *	Allocate a buffer for division
2673 *	09-05-96
2674 *************************************************************************/
GetDivision(Asn2ffJobPtr ajp,GBEntryPtr gbp)2675 static CharPtr GetDivision(Asn2ffJobPtr ajp, GBEntryPtr gbp)
2676 {
2677 	ValNodePtr vnp;
2678 	MolInfoPtr mol = NULL;
2679 	EMBLBlockPtr eb=NULL;
2680 	BioseqPtr bsp = gbp->bsp;
2681 	Int2 buflen=4;
2682 	CharPtr buffer;
2683 	static CharPtr embl_divs [] = {
2684 	"FUN","INV","MAM","ORG","PHG","PLN","PRI","PRO","ROD","SYN","UNA","VRL",
2685 	"VRT","PAT","EST","STS", "HUM", "HTC"
2686 	};
2687 
2688 	buffer = MemNew(buflen);
2689 	buffer[0] = '\0';
2690 	if (ajp->format == EMBL_FMT || ajp->format == PSEUDOEMBL_FMT ||
2691 					ajp->format == EMBLPEPT_FMT) {
2692 		if ((vnp=GatherDescrByChoice(ajp, gbp, Seq_descr_embl)) != NULL) {
2693 			eb = (EMBLBlockPtr) vnp->data.ptrvalue;
2694 		}
2695 		if (eb ) {
2696 			if (eb->div == 255) {
2697 /* kludge for HUM division */
2698 				if ((vnp=GatherDescrByChoice(ajp, gbp, Seq_descr_molinfo)) != NULL) {
2699 					gbp->descr = MemFree(gbp->descr);
2700 					mol = (MolInfoPtr) vnp->data.ptrvalue;
2701 				}
2702 				if (mol) {
2703 					if (mol->tech == MI_TECH_survey) {
2704 						StringNCpy_0(buffer, "GSS", buflen);
2705 						return buffer;
2706 					} else if (mol->tech == MI_TECH_htc) {
2707 						StringNCpy_0(buffer, "HTC", buflen);
2708 						return buffer;
2709 					} else if (mol->tech == MI_TECH_htgs_1
2710 							 || mol->tech == MI_TECH_htgs_2) {
2711 						StringNCpy_0(buffer, "HTG", buflen);
2712 						return buffer;
2713 					}
2714 				} else {
2715 					StringNCpy_0(buffer, embl_divs[16], buflen);  /*HUM */
2716 					return buffer;
2717 				}
2718 			} else {
2719 				StringNCpy_0(buffer, embl_divs[eb->div], buflen);
2720 				return buffer;
2721 			}
2722 		}
2723 		BioseqGetGBDivCodeEx (bsp, buffer, buflen, FALSE, ajp->useSeqMgrIndexes);
2724 		if (buffer[0] == NULLB) {
2725 			StringNCpy_0(buffer, "   ", buflen);
2726 		}
2727 		return buffer;
2728 	}
2729 	BioseqGetGBDivCodeEx (bsp, buffer, buflen, FALSE, ajp->useSeqMgrIndexes);
2730 	if (buffer[0] == NULLB) {
2731 		StringNCpy_0(buffer, "   ", buflen);
2732 	}
2733 	if (gbp->bsp && gbp->bsp->mol == Seq_mol_aa) {
2734 		return buffer;
2735 	}
2736 	if (ajp->genome_view) {
2737 		StringNCpy_0(buffer, "CON", buflen);
2738 	}
2739 	return buffer;
2740 }
2741 
2742 /***************************************************************************
2743 *
2744 *	UseGIforLocus to get the GI number for the locus and accession numbers.
2745 *	and to get division using Gather
2746 *
2747 ***************************************************************************/
2748 
UseGIforLocus(Asn2ffJobPtr ajp)2749 NLM_EXTERN void UseGIforLocus (Asn2ffJobPtr ajp)
2750 {
2751 	CharPtr buffer;
2752 	GBEntryPtr gbp;
2753 
2754 	for (gbp=ajp->asn2ffwep->gbp; gbp; gbp=gbp->next) {
2755 		if (ajp->show_gi) {
2756 			sprintf(gbp->accession, "%ld", (long) (gbp->gi));
2757 			sprintf(gbp->locus, "%-10ld", (long) (gbp->gi));
2758 		} else {
2759 			MemSet((VoidPtr) gbp->accession, ' ', 10);
2760 			MemSet((VoidPtr) gbp->locus, ' ', 10);
2761 		}
2762 		buffer = GetDivision(ajp, gbp);
2763 		if (buffer[0] != NULLB) {
2764 			StringCpy(gbp->div, "   ");
2765 		} else {
2766 			StringNCpy_0(gbp->div, buffer, 4);
2767 		}
2768                 MemFree(buffer);
2769 	}
2770 }
2771 
2772 /*****************************************************************************
2773 *
2774 *	ValidateLocus takes a locus name and assures that the format is
2775 *	proper. if segmented set adds the segment number at the end and
2776 *	returns new locus
2777 *****************************************************************************/
ValidateLocus(Asn2ffJobPtr ajp,BioseqPtr bsp,CharPtr base_locus,Int2 total_segs,Int2 num_seg,CharPtr new_buf,CharPtr buf_locus,CharPtr orig_buf)2778 CharPtr ValidateLocus(Asn2ffJobPtr ajp, BioseqPtr bsp, CharPtr base_locus, Int2 total_segs, Int2 num_seg, CharPtr new_buf, CharPtr buf_locus, CharPtr orig_buf)
2779 
2780 {
2781 	Boolean collision=FALSE;
2782 	static Boolean order_init=FALSE;
2783 	Char buf_ext[BUF_EXT_LENGTH], buffer[30];
2784 	DbtagPtr db;
2785 	int dex;
2786 	Int2 /*UNUSED*/base_locus_max, buf_index, exp, length, num_of_digits;
2787 	ObjectIdPtr ob;
2788 	SeqIdPtr best_id, id;
2789 	static Uint1 rel_order[NUM_SEQID];
2790 
2791 	if (! order_init)
2792 	{
2793 		for (dex=0; dex<18; dex++)
2794 			rel_order[dex] = 255;
2795 		rel_order[SEQID_GENERAL ] = 14;
2796 	}
2797 	order_init = TRUE;
2798 
2799 	if (ASN2FF_AVOID_LOCUS_COLL || ASN2FF_REPORT_LOCUS_COLL)
2800 	{	/* Check for LOCUS collisions with Karl's algorithm */
2801 		id = bsp->id;
2802 		best_id = SeqIdSelect( id, rel_order,NUM_SEQID);
2803 		if (best_id != NULL) {
2804 			if (best_id -> choice == SEQID_GENERAL){ /* always! */
2805 
2806 			    db = (DbtagPtr) best_id -> data.ptrvalue;
2807 			    if (StringCmp(db -> db, LOCUS_COLLISION_DB_NAME) == 0){
2808 				ob = db -> tag;
2809 				    if ( ob != NULL)
2810 				    {
2811 					if (ASN2FF_REPORT_LOCUS_COLL)
2812 					{
2813 					    MakeAnAccession(buffer, id, 30);
2814 					    flat2asn_delete_locus_user_string();
2815 					    flat2asn_install_locus_user_string(buffer);
2816 					    flat2asn_delete_accession_user_string();
2817 					    flat2asn_install_accession_user_string(buffer);
2818 					     ErrPostStr(SEV_WARNING, ERR_LOCUS_LocusNameCollision, "");
2819 					}
2820 					if (ASN2FF_AVOID_LOCUS_COLL)
2821 					{
2822 					    collision=TRUE;
2823 					    StringNCpy_0(new_buf, ob -> str, MAX_LOCUS_NAME_LEN+1);
2824 					}
2825 				    }
2826 				}
2827 			}
2828 		}
2829 	}
2830 
2831 	if (! collision)
2832 	{
2833 		if (total_segs == 0)
2834 		{	/* Not a segmented set. */
2835 			if ((length=StringLen(buf_locus)) <= 0)
2836 				new_buf = StringCpy(new_buf, orig_buf);
2837 			else
2838 				new_buf = StringCpy(new_buf, buf_locus);
2839 
2840 			new_buf = CheckLocusLength (ajp->error_msgs, new_buf, MAX_LOCUS_NAME_LEN, 0);
2841 		}
2842 		else
2843 		{
2844 			if (total_segs < 10)
2845 				num_of_digits = 1;
2846 			else if (total_segs < 100)
2847 				num_of_digits = 2;
2848 			else if (total_segs < 1000)
2849 				num_of_digits = 3;
2850 			else
2851 			{
2852 				num_of_digits = 4;
2853 				ErrPostStr(SEV_INFO, ERR_SEGMENT_MoreThan1000Segs, "");
2854 			}
2855 			if (num_seg < 10)
2856 				exp = 1;
2857 			else if (num_seg < 100)
2858 				exp = 2;
2859 			else if (num_seg < 1000)
2860 				exp = 3;
2861 			base_locus_max = MAX_LOCUS_NAME_LEN - num_of_digits;
2862 			length = StringLen(base_locus);
2863 			StringCpy(new_buf, base_locus);
2864 			MemSet((VoidPtr) buf_ext, '\0', BUF_EXT_LENGTH);
2865 			MemSet((VoidPtr) buf_ext, '0', num_of_digits);
2866 			sprintf(buf_ext+num_of_digits-exp, "%ld", (long) num_seg);
2867 			buf_index = 0;
2868 			while (buf_ext[buf_index] != '\0')
2869 			{
2870 			     new_buf[length+buf_index] = buf_ext[buf_index];
2871 			     buf_index++;
2872 			 }
2873 			 new_buf[length+buf_index] = '\0';
2874 		}
2875 	}
2876 
2877 	return new_buf;
2878 }	/* ValidateLocus */
2879 
2880 /***************************************************************************
2881 *	example: NM_000756
2882 ***************************************************************************/
ValidateOtherAccession(CharPtr new_buf,CharPtr orig_buf)2883 static Int2 ValidateOtherAccession(CharPtr new_buf, CharPtr orig_buf)
2884 {
2885 	Int2 count;
2886 	Boolean FirstLetter=FALSE, FiveNum = FALSE;
2887 
2888 	if (orig_buf == NULL || orig_buf[0] == '\0') {
2889 		return -3;
2890 	}
2891 	if (StringLen(orig_buf) >= 10) {
2892 		return -4;
2893 	}
2894 	if (orig_buf[0] != 'N') {
2895 		return -1;
2896 	}
2897 	if (orig_buf[2] != '_') {
2898 		return -1;
2899 	}
2900 	for (count=3; count < 8; count++) {
2901 		if(! IS_DIGIT(orig_buf[count]))
2902 			break;
2903 	}
2904 	if (count == 8 && (orig_buf[count+1] == '\0' || orig_buf[count+1] == ' ')) {
2905 		StringCpy(new_buf, orig_buf);
2906 		return 0;
2907 
2908 	} else {
2909 		return -1;
2910 	}
2911 }
2912 
2913 
2914 /****************************************************************************
2915 *
2916 *	ValidateAccession takes an accession number and makes sure it is
2917 *	in the proper format (starts with a capital letter that is followed
2918 *	by five numbers).
2919 *
2920 *	Return values are:
2921 *	 0: no problem
2922 *	-1: Accession did not start with a letter (or two letters)
2923 *	-2: Accession did not contain five numbers (or six numbers after 2 letters)
2924 *	-3: the original Accession number to be validated was NULL
2925 *	-4: the original Accession number is too long (>10)
2926 *
2927 ****************************************************************************/
ValidateAccession(CharPtr new_buf,CharPtr orig_buf)2928 Int2 ValidateAccession(CharPtr new_buf, CharPtr orig_buf)
2929 {
2930 	Int2 count, start_count, stop_count;
2931 	Boolean FirstLetter=FALSE, FiveNum = FALSE;
2932 
2933 	if (orig_buf == NULL || orig_buf[0] == '\0') {
2934 		return -3;
2935 	}
2936 	if (StringLen(orig_buf) >= 10) {
2937 		return -4;
2938 	}
2939 	if (orig_buf[0] < 'A' || orig_buf[0] > 'Z') {
2940 		return -1;
2941 	} else {
2942 		FirstLetter = TRUE;
2943 	}
2944 	for (count=1; count < 5; count++) {
2945 		if(! IS_DIGIT(orig_buf[count]))
2946 			break;
2947 	}
2948 	if (count == 5 && (orig_buf[count+1] == '\0' || orig_buf[count+1] == ' '))
2949 		FiveNum = TRUE;
2950 
2951 	if (FirstLetter == TRUE) {
2952 		if (FiveNum == TRUE) {           /* 1 + 5 accession*/
2953 			StringCpy(new_buf, orig_buf);
2954 			return 0;
2955 		} else if (IS_ALPHA(orig_buf[1])) {      /* 2 + 6 accession */
2956 			if (orig_buf[1] < 'A' || orig_buf[1] > 'Z') {
2957 				return -1;
2958 			}
2959 			start_count = 2;
2960 			stop_count = 7;
2961 			if (orig_buf[0] == 'N' || orig_buf[0] == 'X') {
2962 				if ((orig_buf[1] == 'M' || orig_buf[1] == 'C'
2963 						|| orig_buf[1] == 'T'  || orig_buf[1] == 'P'
2964 													 || orig_buf[1] == 'G')
2965 												&&  orig_buf[2] == '_') {
2966 						start_count = 3;
2967 						stop_count = 8;
2968 				}
2969 			}
2970 			for (count=start_count; count < stop_count; count++) {
2971 				if(! IS_DIGIT(orig_buf[count]))
2972 					break;
2973 			}
2974 			if (count == stop_count && (orig_buf[count+1] == '\0' || orig_buf[count+1] == ' ')) {
2975 				StringCpy(new_buf, orig_buf);
2976 				return 0;
2977 			} else if (IS_ALPHA(orig_buf[2])) {      /* 3 + 5 accession */
2978 				if (orig_buf[0] =='A' || orig_buf[0] == 'B' || orig_buf[0] == 'C') {
2979 					for (count=3; count < 7; count++) {
2980 						if(! IS_DIGIT(orig_buf[count]))
2981 							break;
2982 					}
2983 					if (count == 7 && (orig_buf[count+1] == '\0' || orig_buf[count+1] == ' ')) {
2984 						StringCpy(new_buf, orig_buf);
2985 						return 0;
2986 					} else {
2987 						return -2;
2988 					}
2989 				} else {
2990 					return -2;
2991 				}
2992 			} else {
2993 				return -2;
2994 			}
2995 		} else {
2996 			return -2;
2997 		}
2998 	} else {
2999 		return -1;
3000 	}
3001 }
3002 
3003 /**************************************************************************
3004 *MakeBaseAccession
3005 *
3006 *	GetBaseAccession takes a BioseqPtr bsp and returns an
3007 *	accession if 1.) the set is segmented, and 2.) there is
3008 *	an accession at a higher level.  Otherwise NULL is returned.
3009 *	The user should deallocate the CharPtr.
3010 **************************************************************************/
3011 
MakeBaseAccession(BioseqPtr bsp)3012 CharPtr MakeBaseAccession (BioseqPtr bsp)
3013 
3014 {
3015 	Char buffer[MAX_ACCESSION_LEN+1];
3016 	CharPtr buf_acc=buffer;
3017 	Int2 status = -1;
3018 	SeqIdPtr sip, isip;
3019 	TextSeqIdPtr tsip;
3020 
3021 
3022 	if (bsp == NULL)
3023 		return NULL;
3024 	isip = bsp->id;
3025 	sip = SeqIdSelect(isip, fasta_order, NUM_SEQID);
3026 	if (sip && (sip->choice == SEQID_GENBANK ||
3027 		sip->choice == SEQID_EMBL ||
3028 		sip->choice == SEQID_PIR ||
3029 		sip->choice == SEQID_SWISSPROT ||
3030 		sip->choice == SEQID_DDBJ ||
3031 		sip->choice == SEQID_PRF ||
3032 		sip->choice == SEQID_OTHER ||
3033 		sip->choice == SEQID_TPG ||
3034 		sip->choice == SEQID_TPE ||
3035 		sip->choice == SEQID_TPD))
3036 	{
3037 		tsip = (TextSeqIdPtr) sip->data.ptrvalue;
3038 		switch (sip->choice) {
3039 			case SEQID_GENBANK:
3040 			case SEQID_EMBL:
3041 			case SEQID_DDBJ:
3042 			case SEQID_TPG:
3043 			case SEQID_TPE:
3044 			case SEQID_TPD:
3045 			case SEQID_PIR:
3046 			case SEQID_SWISSPROT:
3047 				status = ValidateAccession(buf_acc, tsip->accession);
3048 		}
3049 	}
3050 	if (status < 0)
3051 		return NULL;
3052 
3053 	return (StringSave(buf_acc));
3054 }
3055 
3056 /***************************************************************************
3057 *
3058 *	MakeBaseLocus takes a Asn2ffJobPtr and a CharPtr (base_locus)
3059 *	and returns a CharPtr which is the new base_locus.  Checking is
3060 *	done to assure suitability of the new base locus name (i.e.,
3061 *	no more than 15 characters for less than 10 segments and no more
3062 *	than 14 characters for 10 or more segments).
3063 *
3064 ***************************************************************************/
3065 
MakeBaseLocusAwp(Asn2ffJobPtr ajp,CharPtr base_locus)3066 CharPtr MakeBaseLocusAwp (Asn2ffJobPtr ajp, CharPtr base_locus)
3067 
3068 {
3069 	BioseqPtr bsp, bbsp = NULL;
3070 	Int2  index, length, base_locus_max, name_len, num_of_digits, num_seg;
3071 	SeqIdPtr sip, bsip=NULL, isip=NULL;
3072 	TextSeqIdPtr tsip = NULL, btsip=NULL;
3073 	ObjectIdPtr obj;
3074 	Char buffer[21], temp_buf[21];
3075 	CharPtr localbuf=buffer, name, ptr=temp_buf;
3076 	CharPtr tmp = "SEG_";
3077 	Asn2ffWEPtr awp;
3078 	GBEntryPtr	gbp;
3079 
3080 	base_locus[0] = '\0';
3081 	awp = ajp->asn2ffwep;
3082 	num_seg = awp->total_seg;
3083 	if (num_seg < 10)
3084 		num_of_digits = 1;
3085 	else if (num_seg < 100)
3086 		num_of_digits = 2;
3087 	else if (num_seg < 1000)
3088 		num_of_digits = 3;
3089 	else
3090 	{
3091 		ErrPostStr(SEV_INFO, ERR_SEGMENT_MoreThan1000Segs, "");
3092 	}
3093 	base_locus_max = MAX_LOCUS_NAME_LEN - num_of_digits;
3094 /* look for base locus in segmented bioseq */
3095 	awp = ajp->asn2ffwep;
3096 	bbsp = awp->seg; /* segmented Bioseq in segmented set */
3097 	if (bbsp) {
3098 		bsip = SeqIdSelect(bbsp->id, fasta_order, NUM_SEQID);
3099 	}
3100 	if (bsip && (bsip->choice == SEQID_GENBANK ||
3101 				bsip->choice == SEQID_EMBL ||
3102 				bsip->choice == SEQID_DDBJ ||
3103 				bsip->choice == SEQID_SWISSPROT ||
3104 				bsip->choice == SEQID_PIR ||
3105 				bsip->choice == SEQID_OTHER ||
3106 				bsip->choice == SEQID_TPG ||
3107 				bsip->choice == SEQID_TPE ||
3108 				bsip->choice == SEQID_TPD)) {
3109 		btsip = (TextSeqIdPtr) bsip->data.ptrvalue;
3110 	}
3111 	if (btsip && StringLen(btsip->name) > 0) {
3112 		localbuf = StringCpy(localbuf, btsip->name);
3113 		if (StringNCmp(localbuf, tmp, 4) == 0) {
3114 		/* check if name starts with "SEG_", remove if it does. */
3115 		    StringCpy(ptr, localbuf+4);
3116 		    length = StringLen(ptr);
3117 		    ptr[length] = '\0';
3118 		    if (ptr[length-1] == '1') {
3119 		       bsp = awp->gbp->bsp;
3120 		       isip = bsp->id;
3121 		       sip = SeqIdSelect(isip, fasta_order, NUM_SEQID);
3122 		       if (sip &&
3123 		       	(name=((TextSeqIdPtr)sip->data.ptrvalue)->name) != NULL) {
3124 		          name_len = StringLen(name);
3125 		          if (name_len == length) {
3126 		             if (name[length-1] == ptr[length-1])
3127 		    	       for (index=2; index >= num_of_digits; index++) {
3128 		               /* The following is *really* '0'! */
3129 		                  if (ptr[length-index] == '0') {
3130 		                     if (ptr[length-index] == name[length-index]) {
3131 		                        StringNCpy(base_locus, ptr, length-index);
3132 		                        base_locus[length-index] = '\0';
3133 		                     } else {
3134 		                        StringNCpy(base_locus, ptr, length-index+1);
3135 		                        base_locus[length-index+1] = '\0';
3136 		                     }
3137 		                  } else {
3138 		                     StringNCpy(base_locus, ptr, length-index+1);
3139 		                     base_locus[length-index+1] = '\0';
3140 				     break;
3141 		                  }
3142 		               }
3143 		            }
3144 		         }
3145 		    }
3146 		    /* If nothing else worked, use base locus anyway. */
3147 		    if (base_locus[0] == '\0')
3148 		    	StringCpy(base_locus, ptr);
3149 		}
3150 		if (base_locus[0] == '\0')
3151 			StringCpy(base_locus, btsip->name);
3152 
3153 		/*check for length, truncate if necessary.	*/
3154 		base_locus = CheckLocusLength (ajp->error_msgs, base_locus, base_locus_max, num_seg);
3155 		return base_locus;
3156 	}
3157 
3158 /* Look for at least one sensible locus in all segments. */
3159 	for (gbp = awp->gbp; gbp; gbp=gbp->next) {
3160 		bsp = gbp->bsp;
3161 		isip = bsp->id;
3162 		sip = SeqIdSelect(isip, fasta_order, NUM_SEQID);
3163 		if (sip && (sip->choice == SEQID_GENBANK ||
3164 					sip->choice == SEQID_EMBL ||
3165 					sip->choice == SEQID_DDBJ ||
3166 					sip->choice == SEQID_SWISSPROT ||
3167 					sip->choice == SEQID_OTHER ||
3168 					sip->choice == SEQID_PIR ||
3169 					sip->choice == SEQID_TPG ||
3170 					sip->choice == SEQID_TPE ||
3171 					sip->choice == SEQID_TPD)) {
3172 			tsip = (TextSeqIdPtr) sip->data.ptrvalue;
3173 		}
3174 		if (tsip && tsip->name && StringLen(tsip->name) > 0) {
3175 			base_locus = StringCpy(base_locus, tsip->name);
3176 			length = StringLen(base_locus);
3177 			base_locus[length-num_of_digits] = '\0';
3178 			base_locus = CheckLocusLength (ajp->error_msgs,
3179 									base_locus, base_locus_max, num_seg);
3180 			return base_locus;
3181 		}
3182 	}
3183 
3184 /* No option left but to take the first locus name.*/
3185 	bsp = awp->gbp->bsp;
3186 	isip = bsp->id;
3187 	sip = SeqIdSelect(isip, fasta_order, NUM_SEQID);
3188 	if (sip && sip->choice == SEQID_LOCAL) {
3189 		obj = (ObjectIdPtr) sip->data.ptrvalue;
3190 		if ( obj->str == NULL) {
3191 			sprintf(base_locus, "%ld", (long)(obj->id));
3192 		} else {
3193 			base_locus = StringCpy(base_locus, obj->str);
3194 		}
3195 	} else if (sip && (sip->choice == SEQID_GENBANK ||
3196 				sip->choice == SEQID_EMBL ||
3197 				sip->choice == SEQID_SWISSPROT ||
3198 				sip->choice == SEQID_DDBJ ||
3199 				sip->choice == SEQID_PRF ||
3200 				sip->choice == SEQID_PDB ||
3201 				sip->choice == SEQID_OTHER ||
3202 				sip->choice == SEQID_PIR ||
3203 				sip->choice == SEQID_TPG ||
3204 				sip->choice == SEQID_TPE ||
3205 				sip->choice == SEQID_TPD)) {
3206 		tsip = (TextSeqIdPtr)sip->data.ptrvalue;
3207 		base_locus = StringCpy(base_locus, tsip->name);
3208 	}
3209 	base_locus = CheckLocusLength (ajp->error_msgs, base_locus, base_locus_max, num_seg);
3210 	return base_locus;
3211 
3212 }	/* MakeBaseLocusAwp */
3213 
ValidateVersion(SeqIdPtr sid,Asn2ffJobPtr ajp)3214 static Boolean ValidateVersion(SeqIdPtr sid, Asn2ffJobPtr ajp)
3215 {
3216 	TextSeqIdPtr tsip;
3217 
3218 	if (ajp->forgbrel == FALSE)
3219 		return TRUE;
3220 	switch (sid->choice) {
3221 	case SEQID_GENBANK:
3222 	case SEQID_EMBL:
3223 	case SEQID_DDBJ:
3224 	case SEQID_OTHER:
3225 	case SEQID_TPG:
3226 	case SEQID_TPE:
3227 	case SEQID_TPD:
3228 		tsip = (TextSeqIdPtr) sid->data.ptrvalue;
3229 		if (tsip->version == 0 || tsip->version == INT2_MIN) {
3230 			return FALSE;
3231 		}
3232 	}
3233 	return TRUE;
3234 }
3235 
GetLocusPartsAwp(Asn2ffJobPtr ajp)3236 NLM_EXTERN void GetLocusPartsAwp (Asn2ffJobPtr ajp)
3237 {
3238 	BioseqPtr bsp=NULL;
3239 	Asn2ffWEPtr awp;
3240 	SeqIdPtr sip, isip;
3241 	Int2 num_seg=0, total_segs=0;
3242 	TextSeqIdPtr tsip;
3243 	Char buf_a[MAX_ACCESSION_LEN+1], buf_l[MAX_ACCESSION_LEN+1],
3244 		 base_l[MAX_ACCESSION_LEN+1];
3245 	CharPtr buffer, buf_acc=buf_a, buf_locus=buf_l, base_locus=base_l, base_a;
3246 	GBEntryPtr gbp;
3247 	CharPtr loc;
3248 	Int2 acc_len;
3249 
3250 	awp = ajp->asn2ffwep;
3251 	if (ajp->slp) {
3252 		for (gbp = awp->gbp; gbp; gbp = gbp->next) {
3253 			buffer = GetDivision(ajp, gbp);
3254 			if (buffer[0] != NULLB) {
3255 				StringNCpy_0(gbp->div, buffer, 4);
3256 				MemFree(buffer);
3257 			}
3258 			if ((bsp = BioseqFindFromSeqLoc(ajp->slp)) != NULL) {
3259 				CharPtr flatloc;
3260 
3261 				isip = SeqIdSelect(gbp->bsp->id, fasta_order, NUM_SEQID);
3262 				if (isip == NULL)
3263 					isip = gbp->bsp->id;
3264 				SeqIdWrite(isip,
3265 					buf_acc, PRINTID_TEXTID_ACCESSION, MAX_ACCESSION_LEN);
3266 
3267 				if (ajp->old_locus_fmt == TRUE)
3268 				  sprintf(gbp->locus, "%-10s", buf_acc);
3269 				else
3270 				  sprintf(gbp->locus, "%-16s", buf_acc);
3271 
3272 				flatloc =  FlatLoc(bsp, ajp->slp);
3273 				sprintf(gbp->accession, "%s REGION: %s", buf_acc, flatloc);
3274 				flatloc = MemFree(flatloc);
3275 				if (ajp->show_version) {
3276 					SeqIdWrite(isip,
3277 					buf_acc, PRINTID_TEXTID_ACC_VER, MAX_ACCESSION_LEN+1);
3278 					StringNCpy_0(gbp->version,
3279 								 buf_acc, MAX_ACCESSION_LEN+1);
3280 				}
3281 			} else {
3282 				loc = SeqLocPrint(ajp->slp);
3283 				StringNCpy_0(gbp->locus,  loc, MAX_LOCUS_NAME_LEN+1);
3284 				acc_len = MIN(StringLen(loc), 60);
3285 				StringNCpy_0(gbp->accession, loc, acc_len+1);
3286 				MemFree(loc);
3287 			}
3288 		}
3289 		return;
3290 	}
3291 	if (ajp->only_one) {
3292 		for (gbp = awp->gbp; gbp; gbp = gbp->next) {
3293 			if (gbp->bsp == NULL) {
3294 				continue;
3295 			}
3296 			bsp = gbp->bsp;
3297 			GetGINumber(gbp);
3298 			buffer = GetDivision(ajp, gbp);
3299 			if (buffer[0] != NULLB) {
3300 				StringNCpy_0(gbp->div, buffer, 4);
3301 				MemFree(buffer);
3302 			}
3303 			isip = SeqIdSelect(gbp->bsp->id, fasta_order, NUM_SEQID);
3304 			if (isip == NULL)
3305 				isip = gbp->bsp->id;
3306 			SeqIdWrite(isip, buf_acc,
3307 					PRINTID_TEXTID_ACCESSION, MAX_ACCESSION_LEN+1);
3308 			StringNCpy_0(gbp->accession, buf_acc, MAX_ACCESSION_LEN+1);
3309 
3310 			if (ajp->old_locus_fmt == TRUE)
3311 			  sprintf(gbp->locus, "%-10s", buf_acc);
3312 			else
3313 			  sprintf(gbp->locus, "%-16s", buf_acc);
3314 
3315 			if (ajp->show_version) {
3316 				SeqIdWrite(isip, buf_acc,
3317 					PRINTID_TEXTID_ACC_VER, MAX_ACCESSION_LEN+1);
3318 				StringNCpy_0(gbp->version, buf_acc, MAX_ACCESSION_LEN+1);
3319 			}
3320 		}
3321 		return;
3322 	}
3323 	total_segs = awp->total_seg;
3324 	base_a = MakeBaseAccession(awp->seg);
3325 	base_locus = MakeBaseLocusAwp(ajp, base_locus);
3326 	StringNCpy_0(ajp->asn2ffwep->base_name, base_locus, 11);
3327 
3328 	for (gbp = awp->gbp; gbp != NULL; gbp = gbp->next) {
3329 		if (gbp->bsp == NULL) {
3330 			continue;
3331 		}
3332 		bsp = gbp->bsp;
3333 		if ((isip = gbp->bsp->id) == NULL) {
3334 			continue;
3335 		}
3336 		buffer = GetDivision(ajp, gbp);
3337 		if (buffer[0] != NULLB) {
3338 			StringNCpy_0(gbp->div, buffer, 4);
3339 			MemFree(buffer);
3340 		}
3341 		num_seg = gbp->num_seg;
3342 		sip = SeqIdSelect(isip, fasta_order, NUM_SEQID);
3343 		if (sip == NULL) {
3344 			sip = isip;
3345 		}
3346 		switch (sip->choice) {
3347 		    case SEQID_GENBANK:
3348 	    	case SEQID_EMBL:
3349 	    	case SEQID_DDBJ:
3350 	    	case SEQID_OTHER:
3351 			case SEQID_TPG:
3352 			case SEQID_TPE:
3353 			case SEQID_TPD:
3354 				tsip = (TextSeqIdPtr) sip->data.ptrvalue;
3355 				if ((ValidateAccession(buf_acc, tsip->accession)) < 0) {
3356 					if (base_a != NULL) {
3357 						StringNCpy_0(buf_acc, base_a, MAX_ACCESSION_LEN+1);
3358 					} else {
3359 						buf_acc = MakeAnAccession(buf_acc, isip,
3360 													MAX_ACCESSION_LEN+1);
3361 					}
3362 				}
3363 				buf_locus = ValidateLocus(ajp, bsp, base_locus,
3364 					total_segs, num_seg, buf_locus, tsip->name, buf_acc);
3365 				StringNCpy_0(gbp->accession,
3366 					buf_acc, MAX_ACCESSION_LEN+1);
3367 				if (sip->choice == SEQID_OTHER
3368 						&& StringNCmp(tsip->accession, "NT_", 3) == 0) {
3369 					if (ajp->old_locus_fmt == TRUE)
3370 					  sprintf(gbp->locus, "%-10s", buf_acc);
3371 					else
3372 					  sprintf(gbp->locus, "%-16s", buf_acc);
3373 				} else {
3374 					if (ajp->old_locus_fmt == TRUE)
3375 					  sprintf(gbp->locus, "%-10s", buf_locus);
3376 					else
3377 					  sprintf(gbp->locus, "%-16s", buf_locus);
3378 				}
3379 				num_seg--;
3380 			if (ajp->show_version) {
3381 				if (ValidateVersion(sip, ajp) == FALSE) {
3382 					gbp->bsp = NULL;
3383 					ErrPostEx(SEV_ERROR, ERR_ACCESSION_No_VERSION_Number, "%s", gbp->accession);
3384 					continue;
3385 				}
3386 				SeqIdWrite(sip, buf_acc,
3387 					PRINTID_TEXTID_ACC_VER, MAX_ACCESSION_LEN+6);
3388 				StringNCpy_0(gbp->version, buf_acc, MAX_ACCESSION_LEN+6);
3389 			}
3390 				break;
3391 		    case SEQID_LOCAL:
3392 				if ((((ObjectIdPtr)sip->data.ptrvalue)->str) == NULL) {
3393 					buf_acc[0] = 'X';
3394 					sprintf(buf_acc+1, "%ld",
3395 						(long)((ObjectIdPtr)sip->data.ptrvalue)->id);
3396 				} else {
3397 					StringNCpy_0(buf_acc,
3398 				 ((ObjectIdPtr)sip->data.ptrvalue)->str, MAX_ACCESSION_LEN+1);
3399 				}
3400 				buf_locus = ValidateLocus(ajp, bsp, base_locus,
3401 				total_segs, num_seg,buf_locus,  buf_acc, buf_acc);
3402 				StringNCpy_0(gbp->accession, buf_acc, MAX_ACCESSION_LEN+1);
3403 
3404 				if (ajp->old_locus_fmt == TRUE)
3405 				  sprintf(gbp->locus, "%-10s", buf_locus);
3406 				else
3407 				  sprintf(gbp->locus, "%-16s", buf_locus);
3408 
3409 				num_seg--;
3410 				break;
3411 		   case SEQID_GI:
3412 			sprintf(buf_acc, "%ld", (long) (sip->data.intvalue));
3413 			buf_locus = ValidateLocus(ajp, bsp, base_locus,
3414 					total_segs, num_seg, buf_locus, buf_acc, buf_acc);
3415 			StringNCpy_0(gbp->accession, buf_acc, MAX_ACCESSION_LEN+1);
3416 
3417 			if (ajp->old_locus_fmt == TRUE)
3418 			  sprintf(gbp->locus, "%-10s", buf_locus);
3419 			else
3420 			  sprintf(gbp->locus, "%-16s", buf_locus);
3421 
3422 			num_seg--;
3423 			break;
3424 
3425 		   case SEQID_PIR:
3426 		   case SEQID_SWISSPROT:
3427 			tsip = (TextSeqIdPtr) sip->data.ptrvalue;
3428 			if ((ValidateAccession(buf_acc, tsip->accession)) < 0) {
3429 				if (base_a != NULL) {
3430 					StringNCpy_0(buf_acc, base_a, MAX_ACCESSION_LEN+1);
3431 				} else {
3432 					buf_acc = MakeAnAccession(buf_acc,
3433 						isip, MAX_ACCESSION_LEN);
3434 				}
3435 			}
3436 			if (ajp->show_version) {
3437 				SeqIdWrite(sip, buf_acc,
3438 					PRINTID_TEXTID_ACC_VER, MAX_ACCESSION_LEN+6);
3439 				StringNCpy_0(gbp->version, buf_acc, MAX_ACCESSION_LEN+6);
3440 			}
3441 			buf_locus = ValidateLocus(ajp, bsp, base_locus,
3442 				total_segs, num_seg, buf_locus, tsip->name, buf_acc);
3443 			StringNCpy_0(gbp->accession, buf_acc, MAX_ACCESSION_LEN+1);
3444 			if (sip->choice == SEQID_OTHER
3445 					&& StringNCmp(tsip->accession, "NT_", 3) == 0) {
3446 				if (ajp->old_locus_fmt == TRUE)
3447 				  sprintf(gbp->locus, "%-10s", buf_acc);
3448 				else
3449 				  sprintf(gbp->locus, "%-16s", buf_acc);
3450 			} else {
3451 				if (ajp->old_locus_fmt == TRUE)
3452 				  sprintf(gbp->locus, "%-10s", buf_locus);
3453 				else
3454 				  sprintf(gbp->locus, "%-16s", buf_locus);
3455 			}
3456 			num_seg--;
3457 
3458 			break;
3459 		   default:
3460 			buf_acc = MakeAnAccession(buf_acc, isip, MAX_ACCESSION_LEN+1);
3461 			buf_locus = ValidateLocus(ajp, bsp, base_locus,
3462 				total_segs, num_seg, buf_locus, buf_acc, buf_acc);
3463 			StringNCpy_0(gbp->accession, buf_acc,
3464 													 MAX_ACCESSION_LEN+1);
3465 			if (ajp->old_locus_fmt == TRUE)
3466 			  sprintf(gbp->locus, "%-10s", buf_locus);
3467 			else
3468 			  sprintf(gbp->locus, "%-16s", buf_locus);
3469 
3470 			num_seg--;
3471 			break;
3472 		}
3473 	}
3474 	if (base_a != NULL)
3475 		base_a = MemFree(base_a);
3476 
3477 }
3478 /**************************************************************************
3479 *	Looks in the descriptor  and feature->xref for any extra-accessions.
3480 **************************************************************************/
3481 
AddExtraAccessions(Asn2ffJobPtr ajp,GBEntryPtr gbp)3482 NLM_EXTERN void AddExtraAccessions(Asn2ffJobPtr ajp, GBEntryPtr gbp)
3483 
3484 {
3485 	BioseqPtr bsp;
3486 	Char buffer[10];
3487 	CharPtr ptr=buffer, ac;
3488 	EMBLBlockPtr eb;
3489 	GBBlockPtr gb;
3490 	Int2 index, status;
3491 	SeqFeatPtr sfp;
3492 	SeqIdPtr xid;
3493 	SeqIntPtr si;
3494 	SeqLocPtr xref;
3495 	TextSeqIdPtr text;
3496 	ValNodePtr extra_access=NULL, location=NULL, vnp;
3497 	SortStructPtr p;
3498 	Boolean /*UNUSED*/ncbi = FALSE;
3499 
3500 	if (gbp == NULL) {
3501 		return;
3502 	}
3503 	if ((bsp = gbp->bsp) == NULL) {
3504 		return;
3505 	}
3506 	ac = gbp->accession;
3507 	if (ac && *ac == 'U') {
3508 		ncbi = TRUE;
3509 	}
3510 	for (vnp = bsp->descr; vnp; vnp=vnp->next) {
3511 		if (vnp->choice == Seq_descr_genbank) {
3512 			break;
3513 		}
3514 	}
3515 	if (vnp != NULL) {
3516 		gb = (GBBlockPtr) vnp->data.ptrvalue;
3517 		extra_access = gb->extra_accessions;
3518 		if (extra_access != NULL) {
3519 			for (vnp=extra_access; vnp != NULL; vnp=vnp->next) {
3520 				status = ValidateAccession(ptr, vnp->data.ptrvalue);
3521 				if (status == 0) {
3522 					if (ajp->format == EMBL_FMT || ajp->format ==
3523 						 PSEUDOEMBL_FMT || ajp->format == EMBLPEPT_FMT) {
3524 						ff_AddChar(';');
3525 					} else {
3526 						ff_AddChar(' ');
3527 					}
3528 				/*	www_extra_acc(ptr, ncbi); */
3529 					ff_AddString( ptr);
3530 				}
3531 			}
3532 		}
3533 	}
3534 	for (vnp = bsp->descr; vnp; vnp=vnp->next) {
3535 		if (vnp->choice == Seq_descr_embl) {
3536 			break;
3537 		}
3538 	}
3539 
3540 	if (vnp != NULL) {
3541 		eb = (EMBLBlockPtr) vnp->data.ptrvalue;
3542 		extra_access = eb->extra_acc;
3543 		if (extra_access != NULL) {
3544 			for (vnp=extra_access; vnp != NULL; vnp=vnp->next) {
3545 				status = ValidateAccession(ptr, vnp->data.ptrvalue);
3546 				if (status == 0) {
3547 					if (ajp->format == EMBL_FMT || ajp->format ==
3548 						 PSEUDOEMBL_FMT || ajp->format == EMBLPEPT_FMT) {
3549 						ff_AddChar(';');
3550 					} else {
3551 						ff_AddChar(' ');
3552 					}
3553 				/*	www_extra_acc(ptr, ncbi); */
3554 					ff_AddString( ptr);
3555 				}
3556 			}
3557 		}
3558 	}
3559 	if (gbp->feat) {
3560 		p = gbp->feat->Xreflist;
3561 		for (index=0; index < gbp->feat->sfpXrefsize; index++, p++) {
3562 			if (location == NULL) {
3563 				location = ValNodeNew(NULL);
3564 				si = SeqIntNew();
3565 				location->choice = SEQLOC_INT;
3566 				location->data.ptrvalue = si;
3567 			}
3568 			si->from = 0;
3569 			bsp = gbp->bsp;
3570 			si->to = bsp->length - 1;
3571 			si->id = bsp->id;	/* Don't delete id!! */
3572 			if ((sfp = p->sfp) == NULL) {
3573 				GatherItemWithLock(p->entityID,
3574 					p->itemID, p->itemtype, &sfp, find_item);
3575 			}
3576 			if (sfp == NULL) {
3577 				continue;
3578 			}
3579 			if (SeqLocCompare(sfp->location, location) != 0) {
3580 				xref = (SeqLocPtr) sfp->data.value.ptrvalue;
3581 				xid = (SeqIdPtr) xref->data.ptrvalue;
3582 				if (xid->choice == 5 || xid->choice == 6 ||
3583 					xid->choice == 13) {
3584 					text = (TextSeqIdPtr) xid->data.ptrvalue;
3585 					status = ValidateAccession(ptr, text->accession);
3586 					if (status == 0) {
3587 						if (ajp->format == EMBL_FMT || ajp->format ==
3588 							 PSEUDOEMBL_FMT || ajp->format == EMBLPEPT_FMT) {
3589 							ff_AddChar(';');
3590 						} else {
3591 							ff_AddChar(' ');
3592 						}
3593 					/*	www_extra_acc(ptr, ncbi); */
3594 						ff_AddString( ptr);
3595 					}
3596 				}
3597 			}
3598 		}
3599 	}
3600 
3601 	if (location) {
3602 		si->id = NULL;
3603 		SeqIntFree(si);
3604 		ValNodeFree(location);
3605 	}
3606 
3607 	return;
3608 }static Boolean CompareToAwpList (BioseqPtr bsp, Asn2ffWEPtr	awp)
3609 
3610 {
3611 	GBEntryPtr gbp;
3612 
3613 	if (bsp == NULL) {
3614 		return FALSE;
3615 	}
3616 	for (gbp = awp->gbp; gbp != NULL; gbp = gbp->next) {
3617 		if (bsp == gbp->bsp) {
3618 			return TRUE;
3619 		}
3620 	}
3621 	return FALSE;
3622 }
3623 
GBEntryNew(void)3624 static GBEntryPtr GBEntryNew(void)
3625 {
3626 	GBEntryPtr gbp;
3627 
3628 	gbp = (GBEntryPtr) MemNew(sizeof(GBEntry));
3629 	gbp->feat = NULL;
3630 	gbp->descr = NULL;
3631 	gbp->source_info = NULL;
3632 	gbp->comm = NULL;
3633 	gbp->map = FALSE;
3634 
3635 	return gbp;
3636 }
3637 
tie_next_gbp(GBEntryPtr head,GBEntryPtr next)3638 static GBEntryPtr tie_next_gbp(GBEntryPtr head, GBEntryPtr next)
3639 /*  ties next node to the end of the chain */
3640 {
3641 	GBEntryPtr v;
3642 
3643 	if (head == NULL) {
3644 		return next;
3645 	}
3646 	for (v = head; v->next != NULL; v = v->next) {
3647 		v = v;
3648 	}
3649 	v->next = next;
3650 	return head;
3651 }
3652 
CreateGBEntry(Asn2ffWEPtr awp,BioseqPtr bsp,Int2 eID,Int2 iID,Int2 itype)3653 static GBEntryPtr CreateGBEntry(Asn2ffWEPtr awp, BioseqPtr bsp,
3654 Int2 eID, Int2 iID, Int2 itype)
3655 {
3656 	GBEntryPtr	gbep;
3657 
3658 	gbep = GBEntryNew();
3659 	gbep->bsp = bsp;
3660 	gbep->length = bsp->length;
3661 	gbep->entityID = eID;
3662 	gbep->itemID = iID;
3663 	gbep->itemtype = itype;
3664 	awp->gbp = tie_next_gbp(awp->gbp, gbep);
3665 
3666 	return gbep;
3667 }
3668 
3669 /************************************************************************
3670 *	SeqToAwp()
3671 *		gather callback to create a list of GenBank entries
3672 *************************************************************************/
3673 
SeqToAwp(GatherContextPtr gcp)3674 NLM_EXTERN Boolean SeqToAwp (GatherContextPtr gcp)
3675 
3676 {
3677 	BioseqPtr bsp;
3678 	SeqEntryPtr ep;
3679 	BioseqSetPtr bssp;
3680 	SeqLocPtr slp;
3681 	Asn2ffWEPtr	awp;
3682 	Asn2ffJobPtr ajp;
3683 	GBEntryPtr	gbep;
3684 	SeqIdPtr isip, sip;
3685 	Uint1 format;
3686 	Boolean is_www = get_www();
3687 
3688 	ajp = (Asn2ffJobPtr) gcp->userdata;
3689 	awp = ajp->asn2ffwep;
3690 	format = ajp->format;
3691 	switch (gcp->thistype)
3692 	{
3693 		case OBJ_BIOSEQ:
3694 			bsp = gcp->thisitem;
3695 			if (bsp->repr == Seq_repr_seg) {
3696 				if (ajp->genome_view || ajp->only_one) {
3697 					gbep = CreateGBEntry(awp, bsp, gcp->entityID,
3698 						gcp->itemID, gcp->thistype);
3699 						if (ajp->only_one && !ajp->map_view) {
3700 							return FALSE;
3701 						}
3702 				}
3703 				if (ISA_na(bsp->mol) && (format == GENBANK_FMT ||
3704 					format == EMBL_FMT || format == PSEUDOEMBL_FMT
3705 						|| format == GRAPHIK_FMT)) {
3706 					awp->seg = bsp;
3707 				} else if (ISA_aa(bsp->mol) &&
3708 					(format == GENPEPT_FMT || format == EMBLPEPT_FMT
3709 						|| format == GRAPHIK_FMT)) {
3710 					awp->seg = bsp;
3711 				}
3712 			}
3713 			if (ASN2FF_LOOK_FOR_SEQ == FALSE) {
3714 				if (ajp->format == GENPEPT_FMT || ajp->format == EMBLPEPT_FMT
3715 					|| (ISA_aa(bsp->mol) && format == GRAPHIK_FMT)) {
3716 					if (ISA_aa(bsp->mol) && (bsp->repr == Seq_repr_raw
3717 		   	|| bsp->repr == Seq_repr_const || bsp->repr == Seq_repr_delta
3718 		   	|| 	((is_www || ajp->mode != RELEASE_MODE) && bsp->repr == Seq_repr_virtual))) {
3719 						gbep = CreateGBEntry(awp, bsp, gcp->entityID,
3720 							gcp->itemID, gcp->thistype);
3721 						++awp->total_seg;
3722 						gbep->num_seg = awp->total_seg;
3723 					}
3724 				} else {
3725 					if (ISA_na(bsp->mol) && (bsp->repr == Seq_repr_raw
3726 		   		|| bsp->repr == Seq_repr_const|| bsp->repr == Seq_repr_delta
3727 		   		|| 	(is_www && bsp->repr == Seq_repr_virtual))) {
3728 						if (ASN2FF_LOCAL_ID == FALSE) {
3729 							sip = SeqIdSelect(bsp->id, fasta_order, NUM_SEQID);
3730 							if (sip && sip->choice != SEQID_LOCAL) {
3731 								gbep = CreateGBEntry(awp, bsp, gcp->entityID,
3732 									gcp->itemID, gcp->thistype);
3733 								++awp->total_seg;
3734 								gbep->num_seg = awp->total_seg;
3735 							}
3736 						} else {
3737 							gbep = CreateGBEntry(awp, bsp, gcp->entityID,
3738 								gcp->itemID, gcp->thistype);
3739 							++awp->total_seg;
3740 							gbep->num_seg = awp->total_seg;
3741 						}
3742 					} else if (ISA_na(bsp->mol) && bsp->repr == Seq_repr_map &&
3743 							ajp->map_view) {
3744 							gbep = CreateGBEntry(awp, bsp, gcp->entityID,
3745 								gcp->itemID, gcp->thistype);
3746 							gbep->map = TRUE;
3747 					}
3748 				}
3749 			} else {
3750 				if (bsp->seq_ext_type == 1) {
3751 					slp = bsp->seq_ext;
3752 					while (slp) {
3753 						bsp = BioseqFind(SeqLocId(slp));
3754 						if (bsp->repr == Seq_repr_raw ||
3755 							bsp->repr == Seq_repr_const
3756 							|| bsp->repr == Seq_repr_delta
3757 							|| (is_www && bsp->repr == Seq_repr_virtual)) {
3758 							if (CompareToAwpList(bsp, awp) == FALSE) {
3759 								if (ASN2FF_LOCAL_ID == FALSE) {
3760 									isip = bsp->id;
3761 									sip = SeqIdSelect(isip,
3762 										fasta_order, NUM_SEQID);
3763 									if (sip && sip->choice != SEQID_LOCAL) {
3764 										gbep = CreateGBEntry(awp, bsp,
3765 											gcp->entityID, gcp->itemID,
3766 												gcp->thistype);
3767 										++awp->total_seg;
3768 										gbep->num_seg = awp->total_seg;
3769 									} else if (sip->choice == SEQID_LOCAL &&
3770 										(format == GENPEPT_FMT ||
3771 												format == EMBLPEPT_FMT)) {
3772 										gbep = CreateGBEntry(awp, bsp,
3773 											gcp->entityID, gcp->itemID,
3774 													gcp->thistype);
3775 										++awp->total_seg;
3776 										gbep->num_seg = awp->total_seg;
3777 									} else {
3778 										gbep = CreateGBEntry(awp, bsp,
3779 											gcp->entityID, gcp->itemID,
3780 												gcp->thistype);
3781 										++awp->total_seg;
3782 										gbep->num_seg = awp->total_seg;
3783 								}
3784 								}
3785 							}
3786 						}
3787 						slp = slp->next;
3788 					}
3789 				} else if (ISA_na(bsp->mol) && (bsp->repr == Seq_repr_raw ||
3790 						bsp->repr == Seq_repr_const
3791 							|| bsp->repr == Seq_repr_delta
3792 							|| (is_www && bsp->repr == Seq_repr_virtual))) {
3793 						if (CompareToAwpList(bsp, awp) == FALSE) {
3794 						if (ASN2FF_LOCAL_ID == FALSE) {
3795 							isip = bsp->id;
3796 							sip = SeqIdSelect(isip, fasta_order, NUM_SEQID);
3797 							if (sip && sip->choice != SEQID_LOCAL) {
3798 								gbep = CreateGBEntry(awp, bsp, gcp->entityID,
3799 											gcp->itemID, gcp->thistype);
3800 								++awp->total_seg;
3801 								gbep->num_seg = awp->total_seg;
3802 							} else if (sip->choice == SEQID_LOCAL &&
3803 									(format == GENPEPT_FMT ||
3804 												format == EMBLPEPT_FMT)) {
3805 								gbep = CreateGBEntry(awp, bsp, gcp->entityID,
3806 											gcp->itemID, gcp->thistype);
3807 								++awp->total_seg;
3808 								gbep->num_seg = awp->total_seg;
3809 							} else {
3810 								gbep = CreateGBEntry(awp, bsp, gcp->entityID,
3811 											gcp->itemID, gcp->thistype);
3812 								++awp->total_seg;
3813 								gbep->num_seg = awp->total_seg;
3814 							}
3815 						}
3816 					}
3817 				}
3818 			}
3819 			break;
3820 		case OBJ_BIOSEQSET:
3821 			bssp = (BioseqSetPtr) gcp->thisitem;
3822 			if (bssp->_class == 4) {/*parts*/
3823 				ep = bssp->seq_set;
3824 				if (ep != NULL) {
3825 					bsp = ep->data.ptrvalue;
3826 					if (ISA_na(bsp->mol) && (format == GENBANK_FMT ||
3827 						format == EMBL_FMT || format == PSEUDOEMBL_FMT)) {
3828 						awp->parts = bssp;
3829 					} else if (ISA_aa(bsp->mol) &&
3830 						(format == GENPEPT_FMT || format == EMBLPEPT_FMT)) {
3831 						awp->parts = bssp;
3832 					}
3833 				}
3834 			}
3835 			break;
3836 		default:
3837 			break;
3838 
3839 	}
3840 	return TRUE;
3841 
3842 }
3843 
3844 
3845 
3846