1 /* asn2ff6.c
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information (NCBI)
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government do not place any restriction on its use or reproduction.
13 * We would, however, appreciate having the NCBI and the author cited in
14 * any work or product based on this material
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name: asn2ff6.c
27 *
28 * Author: Karl Sirotkin, Tom Madden, Tatiana Tatusov
29 *
30 * Version Creation Date: 7/15/95
31 *
32 * $Revision: 6.69 $
33 *
34 * File Description:
35 *
36 * Modifications:
37 * --------------------------------------------------------------------------
38 * $Log: asn2ff6.c,v $
39 * Revision 6.69 2006/07/13 17:06:38 bollin
40 * use Uint4 instead of Uint2 for itemID values
41 * removed unused variables
42 * resolved compiler warnings
43 *
44 * Revision 6.68 2002/02/15 18:30:55 kans
45 * no longer change snoRNA to misc_RNA
46 *
47 * Revision 6.67 2001/12/28 21:37:10 kans
48 * allow sfp->product to be SEQLOC_EQUIV
49 *
50 * Revision 6.66 2001/12/21 20:21:06 cavanaug
51 * old_locus_fmt now controls generated of *old* LOCUS line format
52 *
53 * Revision 6.65 2001/12/05 18:13:53 cavanaug
54 * Changes for new LOCUS line format
55 *
56 * Revision 6.64 2001/08/21 17:33:33 kans
57 * snoRNA can show /product
58 *
59 * Revision 6.63 2001/08/07 15:51:08 kans
60 * use NUM_SEQID, added third party annotation seqids
61 *
62 * Revision 6.62 2001/07/18 14:50:13 kans
63 * gather features with gsc.useSeqMgrIndexes if genpept, raw, indexing requested, and IndexedGetDescrForDiv to speed up finding division
64 *
65 * Revision 6.61 2001/07/03 20:01:41 kans
66 * AddGBQual ASN2GNBK_STRIP_NOTE_PERIODS trim trailing tilde first
67 *
68 * Revision 6.60 2001/07/03 00:05:51 kans
69 * TrimSpacesAndJunkFromEnds on genbankblock->source if ASN2GNBK_STRIP_NOTE_PERIODS
70 *
71 * Revision 6.59 2001/06/26 23:43:35 kans
72 * moved second period check to inside last period check
73 *
74 * Revision 6.58 2001/06/26 23:36:06 kans
75 * in AddGBQual if ASN2GNBK_STRIP_NOTE_PERIODS, trim one or two periods at end
76 *
77 * Revision 6.57 2001/06/13 14:41:58 yaschenk
78 * changing increment of 10 to 1024 in EnlargeSortList()
79 *
80 * Revision 6.56 2001/06/04 21:30:52 kans
81 * TrimSpacesAndSemicolons trims leading semicolons as well as leading spaces
82 *
83 * Revision 6.55 2001/06/01 18:46:26 tatiana
84 * NG_ added to ValidateAccession
85 *
86 * Revision 6.54 2001/05/31 23:45:48 kans
87 * if ASN2GNBK_STRIP_NOTE_PERIODS and IsEllipsis, do not strip period
88 *
89 * Revision 6.53 2001/05/29 23:27:47 kans
90 * added support for snoRNA - flatfile prints as misc_RNA for now
91 *
92 * Revision 6.52 2001/04/16 16:51:42 tatiana
93 * GetDivision(): CON division never use for aa
94 *
95 * Revision 6.51 2001/04/06 12:47:43 beloslyu
96 * missing flatloc declaration was added
97 *
98 * Revision 6.50 2001/04/05 21:41:26 tatiana
99 * REGION added in GetLocusPartsAwp()
100 *
101 * Revision 6.49 2001/04/04 22:05:16 kans
102 * In GB_PrintPubs under ASN2GNBK_STRIP_NOTE_PERIODS clean up comma/space/semicolon (TF)
103 *
104 * Revision 6.48 2001/04/04 21:46:56 kans
105 * TrimSpacesAndJunkFromEnds if ASN2GNBK_STRIP_NOTE_PERIODS (TF)
106 *
107 * Revision 6.47 2001/04/02 21:25:19 kans
108 * AddGBQual under ASN2GNBK_STRIP_NOTE_PERIODS also removes ; ; substrings
109 *
110 * Revision 6.46 2001/03/26 17:36:06 kans
111 * added NULL for endogenous-virus to genome prefix array
112 *
113 * Revision 6.45 2001/02/16 16:52:22 tatiana
114 * special case locus for NT_ records
115 *
116 * Revision 6.44 2001/01/26 19:21:48 kans
117 * extrachromosomal into source note, removed macronuclear, extrachrom, plasmid from organism line
118 *
119 * Revision 6.43 2001/01/19 21:51:04 kans
120 * finally got ASN2GNBK_STRIP_NOTE_PERIODS logic right
121 *
122 * Revision 6.42 2001/01/19 18:45:28 kans
123 * another attempt to use ASN2GNBK_STRIP_NOTE_PERIODS to remove extraneous asn2ff/asn2gnbk diffs
124 *
125 * Revision 6.41 2001/01/08 18:36:40 kans
126 * removed ASN2GNBK_STRIP_NOTE_PERIODS - this was not the right place
127 *
128 * Revision 6.40 2001/01/06 22:09:42 kans
129 * added ASN2GNBK_STRIP_NOTE_PERIODS to try to eliminate trivial note discrepancies
130 *
131 * Revision 6.39 2000/11/29 20:46:11 tatiana
132 * HTC division added for MI_TECH_htc
133 *
134 * Revision 6.38 2000/10/24 20:28:44 tatiana
135 * ValidateAccession accepts XP, XM
136 *
137 * Revision 6.37 2000/09/20 21:26:19 tatiana
138 * all organelles adde to ORGANISM line
139 *
140 * Revision 6.36 2000/09/11 18:52:59 tatiana
141 * PUBMED linetype is legal in release mode
142 *
143 * Revision 6.35 2000/08/25 16:16:46 kans
144 * ValidateLocus initializes num_of_digits even if > 1000 segments
145 *
146 * Revision 6.34 2000/08/01 21:09:39 tatiana
147 * ValidateVersion is colld in forgbrel option only
148 *
149 * Revision 6.33 2000/06/29 12:23:30 kans
150 * GenPept on Seq_repr_virtual shown only if is_www || ajp->mode != RELEASE_MODE, earlier kludge of ignoring get_www was probably too broad
151 *
152 * Revision 6.32 2000/06/28 19:31:22 kans
153 * in SeqToAwp always set is_www to TRUE, so virtual sequences show up on non-web applications
154 *
155 * Revision 6.31 2000/06/23 15:42:34 tatiana
156 * removed virion and proviral from ORGANISM line
157 *
158 * Revision 6.30 2000/06/21 15:04:57 tatiana
159 * space added to Virion
160 *
161 * Revision 6.29 2000/06/12 20:49:04 tatiana
162 * new organelles added to ORGANISM filed
163 *
164 * Revision 6.28 2000/06/05 17:51:53 tatiana
165 * increase size of feature arrays to Int4
166 *
167 * Revision 6.27 2000/02/09 19:34:39 kans
168 * added forgbrel flag to Asn2ffJobPtr, currently used to suppress PUBMED line, which was not formally announced in release notes
169 *
170 * Revision 6.26 2000/01/28 17:56:48 kans
171 * show_gi always FALSE to suppress NID and PID, added support for PUBMED line in GenBank format
172 *
173 * Revision 6.25 2000/01/18 17:09:24 tatiana
174 * NP added to ValidateAccession
175 *
176 * Revision 6.24 1999/10/06 20:20:24 bazhin
177 * Removed memory leaks in GeneStructContentFree() and GetPubsAwp()
178 * functions.
179 *
180 * Revision 6.23 1999/09/23 18:09:33 tatiana
181 * ValidateAccession modified for N*_ accession
182 *
183 * Revision 6.22 1999/09/15 18:17:12 tatiana
184 * GRAPHIK_FMT corrected
185 *
186 * Revision 6.18 1999/04/02 19:33:55 tatiana
187 * MI_TECH_htgs_0 added in BioseqGetGBDivCode()
188 *
189 * Revision 6.17 1999/04/01 20:44:12 kans
190 * Int2 lengths to Int4 to allow CountGapsInDeltaSeq with buffer > 32K
191 *
192 * Revision 6.16 1999/03/31 01:09:23 tatiana
193 * ValidateAccession accepts 3+5
194 *
195 * Revision 6.15 1999/03/30 21:00:45 tatiana
196 * ValidateOtherAccession() added
197 *
198 * Revision 6.14 1999/03/22 23:22:32 tatiana
199 * accession.version modifications
200 *
201 * Revision 6.13 1999/01/12 16:57:55 kans
202 * SeqToAwp checks for null ep before dereferencing
203 *
204 * Revision 6.12 1998/11/24 20:15:03 kans
205 * seqid other has better priority than local so refgene id is used preferentially
206 *
207 * Revision 6.11 1998/10/30 01:12:00 kans
208 * GetPubsAwp GatherEntity filters out OBJ_SEQALIGN - this was being hit many times on big records, and there is no need for asn2ff to see alignments
209 *
210 * Revision 6.10 1998/09/24 17:46:00 kans
211 * fixed GetDBXrefFromGene problem (TT)
212 *
213 * Revision 6.9 1998/06/15 14:59:49 tatiana
214 * UNIX compiler warnings fixed
215 *
216 * Revision 6.8 1998/05/11 21:58:33 tatiana
217 * some functions moved from asn2ff1.c
218 *
219 * Revision 6.7 1998/05/05 19:53:50 tatiana
220 * SEQFEAT_RSITE supressed in GetNAFeatKey()
221 *
222 * Revision 6.6 1998/04/30 21:49:10 tatiana
223 * *** empty log message ***
224 *
225 * Revision 6.5 1998/02/10 17:01:14 tatiana
226 * AddGBQualEx() added
227 *
228 * Revision 6.4 1998/01/13 21:35:20 tatiana
229 * AsnIoHash moved to asnio.c file
230 *
231 * Revision 6.3 1998/01/13 21:14:50 tatiana
232 * static AsnIoHash changed to AsnIoHash to avoid fubction name collision
233 *
234 * Revision 6.2 1997/12/15 15:53:29 tatiana
235 * features processing has been changed
236 *
237 * Revision 6.1 1997/09/16 15:41:49 kans
238 * added SEQFEAT_SITE case to GetNAFeatKey (TT)
239 *
240 * Revision 5.25 1997/07/28 19:03:59 vakatov
241 * [WIN32,MSVC++] Restored lost "NCBIOBJ.LIB" pro-DLL modifications
242 *
243 * Revision 5.24 1997/07/28 14:26:11 vakatov
244 * BioseqGetGBDivCode() proto in-sync with its header-located declaration
245 *
246 * Revision 5.23 1997/07/24 23:57:41 tatiana
247 * fixed sfp_order
248 *
249 * Revision 5.22 1997/07/24 15:59:06 tatiana
250 * aaaaaaa bug fixed in Getscblknum
251 *
252 * Revision 5.21 1997/07/16 21:18:42 tatiana
253 * added sorting by feat type in CompareSfpForHeap()
254 *
255 * Revision 5.20 1997/06/19 18:37:17 vakatov
256 * [WIN32,MSVC++] Adopted for the "NCBIOBJ.LIB" DLL'ization
257 *
258 * Revision 5.19 1997/05/21 14:43:27 tatiana
259 * fix empty /product in GetNAFeatKey
260 *
261 * Revision 5.17 1997/01/13 22:33:04 tatiana
262 * added CompareGeneName()
263 *
264 * Revision 5.16 1996/12/17 22:47:56 tatiana
265 * added StoreFeatFree()
266 *
267 * Revision 5.15 1996/10/25 22:12:10 tatiana
268 * doesn't add empty ("") val if qual is translation
269 *
270 * Revision 5.14 1996/10/02 15:14:38 tatiana
271 * a bug fixed
272 *
273 * Revision 5.13 1996/10/01 22:42:09 tatiana
274 * fixed duplicated notes in NoteToCharPtrStack
275 *
276 * Revision 5.12 1996/09/09 13:36:02 kans
277 * moved BioseqGetGBDivCode from toasn.[ch] to asn2ff.h/asn2ff6.c
278 *
279 * Revision 5.11 1996/09/03 19:52:49 tatiana
280 * extra_loc added
281 *
282 * Revision 5.10 1996/08/28 21:40:35 tatiana
283 * don't copy new location from gather
284 *
285 * Revision 5.9 1996/08/16 20:34:45 tatiana
286 * GetNAFeatKey() changed
287 *
288 * Revision 5.7 1996/08/09 21:08:57 tatiana
289 * a bug fixed in GetNAFeatKey
290 *
291 * Revision 5.6 1996/07/30 16:35:05 tatiana
292 * Boolean new added to GetNaFeatKey()
293 *
294 * Revision 5.5 1996/07/19 21:38:15 tatiana
295 * ERR_GI_No_GI_Number changed from ErrPostEx to ErrPostStr
296 *
297 * Revision 5.3 1996/07/02 18:11:18 tatiana
298 * calculate hash in StoreFeat
299 *
300 * Revision 5.2 1996/06/14 18:05:03 tatiana
301 * GetNAFeatKey change
302 *
303 * Revision 5.1 1996/06/11 15:26:36 tatiana
304 * GetGINumber is modified to get also embl NI
305 *
306 * Revision 4.17 1996/05/16 21:00:52 tatiana
307 * RemoveRedundantFeats addded
308 *
309 * Revision 4.16 1996/04/29 18:51:42 tatiana
310 * whole_book format added
311 *
312 * Revision 4.15 1996/04/15 14:36:23 tatiana
313 * memory leaks cleaning
314 *
315 * Revision 4.13 1996/02/28 04:53:06 ostell
316 * changes to support segmented master seeuquences
317 *
318 * Revision 4.12 1996/02/15 15:54:51 tatiana
319 * minor clean ups
320 *
321 * Revision 4.11 1996/01/29 22:39:10 tatiana
322 * error posting MODULE
323 *
324 * Revision 4.10 1995/12/20 22:41:56 tatiana
325 * removed redundant functions
326 *
327 * Revision 4.9 1995/12/12 20:21:05 tatiana
328 * CitSub validation fixed
329 *
330 * Revision 4.8 1995/12/10 22:19:31 tatiana
331 * Imprint in CitSub became optional
332 *
333 * Revision 4.7 1995/11/17 21:28:35 kans
334 * asn2ff now uses gather (Tatiana)
335 *
336 * Revision 4.2 1995/08/04 15:26:42 tatiana
337 * bug fixed in GetPubDate (check for Null pointer).
338 *
339 * Revision 4.1 1995/08/01 14:53:08 tatiana
340 * change SeqIdPrint to SeqIdWrite
341 *
342 * Revision 1.57 1995/07/17 19:33:20 kans
343 * parameters combined into Asn2ffJobPtr structure
344 * ==========================================================================
345 */
346
347 #include <asn2ff6.h>
348 #include <asn2ffp.h>
349 #include <a2ferrdf.h>
350 #include <asn2ffg.h>
351 #include <utilpub.h>
352 #include <ffprint.h>
353 #include <explore.h>
354 #include <sqnutils.h>
355
356 #define BUF_EXT_LENGTH 4
357
358 /*---------- order for other id FASTA_LONG (copied from SeqIdWrite) ------- */
359
360 static Uint1 fasta_order[NUM_SEQID] = {
361 33, /* 0 = not set */
362 20, /* 1 = local Object-id */
363 15, /* 2 = gibbsq */
364 16, /* 3 = gibbmt */
365 30, /* 4 = giim Giimport-id */
366 10, /* 5 = genbank */
367 10, /* 6 = embl */
368 10, /* 7 = pir */
369 10, /* 8 = swissprot */
370 15, /* 9 = patent */
371 10, /* 10 = other TextSeqId */
372 20, /* 11 = general Dbtag */
373 32, /* 12 = gi */
374 10, /* 13 = ddbj */
375 10, /* 14 = prf */
376 12, /* 15 = pdb */
377 10, /* 16 = tpg */
378 10, /* 17 = tpe */
379 10 /* 18 = tpd */
380 };
381
382
383 static Uint1 sfp_order[21] = {0,
384 2, /* SEQFEAT_GENE */
385 5, /* SEQFEAT_ORG */
386 3, /* SEQFEAT_CDREGION */
387 5, /* SEQFEAT_PROT */
388 1, /* SEQFEAT_RNA */
389 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5};
390
391 SeqFeatPtr MakeSyntheticSeqFeat PROTO ((void));
392 ValNodePtr LookForPubsOnFeat PROTO ((SeqFeatPtr sfp, ValNodePtr PubOnFeat));
393
394 Boolean asn2ff_flags[13];
395
GeneStructNew(void)396 NLM_EXTERN GeneStructPtr GeneStructNew (void)
397 {
398 GeneStructPtr gsp;
399
400 gsp = (GeneStructPtr) MemNew(sizeof(GeneStruct));
401 gsp->gene = NULL;
402 gsp->product = NULL;
403 gsp->standard_name = NULL;
404 gsp->map = (CharPtr PNTR) MemNew(sizeof(CharPtr));
405 gsp->map_index = 0;
406 gsp->map_size = 1;
407 gsp->ECNum = NULL;
408 gsp->activity = NULL;
409 gsp->grp = NULL;
410
411 return gsp;
412 }
413
NoteStructNew(NoteStructPtr nsp)414 NLM_EXTERN NoteStructPtr NoteStructNew (NoteStructPtr nsp)
415 {
416 nsp = (NoteStructPtr) MemNew(sizeof(NoteStruct));
417
418 nsp->note = (CharPtr PNTR) MemNew(5*sizeof(CharPtr));
419 nsp->note_annot = (CharPtr PNTR) MemNew(5*sizeof(CharPtr));
420 nsp->note_alloc = (Uint1 PNTR) MemNew(5*sizeof(Uint1));
421 nsp->note_index = 0;
422 nsp->note_size = 5;
423
424 return nsp;
425 }
426
GeneStructContentFree(GeneStructPtr gsp)427 static void GeneStructContentFree(GeneStructPtr gsp)
428 {
429 ValNodePtr v, vnext;
430
431 if (gsp->gene) {
432 if (gsp->gene->data.ptrvalue != NULL) {
433 MemFree(gsp->gene->data.ptrvalue);
434 }
435 gsp->gene = ValNodeFree(gsp->gene);
436 }
437 for (v = gsp->product; v; v = vnext) {
438 vnext = v->next;
439 if (v->data.ptrvalue != NULL) {
440 MemFree(v->data.ptrvalue);
441 }
442 MemFree(v);
443 }
444 for (v = gsp->standard_name; v != NULL; v = vnext) {
445 vnext = v->next;
446 if (v->data.ptrvalue != NULL) {
447 MemFree(v->data.ptrvalue);
448 }
449 MemFree(v);
450 }
451 for (v = gsp->ECNum; v; v = vnext) {
452 vnext = v->next;
453 if (v->data.ptrvalue != NULL) {
454 MemFree(v->data.ptrvalue);
455 }
456 MemFree(v);
457 }
458 for (v = gsp->activity; v; v = vnext) {
459 vnext = v->next;
460 if (v->data.ptrvalue != NULL) {
461 MemFree(v->data.ptrvalue);
462 }
463 MemFree(v);
464 }
465 if (gsp->grp) {
466 GeneRefFree(gsp->grp);
467 }
468 return;
469 }
470
GeneStructFree(GeneStructPtr gsp)471 NLM_EXTERN void GeneStructFree (GeneStructPtr gsp)
472 {
473 if (gsp == NULL)
474 return;
475 gsp->map = MemFree(gsp->map);
476 GeneStructContentFree(gsp);
477 MemFree(gsp);
478 }
479
NoteStructFree(NoteStructPtr nsp)480 NLM_EXTERN void NoteStructFree (NoteStructPtr nsp)
481 {
482 Int2 index;
483
484 if (nsp == NULL) {
485 return;
486 }
487 for (index=0; index < nsp->note_index; index++) {
488 if (nsp->note_alloc[index] == ASN2FLAT_ALLOC)
489 nsp->note[index] = MemFree(nsp->note[index]);
490 }
491 nsp->note = MemFree(nsp->note);
492 nsp->note_annot = MemFree(nsp->note_annot);
493 nsp->note_alloc = MemFree(nsp->note_alloc);
494 MemFree(nsp);
495 }
496
NoteStructReset(NoteStructPtr nsp)497 NLM_EXTERN void NoteStructReset (NoteStructPtr nsp)
498 {
499 Int2 index;
500
501 if (nsp == NULL) {
502 return;
503 }
504 for (index=0; index<nsp->note_index; index++)
505 {
506 if (nsp->note_alloc[index] == ASN2FLAT_ALLOC)
507 nsp->note[index] = MemFree(nsp->note[index]);
508 nsp->note[index] = NULL;
509 nsp->note_annot[index] = NULL;
510 }
511 nsp->note_index = 0;
512 }
513
514
ListFree(SeqFeatPtr PNTR PNTR List,Int4 range)515 NLM_EXTERN void ListFree (SeqFeatPtr PNTR PNTR List, Int4 range)
516 {
517 Int4 index;
518
519 for (index=0; index < range; index++)
520 MemFree(List[index]);
521
522 MemFree(List);
523 }
524
525 /***********************************************************************
526 *SeqFeatPtr MakeSyntheticSeqFeat(void)
527 *
528 * This function allocates a "synthetic" SeqFeatPtr, which is
529 * used to print the SeqFeats out. To print out SeqFeats, they
530 * are copied to this "synthetic" sfp, which is an ImpFeat,
531 * adjusted, validated, and then a function prints out this ImpFeat.
532 *************************************************************************/
533
MakeSyntheticSeqFeat(void)534 NLM_EXTERN SeqFeatPtr MakeSyntheticSeqFeat(void)
535 {
536 ImpFeatPtr ifp;
537 SeqFeatPtr sfp_out;
538
539 sfp_out = SeqFeatNew();
540 if (sfp_out)
541 {
542 sfp_out->data.choice = SEQFEAT_IMP;
543 sfp_out->qual = NULL;
544 ifp = sfp_out->data.value.ptrvalue = ImpFeatNew();
545 /* ifp->key = (CharPtr) MemNew(20*sizeof(Char)); */
546 /* key may be more than 20 char one day and cause segmentation fault */
547 ifp->key = NULL;
548 ifp->loc = NULL;
549 sfp_out->comment = NULL;
550 sfp_out->location = NULL;
551 sfp_out->product = NULL;
552 }
553
554 return sfp_out;
555 }
556
CpNoteToCharPtrStack(NoteStructPtr nsp,CharPtr annot,CharPtr string)557 NLM_EXTERN void CpNoteToCharPtrStack (NoteStructPtr nsp, CharPtr annot, CharPtr string)
558 {
559 NoteToCharPtrStack(nsp, annot, string, ASN2FLAT_NOT_ALLOC);
560 return;
561 }
562
SaveNoteToCharPtrStack(NoteStructPtr nsp,CharPtr annot,CharPtr string)563 NLM_EXTERN void SaveNoteToCharPtrStack (NoteStructPtr nsp, CharPtr annot, CharPtr string)
564 {
565 NoteToCharPtrStack(nsp, annot, string, ASN2FLAT_ALLOC);
566 return;
567 }
568
569
NoteToCharPtrStack(NoteStructPtr nsp,CharPtr annot,CharPtr string,Uint1 alloc)570 NLM_EXTERN void NoteToCharPtrStack (NoteStructPtr nsp, CharPtr annot, CharPtr string, Uint1 alloc)
571 {
572 Int2 index, note_size;
573
574 if (nsp)
575 {
576 note_size = nsp->note_size;
577 index = nsp->note_index;
578 }
579 else
580 return;
581 /*** add check for duplicated notes 9-27-96 ***/
582 /* if (string[StringLen(string)-1] == '.')
583 string[StringLen(string)-1] = '\0';
584 for (i = 0; i < note_size; i++) {
585 if (nsp->note[i] && StringStr(nsp->note[i], string) != NULL) {
586 return;
587 }
588 }
589 */
590 if (index == note_size)
591 EnlargeCharPtrStack(nsp, 5);
592
593 nsp->note_annot[index] = annot;
594
595 if (alloc == ASN2FLAT_NOT_ALLOC)
596 {
597 nsp->note_alloc[index] = ASN2FLAT_NOT_ALLOC;
598 nsp->note[index] = string;
599 }
600 else if (alloc == ASN2FLAT_ALLOC)
601 {
602 nsp->note_alloc[index] = ASN2FLAT_ALLOC;
603 nsp->note[index] = StringSave(string);
604 }
605
606 nsp->note_index++;
607
608 return;
609 }
610
EnlargeCharPtrStack(NoteStructPtr nsp,Int2 enlarge)611 NLM_EXTERN void EnlargeCharPtrStack (NoteStructPtr nsp, Int2 enlarge)
612 {
613 CharPtr PNTR newstr;
614 CharPtr PNTR new_annot;
615 Int2 index;
616 Uint1 PNTR new_alloc;
617
618 newstr = (CharPtr PNTR) MemNew((size_t)
619 ((enlarge+(nsp->note_size))*sizeof(CharPtr)));
620 new_annot = (CharPtr PNTR) MemNew((size_t)
621 ((enlarge+(nsp->note_size))*sizeof(CharPtr)));
622 new_alloc = (Uint1 PNTR) MemNew((size_t)
623 ((enlarge+(nsp->note_size))*sizeof(Uint1)));
624
625 for (index=0; index<(nsp->note_size); index++) {
626 newstr[index] = nsp->note[index];
627 new_annot[index] = nsp->note_annot[index];
628 new_alloc[index] = nsp->note_alloc[index];
629 }
630 nsp->note_size += enlarge;
631 nsp->note = MemFree(nsp->note);
632 nsp->note_annot = MemFree(nsp->note_annot);
633 nsp->note_alloc = MemFree(nsp->note_alloc);
634 nsp->note = newstr;
635 nsp->note_annot = new_annot;
636 nsp->note_alloc = new_alloc;
637 }
638
EnlargeSortList(SortStructPtr List,Int4 size)639 NLM_EXTERN SortStructPtr EnlargeSortList(SortStructPtr List, Int4 size)
640 {
641 SortStructPtr NewList;
642
643 if (size % 1024 == 0) {
644 NewList = (SortStructPtr) MemNew((size+1024)*sizeof(SortStruct));
645 if (size > 0) {
646 MemCopy(NewList, List, (size * sizeof(SortStruct)));
647 MemFree(List);
648 }
649 return NewList;
650 }
651 return List;
652
653 } /* EnlargeSortList */
654
CompareSfpForHeap(VoidPtr vp1,VoidPtr vp2)655 NLM_EXTERN int LIBCALLBACK CompareSfpForHeap (VoidPtr vp1, VoidPtr vp2)
656 {
657
658 SortStructPtr sp1 = vp1;
659 SortStructPtr sp2 = vp2;
660 BioseqPtr bsp;
661 SeqFeatPtr sfp1, sfp2;
662 Int2 status = 0;
663
664 bsp = sp1->bsp;
665 sfp1 = sp1->sfp;
666 sfp2 = sp2->sfp;
667 if (sfp1 == NULL || sfp2 == NULL) {
668 return status;
669 }
670
671 status = SeqLocOrder(sfp1->location, sfp2->location, bsp);
672
673 if (ABS(status) >= 2 && sp1->seg_bsp) {
674 status = SeqLocOrder(sfp1->location, sfp2->location, sp1->seg_bsp);
675 }
676 if (status == 0 && sfp1->data.choice < 6 && sfp2->data.choice < 6) {
677 status = sfp_order[sfp1->data.choice] - sfp_order[sfp2->data.choice];
678 }
679 return status;
680 }
CompareGeneName(VoidPtr vp1,VoidPtr vp2)681 NLM_EXTERN int LIBCALLBACK CompareGeneName (VoidPtr vp1, VoidPtr vp2)
682 {
683
684 SortStructPtr sp1 = vp1;
685 SortStructPtr sp2 = vp2;
686 SeqFeatPtr sfp1, sfp2;
687 GeneRefPtr gr1, gr2;
688 Int2 status = 0;
689
690 sfp1 = sp1->sfp;
691 sfp2 = sp2->sfp;
692 if (sfp1 == NULL || sfp2 == NULL) {
693 return status;
694 }
695 if (sfp1->data.choice != SEQFEAT_GENE)
696 return status;
697 if (sfp2->data.choice != SEQFEAT_GENE)
698 return status;
699 gr1 = (GeneRefPtr) sfp1->data.value.ptrvalue;
700 gr2 = (GeneRefPtr) sfp2->data.value.ptrvalue;
701 if (gr1 == NULL || gr2 == NULL)
702 return status;
703 status = StringCmp(gr1->locus, gr2->locus);
704
705 return status;
706 }
707
708 /**************************************************************************
709 * This function returns the gi number
710 * If no gi number is found, -1 is returned and a warning is
711 * issued.
712 * 06-10-96
713 * This fubction is changed to void. It will find NCBI gi and embl ni
714 * and fill up gbp structure
715 **************************************************************************/
716
GetGINumber(GBEntryPtr gbp)717 NLM_EXTERN void GetGINumber(GBEntryPtr gbp)
718 {
719 Boolean found_gi;
720 ValNodePtr vnp;
721 Int4 gi = -1;
722 CharPtr ni = NULL;
723 DbtagPtr dbtag;
724 ObjectIdPtr oid;
725
726 found_gi = FALSE;
727 if (gbp == NULL)
728 return;
729 if (gbp->bsp == NULL)
730 return;
731 for (vnp=gbp->bsp->id; vnp; vnp=vnp->next) {
732 if (vnp->choice == SEQID_GI) {
733 gi = vnp->data.intvalue;
734 if (gi != 0) {
735 found_gi = TRUE;
736 break;
737 } else {
738 if (ASN2FF_SHOW_ERROR_MSG == TRUE) {
739 ErrPostEx(SEV_WARNING, ERR_GI_No_GI_Number,
740 "Zero gi number: %d", gi);
741 }
742 }
743 } else if (vnp->choice == SEQID_GENERAL) {
744 dbtag = vnp->data.ptrvalue;
745 if (StringCmp(dbtag->db, "NID") == 0) {
746 oid = dbtag->tag;
747 if (oid->str) {
748 ni = StringSave(oid->str);
749 }
750 }
751 }
752 }
753 if (! found_gi) {
754 if (ASN2FF_SHOW_ERROR_MSG == TRUE) {
755 ErrPostStr(SEV_WARNING, ERR_GI_No_GI_Number, "");
756 }
757 gi = -1;
758 }
759 gbp->gi = gi;
760 gbp->ni = ni;
761 return;
762 }
763
764 /***********************************************************************
765 *
766 * GetGIs gets the GI's.
767 *
768 ************************************************************************/
GetGIs(Asn2ffJobPtr ajp)769 NLM_EXTERN void GetGIs (Asn2ffJobPtr ajp)
770 {
771 GBEntryPtr gbp;
772
773 for (gbp = ajp->asn2ffwep->gbp; gbp; gbp = gbp->next) {
774 GetGINumber(gbp);
775 }
776 return;
777 }
778
GetProductSeqId(ValNodePtr product)779 NLM_EXTERN SeqIdPtr GetProductSeqId(ValNodePtr product)
780 {
781 SeqIdPtr sip=NULL;
782 SeqIntPtr seq_int;
783 SeqLocPtr slp;
784
785 if (product)
786 {
787 if (product->choice == SEQLOC_WHOLE)
788 {
789 sip = (SeqIdPtr) product->data.ptrvalue;
790 }
791 else if (product->choice == SEQLOC_INT)
792 {
793 seq_int = (SeqIntPtr) product->data.ptrvalue;
794 sip = seq_int->id;
795 }
796 else if (product->choice == SEQLOC_EQUIV)
797 {
798 for (slp = (SeqLocPtr) product->data.ptrvalue; slp != NULL; slp = slp->next) {
799 sip = GetProductSeqId (slp);
800 if (sip != NULL) return sip;
801 }
802 }
803 }
804 return sip;
805 }
806
807 /*****************************************************************************
808 *check_range
809 *
810 * This function is called by the gbparse functions of Karl Sirotkin
811 * and determines if the length of a BioSeqPtr is sensible.
812 * Pointer data is not used !! Tatiana !!
813 * Tom Madden
814 *****************************************************************************/
815
check_range(Pointer data,SeqIdPtr seq_id)816 NLM_EXTERN Int4 check_range(Pointer data, SeqIdPtr seq_id)
817
818 {
819 BioseqPtr bsp;
820
821 bsp = BioseqFind(seq_id);
822 if (bsp)
823 return bsp->length;
824 else
825 return 0;
826 } /* check_range */
827
828 /****************************************************************************
829 *do_loc_errors
830 *
831 * This function is called both by the gbparse functions of Karl Sirotkin
832 * and by asn2ff. If called by gbparse, error messages are stored in
833 * buffers and a flag is set; if called by asn2ff, the error messages
834 * are retrieved and the flag reset.
835 *
836 ***************************************************************************/
837
do_loc_errors(CharPtr front,CharPtr details)838 NLM_EXTERN void do_loc_errors(CharPtr front, CharPtr details)
839 {
840 ErrPostEx(SEV_INFO, ERR_FEATURE_Bad_location, "%s: %s\n", front, details);
841 }
842
843 /***************************************************************************
844 *do_no_loc_errors
845 *
846 * Is used when no error messages are wanted.
847 ****************************************************************************/
848
do_no_loc_errors(CharPtr front,CharPtr details)849 NLM_EXTERN void do_no_loc_errors(CharPtr front, CharPtr details)
850 {
851 return;
852 }
853
854 /***************************************************************************
855 *Boolean GBQualPresent(CharPtr ptr, GBQualPtr gbqual)
856 *
857 *This function check that a qual, that is to be added to the list of qual
858 *isn't already present.
859 ***************************************************************************/
GBQualPresent(CharPtr ptr,GBQualPtr gbqual)860 NLM_EXTERN Boolean GBQualPresent(CharPtr ptr, GBQualPtr gbqual)
861
862 {
863 Boolean present=FALSE;
864 GBQualPtr qual;
865
866 for (qual=gbqual; qual; qual=qual->next)
867 if (StringCmp(ptr, qual->qual) == 0)
868 {
869 present = TRUE;
870 break;
871 }
872
873 return present;
874 } /* GBQualPresent */
875
876 /**********************************************************************
877 *Boolean GetNAFeatKey(CharPtr buffer, SeqFeatPtr sfp)
878 *
879 * This function places the sfp "key" in buffer and returns TRUE
880 * if successful, it returns FALSE if not successful.
881 * This function only works for nucleic acid sequences, as the
882 * keys (for corresponding numbers) are different for peptides.
883 ***********************************************************************/
884
GetNAFeatKey(Boolean is_new,CharPtr PNTR buffer,SeqFeatPtr sfp,SeqFeatPtr sfp_out)885 NLM_EXTERN Boolean GetNAFeatKey(Boolean is_new, CharPtr PNTR buffer, SeqFeatPtr sfp, SeqFeatPtr sfp_out)
886 {
887
888 Boolean retval=TRUE;
889 ImpFeatPtr ifp;
890 RnaRefPtr rrp;
891 CharPtr str = NULL;
892 Int2 index;
893
894
895 switch (sfp->data.choice)
896 {
897 case SEQFEAT_GENE: /* gene becomes misc_feat for purposes of CheckNAFeat */
898 if (is_new) {
899 *buffer = StringSave("gene");
900 } else {
901 *buffer = StringSave("misc_feature");
902 }
903 break;
904 case SEQFEAT_CDREGION:
905 *buffer = StringSave("CDS");
906 break;
907 case SEQFEAT_RNA:
908 rrp = sfp->data.value.ptrvalue;
909 /* the following code was taken (almost) directly from Karl
910 Sirotkin's code. */
911 switch ( rrp -> type){
912 case 1:
913 *buffer =StringSave("precursor_RNA");
914 break;
915 case 2:
916 *buffer = StringSave("mRNA");
917 break;
918 case 3:
919 *buffer = StringSave("tRNA");
920 break;
921 case 4:
922 *buffer = StringSave("rRNA");
923 break;
924 case 5:
925 *buffer = StringSave("snRNA");
926 break;
927 case 6:
928 *buffer = StringSave("scRNA");
929 break;
930 case 7:
931 *buffer = StringSave("snoRNA"); /* snoRNA */
932 break;
933 case 255:
934 *buffer = StringSave("misc_RNA");
935 break;
936 }
937 switch ( rrp -> type){
938 case 2:
939 case 4:
940 case 5:
941 case 6:
942 case 7:
943 case 255:
944 if (rrp ->ext.choice == 1 && sfp_out) {
945 str = rrp->ext.value.ptrvalue;
946 if (str != NULL && *str != '\0') {
947 index = GBFeatKeyNameValid(buffer, FALSE);
948 if (GBQualValidToAdd(index, "product")) {
949 sfp_out->qual = AddGBQual(sfp_out->qual,
950 "product", str);
951 }
952 }
953 }
954 break;
955 }
956 break;
957 case SEQFEAT_IMP:
958 ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
959 *buffer = StringSave(ifp->key);
960 break;
961 case SEQFEAT_SEQ:
962 case SEQFEAT_SITE:
963 case SEQFEAT_REGION:
964 case SEQFEAT_COMMENT:
965 *buffer = StringSave("misc_feature");
966 break;
967 case SEQFEAT_BIOSRC:
968 *buffer = StringSave("source");
969 break;
970 case SEQFEAT_RSITE:
971 default:
972 retval = FALSE;
973 break;
974 }
975
976 return retval;
977 } /* GetNAFeatKey */
978
979 /**************************************************************************
980 *SeqIdPtr CheckXrefFeat (BioseqPtr bsp, SeqFeatPtr sfp)
981 *
982 * First the location of the xref is checked to see if it overlaps
983 * the sequence. If this feature has a xref that is NOT of type
984 * genbank, embl, or ddbj, it is put out as a misc_feat. If it's
985 * one of genbank, embl, or ddbj, it has been put out as a second
986 * accession. If the feature should be put out as a misc_feat, then
987 * the SeqIdPtr (xid) is returned, otherwise NULL.
988 **************************************************************************/
989
CheckXrefFeat(BioseqPtr bsp,SeqFeatPtr sfp)990 NLM_EXTERN SeqIdPtr CheckXrefFeat (BioseqPtr bsp, SeqFeatPtr sfp)
991 {
992
993 SeqIdPtr xid=NULL;
994 SeqIntPtr si;
995 SeqLocPtr xref;
996 ValNodePtr location;
997
998 location = ValNodeNew(NULL);
999 si = SeqIntNew();
1000 location->choice = SEQLOC_INT;
1001 location->data.ptrvalue = si;
1002 si->from = 0;
1003 si->to = bsp->length - 1;
1004 si->id = bsp->id; /* Don't delete id!! */
1005 if (SeqLocCompare(sfp->location, location) != 0)
1006 {
1007 xref = (SeqLocPtr) sfp->data.value.ptrvalue;
1008 xid = (SeqIdPtr) xref->data.ptrvalue;
1009 if (xid->choice != 5 && xid->choice != 6 && xid->choice != 13)
1010 ;
1011 else
1012 xid = NULL;
1013 }
1014 si->id = NULL;
1015 SeqIntFree(si);
1016 ValNodeFree(location);
1017 return xid;
1018 }
1019
GetGINumFromSip(SeqIdPtr sip)1020 NLM_EXTERN Int4 GetGINumFromSip (SeqIdPtr sip)
1021 {
1022 Int4 gi = -1;
1023 ValNodePtr vnp;
1024
1025 for (vnp=sip; vnp; vnp=vnp->next)
1026 if (vnp->choice == SEQID_GI)
1027 gi = vnp->data.intvalue;
1028
1029 return gi;
1030 }
1031
1032 /*****************************************************************************
1033 *FlatRefBest
1034 *
1035 * returns ValNodePtr to best (for FlatFile production) pub in a equiv set
1036 *****************************************************************************/
FlatRefBest(ValNodePtr equiv,Boolean error_msgs,Boolean anything)1037 NLM_EXTERN ValNodePtr FlatRefBest(ValNodePtr equiv, Boolean error_msgs, Boolean anything)
1038 {
1039 ValNodePtr the_pub, retval = NULL, newpub;
1040 CitBookPtr cb;
1041 CitSubPtr cs;
1042 CitGenPtr cg;
1043 CitArtPtr ca;
1044 MedlineEntryPtr ml;
1045 CitJourPtr jp;
1046 ImprintPtr ip;
1047 Boolean good_one;
1048 Int1 bad_one= 0;
1049 CharPtr str_ret;
1050
1051 if (equiv->choice == PUB_Equiv) {
1052 newpub = equiv->data.ptrvalue;
1053 } else {
1054 newpub = equiv;
1055 }
1056 for (the_pub = newpub, good_one = FALSE; the_pub && ! good_one
1057 ; the_pub = the_pub -> next) {
1058
1059 switch ( the_pub -> choice) {
1060
1061 case PUB_Sub:
1062 cs = (CitSubPtr) the_pub -> data.ptrvalue;
1063 if (cs) {
1064 if ( cs -> imp){
1065 ip = cs -> imp;
1066 if ( ip -> date) {
1067 retval = the_pub;
1068 good_one = TRUE; /* good for submitted */
1069 }
1070 } else if (cs->date) {
1071 retval = the_pub;
1072 good_one = TRUE; /* good for submitted */
1073 }
1074 }
1075 break;
1076 case PUB_Man:
1077 case PUB_Book:
1078 cb = (CitBookPtr) the_pub -> data.ptrvalue;
1079 if ( cb -> imp) {
1080 ip = cb -> imp;
1081 if ( ip -> date) {
1082 retval = the_pub;
1083 good_one = TRUE; /* good for thesis or book */
1084 }
1085 }
1086 break;
1087 case PUB_Patent:
1088 retval = the_pub;
1089 good_one = TRUE; /* might exclude later...*/
1090 break;
1091 case PUB_Article:
1092 case PUB_Medline:
1093 if ( the_pub -> choice == PUB_Medline) {
1094 ml = (MedlineEntryPtr) the_pub -> data.ptrvalue;
1095 ca = (CitArtPtr) ml -> cit;
1096
1097 } else {
1098 ca = (CitArtPtr) the_pub -> data.ptrvalue;
1099 }
1100 if (ca -> fromptr) {
1101 if (ca -> from ==1) {
1102 jp = (CitJourPtr) ca -> fromptr;
1103 if ( jp -> imp) {
1104 ip = jp -> imp;
1105 if ( ip -> date) {
1106 retval = the_pub;
1107 good_one = TRUE; /* good as it gets */
1108 }
1109 }
1110 } else {
1111 CitBookPtr book = (CitBookPtr) ca -> fromptr;
1112 if ( book -> imp) {
1113 ip = book -> imp;
1114 if ( ip -> date) {
1115 retval = the_pub;
1116 good_one = TRUE; /* good for book */
1117 }
1118 }
1119
1120 }
1121 }
1122 break;
1123 case PUB_Gen:
1124 cg = (CitGenPtr) the_pub -> data.ptrvalue;
1125 if (cg -> cit) {
1126 str_ret = NULL;
1127 str_ret = StrStr(cg -> cit ,"Journal=\"");
1128 if ((str_ret) || (cg->title) || (cg->journal) || (cg->date)) {
1129 retval = the_pub; /*unless something better */
1130 } else {
1131 if (StringNICmp("unpublished", cg->cit, 11) == 0)
1132 retval = the_pub;
1133 else if (StringNICmp("to be published", cg->cit, 15) == 0)
1134 retval = the_pub;
1135 else if (StringNICmp("in press", cg->cit, 8) == 0)
1136 retval = the_pub;
1137 else if (StringNICmp("submitted", cg->cit, 8) == 0)
1138 retval = the_pub;
1139 }
1140 } else if (cg -> journal) {
1141 retval = the_pub; /*unless something better */
1142 }
1143
1144 break;
1145 case PUB_Proc:
1146 bad_one = the_pub -> choice;
1147 break;
1148 }
1149 }
1150
1151 if (! retval && anything) {
1152 for (the_pub = newpub; the_pub; the_pub = the_pub -> next) {
1153 if (the_pub->choice == PUB_Muid)
1154 retval = the_pub;
1155 }
1156 if (! retval) /* Take anything left over now and hope for the best */
1157 retval = newpub;
1158 }
1159
1160 if ( ! retval && bad_one != 0) {
1161 if (error_msgs == TRUE)
1162 ErrPostEx(SEV_WARNING, ERR_REFERENCE_Illegalreference,
1163 "FlatRefBest: Unimplemented pub type = %d", bad_one);
1164 }
1165
1166 return retval;
1167 } /* FlatRefBest */
1168
StoreFeatTemp(SortStruct PNTR List,SeqFeatPtr sfp,Int4 currentsize,BioseqPtr bsp,BioseqPtr seg,Uint2 entityID,Uint4 itemID,Uint2 itemtype,SeqLocPtr slp,SeqLocPtr PNTR extra_loc,Int2 extra_loc_cnt,Boolean temp)1169 NLM_EXTERN Int4 StoreFeatTemp(SortStruct PNTR List, SeqFeatPtr sfp,
1170 Int4 currentsize, BioseqPtr bsp, BioseqPtr seg, Uint2 entityID, Uint4 itemID, Uint2 itemtype,SeqLocPtr slp, SeqLocPtr PNTR extra_loc, Int2 extra_loc_cnt,
1171 Boolean temp)
1172 {
1173 SeqLocPtr PNTR slpp = NULL;
1174
1175 List[currentsize].entityID = entityID;
1176 List[currentsize].itemID = itemID;
1177 List[currentsize].itemtype = itemtype;
1178 List[currentsize].sfp = sfp;
1179 List[currentsize].bsp = bsp;
1180 List[currentsize].seg_bsp = seg;
1181 List[currentsize].dup = FALSE;
1182 List[currentsize].hash = AsnIoHash(sfp,
1183 (AsnWriteFunc) SeqFeatAsnWrite);
1184 List[currentsize].slp = slp;
1185 if (extra_loc_cnt > 0) {
1186 slpp = MemNew(extra_loc_cnt*(sizeof(SeqLocPtr)));
1187 MemCpy(slpp, extra_loc, extra_loc_cnt*(sizeof(SeqLocPtr)));
1188 }
1189 List[currentsize].extra_loc = slpp;
1190 List[currentsize].extra_loc_cnt = extra_loc_cnt;
1191 List[currentsize].tempload = temp;
1192 List[currentsize].gsp = NULL;
1193 List[currentsize].nsp = NoteStructNew(List[currentsize].nsp);
1194
1195 currentsize++;
1196
1197 return currentsize;
1198 }
1199
StoreFeat(SortStruct PNTR List,SeqFeatPtr sfp,Int4 currentsize,BioseqPtr bsp,BioseqPtr seg,Uint2 entityID,Uint4 itemID,Uint2 itemtype,SeqLocPtr slp,SeqLocPtr PNTR extra_loc,Int2 extra_loc_cnt)1200 NLM_EXTERN Int4 StoreFeat(SortStruct PNTR List, SeqFeatPtr sfp, Int4 currentsize,
1201 BioseqPtr bsp, BioseqPtr seg, Uint2 entityID, Uint4 itemID, Uint2 itemtype,
1202 SeqLocPtr slp, SeqLocPtr PNTR extra_loc, Int2 extra_loc_cnt)
1203 {
1204 return StoreFeatFree(List, sfp, currentsize, bsp, seg, entityID, itemID,
1205 itemtype,slp, extra_loc, extra_loc_cnt, FALSE);
1206 }
1207
StoreFeatFree(SortStruct PNTR List,SeqFeatPtr sfp,Int4 currentsize,BioseqPtr bsp,BioseqPtr seg,Uint2 entityID,Uint4 itemID,Uint2 itemtype,SeqLocPtr slp,SeqLocPtr PNTR extra_loc,Int2 extra_loc_cnt,Boolean feat_free)1208 NLM_EXTERN Int4 StoreFeatFree(SortStruct PNTR List, SeqFeatPtr sfp, Int4 currentsize,
1209 BioseqPtr bsp, BioseqPtr seg, Uint2 entityID, Uint4 itemID, Uint2 itemtype,
1210 SeqLocPtr slp, SeqLocPtr PNTR extra_loc, Int2 extra_loc_cnt, Boolean feat_free)
1211 {
1212 SeqLocPtr PNTR slpp = NULL;
1213
1214 List[currentsize].entityID = entityID;
1215 List[currentsize].itemID = itemID;
1216 List[currentsize].itemtype = itemtype;
1217 List[currentsize].sfp = sfp;
1218 List[currentsize].bsp = bsp;
1219 List[currentsize].seg_bsp = seg;
1220 List[currentsize].dup = FALSE;
1221 List[currentsize].hash = AsnIoHash(sfp,
1222 (AsnWriteFunc) SeqFeatAsnWrite);
1223 List[currentsize].slp = slp;
1224 if (extra_loc_cnt > 0) {
1225 slpp = MemNew(extra_loc_cnt*(sizeof(SeqLocPtr)));
1226 MemCpy(slpp, extra_loc, extra_loc_cnt*(sizeof(SeqLocPtr)));
1227 }
1228 List[currentsize].extra_loc = slpp;
1229 List[currentsize].extra_loc_cnt = extra_loc_cnt;
1230 List[currentsize].feat_free = feat_free;
1231 List[currentsize].gsp = NULL;
1232 List[currentsize].nsp = NoteStructNew(List[currentsize].nsp);
1233
1234 currentsize++;
1235
1236 return currentsize;
1237 }
1238 /****************************************************************************
1239 *CharPtr Cat2Strings (CharPtr string1, CharPtr string2, CharPtr separator, Int2 num)
1240 *
1241 * Concatenates two strings (string1 and string2) and separates them by a
1242 * "separator". If num>0, takes num spaces off the end of string1 on
1243 * concatenation; if num<0 takes all spaces off the end of the complete
1244 * string.
1245 *****************************************************************************/
Cat2Strings(CharPtr string1,CharPtr string2,CharPtr separator,Int2 num)1246 NLM_EXTERN CharPtr Cat2Strings (CharPtr string1, CharPtr string2, CharPtr separator, Int2 num)
1247
1248 {
1249 Boolean no_space=FALSE;
1250 Int4 length1=0, length2=0, length_sep=0, length_total;
1251 CharPtr newstring=NULL;
1252
1253 if (num < 0)
1254 {
1255 num=0;
1256 no_space=TRUE;
1257 }
1258
1259 if (string1 != NULL)
1260 length1 = StringLen(string1);
1261 if (string2 != NULL)
1262 length2 = StringLen(string2);
1263 if (separator != NULL)
1264 length_sep = StringLen(separator);
1265
1266 length_total = length1+length2+length_sep-num+1;
1267
1268 newstring = (CharPtr) MemNew(length_total*sizeof(Char));
1269
1270 if (string1 != NULL)
1271 newstring = StringCat(newstring, string1);
1272 if ((length1-num) >= 0)
1273 newstring[length1-num] = '\0';
1274 if (no_space && length1 > 0)
1275 while (length1 > 0 && newstring[length1-1] == ' ')
1276 {
1277 newstring[length1-1] = '\0';
1278 length1--;
1279 }
1280 if (separator != NULL)
1281 newstring = StringCat(newstring, separator);
1282 if (string2 != NULL)
1283 newstring = StringCat(newstring, string2);
1284
1285
1286 return newstring;
1287 }
1288
AddGBQualEx(CharPtr PNTR key,GBQualPtr gbqual,CharPtr qual,CharPtr val)1289 NLM_EXTERN GBQualPtr AddGBQualEx (CharPtr PNTR key, GBQualPtr gbqual, CharPtr qual, CharPtr val)
1290 {
1291 Int2 index;
1292
1293 index = GBFeatKeyNameValid(key, FALSE);
1294 if (GBQualValidToAdd(index,qual)) {
1295 return AddGBQual(gbqual, qual, val);
1296 }
1297 return gbqual;
1298 }
1299
1300 /************************************************************************
1301 *AddGBQual
1302 *
1303 * This function makes a new GBQual and adds a "val" and a
1304 * a "qual".
1305 * doesn't add qual if it's already there /tatiana/
1306 * doesn't add empty ("") val if qual is translation
1307 ***********************************************************************/
1308 #ifdef ASN2GNBK_STRIP_NOTE_PERIODS
IsEllipsis(CharPtr str)1309 static Boolean IsEllipsis (
1310 CharPtr str
1311 )
1312
1313 {
1314 size_t len;
1315 CharPtr ptr;
1316
1317 if (StringHasNoText (str)) return FALSE;
1318 len = StringLen (str);
1319 if (len < 3) return FALSE;
1320 ptr = str + len - 3;
1321 return (Boolean) (ptr [0] == '.' && ptr [1] == '.' && ptr [2] == '.');
1322 }
1323 #endif
1324
AddGBQual(GBQualPtr gbqual,CharPtr qual,CharPtr val)1325 NLM_EXTERN GBQualPtr AddGBQual (GBQualPtr gbqual, CharPtr qual, CharPtr val)
1326 {
1327 GBQualPtr curq, note = NULL;
1328
1329 if (StringCmp(qual, "translation") == 0) {
1330 if (val == NULL)
1331 return gbqual;
1332 if (*val == '\0')
1333 return gbqual;
1334 }
1335 if (gbqual) {
1336 if (CheckForQual(gbqual, qual, val) == 1) {
1337 return gbqual;
1338 }
1339 for (curq=gbqual; curq->next != NULL; curq=curq->next)
1340 continue;
1341 curq->next = GBQualNew();
1342 curq = curq->next;
1343 if (val)
1344 curq->val = StringSave(val);
1345 curq->qual = StringSave(qual);
1346 note = curq;
1347 } else {
1348 gbqual = GBQualNew();
1349 gbqual->next = NULL;
1350 if (val)
1351 gbqual->val = StringSave(val);
1352 gbqual->qual = StringSave(qual);
1353 note = gbqual;
1354 }
1355
1356 #ifdef ASN2GNBK_STRIP_NOTE_PERIODS
1357 if (note != NULL && StringICmp (qual, "note") == 0) {
1358 size_t len;
1359 CharPtr p, q;
1360 len = StringLen (note->val);
1361 if (len > 0 && note->val [len - 1] == '~') {
1362 note->val [len - 1] = '\0';
1363 }
1364 if (! IsEllipsis (note->val)) {
1365 len = StringLen (note->val);
1366 if (len > 0 && note->val [len - 1] == '.') {
1367 note->val [len - 1] = '\0';
1368 if (len > 1 && note->val [len - 2] == '.') {
1369 note->val [len - 2] = '\0';
1370 }
1371 }
1372 }
1373 TrimSpacesAndJunkFromEnds (note->val,TRUE);
1374 TrimSpacesAndSemicolons (note->val);
1375 p = note->val;
1376 q = note->val;
1377 while (*p) {
1378 if (*p == ';' && p [1] == ' ' && p [2] == ';') {
1379 p += 2;
1380 } else {
1381 *q = *p;
1382 p++;
1383 q++;
1384 }
1385 }
1386 *q = '\0';
1387 }
1388 #endif
1389
1390 return gbqual;
1391 }
1392
1393 /****************************************************************************
1394 * Int2 CheckForQual(GBQualPtr gbqual, CharPtr string_q, CharPtr string_v)
1395 *
1396 * Compares string (a potential gbqual->val) against all gbquals.
1397 * If a match is found, "1" is returned; if not "0".
1398 ****************************************************************************/
1399
CheckForQual(GBQualPtr gbqual,CharPtr string_q,CharPtr string_v)1400 NLM_EXTERN Int2 CheckForQual (GBQualPtr gbqual, CharPtr string_q, CharPtr string_v)
1401 {
1402 GBQualPtr curq;
1403
1404 for (curq=gbqual; curq; curq=curq->next) {
1405 if (StringCmp(string_q, curq->qual) == 0) {
1406 if (curq->val == NULL) {
1407 curq->val = StringSave(string_v);
1408 return 1;
1409 }
1410 if (StringCmp(string_v, curq->val) == 0) {
1411 return 1;
1412 }
1413 }
1414 }
1415 return 0;
1416 }
1417
1418
1419 /****************************************************************************
1420 *
1421 * MakeAnAccession is for last ditch efforts to get an accession
1422 * after all the normal things have failed.
1423 *
1424 ****************************************************************************/
1425
MakeAnAccession(CharPtr new_buf,SeqIdPtr seq_id,Int2 buflen)1426 NLM_EXTERN CharPtr MakeAnAccession (CharPtr new_buf, SeqIdPtr seq_id, Int2 buflen)
1427 {
1428 SeqIdPtr new_id;
1429
1430 new_id = SeqIdFindBest(seq_id, SEQID_GENBANK);
1431 SeqIdWrite(new_id, new_buf, PRINTID_TEXTID_ACCESSION, buflen);
1432 return new_buf;
1433
1434 }
1435
GetGBSourceLine(GBBlockPtr gb)1436 NLM_EXTERN CharPtr GetGBSourceLine (GBBlockPtr gb)
1437 {
1438 CharPtr source = NULL;
1439
1440 if(gb && gb->source)
1441 source = StringSave(gb->source);
1442
1443 #ifdef ASN2GNBK_STRIP_NOTE_PERIODS
1444 if (source != NULL) {
1445 TrimSpacesAndJunkFromEnds (source,TRUE);
1446 }
1447 #endif
1448 return source;
1449 }
1450
FlatOrganelle(Asn2ffJobPtr ajp,GBEntryPtr gbp)1451 NLM_EXTERN CharPtr FlatOrganelle(Asn2ffJobPtr ajp, GBEntryPtr gbp)
1452 {
1453 CharPtr retval = NULL;
1454 ValNodePtr man, vnp=NULL;
1455 static char * organelle_names [] = {
1456 "Mitochondrion " ,
1457 "Chloroplast " ,
1458 "Kinetoplast ",
1459 "Cyanelle "};
1460 BioSourcePtr biosp=NULL;
1461 /*
1462 static CharPtr genome[] = {
1463 NULL, NULL, "Chloroplast ", "Chromoplast ", "Kinetoplast ", "Mitochondrion ", "Plastid ", "Macronuclear ", "Extrachrom ", "Plasmid ", NULL, NULL, "Cyanelle ", "Proviral ", "Virion ", "Nucleomorph ", "Apicoplast ", "Leucoplast ", "Proplastid "};
1464 */
1465 static CharPtr genome[] = {
1466 NULL, NULL, "Chloroplast ", "Chromoplast ", "Kinetoplast ", "Mitochondrion ", "Plastid ", NULL, NULL, NULL, NULL, NULL, "Cyanelle ", NULL, NULL, "Nucleomorph ", "Apicoplast ", "Leucoplast ", "Proplastid ", NULL};
1467
1468 /* try new first */
1469 if ((vnp=GatherDescrByChoice(ajp, gbp, Seq_descr_source)) != NULL)
1470 {
1471 biosp = vnp->data.ptrvalue;
1472 /* if (biosp->genome < 6 || biosp->genome > 12)*/
1473 retval = StringSave(genome[biosp->genome]);
1474 }
1475 /* old next */
1476 if (biosp == NULL) {
1477 if ((vnp=GatherDescrByChoice(ajp, gbp, Seq_descr_modif)) != NULL)
1478 {
1479 for (man = (ValNodePtr) vnp-> data.ptrvalue; man; man = man -> next)
1480 {
1481 switch (man -> data.intvalue){
1482 case 4: case 5: case 6: case 7:
1483 if (! retval )
1484 retval = StringSave(organelle_names
1485 [man->data.intvalue-4]);
1486 break;
1487 default:
1488 break;
1489 }
1490 }
1491 }
1492 }
1493 return retval;
1494 }
1495
GetNumOfSeqBlks(Asn2ffJobPtr ajp,GBEntryPtr gbp)1496 NLM_EXTERN Int4 GetNumOfSeqBlks (Asn2ffJobPtr ajp, GBEntryPtr gbp)
1497 {
1498 Int4 length, num_of_seqblks;
1499
1500 length = BioseqGetLen(gbp->bsp);
1501 if (ajp->slp) {
1502 length = SeqLocLen(ajp->slp);
1503 }
1504 num_of_seqblks = ROUNDUP(length, SEQ_BLK_SIZE)/SEQ_BLK_SIZE;
1505
1506 return num_of_seqblks;
1507 }
1508
1509
1510 /*************************************************************************
1511 * New asn.1 spec - division is in Orgname.div
1512 * check MolInfo.tech
1513 * check GBBlock for PAT or SYN
1514 * get division from Orgname.div (in BioSource)
1515 * 09-05-96
1516 *************************************************************************/
1517
IndexedGetDescrForDiv(BioseqPtr bsp,DivStructPtr PNTR dspp)1518 static void IndexedGetDescrForDiv (BioseqPtr bsp, DivStructPtr PNTR dspp)
1519
1520 {
1521 SeqMgrDescContext context;
1522 ValNodePtr tmp;
1523 DivStructPtr dsp;
1524 BioSourcePtr bsr;
1525 MolInfoPtr mol;
1526 CharPtr gb_div=NULL;
1527 GBBlockPtr gb;
1528
1529 dsp = *dspp;
1530 tmp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &context);
1531 if (tmp != NULL) {
1532 if (tmp->data.ptrvalue != NULL) {
1533 mol = (MolInfoPtr) tmp->data.ptrvalue;
1534 if (mol->tech != 0) {
1535 if (dsp->tech == 0) {
1536 dsp->tech = mol->tech;
1537 } else if (mol->tech != dsp->tech) {
1538 dsp->was_err = TRUE;
1539 if (dsp->err_post) {
1540 ErrPostEx(SEV_WARNING, 0, 0,
1541 "Different Molinfo in one entry: %d|%d",
1542 mol->tech, dsp->tech);
1543 }
1544 dsp->tech = mol->tech;
1545 }
1546 dsp->techID = context.itemID;
1547 dsp->techtype = OBJ_SEQDESC;
1548 *dspp = dsp;
1549 }
1550 }
1551 }
1552
1553 tmp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context);
1554 while (tmp != NULL && dsp->orgdiv == NULL) {
1555 bsr = (BioSourcePtr) tmp->data.ptrvalue;
1556 if (bsr && bsr->org) {
1557 if (bsr->org->orgname && bsr->org->orgname->div) {
1558 gb_div = bsr->org->orgname->div;
1559 if (dsp->orgdiv == NULL) {
1560 dsp->orgdiv = gb_div;
1561 } else if (StringCmp(gb_div, dsp->orgdiv) != 0) {
1562 dsp->was_err = TRUE;
1563 if (dsp->err_post) {
1564 ErrPostEx(SEV_WARNING, 0, 0,
1565 "Different Taxonomy divisions in one entry: %s|%s",
1566 gb_div, dsp->orgdiv);
1567 }
1568 dsp->orgdiv = gb_div;
1569 }
1570 dsp->biosrc = bsr;
1571 dsp->orgID = context.itemID;
1572 dsp->orgtype = OBJ_SEQDESC;
1573 *dspp = dsp;
1574 }
1575 }
1576 tmp = SeqMgrGetNextDescriptor (bsp, tmp, Seq_descr_source, &context);
1577 }
1578
1579 tmp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &context);
1580 if (tmp != NULL) {
1581 gb = (GBBlockPtr) tmp->data.ptrvalue;
1582 if (gb->div) {
1583 gb_div = gb->div;
1584 if (dsp->gbdiv == NULL) {
1585 dsp->gbdiv = gb_div;
1586 } else if (StringCmp(gb_div, dsp->gbdiv) != 0) {
1587 dsp->was_err = TRUE;
1588 if (dsp->err_post) {
1589 ErrPostEx(SEV_WARNING, 0, 0,
1590 "Different GBBlock divisions in one entry: %s|%s",
1591 gb_div, dsp->gbdiv);
1592 }
1593 dsp->gbdiv = gb_div;
1594 }
1595 dsp->gbID = context.itemID;
1596 dsp->gbtype = OBJ_SEQDESC;
1597 *dspp = dsp;
1598 }
1599 }
1600 }
1601
GetDescrForDiv(GatherContextPtr gcp)1602 static Boolean GetDescrForDiv (GatherContextPtr gcp)
1603 {
1604 /* find only one (closest to the target!) vnp with given choice */
1605 ValNodePtr tmp;
1606 DivStructPtr PNTR dspp;
1607 DivStructPtr dsp;
1608 BioSourcePtr bsr;
1609 MolInfoPtr mol;
1610 CharPtr gb_div=NULL;
1611 GBBlockPtr gb;
1612
1613 dspp = gcp->userdata;
1614 dsp = *dspp;
1615 switch (gcp->thistype)
1616 {
1617 case OBJ_SEQDESC:
1618 tmp = (ValNodePtr) (gcp->thisitem);
1619 if (tmp->choice == Seq_descr_molinfo) {
1620 if (tmp->data.ptrvalue != NULL) {
1621 mol = (MolInfoPtr) tmp->data.ptrvalue;
1622 if (mol->tech != 0) {
1623 if (dsp->tech == 0) {
1624 dsp->tech = mol->tech;
1625 } else if (mol->tech != dsp->tech) {
1626 dsp->was_err = TRUE;
1627 if (dsp->err_post) {
1628 ErrPostEx(SEV_WARNING, 0, 0,
1629 "Different Molinfo in one entry: %d|%d",
1630 mol->tech, dsp->tech);
1631 }
1632 dsp->tech = mol->tech;
1633 }
1634 dsp->techID = gcp->itemID;
1635 dsp->techtype = gcp->thistype;
1636 *dspp = dsp;
1637 }
1638 }
1639 } else if (tmp->choice == Seq_descr_source) {
1640 bsr = (BioSourcePtr) tmp->data.ptrvalue;
1641 if (bsr && bsr->org) {
1642 if (bsr->org->orgname && bsr->org->orgname->div) {
1643 gb_div = bsr->org->orgname->div;
1644 if (dsp->orgdiv == NULL) {
1645 dsp->orgdiv = gb_div;
1646 } else if (StringCmp(gb_div, dsp->orgdiv) != 0) {
1647 dsp->was_err = TRUE;
1648 if (dsp->err_post) {
1649 ErrPostEx(SEV_WARNING, 0, 0,
1650 "Different Taxonomy divisions in one entry: %s|%s",
1651 gb_div, dsp->orgdiv);
1652 }
1653 dsp->orgdiv = gb_div;
1654 }
1655 dsp->biosrc = bsr;
1656 dsp->orgID = gcp->itemID;
1657 dsp->orgtype = gcp->thistype;
1658 *dspp = dsp;
1659 }
1660 }
1661 } else if (tmp->choice == Seq_descr_genbank) {
1662 gb = (GBBlockPtr) tmp->data.ptrvalue;
1663 if (gb->div) {
1664 gb_div = gb->div;
1665 if (dsp->gbdiv == NULL) {
1666 dsp->gbdiv = gb_div;
1667 } else if (StringCmp(gb_div, dsp->gbdiv) != 0) {
1668 dsp->was_err = TRUE;
1669 if (dsp->err_post) {
1670 ErrPostEx(SEV_WARNING, 0, 0,
1671 "Different GBBlock divisions in one entry: %s|%s",
1672 gb_div, dsp->gbdiv);
1673 }
1674 dsp->gbdiv = gb_div;
1675 }
1676 dsp->gbID = gcp->itemID;
1677 dsp->gbtype = gcp->thistype;
1678 *dspp = dsp;
1679 }
1680 }
1681 break;
1682 default:
1683 break;
1684 }
1685 return TRUE;
1686 }
1687
1688 /**************************************************************************
1689 * 0 - nothing found
1690 * 1 - return division code OK
1691 * 2 - return division code but errors were found
1692 **************************************************************************/
BioseqGetGBDivCodeEx(BioseqPtr bsp,CharPtr buf,Int2 buflen,Boolean err_post,Boolean useFeatureIndexing)1693 static Int2 BioseqGetGBDivCodeEx (BioseqPtr bsp, CharPtr buf, Int2 buflen, Boolean err_post, Boolean useFeatureIndexing)
1694 {
1695 GatherScope gsc;
1696 SeqLocPtr slp = NULL;
1697 Uint2 bspID;
1698 DivStructPtr dsp;
1699 BioSourcePtr bsr = NULL;
1700 Int2 tech, /*UNUSED*/diff, retval = 0;
1701 CharPtr orgdiv, gbdiv;
1702 SeqIdPtr sip;
1703
1704 if (buf == NULL)
1705 return 0;
1706 *buf = '\0';
1707 /* check for Patent SeqId */
1708 for (sip = bsp->id; sip; sip=sip->next) {
1709 if (sip->choice == SEQID_PATENT) {
1710 diff = LabelCopy(buf, "PAT", buflen);
1711 return 1;
1712 }
1713 }
1714 bspID = ObjMgrGetEntityIDForPointer(bsp);
1715 dsp = MemNew(sizeof(DivStruct));
1716 dsp->err_post = err_post;
1717 dsp->entityID = bspID;
1718 dsp->tech = 0;
1719 dsp->gbdiv = NULL;
1720 dsp->orgdiv = NULL;
1721 dsp->biosrc = NULL;
1722 dsp->was_err = FALSE;
1723 MemSet ((Pointer) (&gsc), 0, sizeof (GatherScope));
1724 MemSet ((Pointer) (gsc.ignore), (int)(TRUE), (size_t) (OBJ_MAX * sizeof(Boolean)));
1725 gsc.ignore[OBJ_SEQDESC] = FALSE;
1726 slp = ValNodeNew(NULL);
1727 slp->choice = SEQLOC_WHOLE;
1728 slp->data.ptrvalue = (SeqIdPtr) SeqIdDup (SeqIdFindBest (bsp->id, 0));
1729 gsc.target = slp;
1730
1731 if (useFeatureIndexing) {
1732 IndexedGetDescrForDiv (bsp, &dsp);
1733 } else {
1734 GatherEntity(bspID, &dsp, GetDescrForDiv, &gsc);
1735 }
1736
1737 SeqLocFree(slp);
1738 orgdiv = dsp->orgdiv;
1739 gbdiv = dsp->gbdiv;
1740 tech = dsp->tech;
1741 bsr = dsp->biosrc;
1742 if (dsp->was_err) {
1743 retval = 2;
1744 } else {
1745 retval = 1;
1746 }
1747 MemFree(dsp);
1748 switch (tech) {
1749 case MI_TECH_est:
1750 diff = LabelCopy(buf, "EST", buflen);
1751 break;
1752 case MI_TECH_sts: /* Sequence Tagged Site */
1753 diff = LabelCopy(buf, "STS", buflen);
1754 break;
1755 case MI_TECH_survey:
1756 diff = LabelCopy(buf, "GSS", buflen);
1757 break;
1758 case MI_TECH_htc:
1759 diff = LabelCopy(buf, "HTC", buflen);
1760 break;
1761 case MI_TECH_htgs_0:
1762 case MI_TECH_htgs_1:
1763 case MI_TECH_htgs_2:
1764 diff = LabelCopy(buf, "HTG", buflen);
1765 break;
1766 default:
1767 break;
1768 }
1769 if (*buf != '\0') {
1770 return retval;
1771 }
1772 /* new slot for synthetic sequences */
1773 if (bsr && bsr->origin == 5) {
1774 diff = LabelCopy(buf, "SYN", buflen);
1775 return retval;
1776 }
1777 /***** division in GBBlock becomes obsolete ********/
1778 if (gbdiv != NULL) {
1779 if (StringCmp(gbdiv, "PAT") == 0 ||
1780 StringCmp(gbdiv, "SYN") == 0 || orgdiv == NULL) {
1781 diff = LabelCopy(buf, gbdiv, buflen);
1782 return retval;
1783 }
1784 }
1785
1786 /**********/
1787 if (orgdiv != NULL) {
1788 diff = LabelCopy(buf, orgdiv, buflen);
1789 return retval;
1790 }
1791 return 0;
1792 }
1793
BioseqGetGBDivCode(BioseqPtr bsp,CharPtr buf,Int2 buflen,Boolean err_post)1794 NLM_EXTERN Int2 BioseqGetGBDivCode(BioseqPtr bsp, CharPtr buf, Int2 buflen, Boolean err_post)
1795
1796 {
1797 return BioseqGetGBDivCodeEx (bsp, buf, buflen, err_post, FALSE);
1798 }
1799
1800
1801 /*============================================================================*\
1802 * Function:
1803 * StrStripSpaces
1804 *
1805 * Purpose:
1806 * Strips all spaces in string in following manner. If the function
1807 * meet several spaces (spaces and tabs) in succession it replaces them
1808 * with one space.
1809 * Strips all spaces after '(' and before ')'
1810 *
1811 \*----------------------------------------------------------------------------*/
StrStripSpaces(CharPtr str)1812 static void StrStripSpaces(CharPtr str)
1813 {
1814 CharPtr new_str;
1815
1816 if (str == NULL) {
1817 return;
1818 }
1819
1820 new_str = str;
1821 while (*str != '\0') {
1822 *new_str++ = *str;
1823 if (*str == ' ' || *str == '\t' || *str == '(') {
1824 for (str++; *str == ' ' || *str == '\t'; str++) ;
1825 if (*str == ')' || *str == ',') {
1826 new_str--;
1827 }
1828 } else {
1829 str++;
1830 }
1831 }
1832 *new_str = '\0';
1833 }
1834
GetFlatRetract(ValNodePtr pub)1835 static CharPtr GetFlatRetract(ValNodePtr pub)
1836 {
1837 CitArtPtr cit;
1838 CitJourPtr jour = NULL;
1839 CitRetractPtr ret = NULL;
1840 CharPtr buffer;
1841 Int2 len;
1842
1843 if (pub == NULL)
1844 return NULL;
1845 if (pub->choice != PUB_Article)
1846 return NULL;
1847 cit = pub->data.ptrvalue;
1848 if (cit->from == 1) {
1849 jour = cit->fromptr;
1850 if (jour && jour->imp) {
1851 ret = jour->imp->retract;
1852 if (ret && ret->type == 3) { /* other types can be added later */
1853 len = StringLen(ret->exp) + 11;
1854 buffer = (CharPtr) MemNew(len*sizeof(Char));
1855 sprintf(buffer, "Erratum:[%s]", ret->exp);
1856 return buffer;
1857 }
1858 }
1859 }
1860 return NULL;
1861 }
1862
GetSubmitDescr(ValNodePtr pub)1863 static CharPtr GetSubmitDescr(ValNodePtr pub)
1864 {
1865 CitSubPtr cs;
1866
1867 if (pub == NULL) {
1868 return NULL;
1869 }
1870 if (pub->choice != PUB_Sub) {
1871 return NULL;
1872 }
1873 cs = (CitSubPtr) pub->data.ptrvalue;
1874 if (cs->descr == NULL) {
1875 return NULL;
1876 }
1877 return (StringSave(cs->descr));
1878 }
1879
GetMuid(ValNodePtr equiv)1880 static Int4 GetMuid(ValNodePtr equiv)
1881 {
1882 Int4 muid=0;
1883 ValNodePtr newpub, the_pub;
1884 MedlineEntryPtr ml;
1885
1886 if (equiv->choice == PUB_Equiv)
1887 newpub = equiv->data.ptrvalue;
1888 else
1889 newpub = equiv;
1890
1891 for (the_pub = newpub; the_pub; the_pub = the_pub -> next) {
1892 if (the_pub->choice == PUB_Muid) {
1893 muid = the_pub->data.intvalue;
1894 break;
1895 }
1896 if (the_pub->choice == PUB_Medline) {
1897 ml = (MedlineEntryPtr) the_pub -> data.ptrvalue;
1898 muid = ml->uid;
1899 }
1900 }
1901
1902 return muid;
1903
1904 } /* GetMuid */
1905
GetPmid(ValNodePtr equiv)1906 static Int4 GetPmid(ValNodePtr equiv)
1907 {
1908 Int4 pmid=0;
1909 ValNodePtr newpub, the_pub;
1910 MedlineEntryPtr ml;
1911
1912 if (equiv->choice == PUB_Equiv)
1913 newpub = equiv->data.ptrvalue;
1914 else
1915 newpub = equiv;
1916
1917 for (the_pub = newpub; the_pub; the_pub = the_pub -> next) {
1918 if (the_pub->choice == PUB_PMid) {
1919 pmid = the_pub->data.intvalue;
1920 break;
1921 }
1922 if (the_pub->choice == PUB_Medline) {
1923 ml = (MedlineEntryPtr) the_pub -> data.ptrvalue;
1924 pmid = ml->pmid;
1925 }
1926 }
1927
1928 return pmid;
1929
1930 } /* GetPmid */
1931
1932 /***************************************************************************
1933 * SeqLocPtr GetBaseRangeForCitation (SeqLocPtr loc, SeqLocPtr slp, Int4Ptr start, Int4Ptr stop)
1934 *
1935 *
1936 * This function finds the start and stop Int4 values for a location.
1937 * If this is a cmplex location (e.g., SEQLOC_MIX), then the
1938 * function is called several times, with the returned slp used
1939 * as an argument on the next round.
1940 *
1941 * The first call should be with slp set to NULL.
1942 *
1943 ****************************************************************************/
1944
GetBaseRangeForCitation(SeqLocPtr loc,SeqLocPtr slp,Int4Ptr start,Int4Ptr stop)1945 static SeqLocPtr GetBaseRangeForCitation (SeqLocPtr loc, SeqLocPtr slp, Int4Ptr start, Int4Ptr stop)
1946 {
1947 Int4 tmp_start, tmp_stop, tmp_range;
1948
1949 *start = 0;
1950 *stop = 0;
1951
1952 switch (loc->choice)
1953 {
1954 case SEQLOC_BOND:
1955 case SEQLOC_FEAT:
1956 case SEQLOC_NULL:
1957 case SEQLOC_EMPTY:
1958 slp = NULL;
1959 break;
1960 case SEQLOC_WHOLE:
1961 case SEQLOC_INT:
1962 if ((tmp_start = SeqLocStart(loc)) >= 0 &&
1963 (tmp_stop = SeqLocStop(loc)) >= 0)
1964 {
1965 tmp_range = tmp_stop - tmp_start;
1966 if (tmp_range >= 0)
1967 { /* +1 for Genbank format. */
1968 *start = tmp_start+1;
1969 *stop = tmp_stop+1;
1970 }
1971 }
1972 slp = NULL;
1973 break;
1974 case SEQLOC_MIX:
1975 case SEQLOC_EQUIV:
1976 case SEQLOC_PACKED_INT:
1977 if (slp == NULL)
1978 slp = loc->data.ptrvalue;
1979 if (slp != NULL)
1980 {
1981 if ((tmp_start = SeqLocStart(slp)) >= 0 &&
1982 (tmp_stop = SeqLocStop(slp)) >= 0)
1983 {
1984 tmp_range = tmp_stop - tmp_start;
1985 if (tmp_range >= 0)
1986 { /* +1 for Genbank format. */
1987 *start = tmp_start+1;
1988 *stop = tmp_stop+1;
1989 }
1990 }
1991 slp = slp->next;
1992 }
1993 break;
1994 case SEQLOC_PACKED_PNT:
1995 case SEQLOC_PNT:
1996 slp = NULL;
1997 break;
1998 default:
1999 slp = NULL;
2000 break;
2001 }
2002 return slp;
2003 }
2004
2005 /*************************************************************************
2006 *GB_PrintPubs
2007 *
2008 * "GB_PrintPubs" to dump pubs in Flat File (i.e., Genbank) format.
2009 *
2010 **************************************************************************/
2011
GB_PrintPubs(Asn2ffJobPtr ajp,GBEntryPtr gbp,PubStructPtr psp)2012 void GB_PrintPubs (Asn2ffJobPtr ajp, GBEntryPtr gbp, PubStructPtr psp)
2013
2014 {
2015
2016 BioseqPtr bsp=gbp->bsp;
2017 Boolean first_time, ignore_this=FALSE, submit=FALSE, tag;
2018 Char buffer[150];
2019 CharPtr authors=NULL,title=NULL,journal=NULL,string_start, string, retract;
2020 CharPtr descr = NULL;
2021 Int2 i;
2022 Int4 gibbsq, muid, pmid, pat_seqid=0, start=0, stop=0;
2023 PubdescPtr pdp;
2024 SeqFeatPtr sfp;
2025 SeqLocPtr loc, slp;
2026 ValNodePtr pub;
2027
2028 if (ASN2FF_SHOW_ALL_PUBS) {
2029 pub = FlatRefBest(psp->pub, ajp->error_msgs, TRUE);
2030 } else {
2031 pub = FlatRefBest(psp->pub, ajp->error_msgs, FALSE);
2032 }
2033 if (pub == NULL)
2034 {
2035 if (ajp->error_msgs == TRUE)
2036 PostARefErrMessage (ajp, bsp, psp, NULL, -1, NULL);
2037 return;
2038 }
2039 ignore_this = FlatIgnoreThisPatentPub(bsp, pub, &pat_seqid);
2040 if (ajp->format != GENPEPT_FMT)
2041 {
2042 if (ignore_this == TRUE)
2043 {
2044 if (ajp->error_msgs == TRUE)
2045 PostARefErrMessage (ajp, bsp, psp, NULL, -1, NULL);
2046 return;
2047 }
2048 }
2049
2050 ff_StartPrint(0, 12, ASN2FF_GB_MAX, NULL);
2051 ff_AddString("REFERENCE");
2052 TabToColumn(13);
2053 ff_AddInteger("%ld", (long) psp->number);
2054 if (psp->start == 1) {
2055 TabToColumn(16);
2056 if (psp->descr != NULL) {
2057 if (psp->descr->reftype != 0) {
2058 ff_AddString("(sites)");
2059 } else {
2060 if (ajp->format != GENPEPT_FMT) {
2061 ff_AddString("(bases ");
2062 } else {
2063 ff_AddString("(residues ");
2064 }
2065 if (ajp->slp) {
2066 ff_AddInteger("%ld", (long) (SeqLocStart(ajp->slp) + 1));
2067 ff_AddString(" to ");
2068 ff_AddInteger("%ld", (long) (SeqLocStop(ajp->slp) + 1));
2069 } else {
2070 ff_AddString("1 to ");
2071 ff_AddInteger("%ld", (long) bsp->length);
2072 }
2073 ff_AddChar(')');
2074 }
2075 }
2076 }
2077 else if (psp->start == 2) {
2078 TabToColumn(16);
2079 if (ajp->format != GENPEPT_FMT)
2080 ff_AddString("(bases ");
2081 else
2082 ff_AddString("(residues ");
2083 for (i=0; i<psp->citcount; i++) {
2084 sfp = psp->citfeat[i];
2085 loc = (SeqLocPtr) sfp->location;
2086 slp = GetBaseRangeForCitation (loc, NULL, &start, &stop);
2087 if (start != 0 || stop != 0) {
2088 ff_AddInteger("%ld", (long) start);
2089 ff_AddString(" to ");
2090 ff_AddInteger("%ld", (long) stop);
2091 if (slp != NULL || i+1 != psp->citcount)
2092 ff_AddString("; ");
2093 }
2094 while (slp != NULL) {
2095 slp = GetBaseRangeForCitation (loc, slp, &start, &stop);
2096 if (start != 0 || stop != 0) {
2097 ff_AddInteger("%ld", (long) start);
2098 ff_AddString(" to ");
2099 ff_AddInteger("%ld", (long) stop);
2100 if (slp != NULL || i+1 != psp->citcount)
2101 ff_AddString("; ");
2102 }
2103 }
2104 }
2105 ff_AddChar(')');
2106 } else if (psp->start == 3) {
2107 TabToColumn(16);
2108 ff_AddString("(sites)");
2109 } else {
2110 if (ajp->error_msgs == TRUE)
2111 ErrPostEx(SEV_WARNING, CTX_NCBI2GB, 1,
2112 "Incorrect start value (%d) in PubStruct\n", psp->start);
2113 }
2114 ff_EndPrint();
2115
2116 authors = FlatAuthor(ajp, pub);
2117 ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2118 ff_AddString("AUTHORS");
2119 TabToColumn(13);
2120
2121 if (authors && *authors != NULLB) {
2122 ff_AddString(authors);
2123 } else {
2124 ff_AddChar('.');
2125 }
2126 ff_EndPrint();
2127
2128 title = FlatPubTitle(pub);
2129 if (title ) {
2130 if ( *title != NULLB) {
2131 ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2132 ff_AddString("TITLE");
2133 TabToColumn(13);
2134 StrStripSpaces(title);
2135 ff_AddString(title);
2136 ff_EndPrint();
2137 }
2138 }
2139
2140 journal = FlatJournal(ajp, gbp, pub, pat_seqid, &submit, FALSE);
2141 ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2142 ff_AddString("JOURNAL");
2143 TabToColumn(13);
2144 if (journal ) {
2145 #ifdef ASN2GNBK_STRIP_NOTE_PERIODS
2146 CharPtr p, q;
2147 p = journal;
2148 q = journal;
2149 while (*p) {
2150 if (*p == ',' && p [1] == ' ' && p [2] == ';') {
2151 p += 2;
2152 } else {
2153 *q = *p;
2154 p++;
2155 q++;
2156 }
2157 }
2158 *q = '\0';
2159 #endif
2160 StrStripSpaces(journal);
2161 ff_AddString(journal);
2162 } else {
2163 ff_AddString("Unpublished");
2164 }
2165 ff_EndPrint();
2166
2167 muid = GetMuid(psp->pub);
2168 if (muid > 0) {
2169 ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2170 ff_AddString("MEDLINE");
2171 TabToColumn(13);
2172 www_muid(muid);
2173 ff_EndPrint();
2174 }
2175 pmid = GetPmid (psp->pub);
2176 if (pmid > 0) {
2177 ff_StartPrint(3, 12, ASN2FF_GB_MAX, NULL);
2178 ff_AddString("PUBMED");
2179 TabToColumn(13);
2180 www_muid(pmid);
2181 ff_EndPrint();
2182 }
2183
2184 tag = FALSE;
2185 pdp = psp->descr;
2186 if (pdp != NULL && pdp->comment != NULL) {
2187 if (StringCmp(pdp->comment, "full automatic") != 0 &&
2188 StringCmp(pdp->comment, "full staff_review") != 0 &&
2189 StringCmp(pdp->comment, "full staff_entry") != 0 &&
2190 StringCmp(pdp->comment, "simple staff_review") != 0 &&
2191 StringCmp(pdp->comment, "simple staff_entry") != 0 &&
2192 StringCmp(pdp->comment, "simple automatic") != 0 &&
2193 StringCmp(pdp->comment, "unannotated automatic") != 0 &&
2194 StringCmp(pdp->comment, "unannotated staff_review") != 0 &&
2195 StringCmp(pdp->comment, "unannotated staff_entry") != 0)
2196 {
2197 ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2198 ff_AddString("REMARK ");
2199 TabToColumn(13);
2200 ff_AddStringWithTildes(pdp->comment);
2201 tag = TRUE;
2202 }
2203 }
2204 string = &buffer[0];
2205 gibbsq = GetGibbsqStatement(gbp, string);
2206 if (gibbsq > 0) {
2207 if (tag != TRUE) {
2208 ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2209 ff_AddString("REMARK");
2210 TabToColumn(13);
2211 } else {
2212 NewContLine();
2213 }
2214 ff_AddStringWithTildes(string);
2215 tag = TRUE;
2216 }
2217 string = GetGibbsqComment(gbp);
2218 if (string) {
2219 string_start = string;
2220 if (tag != TRUE) {
2221 ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2222 ff_AddString("REMARK");
2223 TabToColumn(13);
2224 } else {
2225 NewContLine();
2226 }
2227 first_time = TRUE;
2228 /* Can't this be rewritten to use ff_AddString???? That would be faster! */
2229 while (*string != '\0') {
2230 if (*string == '~') {
2231 if (first_time == FALSE)
2232 NewContLine();
2233 else
2234 first_time = FALSE;
2235 } else if (*string == '\"') {
2236 *string = '\'';
2237 ff_AddChar(*string);
2238 } else {
2239 ff_AddChar(*string);
2240 }
2241 string++;
2242 }
2243 string_start = MemFree(string_start);
2244 tag=TRUE;
2245 }
2246 retract = GetFlatRetract(pub);
2247 if (retract) {
2248 if (tag != TRUE) {
2249 ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2250 ff_AddString("REMARK");
2251 TabToColumn(13);
2252 } else {
2253 NewContLine();
2254 }
2255 ff_AddStringWithTildes(retract);
2256 tag = TRUE;
2257 MemFree(retract);
2258 }
2259 descr = GetSubmitDescr(pub);
2260 if (descr) {
2261 if (tag != TRUE) {
2262 ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2263 ff_AddString("REMARK");
2264 TabToColumn(13);
2265 } else {
2266 NewContLine();
2267 }
2268 ff_AddStringWithTildes(descr);
2269 tag = TRUE;
2270 }
2271 if (tag == TRUE)
2272 ff_EndPrint();
2273
2274 if (authors)
2275 MemFree(authors);
2276
2277 MemFree(descr);
2278 MemFree(title);
2279 MemFree(journal);
2280 } /* GB_PrintPubs */
2281
2282 /*************************************************************************
2283 *GR_PrintPubs
2284 *
2285 * "GR_PrintPubs" to dump pubs in Flat File (i.e., Genbank) format.
2286 *
2287 **************************************************************************/
2288
GR_PrintPubs(Asn2ffJobPtr ajp,GBEntryPtr gbp,PubStructPtr psp)2289 void GR_PrintPubs (Asn2ffJobPtr ajp, GBEntryPtr gbp, PubStructPtr psp)
2290
2291 {
2292
2293 BioseqPtr bsp=gbp->bsp;
2294 Boolean ignore_this=FALSE, submit=FALSE;
2295 CharPtr authors=NULL,title=NULL,journal=NULL;
2296 CharPtr descr = NULL;
2297 Int4 muid, pmid, pat_seqid=0, start=0, stop=0;
2298 ValNodePtr pub;
2299
2300 if (ASN2FF_SHOW_ALL_PUBS) {
2301 pub = FlatRefBest(psp->pub, ajp->error_msgs, TRUE);
2302 } else {
2303 pub = FlatRefBest(psp->pub, ajp->error_msgs, FALSE);
2304 }
2305 if (pub == NULL)
2306 {
2307 if (ajp->error_msgs == TRUE)
2308 PostARefErrMessage (ajp, bsp, psp, NULL, -1, NULL);
2309 return;
2310 }
2311 ignore_this = FlatIgnoreThisPatentPub(bsp, pub, &pat_seqid);
2312 if (ajp->format != GENPEPT_FMT)
2313 {
2314 if (ignore_this == TRUE)
2315 {
2316 if (ajp->error_msgs == TRUE)
2317 PostARefErrMessage (ajp, bsp, psp, NULL, -1, NULL);
2318 return;
2319 }
2320 }
2321
2322 ff_StartPrint(0, 12, ASN2FF_GB_MAX, NULL);
2323 ff_AddString("<BR><BR>");
2324 title = FlatPubTitle(pub);
2325 if (title ) {
2326 if ( *title != NULLB) {
2327 StrStripSpaces(title);
2328 ff_AddString("<B>");
2329 ff_AddString(title);
2330 ff_AddString("</B>");
2331 ff_EndPrint();
2332 }
2333 }
2334 authors = FlatAuthor(ajp, pub);
2335
2336 if (authors && *authors != NULLB) {
2337 ff_AddString("<BR>");
2338 ff_AddString(authors);
2339 } else {
2340 ff_AddChar('.');
2341 }
2342 ff_EndPrint();
2343
2344
2345 journal = FlatJournal(ajp, gbp, pub, pat_seqid, &submit, FALSE);
2346 ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2347 ff_AddString("<BR>");
2348 if (journal ) {
2349 StrStripSpaces(journal);
2350 ff_AddString(journal);
2351 } else {
2352 ff_AddString("Unpublished");
2353 }
2354 ff_EndPrint();
2355
2356 muid = GetMuid(psp->pub);
2357 if (muid > 0) {
2358 ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2359 ff_AddString("<BR>");
2360 TabToColumn(13);
2361 www_muid(muid);
2362 ff_EndPrint();
2363 }
2364 pmid = GetPmid (psp->pub); /* not sure what GR format should be generating */
2365 /*
2366 if (pmid > 0) {
2367 ff_StartPrint(3, 12, ASN2FF_GB_MAX, NULL);
2368 ff_AddString("<BR>");
2369 TabToColumn(13);
2370 www_muid(pmid);
2371 ff_EndPrint();
2372 }
2373 */
2374
2375
2376 if (authors)
2377 MemFree(authors);
2378
2379 MemFree(title);
2380 MemFree(journal);
2381
2382 } /* GR_PrintPubs */
2383
2384 /*************************************************************************
2385 *EMBL_PrintPubs
2386 *
2387 * "EMBL_PrintPubs" to dump pubs in FlatFile (EMBL) format.
2388 *
2389 **************************************************************************/
2390
EMBL_PrintPubs(Asn2ffJobPtr ajp,GBEntryPtr gbp,PubStructPtr psp)2391 void EMBL_PrintPubs (Asn2ffJobPtr ajp, GBEntryPtr gbp, PubStructPtr psp)
2392
2393 {
2394
2395 BioseqPtr bsp=gbp->bsp;
2396 Boolean ignore_this=FALSE, submit=FALSE;
2397 CharPtr authors=NULL, title=NULL, journal=NULL, new_journal;
2398 Int2 i;
2399 Int4 pat_seqid=0;
2400 Int4 start=0, stop=0, tmp_range, range;
2401 PubdescPtr descr=psp->descr;
2402 SeqFeatPtr sfp;
2403 SeqLocPtr loc, slp;
2404 ValNodePtr pub;
2405 Int4 muid;
2406 Char s[15];
2407
2408 pub = FlatRefBest(psp->pub, ajp->error_msgs, FALSE);
2409 if (pub == NULL)
2410 {
2411 if (ajp->error_msgs == TRUE)
2412 ErrPostStr(SEV_WARNING, ERR_REFERENCE_Illegalreference, "FFDumpPubs: Invalid Pub found.");
2413 return;
2414 }
2415 ignore_this = FlatIgnoreThisPatentPub(bsp, pub, &pat_seqid);
2416 if (ignore_this == TRUE && ASN2FF_IGNORE_PATENT_PUBS != FALSE)
2417 {
2418 if (ajp->error_msgs == TRUE)
2419 ErrPostStr(SEV_WARNING, ERR_REFERENCE_Illegalreference, "FFDumpPubs: Invalid Patent Pub");
2420 return;
2421 }
2422
2423 PrintXX();
2424
2425 ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "RN");
2426 ff_AddChar('[');
2427 ff_AddInteger("%ld", (long) psp->number);
2428 ff_AddChar(']');
2429 ff_EndPrint();
2430 if (psp->start == 1)
2431 {
2432 ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "RP");
2433 ff_AddString("1-");
2434 ff_AddInteger("%ld", (long) bsp->length);
2435 ff_EndPrint();
2436 }
2437 else if (psp->start == 2)
2438 {
2439 range = 0;
2440 ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "RP");
2441 for (i=0; i<psp->citcount; i++)
2442 {
2443 sfp = psp->citfeat[i];
2444 loc = (SeqLocPtr) sfp->location;
2445 slp = GetBaseRangeForCitation (loc, NULL, &start, &stop);
2446 if (start != 0 || stop != 0)
2447 { /* Why do I need the tmp_range test??? */
2448 tmp_range = stop - start;
2449 if (tmp_range >= range)
2450 {
2451 range = tmp_range;
2452 ff_AddInteger("%ld", (long) start);
2453 ff_AddChar('-');
2454 ff_AddInteger("%ld", (long) stop);
2455 if (slp != NULL || i+1 != psp->citcount)
2456 ff_AddString(", ");
2457 }
2458 }
2459 while (slp != NULL)
2460 {
2461 slp = GetBaseRangeForCitation (loc, slp, &start, &stop);
2462 if (start != 0 || stop != 0)
2463 {
2464 ff_AddInteger("%ld", (long) start);
2465 ff_AddChar('-');
2466 ff_AddInteger("%ld", (long) stop);
2467 if (slp != NULL || i+1 != psp->citcount)
2468 ff_AddString(", ");
2469 }
2470 }
2471 }
2472 ff_EndPrint();
2473 }
2474 else if (psp->start == 3 && ajp->pseudo == TRUE)
2475 { /* "sites" only for pseudo-embl. */
2476 ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "RP");
2477 ff_AddString("(sites)");
2478 ff_EndPrint();
2479 }
2480
2481 journal = FlatJournal(ajp, gbp, pub, pat_seqid, &submit, FALSE);
2482
2483 if (descr && descr->comment)
2484 {
2485 ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "RC");
2486 ff_AddString(descr->comment);
2487 ff_EndPrint();
2488 }
2489 authors = FlatAuthor(ajp, pub);
2490 ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "RA");
2491 if (authors)
2492 ff_AddString(authors);
2493 ff_AddChar(';');
2494 ff_EndPrint();
2495
2496 ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "RT");
2497 if (! submit)
2498 {
2499 title = FlatPubTitle(pub);
2500 if (title ){
2501 if ( *title )
2502 {
2503 ff_AddChar('\"');
2504 StrStripSpaces(title);
2505 ff_AddString(title);
2506 ff_AddChar('\"');
2507 }
2508 }
2509 }
2510 ff_AddChar(';');
2511 ff_EndPrint();
2512
2513 ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "RL");
2514 if (journal)
2515 {
2516 new_journal = CheckEndPunctuation(journal, '.');
2517 StrStripSpaces(new_journal);
2518 ff_AddString(new_journal);
2519 new_journal = MemFree(new_journal);
2520 }
2521 ff_EndPrint();
2522
2523 muid = GetMuid(psp->pub);
2524 if (muid != 0) {
2525 sprintf(s, "%ld.", (long) muid);
2526 s[StringLen(s)] = '\0';
2527 ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "RX");
2528 ff_AddString("MEDLINE; ");
2529 ff_AddString(s);
2530 ff_EndPrint();
2531 }
2532 if (authors)
2533 MemFree(authors);
2534 MemFree(title);
2535 MemFree(journal);
2536 } /* EMBL_PrintPubs */
2537
2538 /***************************************************************************
2539 *CharPtr CheckLocusLength (Boolean error_msgs, CharPtr locus, Int2 locus_max, Int2 total_segs)
2540 *
2541 * Calculate the length of the locus; if it's too long, take characters
2542 * off the front. If it's part of a segmented set and the locus ends
2543 * in a number, add an "S". If it appears to be an NCBI locus of the
2544 * form HSU00001, then take two letters off the front.
2545 ***************************************************************************/
CheckLocusLength(Boolean error_msgs,CharPtr locus,Int2 locus_max,Int2 total_segs)2546 static CharPtr CheckLocusLength (Boolean error_msgs, CharPtr locus, Int2 locus_max, Int2 total_segs)
2547
2548 {
2549 Boolean cut_two=FALSE;
2550 CharPtr buffer;
2551 Int2 length, surplus;
2552
2553 length = StringLen(locus);
2554 buffer = MemNew((length+2)*sizeof(Char));
2555 buffer = StringCpy(buffer, locus);
2556
2557 if (total_segs > 0 && IS_DIGIT(locus[length-1]) != 0)
2558 if (locus[length-1] != '0' || IS_DIGIT(locus[length-2]) != 0)
2559 {
2560 length++;
2561 buffer[length-1] = 'S';
2562 buffer[length] = '\0';
2563 }
2564
2565 surplus = length - locus_max;
2566
2567 if (surplus > 0)
2568 {
2569 if (surplus <= 2)
2570 { /* Check if this is of the form HSU00001S */
2571 if (IS_ALPHA(buffer[0]) != 0 &&
2572 IS_ALPHA(buffer[1]) != 0 &&
2573 IS_ALPHA(buffer[2]) != 0 &&
2574 IS_DIGIT(buffer[3]) != 0 &&
2575 IS_DIGIT(buffer[4]) != 0 &&
2576 IS_DIGIT(buffer[5]) != 0 &&
2577 IS_DIGIT(buffer[6]) != 0 &&
2578 IS_DIGIT(buffer[7]) != 0 &&
2579 buffer[8] == 'S' &&
2580 buffer[9] == '\0')
2581 cut_two = TRUE;
2582 }
2583
2584 if (cut_two == TRUE)
2585 locus = StringCpy(locus, buffer+2);
2586 else
2587 locus = StringCpy(locus, buffer+surplus);
2588 if (error_msgs == TRUE)
2589 {
2590 flat2asn_delete_locus_user_string();
2591 flat2asn_install_locus_user_string(buffer);
2592 ErrPostStr(SEV_INFO, ERR_LOCUS_ChangedLocusName,
2593 "Locusname length is more than 16, locusname is truncated");
2594 }
2595 }
2596
2597 buffer = MemFree(buffer);
2598
2599 return locus;
2600 }
2601
GetPubsAwp(Asn2ffJobPtr ajp,GBEntryPtr gbp)2602 NLM_EXTERN Int4 GetPubsAwp (Asn2ffJobPtr ajp, GBEntryPtr gbp)
2603 {
2604 GatherScope gs;
2605 BioseqPtr bsp = NULL;
2606 ValNodePtr vnp, v;
2607 SeqLocPtr slp = NULL;
2608 SeqIdPtr isip;
2609 Int4 status, i;
2610 Char buffer[31];
2611
2612 bsp = gbp->bsp;
2613 if (bsp == NULL) {
2614 return 0;
2615 }
2616 isip = bsp->id;
2617 vnp = NULL;
2618 MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
2619 gs.get_feats_location = TRUE;
2620 if (ajp->genome_view == TRUE) {
2621 gs.seglevels = 0;
2622 } else if (ajp->only_one) {
2623 gs.seglevels = 2;
2624 } else {
2625 gs.seglevels = 1;
2626 }
2627 /* MemSet ((Pointer) (gs.ignore), (int)(TRUE), (size_t) (OBJ_MAX * sizeof(Boolean)));
2628 gs.ignore[OBJ_SEQDESC] = FALSE;
2629 gs.ignore[OBJ_SEQANNOT] = FALSE;
2630 gs.ignore[OBJ_SEQFEAT] = FALSE;
2631 gs.ignore[OBJ_SEQSUB] = FALSE;
2632 gs.ignore[OBJ_SEQSUB_CIT] = FALSE;*/
2633
2634 MemSet ((Pointer) (gs.ignore), (int)(FALSE), (size_t) (OBJ_MAX * sizeof(Boolean)));
2635 gs.ignore[OBJ_SEQALIGN] = TRUE; /* this was being hit many times on big records */
2636
2637 if (ajp->slp == NULL) {
2638 slp = ValNodeNew(NULL);
2639 slp->choice = SEQLOC_WHOLE;
2640 slp->data.ptrvalue = (SeqIdPtr) SeqIdDup (SeqIdFindBest (bsp->id, 0));
2641 gs.target = slp;
2642 } else {
2643 gs.target = ajp->slp;
2644 }
2645 GatherEntity(ajp->entityID, &vnp, get_pubs, &gs);
2646 if (slp)
2647 SeqLocFree(slp);
2648 if ((status = CheckPubs(ajp, bsp, &vnp)) <= 0) {
2649 if (ajp->error_msgs == TRUE) {
2650 MakeAnAccession(buffer, isip, 30);
2651 flat2asn_delete_locus_user_string();
2652 flat2asn_install_locus_user_string(buffer);
2653 flat2asn_delete_accession_user_string();
2654 flat2asn_install_accession_user_string(buffer);
2655 ErrPostStr(SEV_ERROR, ERR_REFERENCE_NoValidRefs,
2656 "No refs found that would result in legal flatfile format");
2657 }
2658 /* found something. */
2659 if (status < 0) {
2660 ValNodeFree(vnp);
2661 vnp = NULL;
2662 }
2663 }
2664 gbp->Pub = OrganizePubList(vnp);
2665 for (v = gbp->Pub, i=0; v != NULL; v= v->next, i++);
2666
2667 return i;
2668 }
2669 /*************************************************************************
2670 * Check for EMBL format first
2671 * Call to find div for Genbank records
2672 * Allocate a buffer for division
2673 * 09-05-96
2674 *************************************************************************/
GetDivision(Asn2ffJobPtr ajp,GBEntryPtr gbp)2675 static CharPtr GetDivision(Asn2ffJobPtr ajp, GBEntryPtr gbp)
2676 {
2677 ValNodePtr vnp;
2678 MolInfoPtr mol = NULL;
2679 EMBLBlockPtr eb=NULL;
2680 BioseqPtr bsp = gbp->bsp;
2681 Int2 buflen=4;
2682 CharPtr buffer;
2683 static CharPtr embl_divs [] = {
2684 "FUN","INV","MAM","ORG","PHG","PLN","PRI","PRO","ROD","SYN","UNA","VRL",
2685 "VRT","PAT","EST","STS", "HUM", "HTC"
2686 };
2687
2688 buffer = MemNew(buflen);
2689 buffer[0] = '\0';
2690 if (ajp->format == EMBL_FMT || ajp->format == PSEUDOEMBL_FMT ||
2691 ajp->format == EMBLPEPT_FMT) {
2692 if ((vnp=GatherDescrByChoice(ajp, gbp, Seq_descr_embl)) != NULL) {
2693 eb = (EMBLBlockPtr) vnp->data.ptrvalue;
2694 }
2695 if (eb ) {
2696 if (eb->div == 255) {
2697 /* kludge for HUM division */
2698 if ((vnp=GatherDescrByChoice(ajp, gbp, Seq_descr_molinfo)) != NULL) {
2699 gbp->descr = MemFree(gbp->descr);
2700 mol = (MolInfoPtr) vnp->data.ptrvalue;
2701 }
2702 if (mol) {
2703 if (mol->tech == MI_TECH_survey) {
2704 StringNCpy_0(buffer, "GSS", buflen);
2705 return buffer;
2706 } else if (mol->tech == MI_TECH_htc) {
2707 StringNCpy_0(buffer, "HTC", buflen);
2708 return buffer;
2709 } else if (mol->tech == MI_TECH_htgs_1
2710 || mol->tech == MI_TECH_htgs_2) {
2711 StringNCpy_0(buffer, "HTG", buflen);
2712 return buffer;
2713 }
2714 } else {
2715 StringNCpy_0(buffer, embl_divs[16], buflen); /*HUM */
2716 return buffer;
2717 }
2718 } else {
2719 StringNCpy_0(buffer, embl_divs[eb->div], buflen);
2720 return buffer;
2721 }
2722 }
2723 BioseqGetGBDivCodeEx (bsp, buffer, buflen, FALSE, ajp->useSeqMgrIndexes);
2724 if (buffer[0] == NULLB) {
2725 StringNCpy_0(buffer, " ", buflen);
2726 }
2727 return buffer;
2728 }
2729 BioseqGetGBDivCodeEx (bsp, buffer, buflen, FALSE, ajp->useSeqMgrIndexes);
2730 if (buffer[0] == NULLB) {
2731 StringNCpy_0(buffer, " ", buflen);
2732 }
2733 if (gbp->bsp && gbp->bsp->mol == Seq_mol_aa) {
2734 return buffer;
2735 }
2736 if (ajp->genome_view) {
2737 StringNCpy_0(buffer, "CON", buflen);
2738 }
2739 return buffer;
2740 }
2741
2742 /***************************************************************************
2743 *
2744 * UseGIforLocus to get the GI number for the locus and accession numbers.
2745 * and to get division using Gather
2746 *
2747 ***************************************************************************/
2748
UseGIforLocus(Asn2ffJobPtr ajp)2749 NLM_EXTERN void UseGIforLocus (Asn2ffJobPtr ajp)
2750 {
2751 CharPtr buffer;
2752 GBEntryPtr gbp;
2753
2754 for (gbp=ajp->asn2ffwep->gbp; gbp; gbp=gbp->next) {
2755 if (ajp->show_gi) {
2756 sprintf(gbp->accession, "%ld", (long) (gbp->gi));
2757 sprintf(gbp->locus, "%-10ld", (long) (gbp->gi));
2758 } else {
2759 MemSet((VoidPtr) gbp->accession, ' ', 10);
2760 MemSet((VoidPtr) gbp->locus, ' ', 10);
2761 }
2762 buffer = GetDivision(ajp, gbp);
2763 if (buffer[0] != NULLB) {
2764 StringCpy(gbp->div, " ");
2765 } else {
2766 StringNCpy_0(gbp->div, buffer, 4);
2767 }
2768 MemFree(buffer);
2769 }
2770 }
2771
2772 /*****************************************************************************
2773 *
2774 * ValidateLocus takes a locus name and assures that the format is
2775 * proper. if segmented set adds the segment number at the end and
2776 * returns new locus
2777 *****************************************************************************/
ValidateLocus(Asn2ffJobPtr ajp,BioseqPtr bsp,CharPtr base_locus,Int2 total_segs,Int2 num_seg,CharPtr new_buf,CharPtr buf_locus,CharPtr orig_buf)2778 CharPtr ValidateLocus(Asn2ffJobPtr ajp, BioseqPtr bsp, CharPtr base_locus, Int2 total_segs, Int2 num_seg, CharPtr new_buf, CharPtr buf_locus, CharPtr orig_buf)
2779
2780 {
2781 Boolean collision=FALSE;
2782 static Boolean order_init=FALSE;
2783 Char buf_ext[BUF_EXT_LENGTH], buffer[30];
2784 DbtagPtr db;
2785 int dex;
2786 Int2 /*UNUSED*/base_locus_max, buf_index, exp, length, num_of_digits;
2787 ObjectIdPtr ob;
2788 SeqIdPtr best_id, id;
2789 static Uint1 rel_order[NUM_SEQID];
2790
2791 if (! order_init)
2792 {
2793 for (dex=0; dex<18; dex++)
2794 rel_order[dex] = 255;
2795 rel_order[SEQID_GENERAL ] = 14;
2796 }
2797 order_init = TRUE;
2798
2799 if (ASN2FF_AVOID_LOCUS_COLL || ASN2FF_REPORT_LOCUS_COLL)
2800 { /* Check for LOCUS collisions with Karl's algorithm */
2801 id = bsp->id;
2802 best_id = SeqIdSelect( id, rel_order,NUM_SEQID);
2803 if (best_id != NULL) {
2804 if (best_id -> choice == SEQID_GENERAL){ /* always! */
2805
2806 db = (DbtagPtr) best_id -> data.ptrvalue;
2807 if (StringCmp(db -> db, LOCUS_COLLISION_DB_NAME) == 0){
2808 ob = db -> tag;
2809 if ( ob != NULL)
2810 {
2811 if (ASN2FF_REPORT_LOCUS_COLL)
2812 {
2813 MakeAnAccession(buffer, id, 30);
2814 flat2asn_delete_locus_user_string();
2815 flat2asn_install_locus_user_string(buffer);
2816 flat2asn_delete_accession_user_string();
2817 flat2asn_install_accession_user_string(buffer);
2818 ErrPostStr(SEV_WARNING, ERR_LOCUS_LocusNameCollision, "");
2819 }
2820 if (ASN2FF_AVOID_LOCUS_COLL)
2821 {
2822 collision=TRUE;
2823 StringNCpy_0(new_buf, ob -> str, MAX_LOCUS_NAME_LEN+1);
2824 }
2825 }
2826 }
2827 }
2828 }
2829 }
2830
2831 if (! collision)
2832 {
2833 if (total_segs == 0)
2834 { /* Not a segmented set. */
2835 if ((length=StringLen(buf_locus)) <= 0)
2836 new_buf = StringCpy(new_buf, orig_buf);
2837 else
2838 new_buf = StringCpy(new_buf, buf_locus);
2839
2840 new_buf = CheckLocusLength (ajp->error_msgs, new_buf, MAX_LOCUS_NAME_LEN, 0);
2841 }
2842 else
2843 {
2844 if (total_segs < 10)
2845 num_of_digits = 1;
2846 else if (total_segs < 100)
2847 num_of_digits = 2;
2848 else if (total_segs < 1000)
2849 num_of_digits = 3;
2850 else
2851 {
2852 num_of_digits = 4;
2853 ErrPostStr(SEV_INFO, ERR_SEGMENT_MoreThan1000Segs, "");
2854 }
2855 if (num_seg < 10)
2856 exp = 1;
2857 else if (num_seg < 100)
2858 exp = 2;
2859 else if (num_seg < 1000)
2860 exp = 3;
2861 base_locus_max = MAX_LOCUS_NAME_LEN - num_of_digits;
2862 length = StringLen(base_locus);
2863 StringCpy(new_buf, base_locus);
2864 MemSet((VoidPtr) buf_ext, '\0', BUF_EXT_LENGTH);
2865 MemSet((VoidPtr) buf_ext, '0', num_of_digits);
2866 sprintf(buf_ext+num_of_digits-exp, "%ld", (long) num_seg);
2867 buf_index = 0;
2868 while (buf_ext[buf_index] != '\0')
2869 {
2870 new_buf[length+buf_index] = buf_ext[buf_index];
2871 buf_index++;
2872 }
2873 new_buf[length+buf_index] = '\0';
2874 }
2875 }
2876
2877 return new_buf;
2878 } /* ValidateLocus */
2879
2880 /***************************************************************************
2881 * example: NM_000756
2882 ***************************************************************************/
ValidateOtherAccession(CharPtr new_buf,CharPtr orig_buf)2883 static Int2 ValidateOtherAccession(CharPtr new_buf, CharPtr orig_buf)
2884 {
2885 Int2 count;
2886 Boolean FirstLetter=FALSE, FiveNum = FALSE;
2887
2888 if (orig_buf == NULL || orig_buf[0] == '\0') {
2889 return -3;
2890 }
2891 if (StringLen(orig_buf) >= 10) {
2892 return -4;
2893 }
2894 if (orig_buf[0] != 'N') {
2895 return -1;
2896 }
2897 if (orig_buf[2] != '_') {
2898 return -1;
2899 }
2900 for (count=3; count < 8; count++) {
2901 if(! IS_DIGIT(orig_buf[count]))
2902 break;
2903 }
2904 if (count == 8 && (orig_buf[count+1] == '\0' || orig_buf[count+1] == ' ')) {
2905 StringCpy(new_buf, orig_buf);
2906 return 0;
2907
2908 } else {
2909 return -1;
2910 }
2911 }
2912
2913
2914 /****************************************************************************
2915 *
2916 * ValidateAccession takes an accession number and makes sure it is
2917 * in the proper format (starts with a capital letter that is followed
2918 * by five numbers).
2919 *
2920 * Return values are:
2921 * 0: no problem
2922 * -1: Accession did not start with a letter (or two letters)
2923 * -2: Accession did not contain five numbers (or six numbers after 2 letters)
2924 * -3: the original Accession number to be validated was NULL
2925 * -4: the original Accession number is too long (>10)
2926 *
2927 ****************************************************************************/
ValidateAccession(CharPtr new_buf,CharPtr orig_buf)2928 Int2 ValidateAccession(CharPtr new_buf, CharPtr orig_buf)
2929 {
2930 Int2 count, start_count, stop_count;
2931 Boolean FirstLetter=FALSE, FiveNum = FALSE;
2932
2933 if (orig_buf == NULL || orig_buf[0] == '\0') {
2934 return -3;
2935 }
2936 if (StringLen(orig_buf) >= 10) {
2937 return -4;
2938 }
2939 if (orig_buf[0] < 'A' || orig_buf[0] > 'Z') {
2940 return -1;
2941 } else {
2942 FirstLetter = TRUE;
2943 }
2944 for (count=1; count < 5; count++) {
2945 if(! IS_DIGIT(orig_buf[count]))
2946 break;
2947 }
2948 if (count == 5 && (orig_buf[count+1] == '\0' || orig_buf[count+1] == ' '))
2949 FiveNum = TRUE;
2950
2951 if (FirstLetter == TRUE) {
2952 if (FiveNum == TRUE) { /* 1 + 5 accession*/
2953 StringCpy(new_buf, orig_buf);
2954 return 0;
2955 } else if (IS_ALPHA(orig_buf[1])) { /* 2 + 6 accession */
2956 if (orig_buf[1] < 'A' || orig_buf[1] > 'Z') {
2957 return -1;
2958 }
2959 start_count = 2;
2960 stop_count = 7;
2961 if (orig_buf[0] == 'N' || orig_buf[0] == 'X') {
2962 if ((orig_buf[1] == 'M' || orig_buf[1] == 'C'
2963 || orig_buf[1] == 'T' || orig_buf[1] == 'P'
2964 || orig_buf[1] == 'G')
2965 && orig_buf[2] == '_') {
2966 start_count = 3;
2967 stop_count = 8;
2968 }
2969 }
2970 for (count=start_count; count < stop_count; count++) {
2971 if(! IS_DIGIT(orig_buf[count]))
2972 break;
2973 }
2974 if (count == stop_count && (orig_buf[count+1] == '\0' || orig_buf[count+1] == ' ')) {
2975 StringCpy(new_buf, orig_buf);
2976 return 0;
2977 } else if (IS_ALPHA(orig_buf[2])) { /* 3 + 5 accession */
2978 if (orig_buf[0] =='A' || orig_buf[0] == 'B' || orig_buf[0] == 'C') {
2979 for (count=3; count < 7; count++) {
2980 if(! IS_DIGIT(orig_buf[count]))
2981 break;
2982 }
2983 if (count == 7 && (orig_buf[count+1] == '\0' || orig_buf[count+1] == ' ')) {
2984 StringCpy(new_buf, orig_buf);
2985 return 0;
2986 } else {
2987 return -2;
2988 }
2989 } else {
2990 return -2;
2991 }
2992 } else {
2993 return -2;
2994 }
2995 } else {
2996 return -2;
2997 }
2998 } else {
2999 return -1;
3000 }
3001 }
3002
3003 /**************************************************************************
3004 *MakeBaseAccession
3005 *
3006 * GetBaseAccession takes a BioseqPtr bsp and returns an
3007 * accession if 1.) the set is segmented, and 2.) there is
3008 * an accession at a higher level. Otherwise NULL is returned.
3009 * The user should deallocate the CharPtr.
3010 **************************************************************************/
3011
MakeBaseAccession(BioseqPtr bsp)3012 CharPtr MakeBaseAccession (BioseqPtr bsp)
3013
3014 {
3015 Char buffer[MAX_ACCESSION_LEN+1];
3016 CharPtr buf_acc=buffer;
3017 Int2 status = -1;
3018 SeqIdPtr sip, isip;
3019 TextSeqIdPtr tsip;
3020
3021
3022 if (bsp == NULL)
3023 return NULL;
3024 isip = bsp->id;
3025 sip = SeqIdSelect(isip, fasta_order, NUM_SEQID);
3026 if (sip && (sip->choice == SEQID_GENBANK ||
3027 sip->choice == SEQID_EMBL ||
3028 sip->choice == SEQID_PIR ||
3029 sip->choice == SEQID_SWISSPROT ||
3030 sip->choice == SEQID_DDBJ ||
3031 sip->choice == SEQID_PRF ||
3032 sip->choice == SEQID_OTHER ||
3033 sip->choice == SEQID_TPG ||
3034 sip->choice == SEQID_TPE ||
3035 sip->choice == SEQID_TPD))
3036 {
3037 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
3038 switch (sip->choice) {
3039 case SEQID_GENBANK:
3040 case SEQID_EMBL:
3041 case SEQID_DDBJ:
3042 case SEQID_TPG:
3043 case SEQID_TPE:
3044 case SEQID_TPD:
3045 case SEQID_PIR:
3046 case SEQID_SWISSPROT:
3047 status = ValidateAccession(buf_acc, tsip->accession);
3048 }
3049 }
3050 if (status < 0)
3051 return NULL;
3052
3053 return (StringSave(buf_acc));
3054 }
3055
3056 /***************************************************************************
3057 *
3058 * MakeBaseLocus takes a Asn2ffJobPtr and a CharPtr (base_locus)
3059 * and returns a CharPtr which is the new base_locus. Checking is
3060 * done to assure suitability of the new base locus name (i.e.,
3061 * no more than 15 characters for less than 10 segments and no more
3062 * than 14 characters for 10 or more segments).
3063 *
3064 ***************************************************************************/
3065
MakeBaseLocusAwp(Asn2ffJobPtr ajp,CharPtr base_locus)3066 CharPtr MakeBaseLocusAwp (Asn2ffJobPtr ajp, CharPtr base_locus)
3067
3068 {
3069 BioseqPtr bsp, bbsp = NULL;
3070 Int2 index, length, base_locus_max, name_len, num_of_digits, num_seg;
3071 SeqIdPtr sip, bsip=NULL, isip=NULL;
3072 TextSeqIdPtr tsip = NULL, btsip=NULL;
3073 ObjectIdPtr obj;
3074 Char buffer[21], temp_buf[21];
3075 CharPtr localbuf=buffer, name, ptr=temp_buf;
3076 CharPtr tmp = "SEG_";
3077 Asn2ffWEPtr awp;
3078 GBEntryPtr gbp;
3079
3080 base_locus[0] = '\0';
3081 awp = ajp->asn2ffwep;
3082 num_seg = awp->total_seg;
3083 if (num_seg < 10)
3084 num_of_digits = 1;
3085 else if (num_seg < 100)
3086 num_of_digits = 2;
3087 else if (num_seg < 1000)
3088 num_of_digits = 3;
3089 else
3090 {
3091 ErrPostStr(SEV_INFO, ERR_SEGMENT_MoreThan1000Segs, "");
3092 }
3093 base_locus_max = MAX_LOCUS_NAME_LEN - num_of_digits;
3094 /* look for base locus in segmented bioseq */
3095 awp = ajp->asn2ffwep;
3096 bbsp = awp->seg; /* segmented Bioseq in segmented set */
3097 if (bbsp) {
3098 bsip = SeqIdSelect(bbsp->id, fasta_order, NUM_SEQID);
3099 }
3100 if (bsip && (bsip->choice == SEQID_GENBANK ||
3101 bsip->choice == SEQID_EMBL ||
3102 bsip->choice == SEQID_DDBJ ||
3103 bsip->choice == SEQID_SWISSPROT ||
3104 bsip->choice == SEQID_PIR ||
3105 bsip->choice == SEQID_OTHER ||
3106 bsip->choice == SEQID_TPG ||
3107 bsip->choice == SEQID_TPE ||
3108 bsip->choice == SEQID_TPD)) {
3109 btsip = (TextSeqIdPtr) bsip->data.ptrvalue;
3110 }
3111 if (btsip && StringLen(btsip->name) > 0) {
3112 localbuf = StringCpy(localbuf, btsip->name);
3113 if (StringNCmp(localbuf, tmp, 4) == 0) {
3114 /* check if name starts with "SEG_", remove if it does. */
3115 StringCpy(ptr, localbuf+4);
3116 length = StringLen(ptr);
3117 ptr[length] = '\0';
3118 if (ptr[length-1] == '1') {
3119 bsp = awp->gbp->bsp;
3120 isip = bsp->id;
3121 sip = SeqIdSelect(isip, fasta_order, NUM_SEQID);
3122 if (sip &&
3123 (name=((TextSeqIdPtr)sip->data.ptrvalue)->name) != NULL) {
3124 name_len = StringLen(name);
3125 if (name_len == length) {
3126 if (name[length-1] == ptr[length-1])
3127 for (index=2; index >= num_of_digits; index++) {
3128 /* The following is *really* '0'! */
3129 if (ptr[length-index] == '0') {
3130 if (ptr[length-index] == name[length-index]) {
3131 StringNCpy(base_locus, ptr, length-index);
3132 base_locus[length-index] = '\0';
3133 } else {
3134 StringNCpy(base_locus, ptr, length-index+1);
3135 base_locus[length-index+1] = '\0';
3136 }
3137 } else {
3138 StringNCpy(base_locus, ptr, length-index+1);
3139 base_locus[length-index+1] = '\0';
3140 break;
3141 }
3142 }
3143 }
3144 }
3145 }
3146 /* If nothing else worked, use base locus anyway. */
3147 if (base_locus[0] == '\0')
3148 StringCpy(base_locus, ptr);
3149 }
3150 if (base_locus[0] == '\0')
3151 StringCpy(base_locus, btsip->name);
3152
3153 /*check for length, truncate if necessary. */
3154 base_locus = CheckLocusLength (ajp->error_msgs, base_locus, base_locus_max, num_seg);
3155 return base_locus;
3156 }
3157
3158 /* Look for at least one sensible locus in all segments. */
3159 for (gbp = awp->gbp; gbp; gbp=gbp->next) {
3160 bsp = gbp->bsp;
3161 isip = bsp->id;
3162 sip = SeqIdSelect(isip, fasta_order, NUM_SEQID);
3163 if (sip && (sip->choice == SEQID_GENBANK ||
3164 sip->choice == SEQID_EMBL ||
3165 sip->choice == SEQID_DDBJ ||
3166 sip->choice == SEQID_SWISSPROT ||
3167 sip->choice == SEQID_OTHER ||
3168 sip->choice == SEQID_PIR ||
3169 sip->choice == SEQID_TPG ||
3170 sip->choice == SEQID_TPE ||
3171 sip->choice == SEQID_TPD)) {
3172 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
3173 }
3174 if (tsip && tsip->name && StringLen(tsip->name) > 0) {
3175 base_locus = StringCpy(base_locus, tsip->name);
3176 length = StringLen(base_locus);
3177 base_locus[length-num_of_digits] = '\0';
3178 base_locus = CheckLocusLength (ajp->error_msgs,
3179 base_locus, base_locus_max, num_seg);
3180 return base_locus;
3181 }
3182 }
3183
3184 /* No option left but to take the first locus name.*/
3185 bsp = awp->gbp->bsp;
3186 isip = bsp->id;
3187 sip = SeqIdSelect(isip, fasta_order, NUM_SEQID);
3188 if (sip && sip->choice == SEQID_LOCAL) {
3189 obj = (ObjectIdPtr) sip->data.ptrvalue;
3190 if ( obj->str == NULL) {
3191 sprintf(base_locus, "%ld", (long)(obj->id));
3192 } else {
3193 base_locus = StringCpy(base_locus, obj->str);
3194 }
3195 } else if (sip && (sip->choice == SEQID_GENBANK ||
3196 sip->choice == SEQID_EMBL ||
3197 sip->choice == SEQID_SWISSPROT ||
3198 sip->choice == SEQID_DDBJ ||
3199 sip->choice == SEQID_PRF ||
3200 sip->choice == SEQID_PDB ||
3201 sip->choice == SEQID_OTHER ||
3202 sip->choice == SEQID_PIR ||
3203 sip->choice == SEQID_TPG ||
3204 sip->choice == SEQID_TPE ||
3205 sip->choice == SEQID_TPD)) {
3206 tsip = (TextSeqIdPtr)sip->data.ptrvalue;
3207 base_locus = StringCpy(base_locus, tsip->name);
3208 }
3209 base_locus = CheckLocusLength (ajp->error_msgs, base_locus, base_locus_max, num_seg);
3210 return base_locus;
3211
3212 } /* MakeBaseLocusAwp */
3213
ValidateVersion(SeqIdPtr sid,Asn2ffJobPtr ajp)3214 static Boolean ValidateVersion(SeqIdPtr sid, Asn2ffJobPtr ajp)
3215 {
3216 TextSeqIdPtr tsip;
3217
3218 if (ajp->forgbrel == FALSE)
3219 return TRUE;
3220 switch (sid->choice) {
3221 case SEQID_GENBANK:
3222 case SEQID_EMBL:
3223 case SEQID_DDBJ:
3224 case SEQID_OTHER:
3225 case SEQID_TPG:
3226 case SEQID_TPE:
3227 case SEQID_TPD:
3228 tsip = (TextSeqIdPtr) sid->data.ptrvalue;
3229 if (tsip->version == 0 || tsip->version == INT2_MIN) {
3230 return FALSE;
3231 }
3232 }
3233 return TRUE;
3234 }
3235
GetLocusPartsAwp(Asn2ffJobPtr ajp)3236 NLM_EXTERN void GetLocusPartsAwp (Asn2ffJobPtr ajp)
3237 {
3238 BioseqPtr bsp=NULL;
3239 Asn2ffWEPtr awp;
3240 SeqIdPtr sip, isip;
3241 Int2 num_seg=0, total_segs=0;
3242 TextSeqIdPtr tsip;
3243 Char buf_a[MAX_ACCESSION_LEN+1], buf_l[MAX_ACCESSION_LEN+1],
3244 base_l[MAX_ACCESSION_LEN+1];
3245 CharPtr buffer, buf_acc=buf_a, buf_locus=buf_l, base_locus=base_l, base_a;
3246 GBEntryPtr gbp;
3247 CharPtr loc;
3248 Int2 acc_len;
3249
3250 awp = ajp->asn2ffwep;
3251 if (ajp->slp) {
3252 for (gbp = awp->gbp; gbp; gbp = gbp->next) {
3253 buffer = GetDivision(ajp, gbp);
3254 if (buffer[0] != NULLB) {
3255 StringNCpy_0(gbp->div, buffer, 4);
3256 MemFree(buffer);
3257 }
3258 if ((bsp = BioseqFindFromSeqLoc(ajp->slp)) != NULL) {
3259 CharPtr flatloc;
3260
3261 isip = SeqIdSelect(gbp->bsp->id, fasta_order, NUM_SEQID);
3262 if (isip == NULL)
3263 isip = gbp->bsp->id;
3264 SeqIdWrite(isip,
3265 buf_acc, PRINTID_TEXTID_ACCESSION, MAX_ACCESSION_LEN);
3266
3267 if (ajp->old_locus_fmt == TRUE)
3268 sprintf(gbp->locus, "%-10s", buf_acc);
3269 else
3270 sprintf(gbp->locus, "%-16s", buf_acc);
3271
3272 flatloc = FlatLoc(bsp, ajp->slp);
3273 sprintf(gbp->accession, "%s REGION: %s", buf_acc, flatloc);
3274 flatloc = MemFree(flatloc);
3275 if (ajp->show_version) {
3276 SeqIdWrite(isip,
3277 buf_acc, PRINTID_TEXTID_ACC_VER, MAX_ACCESSION_LEN+1);
3278 StringNCpy_0(gbp->version,
3279 buf_acc, MAX_ACCESSION_LEN+1);
3280 }
3281 } else {
3282 loc = SeqLocPrint(ajp->slp);
3283 StringNCpy_0(gbp->locus, loc, MAX_LOCUS_NAME_LEN+1);
3284 acc_len = MIN(StringLen(loc), 60);
3285 StringNCpy_0(gbp->accession, loc, acc_len+1);
3286 MemFree(loc);
3287 }
3288 }
3289 return;
3290 }
3291 if (ajp->only_one) {
3292 for (gbp = awp->gbp; gbp; gbp = gbp->next) {
3293 if (gbp->bsp == NULL) {
3294 continue;
3295 }
3296 bsp = gbp->bsp;
3297 GetGINumber(gbp);
3298 buffer = GetDivision(ajp, gbp);
3299 if (buffer[0] != NULLB) {
3300 StringNCpy_0(gbp->div, buffer, 4);
3301 MemFree(buffer);
3302 }
3303 isip = SeqIdSelect(gbp->bsp->id, fasta_order, NUM_SEQID);
3304 if (isip == NULL)
3305 isip = gbp->bsp->id;
3306 SeqIdWrite(isip, buf_acc,
3307 PRINTID_TEXTID_ACCESSION, MAX_ACCESSION_LEN+1);
3308 StringNCpy_0(gbp->accession, buf_acc, MAX_ACCESSION_LEN+1);
3309
3310 if (ajp->old_locus_fmt == TRUE)
3311 sprintf(gbp->locus, "%-10s", buf_acc);
3312 else
3313 sprintf(gbp->locus, "%-16s", buf_acc);
3314
3315 if (ajp->show_version) {
3316 SeqIdWrite(isip, buf_acc,
3317 PRINTID_TEXTID_ACC_VER, MAX_ACCESSION_LEN+1);
3318 StringNCpy_0(gbp->version, buf_acc, MAX_ACCESSION_LEN+1);
3319 }
3320 }
3321 return;
3322 }
3323 total_segs = awp->total_seg;
3324 base_a = MakeBaseAccession(awp->seg);
3325 base_locus = MakeBaseLocusAwp(ajp, base_locus);
3326 StringNCpy_0(ajp->asn2ffwep->base_name, base_locus, 11);
3327
3328 for (gbp = awp->gbp; gbp != NULL; gbp = gbp->next) {
3329 if (gbp->bsp == NULL) {
3330 continue;
3331 }
3332 bsp = gbp->bsp;
3333 if ((isip = gbp->bsp->id) == NULL) {
3334 continue;
3335 }
3336 buffer = GetDivision(ajp, gbp);
3337 if (buffer[0] != NULLB) {
3338 StringNCpy_0(gbp->div, buffer, 4);
3339 MemFree(buffer);
3340 }
3341 num_seg = gbp->num_seg;
3342 sip = SeqIdSelect(isip, fasta_order, NUM_SEQID);
3343 if (sip == NULL) {
3344 sip = isip;
3345 }
3346 switch (sip->choice) {
3347 case SEQID_GENBANK:
3348 case SEQID_EMBL:
3349 case SEQID_DDBJ:
3350 case SEQID_OTHER:
3351 case SEQID_TPG:
3352 case SEQID_TPE:
3353 case SEQID_TPD:
3354 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
3355 if ((ValidateAccession(buf_acc, tsip->accession)) < 0) {
3356 if (base_a != NULL) {
3357 StringNCpy_0(buf_acc, base_a, MAX_ACCESSION_LEN+1);
3358 } else {
3359 buf_acc = MakeAnAccession(buf_acc, isip,
3360 MAX_ACCESSION_LEN+1);
3361 }
3362 }
3363 buf_locus = ValidateLocus(ajp, bsp, base_locus,
3364 total_segs, num_seg, buf_locus, tsip->name, buf_acc);
3365 StringNCpy_0(gbp->accession,
3366 buf_acc, MAX_ACCESSION_LEN+1);
3367 if (sip->choice == SEQID_OTHER
3368 && StringNCmp(tsip->accession, "NT_", 3) == 0) {
3369 if (ajp->old_locus_fmt == TRUE)
3370 sprintf(gbp->locus, "%-10s", buf_acc);
3371 else
3372 sprintf(gbp->locus, "%-16s", buf_acc);
3373 } else {
3374 if (ajp->old_locus_fmt == TRUE)
3375 sprintf(gbp->locus, "%-10s", buf_locus);
3376 else
3377 sprintf(gbp->locus, "%-16s", buf_locus);
3378 }
3379 num_seg--;
3380 if (ajp->show_version) {
3381 if (ValidateVersion(sip, ajp) == FALSE) {
3382 gbp->bsp = NULL;
3383 ErrPostEx(SEV_ERROR, ERR_ACCESSION_No_VERSION_Number, "%s", gbp->accession);
3384 continue;
3385 }
3386 SeqIdWrite(sip, buf_acc,
3387 PRINTID_TEXTID_ACC_VER, MAX_ACCESSION_LEN+6);
3388 StringNCpy_0(gbp->version, buf_acc, MAX_ACCESSION_LEN+6);
3389 }
3390 break;
3391 case SEQID_LOCAL:
3392 if ((((ObjectIdPtr)sip->data.ptrvalue)->str) == NULL) {
3393 buf_acc[0] = 'X';
3394 sprintf(buf_acc+1, "%ld",
3395 (long)((ObjectIdPtr)sip->data.ptrvalue)->id);
3396 } else {
3397 StringNCpy_0(buf_acc,
3398 ((ObjectIdPtr)sip->data.ptrvalue)->str, MAX_ACCESSION_LEN+1);
3399 }
3400 buf_locus = ValidateLocus(ajp, bsp, base_locus,
3401 total_segs, num_seg,buf_locus, buf_acc, buf_acc);
3402 StringNCpy_0(gbp->accession, buf_acc, MAX_ACCESSION_LEN+1);
3403
3404 if (ajp->old_locus_fmt == TRUE)
3405 sprintf(gbp->locus, "%-10s", buf_locus);
3406 else
3407 sprintf(gbp->locus, "%-16s", buf_locus);
3408
3409 num_seg--;
3410 break;
3411 case SEQID_GI:
3412 sprintf(buf_acc, "%ld", (long) (sip->data.intvalue));
3413 buf_locus = ValidateLocus(ajp, bsp, base_locus,
3414 total_segs, num_seg, buf_locus, buf_acc, buf_acc);
3415 StringNCpy_0(gbp->accession, buf_acc, MAX_ACCESSION_LEN+1);
3416
3417 if (ajp->old_locus_fmt == TRUE)
3418 sprintf(gbp->locus, "%-10s", buf_locus);
3419 else
3420 sprintf(gbp->locus, "%-16s", buf_locus);
3421
3422 num_seg--;
3423 break;
3424
3425 case SEQID_PIR:
3426 case SEQID_SWISSPROT:
3427 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
3428 if ((ValidateAccession(buf_acc, tsip->accession)) < 0) {
3429 if (base_a != NULL) {
3430 StringNCpy_0(buf_acc, base_a, MAX_ACCESSION_LEN+1);
3431 } else {
3432 buf_acc = MakeAnAccession(buf_acc,
3433 isip, MAX_ACCESSION_LEN);
3434 }
3435 }
3436 if (ajp->show_version) {
3437 SeqIdWrite(sip, buf_acc,
3438 PRINTID_TEXTID_ACC_VER, MAX_ACCESSION_LEN+6);
3439 StringNCpy_0(gbp->version, buf_acc, MAX_ACCESSION_LEN+6);
3440 }
3441 buf_locus = ValidateLocus(ajp, bsp, base_locus,
3442 total_segs, num_seg, buf_locus, tsip->name, buf_acc);
3443 StringNCpy_0(gbp->accession, buf_acc, MAX_ACCESSION_LEN+1);
3444 if (sip->choice == SEQID_OTHER
3445 && StringNCmp(tsip->accession, "NT_", 3) == 0) {
3446 if (ajp->old_locus_fmt == TRUE)
3447 sprintf(gbp->locus, "%-10s", buf_acc);
3448 else
3449 sprintf(gbp->locus, "%-16s", buf_acc);
3450 } else {
3451 if (ajp->old_locus_fmt == TRUE)
3452 sprintf(gbp->locus, "%-10s", buf_locus);
3453 else
3454 sprintf(gbp->locus, "%-16s", buf_locus);
3455 }
3456 num_seg--;
3457
3458 break;
3459 default:
3460 buf_acc = MakeAnAccession(buf_acc, isip, MAX_ACCESSION_LEN+1);
3461 buf_locus = ValidateLocus(ajp, bsp, base_locus,
3462 total_segs, num_seg, buf_locus, buf_acc, buf_acc);
3463 StringNCpy_0(gbp->accession, buf_acc,
3464 MAX_ACCESSION_LEN+1);
3465 if (ajp->old_locus_fmt == TRUE)
3466 sprintf(gbp->locus, "%-10s", buf_locus);
3467 else
3468 sprintf(gbp->locus, "%-16s", buf_locus);
3469
3470 num_seg--;
3471 break;
3472 }
3473 }
3474 if (base_a != NULL)
3475 base_a = MemFree(base_a);
3476
3477 }
3478 /**************************************************************************
3479 * Looks in the descriptor and feature->xref for any extra-accessions.
3480 **************************************************************************/
3481
AddExtraAccessions(Asn2ffJobPtr ajp,GBEntryPtr gbp)3482 NLM_EXTERN void AddExtraAccessions(Asn2ffJobPtr ajp, GBEntryPtr gbp)
3483
3484 {
3485 BioseqPtr bsp;
3486 Char buffer[10];
3487 CharPtr ptr=buffer, ac;
3488 EMBLBlockPtr eb;
3489 GBBlockPtr gb;
3490 Int2 index, status;
3491 SeqFeatPtr sfp;
3492 SeqIdPtr xid;
3493 SeqIntPtr si;
3494 SeqLocPtr xref;
3495 TextSeqIdPtr text;
3496 ValNodePtr extra_access=NULL, location=NULL, vnp;
3497 SortStructPtr p;
3498 Boolean /*UNUSED*/ncbi = FALSE;
3499
3500 if (gbp == NULL) {
3501 return;
3502 }
3503 if ((bsp = gbp->bsp) == NULL) {
3504 return;
3505 }
3506 ac = gbp->accession;
3507 if (ac && *ac == 'U') {
3508 ncbi = TRUE;
3509 }
3510 for (vnp = bsp->descr; vnp; vnp=vnp->next) {
3511 if (vnp->choice == Seq_descr_genbank) {
3512 break;
3513 }
3514 }
3515 if (vnp != NULL) {
3516 gb = (GBBlockPtr) vnp->data.ptrvalue;
3517 extra_access = gb->extra_accessions;
3518 if (extra_access != NULL) {
3519 for (vnp=extra_access; vnp != NULL; vnp=vnp->next) {
3520 status = ValidateAccession(ptr, vnp->data.ptrvalue);
3521 if (status == 0) {
3522 if (ajp->format == EMBL_FMT || ajp->format ==
3523 PSEUDOEMBL_FMT || ajp->format == EMBLPEPT_FMT) {
3524 ff_AddChar(';');
3525 } else {
3526 ff_AddChar(' ');
3527 }
3528 /* www_extra_acc(ptr, ncbi); */
3529 ff_AddString( ptr);
3530 }
3531 }
3532 }
3533 }
3534 for (vnp = bsp->descr; vnp; vnp=vnp->next) {
3535 if (vnp->choice == Seq_descr_embl) {
3536 break;
3537 }
3538 }
3539
3540 if (vnp != NULL) {
3541 eb = (EMBLBlockPtr) vnp->data.ptrvalue;
3542 extra_access = eb->extra_acc;
3543 if (extra_access != NULL) {
3544 for (vnp=extra_access; vnp != NULL; vnp=vnp->next) {
3545 status = ValidateAccession(ptr, vnp->data.ptrvalue);
3546 if (status == 0) {
3547 if (ajp->format == EMBL_FMT || ajp->format ==
3548 PSEUDOEMBL_FMT || ajp->format == EMBLPEPT_FMT) {
3549 ff_AddChar(';');
3550 } else {
3551 ff_AddChar(' ');
3552 }
3553 /* www_extra_acc(ptr, ncbi); */
3554 ff_AddString( ptr);
3555 }
3556 }
3557 }
3558 }
3559 if (gbp->feat) {
3560 p = gbp->feat->Xreflist;
3561 for (index=0; index < gbp->feat->sfpXrefsize; index++, p++) {
3562 if (location == NULL) {
3563 location = ValNodeNew(NULL);
3564 si = SeqIntNew();
3565 location->choice = SEQLOC_INT;
3566 location->data.ptrvalue = si;
3567 }
3568 si->from = 0;
3569 bsp = gbp->bsp;
3570 si->to = bsp->length - 1;
3571 si->id = bsp->id; /* Don't delete id!! */
3572 if ((sfp = p->sfp) == NULL) {
3573 GatherItemWithLock(p->entityID,
3574 p->itemID, p->itemtype, &sfp, find_item);
3575 }
3576 if (sfp == NULL) {
3577 continue;
3578 }
3579 if (SeqLocCompare(sfp->location, location) != 0) {
3580 xref = (SeqLocPtr) sfp->data.value.ptrvalue;
3581 xid = (SeqIdPtr) xref->data.ptrvalue;
3582 if (xid->choice == 5 || xid->choice == 6 ||
3583 xid->choice == 13) {
3584 text = (TextSeqIdPtr) xid->data.ptrvalue;
3585 status = ValidateAccession(ptr, text->accession);
3586 if (status == 0) {
3587 if (ajp->format == EMBL_FMT || ajp->format ==
3588 PSEUDOEMBL_FMT || ajp->format == EMBLPEPT_FMT) {
3589 ff_AddChar(';');
3590 } else {
3591 ff_AddChar(' ');
3592 }
3593 /* www_extra_acc(ptr, ncbi); */
3594 ff_AddString( ptr);
3595 }
3596 }
3597 }
3598 }
3599 }
3600
3601 if (location) {
3602 si->id = NULL;
3603 SeqIntFree(si);
3604 ValNodeFree(location);
3605 }
3606
3607 return;
3608 }static Boolean CompareToAwpList (BioseqPtr bsp, Asn2ffWEPtr awp)
3609
3610 {
3611 GBEntryPtr gbp;
3612
3613 if (bsp == NULL) {
3614 return FALSE;
3615 }
3616 for (gbp = awp->gbp; gbp != NULL; gbp = gbp->next) {
3617 if (bsp == gbp->bsp) {
3618 return TRUE;
3619 }
3620 }
3621 return FALSE;
3622 }
3623
GBEntryNew(void)3624 static GBEntryPtr GBEntryNew(void)
3625 {
3626 GBEntryPtr gbp;
3627
3628 gbp = (GBEntryPtr) MemNew(sizeof(GBEntry));
3629 gbp->feat = NULL;
3630 gbp->descr = NULL;
3631 gbp->source_info = NULL;
3632 gbp->comm = NULL;
3633 gbp->map = FALSE;
3634
3635 return gbp;
3636 }
3637
tie_next_gbp(GBEntryPtr head,GBEntryPtr next)3638 static GBEntryPtr tie_next_gbp(GBEntryPtr head, GBEntryPtr next)
3639 /* ties next node to the end of the chain */
3640 {
3641 GBEntryPtr v;
3642
3643 if (head == NULL) {
3644 return next;
3645 }
3646 for (v = head; v->next != NULL; v = v->next) {
3647 v = v;
3648 }
3649 v->next = next;
3650 return head;
3651 }
3652
CreateGBEntry(Asn2ffWEPtr awp,BioseqPtr bsp,Int2 eID,Int2 iID,Int2 itype)3653 static GBEntryPtr CreateGBEntry(Asn2ffWEPtr awp, BioseqPtr bsp,
3654 Int2 eID, Int2 iID, Int2 itype)
3655 {
3656 GBEntryPtr gbep;
3657
3658 gbep = GBEntryNew();
3659 gbep->bsp = bsp;
3660 gbep->length = bsp->length;
3661 gbep->entityID = eID;
3662 gbep->itemID = iID;
3663 gbep->itemtype = itype;
3664 awp->gbp = tie_next_gbp(awp->gbp, gbep);
3665
3666 return gbep;
3667 }
3668
3669 /************************************************************************
3670 * SeqToAwp()
3671 * gather callback to create a list of GenBank entries
3672 *************************************************************************/
3673
SeqToAwp(GatherContextPtr gcp)3674 NLM_EXTERN Boolean SeqToAwp (GatherContextPtr gcp)
3675
3676 {
3677 BioseqPtr bsp;
3678 SeqEntryPtr ep;
3679 BioseqSetPtr bssp;
3680 SeqLocPtr slp;
3681 Asn2ffWEPtr awp;
3682 Asn2ffJobPtr ajp;
3683 GBEntryPtr gbep;
3684 SeqIdPtr isip, sip;
3685 Uint1 format;
3686 Boolean is_www = get_www();
3687
3688 ajp = (Asn2ffJobPtr) gcp->userdata;
3689 awp = ajp->asn2ffwep;
3690 format = ajp->format;
3691 switch (gcp->thistype)
3692 {
3693 case OBJ_BIOSEQ:
3694 bsp = gcp->thisitem;
3695 if (bsp->repr == Seq_repr_seg) {
3696 if (ajp->genome_view || ajp->only_one) {
3697 gbep = CreateGBEntry(awp, bsp, gcp->entityID,
3698 gcp->itemID, gcp->thistype);
3699 if (ajp->only_one && !ajp->map_view) {
3700 return FALSE;
3701 }
3702 }
3703 if (ISA_na(bsp->mol) && (format == GENBANK_FMT ||
3704 format == EMBL_FMT || format == PSEUDOEMBL_FMT
3705 || format == GRAPHIK_FMT)) {
3706 awp->seg = bsp;
3707 } else if (ISA_aa(bsp->mol) &&
3708 (format == GENPEPT_FMT || format == EMBLPEPT_FMT
3709 || format == GRAPHIK_FMT)) {
3710 awp->seg = bsp;
3711 }
3712 }
3713 if (ASN2FF_LOOK_FOR_SEQ == FALSE) {
3714 if (ajp->format == GENPEPT_FMT || ajp->format == EMBLPEPT_FMT
3715 || (ISA_aa(bsp->mol) && format == GRAPHIK_FMT)) {
3716 if (ISA_aa(bsp->mol) && (bsp->repr == Seq_repr_raw
3717 || bsp->repr == Seq_repr_const || bsp->repr == Seq_repr_delta
3718 || ((is_www || ajp->mode != RELEASE_MODE) && bsp->repr == Seq_repr_virtual))) {
3719 gbep = CreateGBEntry(awp, bsp, gcp->entityID,
3720 gcp->itemID, gcp->thistype);
3721 ++awp->total_seg;
3722 gbep->num_seg = awp->total_seg;
3723 }
3724 } else {
3725 if (ISA_na(bsp->mol) && (bsp->repr == Seq_repr_raw
3726 || bsp->repr == Seq_repr_const|| bsp->repr == Seq_repr_delta
3727 || (is_www && bsp->repr == Seq_repr_virtual))) {
3728 if (ASN2FF_LOCAL_ID == FALSE) {
3729 sip = SeqIdSelect(bsp->id, fasta_order, NUM_SEQID);
3730 if (sip && sip->choice != SEQID_LOCAL) {
3731 gbep = CreateGBEntry(awp, bsp, gcp->entityID,
3732 gcp->itemID, gcp->thistype);
3733 ++awp->total_seg;
3734 gbep->num_seg = awp->total_seg;
3735 }
3736 } else {
3737 gbep = CreateGBEntry(awp, bsp, gcp->entityID,
3738 gcp->itemID, gcp->thistype);
3739 ++awp->total_seg;
3740 gbep->num_seg = awp->total_seg;
3741 }
3742 } else if (ISA_na(bsp->mol) && bsp->repr == Seq_repr_map &&
3743 ajp->map_view) {
3744 gbep = CreateGBEntry(awp, bsp, gcp->entityID,
3745 gcp->itemID, gcp->thistype);
3746 gbep->map = TRUE;
3747 }
3748 }
3749 } else {
3750 if (bsp->seq_ext_type == 1) {
3751 slp = bsp->seq_ext;
3752 while (slp) {
3753 bsp = BioseqFind(SeqLocId(slp));
3754 if (bsp->repr == Seq_repr_raw ||
3755 bsp->repr == Seq_repr_const
3756 || bsp->repr == Seq_repr_delta
3757 || (is_www && bsp->repr == Seq_repr_virtual)) {
3758 if (CompareToAwpList(bsp, awp) == FALSE) {
3759 if (ASN2FF_LOCAL_ID == FALSE) {
3760 isip = bsp->id;
3761 sip = SeqIdSelect(isip,
3762 fasta_order, NUM_SEQID);
3763 if (sip && sip->choice != SEQID_LOCAL) {
3764 gbep = CreateGBEntry(awp, bsp,
3765 gcp->entityID, gcp->itemID,
3766 gcp->thistype);
3767 ++awp->total_seg;
3768 gbep->num_seg = awp->total_seg;
3769 } else if (sip->choice == SEQID_LOCAL &&
3770 (format == GENPEPT_FMT ||
3771 format == EMBLPEPT_FMT)) {
3772 gbep = CreateGBEntry(awp, bsp,
3773 gcp->entityID, gcp->itemID,
3774 gcp->thistype);
3775 ++awp->total_seg;
3776 gbep->num_seg = awp->total_seg;
3777 } else {
3778 gbep = CreateGBEntry(awp, bsp,
3779 gcp->entityID, gcp->itemID,
3780 gcp->thistype);
3781 ++awp->total_seg;
3782 gbep->num_seg = awp->total_seg;
3783 }
3784 }
3785 }
3786 }
3787 slp = slp->next;
3788 }
3789 } else if (ISA_na(bsp->mol) && (bsp->repr == Seq_repr_raw ||
3790 bsp->repr == Seq_repr_const
3791 || bsp->repr == Seq_repr_delta
3792 || (is_www && bsp->repr == Seq_repr_virtual))) {
3793 if (CompareToAwpList(bsp, awp) == FALSE) {
3794 if (ASN2FF_LOCAL_ID == FALSE) {
3795 isip = bsp->id;
3796 sip = SeqIdSelect(isip, fasta_order, NUM_SEQID);
3797 if (sip && sip->choice != SEQID_LOCAL) {
3798 gbep = CreateGBEntry(awp, bsp, gcp->entityID,
3799 gcp->itemID, gcp->thistype);
3800 ++awp->total_seg;
3801 gbep->num_seg = awp->total_seg;
3802 } else if (sip->choice == SEQID_LOCAL &&
3803 (format == GENPEPT_FMT ||
3804 format == EMBLPEPT_FMT)) {
3805 gbep = CreateGBEntry(awp, bsp, gcp->entityID,
3806 gcp->itemID, gcp->thistype);
3807 ++awp->total_seg;
3808 gbep->num_seg = awp->total_seg;
3809 } else {
3810 gbep = CreateGBEntry(awp, bsp, gcp->entityID,
3811 gcp->itemID, gcp->thistype);
3812 ++awp->total_seg;
3813 gbep->num_seg = awp->total_seg;
3814 }
3815 }
3816 }
3817 }
3818 }
3819 break;
3820 case OBJ_BIOSEQSET:
3821 bssp = (BioseqSetPtr) gcp->thisitem;
3822 if (bssp->_class == 4) {/*parts*/
3823 ep = bssp->seq_set;
3824 if (ep != NULL) {
3825 bsp = ep->data.ptrvalue;
3826 if (ISA_na(bsp->mol) && (format == GENBANK_FMT ||
3827 format == EMBL_FMT || format == PSEUDOEMBL_FMT)) {
3828 awp->parts = bssp;
3829 } else if (ISA_aa(bsp->mol) &&
3830 (format == GENPEPT_FMT || format == EMBLPEPT_FMT)) {
3831 awp->parts = bssp;
3832 }
3833 }
3834 }
3835 break;
3836 default:
3837 break;
3838
3839 }
3840 return TRUE;
3841
3842 }
3843
3844
3845
3846