1 /* edutil.c
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * File Name: edutil.c
27 *
28 * Author: James Ostell
29 *
30 * Version Creation Date: 2/4/94
31 *
32 * $Revision: 6.70 $
33 *
34 * File Description: Sequence editing utilities
35 *
36 * Modifications:
37 * --------------------------------------------------------------------------
38 * Date Name Description of modification
39 * ------- ---------- -----------------------------------------------------
40 *
41 * $Log: edutil.c,v $
42 * Revision 6.70 2011/07/27 12:50:38 bollin
43 * Corrected bug in SeqLocDeleteEx - set correct flag based on strand of location.
44 *
45 *
46 * Committed on the Free edition of March Hare Software CVSNT Client.
47 * Upgrade to CVS Suite for more features and support:
48 * http://march-hare.com/cvsnt/
49 *
50 * Revision 6.69 2010/07/12 14:32:38 kans
51 * SeqEntryDelFeat calls SeqEntryDelFeatEx
52 *
53 * Revision 6.68 2010/07/12 12:21:49 bollin
54 * Introduced a version of BioseqDelete that uses idx.deleteme to remove features
55 * (instead of freeing them immediately), and fixed bugs in VecScreenTool when
56 * entire Bioseqs are deleted.
57 *
58 * Revision 6.67 2010/06/11 12:03:22 bollin
59 * Added iBOL compliance report, which marks items with low trace as failing.
60 * Also checking in first draft of functions to reverse Quality Scores, not using
61 * until we can verify that they work for float and int graphs.
62 *
63 * Revision 6.66 2009/10/02 19:46:00 kans
64 * address clang static analyzer warnings
65 *
66 * Revision 6.65 2009/03/04 16:34:15 bollin
67 * Added function for removing contigs from scaffolds.
68 *
69 * Revision 6.64 2008/08/26 20:21:01 bollin
70 * Fixed bug in function for converting raw sequences to delta, where the
71 * gap is specified by location and is a replacement rather than an insertion.
72 *
73 * Revision 6.63 2007/07/02 19:17:26 bollin
74 * Corrected functions for inserting and deleting from locations to handle
75 * locations on segmented sets, corrected functions for inserting and deleting
76 * from sequences to adjust the length of the master sequence when adjusting the
77 * length of a segment.
78 *
79 * Revision 6.62 2007/05/08 17:18:32 bollin
80 * Added functions for identifying AGP gap DeltaSeqs
81 *
82 * Revision 6.61 2007/05/07 17:43:03 bollin
83 * Made functions IsDeltaSeqGap and IsDeltaSeqUnknownGap extern.
84 *
85 * Revision 6.60 2007/05/07 17:35:02 kans
86 * can handle Seq-lit.seq-data.gap
87 *
88 * Revision 6.59 2007/05/07 13:28:35 kans
89 * added casts for Seq-data.gap (SeqDataPtr, SeqGapPtr, ByteStorePtr)
90 *
91 * Revision 6.58 2007/01/19 14:55:07 bollin
92 * Do not set partial when deleting location from feature.
93 *
94 * Revision 6.57 2006/07/13 17:06:38 bollin
95 * use Uint4 instead of Uint2 for itemID values
96 * removed unused variables
97 * resolved compiler warnings
98 *
99 * Revision 6.56 2006/04/04 18:00:47 kans
100 * SeqLocAddEx properly returns value to &last argument, makes SeqLocMix from DeltaSeqsToSeqLocs
101 *
102 * Revision 6.55 2006/03/30 19:50:15 kans
103 * DeltaSeqsToSeqLocs calls SeqLocAddEx for efficient list usage
104 *
105 * Revision 6.54 2006/02/07 13:41:29 bollin
106 * added function AdjustFeatureForGapChange, which changes a feature to accommodate
107 * a change in the length of a gap
108 *
109 * Revision 6.53 2005/12/12 14:12:54 bollin
110 * BioseqCopyEx was not correctly handling copying the data contents of a
111 * delta sequence
112 *
113 * Revision 6.52 2005/09/22 19:21:34 bollin
114 * In the sequence editor, if the user inserts Ns into a gap of known length,
115 * the gap length will be increased instead of creating two gaps on either side
116 * with N sequence characters in the middle.
117 *
118 * Revision 6.51 2005/09/13 15:21:57 bollin
119 * fixed bug when inserting characters inside a gap that was incorrectly setting
120 * the lengths of the split gap
121 *
122 * Revision 6.50 2005/09/13 14:14:31 bollin
123 * fixed bug that was preventing the removal of gaps of length 1
124 *
125 * Revision 6.49 2005/07/15 19:01:37 kans
126 * minor fixes for Xcode warnings
127 *
128 * Revision 6.48 2005/05/02 14:20:02 bollin
129 * when inserting gaps, adjust coding region locations to not include gaps.
130 * when removing gaps, if a feature location has intervals that stop and start
131 * again at the point where the gap was removed, connect the intervals.
132 *
133 * Revision 6.47 2005/04/28 20:10:31 bollin
134 * added new function AdjustFeaturesForInsertion which is called by BioseqInsert
135 * and also by a new function in sequin3.c for converting a raw bioseq to a delta
136 * and inserting gaps
137 *
138 * Revision 6.46 2005/04/06 19:33:15 bollin
139 * made it possible to insert and remove gaps from delta sequences
140 *
141 * Revision 6.45 2005/03/18 20:51:10 bollin
142 * only change frame when CDS location has been changed, change anticodon locations
143 * and code breaks when locations have just been shifted
144 *
145 * Revision 6.44 2005/03/08 21:14:44 bollin
146 * strand argument in SeqLocCopyRegion is Seq_strand_minus when features
147 * should be reverse-complemented, does not actually indicate the strand to
148 * which a feature should be copied
149 *
150 * Revision 6.43 2005/02/28 16:53:40 bollin
151 * corrected Unix compiler warnings
152 *
153 * Revision 6.42 2005/02/28 16:08:35 bollin
154 * added utilities for editing delta sequences
155 *
156 * Revision 6.41 2005/01/24 17:00:58 bollin
157 * only change frames, fix code break locations, and fix anticodon locations
158 * when feature location is changed in SeqFeatDelete
159 *
160 * Revision 6.40 2004/11/17 21:19:18 lavr
161 * AffectedFeatFree() to return NULL on afp == NULL
162 *
163 * Revision 6.39 2004/10/08 16:04:16 bollin
164 * added ability to check when an action will remove a feature
165 *
166 * Revision 6.38 2004/10/08 15:19:07 bollin
167 * do not set partial flag when deleting from bioseq location in feature
168 *
169 * Revision 6.37 2004/09/29 18:49:57 bollin
170 * fixed bugs in sequence editing, can now undo a nucleotide deletion that
171 * removes an entire feature location (feature will be restored)
172 *
173 * Revision 6.36 2004/09/23 14:59:51 bollin
174 * moved functions that depend on functions that depend on BLAST functions
175 * into seqpanel.c, made function scalled by those functions extern
176 *
177 * Revision 6.35 2004/09/22 20:12:27 bollin
178 * fixed error in deleting sequence location for point features
179 *
180 * Revision 6.34 2004/09/22 18:20:32 bollin
181 * added functions for playing and unplaying a sequence editor action to translate
182 * a CDS
183 *
184 * Revision 6.33 2004/09/07 14:52:29 bollin
185 * when deleting location from a feature, adjust frame if deleting from 5' end of 5' partial feature.
186 *
187 * Revision 6.32 2004/08/24 13:16:57 bollin
188 * do not free list of product features taken from ObjectMgrDataPtr
189 *
190 * Revision 6.31 2004/08/06 19:56:20 bollin
191 * allow deletion from the end of a sequence
192 *
193 * Revision 6.30 2004/08/05 18:15:02 bollin
194 * when maintaining partials during feature drag, use partial in orig_loc
195 * instead of current feature location
196 *
197 * Revision 6.29 2004/08/05 18:07:03 bollin
198 * maintain partials for features when dragging or sliding intervals
199 *
200 * Revision 6.28 2004/07/30 18:46:55 bollin
201 * added function for reordering intervals after they have been dragged by
202 * the sequence editor
203 *
204 * Revision 6.27 2004/07/30 13:34:50 bollin
205 * in SeqLocCopyRegion, when copying from the minus strand to a non-minus-strand,
206 * be sure to set the strand.
207 *
208 * Revision 6.26 2004/07/28 20:06:19 bollin
209 * added journaling for undo/redo of dragged sequence location changes
210 *
211 * Revision 6.25 2004/07/28 15:22:15 bollin
212 * moved functions for moving feature locations around to edutil.c from
213 * seqpanel.c
214 *
215 * Revision 6.24 2004/07/27 19:46:42 bollin
216 * fixed errors in feature location adjustment when deleting nucleotides
217 * with new sequence editor
218 *
219 * Revision 6.23 2004/07/22 16:08:20 bazhin
220 * Changes to parse gaps of unknown lengths (like "gap(unk100)")
221 * within location strings.
222 *
223 * Revision 6.22 2004/07/12 12:29:45 bollin
224 * moved new sequence editor editing functions here
225 *
226 * Revision 6.21 2003/11/03 19:37:42 bollin
227 * SegLocToPartsEx now handles SEQLOC_PNT as well as SEQLOC_INT
228 *
229 * Revision 6.20 2003/06/03 20:25:34 kans
230 * SeqLocReplaceID works on bonds if both ends bonded to the same Seq-id
231 *
232 * Revision 6.19 2003/02/10 22:57:45 kans
233 * added BioseqCopyEx, which takes a BioseqPtr instead of a SeqIdPtr for the source
234 *
235 * Revision 6.18 2002/07/26 20:15:55 kans
236 * BioseqInsert can do feature indexed collection of features to adjust
237 *
238 * Revision 6.17 2002/07/17 15:39:40 kans
239 * BioseqInsert calls Nlm_BSAdd, need to figure out when not to call
240 *
241 * Revision 6.16 2002/07/11 17:45:53 kans
242 * BioseqInsert does not call Nlm_BSAdd due to a bug in that code
243 *
244 * Revision 6.15 2002/07/02 13:23:42 kans
245 * added SeqLocDeleteEx
246 *
247 * Revision 6.14 2001/06/01 18:07:20 kans
248 * changes to SeqLocAdd to allow one plus and one unknown strand to be accepted
249 *
250 * Revision 6.13 2001/02/23 21:30:09 shkeda
251 * Fixed SeqLocAdd: Int-fuzz pointers should be set to NULL after IntFuzzFree
252 *
253 * Revision 6.12 2001/02/23 01:26:07 ostell
254 * Added support to BioseqDelete() for delta seqs
255 *
256 * Revision 6.11 2000/10/31 17:11:06 kans
257 * SeqLocReplaceID was handling SEQLOC_PACKED_PNT incorrectly
258 *
259 * Revision 6.10 1999/12/20 20:47:12 kans
260 * oldscope test was wrong everywhere
261 *
262 * Revision 6.9 1999/12/15 20:52:16 kans
263 * added IndexedSeqFeatsCopy if SeqMgrFeaturesAreIndexed
264 *
265 * Revision 6.8 1999/12/07 20:32:13 kans
266 * for most editing functions, if BioseqFind failed, temporarily clear scope/try again/reset scope
267 *
268 * Revision 6.7 1999/11/19 19:54:19 kans
269 * SeqLocAdd checks for NULL slp before dereferencing
270 *
271 * Revision 6.6 1998/09/03 20:43:52 kans
272 * added delta bioseq support to BioseqCopy
273 *
274 * Revision 6.5 1998/06/22 20:00:46 kans
275 * DelFeat was a bit too agressive when there were multiple feature tables
276 *
277 * Revision 6.4 1998/06/17 21:50:11 kans
278 * fixed unix compiler warnings, including 64-bit SGI
279 *
280 * Revision 6.3 1997/11/10 19:40:48 bazhin
281 * Fixed incorrect comment for ISAGappedSeqLoc() function.
282 *
283 * Revision 6.2 1997/10/24 19:16:17 bazhin
284 * Added three easy functions GapToSeqLoc(...), ISAGappedSeqLoc(...)
285 * and GappedSeqLocsToDeltaSeqs(...) for processing "gap(...)" tokens
286 * in CONTIG line.
287 *
288 * Revision 6.1 1997/10/10 20:18:02 ostell
289 * removed tab character from SeqLitTag for DeltaSeqsToSeqLoc
290 *
291 * Revision 6.0 1997/08/25 18:05:24 madden
292 * Revision changed to 6.0
293 *
294 * Revision 5.10 1997/07/25 20:34:51 kans
295 * added SegLocToPartsEx
296 *
297 * Revision 5.9 1997/06/19 18:37:30 vakatov
298 * [WIN32,MSVC++] Adopted for the "NCBIOBJ.LIB" DLL'ization
299 *
300 * Revision 5.8 1996/12/20 17:59:34 kans
301 * SeqLocCopyRegion already reversed order for Seq_strand_minus, so no need
302 * to reverse it again (JO + JK)
303 *
304 * Revision 5.7 1996/10/21 18:56:19 ostell
305 * made SegLocToParts accept a complicated Seq-loc argument
306 *
307 * Revision 5.6 1996/10/09 17:27:34 chappey
308 * *** empty log message ***
309 *
310 * Revision 5.5 1996/10/09 16:34:59 chappey
311 * added SeqLocReplaceID() that replaces the Seq-Id of a Seq-Loc
312 *
313 * Revision 5.4 1996/07/15 14:43:51 epstein
314 * change SeqLocAdd() so that it merges identical SEQLOC_PNTs
315 *
316 * Revision 5.3 1996/06/12 18:29:41 epstein
317 * move SeqLocIntNew() and SeqLocPntNew() from edutil to sequtil
318 *
319 * Revision 5.1 1996/06/10 15:07:17 epstein
320 * replace make_seq_loc() with SeqLocIntNew() and make_pnt_loc with SeqLocPntNew()
321 *
322 * Revision 5.0 1996/05/28 13:23:23 ostell
323 * Set to revision 5.0
324 *
325 * Revision 4.10 1996/03/19 19:45:24 kans
326 * fix of SegLocToParts (JO)
327 *
328 * Revision 4.9 1996/03/12 22:14:22 ostell
329 * added SeqLocToParts()
330 *
331 * Revision 4.7 1996/02/19 19:58:05 ostell
332 * added support for Code-break and tRNA.anticodon
333 *
334 * Revision 4.6 1996/01/30 16:24:04 ostell
335 * changed name of SeqLocPack() to SeqLocPackage()
336 *
337 * Revision 4.5 1996/01/29 22:03:52 ostell
338 * revised SeqLocAdd
339 * added SeqLocPack
340 *
341 * Revision 4.4 1996/01/10 22:25:25 ostell
342 * added SeqLocIntNew()
343 *
344 * Revision 4.3 1995/12/29 21:31:44 ostell
345 * added mapping functions between delta seq and seq loc, for editing utilities
346 *
347 * Revision 4.2 1995/12/21 02:35:50 ostell
348 * changed call for BSAdd
349 *
350 * Revision 4.1 1995/11/15 20:40:20 ostell
351 * fixed SeqLocCopyPart so it correctly handles SEQLOC_NULL in segmented
352 * records
353 *
354 * Revision 4.0 1995/07/26 13:49:01 ostell
355 * force revision to 4.0
356 *
357 * Revision 1.22 1995/05/15 21:46:05 ostell
358 * added Log line
359 *
360 *
361 *
362 * ==========================================================================
363 */
364
365 #include <edutil.h>
366 #include <explore.h>
367 #include <sqnutils.h>
368 #include <objfdef.h>
369 #include <gather.h>
370
371 /*****************************************************************************
372 *
373 * SeqLocPackage(head)
374 * head is a chain of 1 or more SeqLocs connected by slp->next
375 * Assumes was built by SeqLocAdd to remove redundancy
376 * Frees the last element if it is a NULL.
377 * If more than one element left, then packages the chain into a SEQLOC_MIX,
378 * or SEQLOC_PACKED_INT as appropriate
379 * returns pointer to the head of the resulting single SeqLoc
380 *
381 *****************************************************************************/
SeqLocPackage(SeqLocPtr head)382 NLM_EXTERN SeqLocPtr LIBCALL SeqLocPackage (SeqLocPtr head)
383 {
384 SeqLocPtr newhead = NULL, tmp, prev;
385 Boolean packed_int = TRUE;
386 Int4 ctr = 0;
387
388 if (head == NULL) return head;
389
390 prev = NULL; /* remove trailing NULL */
391 for (tmp = head; tmp->next != NULL; tmp = tmp->next)
392 prev = tmp;
393
394 if (tmp->choice == SEQLOC_NULL)
395 {
396 SeqLocFree(tmp);
397 if (prev != NULL)
398 prev->next = NULL;
399 else
400 return NULL; /* nothing left */
401 }
402
403 for (tmp = head; tmp != NULL; tmp = tmp->next)
404 {
405 ctr++;
406 if (tmp->choice != SEQLOC_INT)
407 packed_int = FALSE;
408 }
409
410 if (ctr == 1)
411 return head;
412
413 newhead = ValNodeNew(NULL);
414 if (packed_int)
415 newhead->choice = SEQLOC_PACKED_INT;
416 else
417 newhead->choice = SEQLOC_MIX;
418 newhead->data.ptrvalue = head;
419
420 return newhead;
421 }
422
423 /*****************************************************************************
424 *
425 * SeqLocAdd(headptr, slp, merge, do_copy)
426 * creates a linked list of SeqLocs.
427 * returns a pointer to the last SeqLoc in the chain
428 * if (merge)
429 * deletes double NULLs or Nulls at start (application must delete at stop)
430 * merges adjacent intervals on the same strand
431 * if (do_copy)
432 * Makes copies of incoming SeqLocs
433 * if incoming is merged, deletes the incoming SeqLoc
434 *
435 *****************************************************************************/
SeqLocAddEx(SeqLocPtr PNTR head,SeqLocPtr PNTR lastp,SeqLocPtr slp,Boolean merge,Boolean do_copy)436 static SeqLocPtr LIBCALL SeqLocAddEx (SeqLocPtr PNTR head, SeqLocPtr PNTR lastp, SeqLocPtr slp, Boolean merge, Boolean do_copy)
437 {
438 SeqLocPtr tmp, last = NULL, retval = NULL;
439 Boolean merged = FALSE; /* intervals were merged */
440
441 if (slp == NULL) return NULL;
442
443 if (lastp != NULL) {
444 last = *lastp;
445 } else if (head != NULL && *head != NULL)
446 {
447 for (tmp = *head; tmp != NULL; tmp = tmp->next)
448 {
449 last = tmp;
450 }
451 }
452
453 if ((slp->choice == SEQLOC_NULL) && (merge)) /* no null at start, or two in a row */
454 {
455 if (last == NULL) /* first one */
456 {
457 merged = TRUE;
458 goto ret;
459 }
460 if (last->choice == SEQLOC_NULL) /* double NULL */
461 {
462 merged = TRUE;
463 goto ret;
464 }
465 }
466
467 if ((last != NULL) && (merge)) /* check for merging intervals */
468 {
469 if ((last->choice == SEQLOC_INT) && (slp->choice == SEQLOC_INT))
470 {
471 SeqIntPtr sip1, sip2;
472 Boolean samestrand;
473 Uint1 strand = Seq_strand_unknown;
474
475 sip1 = (SeqIntPtr)(last->data.ptrvalue);
476 sip2 = (SeqIntPtr)(slp->data.ptrvalue);
477 samestrand = FALSE;
478 if ((sip1->strand == sip2->strand) ||
479 (sip1->strand == Seq_strand_unknown && sip2->strand != Seq_strand_minus) ||
480 (sip1->strand == Seq_strand_unknown && sip2->strand != Seq_strand_minus)) {
481 samestrand = TRUE;
482 if (sip1->strand == Seq_strand_minus || sip1->strand == Seq_strand_minus) {
483 strand = Seq_strand_minus;
484 } else if (sip1->strand == Seq_strand_plus || sip1->strand == Seq_strand_plus) {
485 strand = Seq_strand_plus;
486 } else {
487 strand = Seq_strand_unknown;
488 }
489 }
490 if (samestrand && (SeqIdForSameBioseq(sip1->id, sip2->id)))
491 {
492 if (strand == Seq_strand_minus)
493 {
494 if (sip1->from == (sip2->to + 1)) /* they are adjacent */
495 {
496 sip1->from = sip2->from;
497 sip1->if_from = IntFuzzFree(sip1->if_from);
498 if (sip2->if_from != NULL) /* copy the fuzz */
499 {
500 if (do_copy)
501 sip1->if_from = (IntFuzzPtr)AsnIoMemCopy((Pointer)(sip2->if_from),
502 (AsnReadFunc)IntFuzzAsnRead, (AsnWriteFunc)IntFuzzAsnWrite);
503 else
504 {
505 sip1->if_from = sip2->if_from;
506 sip2->if_from = NULL;
507 }
508 sip1->strand = strand;
509 }
510 merged = TRUE;
511 }
512 }
513 else
514 {
515 if (sip1->to == (sip2->from - 1)) /* they are adjacent */
516 {
517 sip1->to = sip2->to;
518 sip1->if_to = IntFuzzFree(sip1->if_to);
519 if (sip2->if_to != NULL) /* copy the fuzz */
520 {
521 if (do_copy)
522 sip1->if_to = (IntFuzzPtr)AsnIoMemCopy((Pointer)(sip2->if_to),
523 (AsnReadFunc)IntFuzzAsnRead, (AsnWriteFunc)IntFuzzAsnWrite);
524 else
525 {
526 sip1->if_to = sip2->if_to;
527 sip2->if_to = NULL;
528 }
529 sip1->strand = strand;
530 }
531 merged = TRUE;
532 }
533 }
534 }
535 } else if ((last->choice == SEQLOC_PNT) && (slp->choice == SEQLOC_PNT))
536 {
537 SeqPntPtr sip1, sip2;
538
539 sip1 = (SeqPntPtr)(last->data.ptrvalue);
540 sip2 = (SeqPntPtr)(slp->data.ptrvalue);
541 if ((sip1->strand == sip2->strand) && sip1->point == sip2->point && (SeqIdForSameBioseq(sip1->id, sip2->id)))
542 {
543 sip1->fuzz = IntFuzzFree(sip1->fuzz);
544 if (sip2->fuzz != NULL) /* copy the fuzz */
545 {
546 if (do_copy)
547 sip1->fuzz = (IntFuzzPtr)AsnIoMemCopy((Pointer)(sip2->fuzz),
548 (AsnReadFunc)IntFuzzAsnRead, (AsnWriteFunc)IntFuzzAsnWrite);
549 else
550 {
551 sip1->fuzz = sip2->fuzz;
552 sip2->fuzz = NULL;
553 }
554 }
555 merged = TRUE;
556 }
557 }
558 }
559
560 ret:
561 if (! merged) /* then have to add a new one */
562 {
563 if (do_copy)
564 tmp = (SeqLocPtr)AsnIoMemCopy((Pointer)slp, (AsnReadFunc)SeqLocAsnRead, (AsnWriteFunc)SeqLocAsnWrite);
565 else
566 tmp = slp;
567
568 if (tmp != NULL) {
569 tmp->next = NULL;
570 }
571
572 if (last != NULL) {
573 last->next = tmp;
574 } else if (head != NULL) {
575 *head = tmp;
576 }
577 last = tmp;
578 retval = tmp;
579 }
580 else
581 {
582 retval = last;
583 if (! do_copy) /* got to free it here */
584 SeqLocFree(slp);
585 }
586 if (lastp != NULL) {
587 *lastp = last;
588 }
589
590 return retval;
591 }
592
SeqLocAdd(SeqLocPtr PNTR head,SeqLocPtr slp,Boolean merge,Boolean do_copy)593 NLM_EXTERN SeqLocPtr LIBCALL SeqLocAdd (SeqLocPtr PNTR head, SeqLocPtr slp, Boolean merge, Boolean do_copy)
594 {
595 SeqLocPtr tmp, last;
596
597 if (slp == NULL) return NULL;
598
599 last = NULL;
600 if (* head != NULL)
601 {
602 for (tmp = *head; tmp != NULL; tmp = tmp->next)
603 {
604 last = tmp;
605 }
606 }
607 return SeqLocAddEx (head, &last, slp, merge, do_copy);
608 }
609
610 /*****************************************************************************
611 *
612 * SegLocToParts(BioseqPtr seg, SeqLocPtr slp)
613 * seg must be a segmented Bioseq
614 * slp must be a SeqLoc on it
615 * function maps slp to the components of seg
616 * returns a new SeqLocPtr
617 * does not delete slp
618 *
619 *****************************************************************************/
SegLocToPartsEx(BioseqPtr seg,SeqLocPtr slp,Boolean nullsBetween)620 NLM_EXTERN SeqLocPtr LIBCALL SegLocToPartsEx (BioseqPtr seg, SeqLocPtr slp, Boolean nullsBetween)
621 {
622 SeqLocPtr newloc = NULL, tmp, tmp2, tmp3, next, curr;
623 ValNode thead;
624 SeqIdPtr sip, tsip;
625 Int4 left_end, right_end, tlen, tstart;
626 SeqIntPtr sintp;
627 Boolean split, notFirst = FALSE;
628
629 if ((seg == NULL) || (slp == NULL)) return newloc;
630 if (seg->repr != Seq_repr_seg) return newloc;
631
632 sip = SeqLocId(slp);
633 if (sip == NULL) return newloc;
634 if (! SeqIdIn(sip, seg->id)) return newloc;
635
636 MemSet(&thead, 0, sizeof(ValNode));
637 thead.choice = SEQLOC_MIX;
638 thead.data.ptrvalue = seg->seq_ext;
639
640 curr = NULL;
641 while ((curr = SeqLocFindNext(slp, curr)) != NULL)
642 {
643 left_end = 0;
644 tmp = NULL;
645 while ((tmp = SeqLocFindNext(&thead, tmp)) != NULL)
646 {
647 tlen = SeqLocLen(tmp);
648 if (tlen > 0)
649 {
650 right_end = left_end + tlen - 1;
651 tsip = SeqLocId(tmp);
652 tstart = SeqLocStart(tmp);
653 tmp2 = SeqLocCopyRegion(tsip, curr, seg, left_end, right_end, SeqLocStrand(tmp),
654 &split);
655 while (tmp2 != NULL)
656 {
657 next = tmp2->next;
658 tmp2->next = NULL;
659 if (tmp2->choice == SEQLOC_INT)
660 {
661 if (nullsBetween && notFirst) {
662 tmp3 = ValNodeNew (NULL);
663 if (tmp3 != NULL) {
664 tmp3->choice = SEQLOC_NULL;
665 SeqLocAdd (&newloc, tmp3, TRUE, FALSE);
666 }
667 }
668 notFirst = TRUE;
669 sintp = (SeqIntPtr)(tmp2->data.ptrvalue);
670 sintp->from += tstart;
671 sintp->to += tstart;
672 SeqLocAdd(&newloc, tmp2, TRUE, FALSE);
673 }
674 else if (tmp2->choice == SEQLOC_PNT)
675 {
676 if (nullsBetween && notFirst) {
677 tmp3 = ValNodeNew (NULL);
678 if (tmp3 != NULL) {
679 tmp3->choice = SEQLOC_NULL;
680 SeqLocAdd (&newloc, tmp3, TRUE, FALSE);
681 }
682 }
683 notFirst = TRUE;
684 SeqLocAdd (&newloc, tmp2, TRUE, FALSE);
685 }
686 tmp2 = next;
687 }
688 left_end = right_end + 1;
689 }
690 }
691 }
692
693 if (newloc != NULL)
694 newloc = SeqLocPackage(newloc);
695 return newloc;
696 }
697
SegLocToParts(BioseqPtr seg,SeqLocPtr slp)698 NLM_EXTERN SeqLocPtr LIBCALL SegLocToParts (BioseqPtr seg, SeqLocPtr slp)
699
700 {
701 return SegLocToPartsEx (seg, slp, FALSE);
702 }
703
704 static CharPtr seqlitdbtag = "SeqLit";
705 static CharPtr unkseqlitdbtag = "UnkSeqLit";
706 /*****************************************************************************
707 *
708 * ISADeltaSeqsToSeqLoc(slp)
709 * returns Index (> 0) if this (one) SeqLoc was converted from a Delta Seq by
710 * DeltaSeqsToSeqLocs() by looking for the special Dbtag name
711 *
712 *****************************************************************************/
ISADeltaSeqsToSeqLoc(SeqLocPtr slp)713 NLM_EXTERN Int4 LIBCALL ISADeltaSeqsToSeqLoc (SeqLocPtr slp)
714 {
715 SeqIdPtr sip;
716 Int4 retval = 0;
717
718 if (slp == NULL) return retval;
719 sip = SeqLocId(slp);
720 if (sip == NULL) return retval;
721
722 if (sip->choice != SEQID_GENERAL) return retval;
723
724 if (! StringCmp(seqlitdbtag, ((DbtagPtr)(sip->data.ptrvalue))->db) ||
725 ! StringCmp(unkseqlitdbtag, ((DbtagPtr)(sip->data.ptrvalue))->db))
726 retval = (((DbtagPtr)(sip->data.ptrvalue))->tag->id);
727
728 return retval;
729 }
730
731 /*****************************************************************************
732 *
733 * DeltaSeqsToSeqLocs(dsp)
734 * converts a chain of delta seqs to seqlocs
735 * each SeqLit is converted to SeqLoc of type Int with a SeqId of type
736 * Dbtag where db="Seq\tLit" and objectId.id which is the index of the
737 * element in the delta seq chain where 1 is the first one.
738 * Returned SeqLoc is of type "mix" and must be freed by caller.
739 *
740 *****************************************************************************/
DeltaSeqsToSeqLocs(DeltaSeqPtr dsp)741 NLM_EXTERN SeqLocPtr LIBCALL DeltaSeqsToSeqLocs (DeltaSeqPtr dsp)
742 {
743 SeqLocPtr head = NULL, thead = NULL, last = NULL;
744 DeltaSeqPtr curr;
745 SeqInt si;
746 Dbtag db;
747 ObjectId oi;
748 ValNode vn, vn2;
749
750 MemSet(&vn, 0, sizeof(ValNode));
751 MemSet(&vn2, 0, sizeof(ValNode));
752 MemSet(&si, 0, sizeof(SeqInt));
753 MemSet(&db, 0, sizeof(Dbtag));
754 MemSet(&oi, 0, sizeof(ObjectId));
755 vn.choice = SEQLOC_INT;
756 vn.data.ptrvalue = &si;
757 si.id = &vn2;
758 vn2.choice = SEQID_GENERAL;
759 vn2.data.ptrvalue = &db;
760 db.db = seqlitdbtag;
761 db.tag = &oi;
762 oi.id = 1;
763
764
765
766 for (curr = dsp; curr != NULL; curr = curr->next)
767 {
768 if (curr->choice == 1) /* a SeqLoc */
769 SeqLocAddEx (&thead, &last, (SeqLocPtr)(curr->data.ptrvalue), TRUE, TRUE);
770 else
771 {
772 si.to = ((SeqLitPtr) (curr->data.ptrvalue))->length - 1;
773 SeqLocAddEx (&thead, &last, &vn, TRUE, TRUE);
774 }
775 oi.id++;
776 }
777
778 head = SeqLocPackage(thead);
779 return head;
780 }
781
782 /*****************************************************************************
783 * GOHERE
784 * SeqLocsToDeltaSeqs(dsp, slp)
785 * converts a chain of seqlocs generated by DeltaSeqToSeqLocs() back into
786 * delta seqs. dsp is the original chain of DeltaSeqs, which is required
787 * to convert the delta seqs back.
788 *
789 *****************************************************************************/
SeqLocsToDeltaSeqs(DeltaSeqPtr dsp,SeqLocPtr slp)790 NLM_EXTERN DeltaSeqPtr LIBCALL SeqLocsToDeltaSeqs (DeltaSeqPtr dsp, SeqLocPtr slp)
791 {
792 DeltaSeqPtr dhead=NULL, dcurr=NULL, dtmp;
793 SeqLocPtr scurr;
794 Int4 ctr, index, strt, stp;
795 SeqIdPtr sip;
796 Uint1 strand, newcode;
797 SeqLitPtr slitp, slitp_new;
798 SeqPortPtr spps;
799 ByteStorePtr bsp;
800 Int2 residue;
801 ValNode vn;
802
803 if ((dsp == NULL) || (slp == NULL))
804 return dhead;
805
806 vn.choice = SEQLOC_MIX;
807 vn.next = NULL;
808 vn.data.ptrvalue = slp;
809 scurr = NULL;
810 while ((scurr = SeqLocFindNext(&vn, scurr)) != NULL)
811 {
812 dcurr = ValNodeNew(dhead);
813 if (dhead == NULL)
814 dhead = dcurr;
815
816 index = ISADeltaSeqsToSeqLoc(scurr);
817
818 if (index == 0) /* just a SeqLoc */
819 {
820 dcurr->choice = 1;
821 dcurr->data.ptrvalue = NULL;
822 dcurr->data.ptrvalue = AsnIoMemCopy((Pointer)scurr, (AsnReadFunc)SeqLocAsnRead, (AsnWriteFunc)SeqLocAsnWrite);
823
824 }
825 else /* convert to a delta seq */
826 {
827 dcurr->choice = 2;
828 sip = SeqLocId(scurr);
829 dtmp = dsp;
830 for (ctr = 1; ctr < index; ctr++)
831 dtmp = dtmp->next;
832
833 if (dtmp->choice != 2) /* wups */
834 {
835 ErrPostEx(SEV_ERROR,0,0,"Wrong type in SeqLocsToDeltaSeqs");
836 dhead = DeltaSeqFree(dhead);
837 return dhead;
838 }
839 slitp = (SeqLitPtr)(dtmp->data.ptrvalue);
840
841 strt = SeqLocStart(scurr);
842 stp = SeqLocStop(scurr);
843 strand = SeqLocStrand(scurr);
844
845 if ((strt == 0) && (stp == (slitp->length - 1)) && (strand != Seq_strand_minus)) /* no change */
846 {
847 dcurr->data.ptrvalue = AsnIoMemCopy((Pointer)slitp, (AsnReadFunc)SeqLitAsnRead, (AsnWriteFunc)SeqLitAsnWrite);
848 }
849 else /* got to copy part of it */
850 {
851 switch (slitp->seq_data_type)
852 {
853 case Seq_code_iupacna:
854 case Seq_code_iupacaa:
855 case Seq_code_ncbi8na:
856 case Seq_code_ncbi8aa:
857 case Seq_code_ncbieaa:
858 case Seq_code_ncbistdaa:
859 case Seq_code_iupacaa3:
860 newcode = slitp->seq_data_type; /* one byte codes.. fine */
861 break;
862 case Seq_code_ncbipna:
863 ErrPostEx(SEV_ERROR,0,0,"Converting from P residue codes");
864 newcode = Seq_code_ncbieaa;
865 break;
866 case Seq_code_ncbipaa:
867 ErrPostEx(SEV_ERROR,0,0,"Converting from P residue codes");
868 case Seq_code_ncbi2na:
869 case Seq_code_ncbi4na:
870 newcode = Seq_code_iupacna;
871 break;
872 case Seq_code_gap:
873 ErrPostEx(SEV_WARNING,0,0,"Seq_code_gap residue code in SeqLocsToDeltaSeqs");
874 return DeltaSeqFree(dhead);
875 break;
876 default:
877 ErrPostEx(SEV_FATAL,0,0,"Unrecognized residue code [%d] in SeqLocsToDeltaSeqs",
878 (int)(slitp->seq_data_type));
879 return DeltaSeqFree(dhead);
880 }
881 spps = MemNew(sizeof(SeqPort));
882 SeqPortSetUpFields (spps, strt, stp, strand, newcode);
883 SeqPortSetUpAlphabet(spps, slitp->seq_data_type, newcode);
884 spps->bp = (ByteStorePtr) slitp->seq_data;
885 slitp_new = SeqLitNew();
886 dcurr->data.ptrvalue = slitp_new;
887 slitp_new->seq_data_type = newcode;
888 slitp_new->length = (stp - strt + 1);
889 bsp = BSNew(slitp_new->length);
890 slitp_new->seq_data = (SeqDataPtr) bsp;
891 SeqPortSeek(spps, 0, SEEK_SET);
892 BSSeek(bsp, 0, SEEK_SET);
893 while (stp >= strt)
894 {
895 residue = SeqPortGetResidue(spps);
896 BSPutByte(bsp, residue);
897 strt++;
898 }
899 SeqPortFree(spps);
900 }
901
902 }
903
904 }
905 return dhead;
906 }
907 /*****************************************************************************
908 *
909 * BioseqDelete (target, from, to, do_feat, do_split)
910 * Deletes the region of sequence between from-to, inclusive, on the
911 * Bioseq whose SeqId is target.
912 * If do_feat, the feature table is updated to reflect the deletion
913 * using SeqEntryDelFeat()
914 * If do_split, the features across the deleted region are split into
915 * two intervals on either side. If not, the feature is just shortened.
916 *****************************************************************************/
BioseqDeleteEx(SeqIdPtr target,Int4 from,Int4 to,Boolean do_feat,Boolean do_split,Boolean mark_deleted_feat)917 NLM_EXTERN Boolean LIBCALL BioseqDeleteEx (SeqIdPtr target, Int4 from, Int4 to, Boolean do_feat, Boolean do_split, Boolean mark_deleted_feat)
918 {
919 Boolean retval = FALSE;
920 BioseqPtr bsp;
921 SeqLocPtr tmp, head;
922 Int4 len, deleted;
923 Int4 totlen, templen, tfrom, tto, diff1, diff2;
924 SeqLocPtr slp, tloc, newhead, prev;
925 ValNode vn;
926 SeqInt si;
927 SeqLocPtr PNTR newheadptr;
928 SeqFeatPtr sfpcurr, sfpnext, sfpprev;
929 Int2 dropped;
930 SeqEntryPtr oldscope;
931 DeltaSeqPtr tdsp = NULL;
932
933 bsp = BioseqFind(target);
934 if (bsp == NULL) {
935 oldscope = SeqEntrySetScope (NULL);
936 if (oldscope != NULL) {
937 bsp = BioseqFind(target);
938 SeqEntrySetScope (oldscope);
939 }
940 }
941 if (bsp == NULL) return retval;
942
943 if ((from < 0) || (from >= bsp->length) || (to < 0) ||
944 (to >= bsp->length) || (from > to)) return retval;
945
946 if (do_feat)
947 SeqEntryDelFeatEx(NULL, target, from, to, do_split, mark_deleted_feat);
948
949 len = to - from + 1;
950 /* if actual sequence present */
951
952 if (((bsp->repr == Seq_repr_raw) || (bsp->repr == Seq_repr_const)) && bsp->seq_data_type != Seq_code_gap)
953 {
954 if (ISA_na(bsp->mol))
955 {
956 if (bsp->seq_data_type != Seq_code_iupacna) /* need 1 byte/base */
957 BioseqRawConvert(bsp, Seq_code_iupacna);
958 }
959 else
960 {
961 if (bsp->seq_data_type != Seq_code_ncbieaa)
962 BioseqRawConvert(bsp, Seq_code_ncbieaa);
963 }
964
965 BSSeek((ByteStorePtr) bsp->seq_data, from, SEEK_SET);
966 deleted = BSDelete((ByteStorePtr) bsp->seq_data, len);
967 if (deleted != len) /* error */
968 ErrPost(CTX_NCBIOBJ, 1, "Delete of %ld residues failed", len);
969 else
970 retval = TRUE;
971 }
972
973 /* update segmented sequence */
974 if ((bsp->repr == Seq_repr_seg) || (bsp->repr == Seq_repr_delta))
975 {
976 head = ValNodeNew(NULL); /* allocate to facilitate SeqLocFree */
977 head->choice = SEQLOC_MIX; /* make a SeqLoc out of the extension */
978 if (bsp->repr == Seq_repr_seg)
979 head->data.ptrvalue = bsp->seq_ext;
980 else
981 {
982 tdsp = (DeltaSeqPtr)(bsp->seq_ext);
983 head->data.ptrvalue = DeltaSeqsToSeqLocs(tdsp);
984 }
985
986 newhead = NULL;
987 newheadptr = &newhead;
988
989 tloc = &vn;
990 MemSet((Pointer)tloc, 0, sizeof(ValNode));
991 MemSet((Pointer)&si, 0, sizeof(SeqInt));
992 tloc->choice = SEQLOC_INT;
993 tloc->data.ptrvalue = (Pointer)(&si);
994
995 slp = NULL;
996 totlen = 0;
997 while ((slp = SeqLocFindNext(head, slp)) != NULL)
998 {
999 templen = SeqLocLen(slp);
1000 tfrom = SeqLocStart(slp);
1001 tto = SeqLocStop(slp);
1002
1003 if (((totlen + templen - 1) < from) || /* before cut */
1004 (totlen > to)) /* after cut */
1005 tmp = SeqLocAdd(newheadptr, slp, TRUE, TRUE); /* add whole SeqLoc */
1006 else
1007 {
1008 retval = 1; /* will modify or drop interval */
1009 diff1 = from - totlen; /* partial beginning? */
1010 diff2 = (templen + totlen - 1) - to; /* partial end? */
1011 si.id = SeqLocId(slp);
1012 si.strand = SeqLocStrand(slp);
1013
1014 if (diff1 > 0) /* partial start */
1015 {
1016 if (si.strand != Seq_strand_minus)
1017 {
1018 si.from = tfrom;
1019 si.to = tfrom + diff1 - 1;
1020 }
1021 else
1022 {
1023 si.from = tto - diff1 + 1;
1024 si.to = tto;
1025 }
1026 tmp = SeqLocAdd(newheadptr, tloc, TRUE, TRUE);
1027 }
1028
1029 if (diff2 > 0) /* partial end */
1030 {
1031 if (si.strand != Seq_strand_minus)
1032 {
1033 si.from = tto - diff2 + 1;
1034 si.to = tto;
1035 }
1036 else
1037 {
1038 si.from = tfrom;
1039 si.to = tfrom + diff2 - 1;
1040 }
1041 tmp = SeqLocAdd(newheadptr, tloc, TRUE, TRUE);
1042 }
1043
1044 }
1045 totlen += templen;
1046 }
1047
1048 prev = NULL;
1049 for (tmp = newhead; tmp != NULL; tmp = tmp->next)
1050 {
1051 if (tmp->next == NULL) /* last one */
1052 {
1053 if (tmp->choice == SEQLOC_NULL)
1054 {
1055 if (prev != NULL)
1056 prev->next = NULL;
1057 else /* only a NULL left */
1058 {
1059 newhead = NULL;
1060 }
1061 MemFree(tmp);
1062 break;
1063 }
1064 }
1065 prev = tmp;
1066 }
1067
1068 if (bsp->repr == Seq_repr_seg)
1069 bsp->seq_ext = newhead;
1070 else
1071 {
1072 bsp->seq_ext = SeqLocsToDeltaSeqs(tdsp, newhead);
1073 DeltaSeqSetFree(tdsp);
1074 SeqLocSetFree(newhead);
1075 }
1076 SeqLocFree(head);
1077 retval = TRUE;
1078 }
1079
1080 if (bsp->repr == Seq_repr_map) /* map bioseq */
1081 {
1082 sfpprev = NULL;
1083 sfpnext = NULL;
1084 sfpcurr = (SeqFeatPtr)(bsp->seq_ext);
1085 bsp->seq_ext = NULL;
1086 for (; sfpcurr != NULL; sfpcurr = sfpnext)
1087 {
1088 sfpnext = sfpcurr->next;
1089 dropped = SeqFeatDelete(sfpcurr, target, from, to, TRUE);
1090 if (dropped == 2) /* completely gone */
1091 {
1092 SeqFeatFree(sfpcurr);
1093 }
1094 else
1095 {
1096 if (sfpprev == NULL)
1097 bsp->seq_ext = (Pointer)sfpcurr;
1098 else
1099 sfpprev->next = sfpcurr;
1100 sfpcurr->next = NULL;
1101 sfpprev = sfpcurr;
1102 }
1103 }
1104 retval = TRUE;
1105 }
1106
1107 if (bsp->repr == Seq_repr_virtual)
1108 retval = TRUE; /* nothing to do */
1109
1110 if (retval)
1111 bsp->length -= len;
1112 return retval;
1113 }
1114
1115
BioseqDelete(SeqIdPtr target,Int4 from,Int4 to,Boolean do_feat,Boolean do_split)1116 NLM_EXTERN Boolean LIBCALL BioseqDelete (SeqIdPtr target, Int4 from, Int4 to, Boolean do_feat, Boolean do_split)
1117 {
1118 return BioseqDeleteEx (target, from, to, do_feat, do_split, FALSE);
1119 }
1120
1121
1122 /*****************************************************************************
1123 *
1124 * BioseqOverwrite (target, pos, residue, seqcode)
1125 * Overwrites the residue at pos with residue in the
1126 * Bioseq whose SeqId is target.
1127 * residue is iupacna for DNA or ncbieaa for protein
1128 * target MUST be a raw Bioseq right now
1129 *
1130 *****************************************************************************/
BioseqOverwrite(SeqIdPtr target,Int4 pos,Uint1 residue)1131 NLM_EXTERN Boolean LIBCALL BioseqOverwrite (SeqIdPtr target, Int4 pos, Uint1 residue)
1132 {
1133 BioseqPtr bsp;
1134 Boolean retval = FALSE;
1135 SeqEntryPtr oldscope;
1136
1137
1138 bsp = BioseqFind(target);
1139 if (bsp == NULL) {
1140 oldscope = SeqEntrySetScope (NULL);
1141 if (oldscope != NULL) {
1142 bsp = BioseqFind(target);
1143 SeqEntrySetScope (oldscope);
1144 }
1145 }
1146 if (bsp == NULL) return retval;
1147
1148 if ((pos < 0) || (pos >= bsp->length)) return retval;
1149 if (bsp->repr != Seq_repr_raw) return retval;
1150
1151 if (bsp->seq_data_type == Seq_code_gap) return FALSE;
1152
1153 if (ISA_na(bsp->mol))
1154 {
1155 if (bsp->seq_data_type != Seq_code_iupacna) /* need 1 byte/base */
1156 BioseqRawConvert(bsp, Seq_code_iupacna);
1157 }
1158 else
1159 {
1160 if (bsp->seq_data_type != Seq_code_ncbieaa)
1161 BioseqRawConvert(bsp, Seq_code_ncbieaa);
1162 }
1163
1164 BSSeek((ByteStorePtr) bsp->seq_data, pos, SEEK_SET);
1165 BSPutByte((ByteStorePtr) bsp->seq_data, (Int2)(TO_UPPER(residue)));
1166 retval = TRUE;
1167
1168 return retval;
1169 }
1170
1171
1172 /*****************************************************************************
1173 *
1174 * SeqInsertByLoc (target, offset, fragment)
1175 *
1176 *****************************************************************************/
SeqInsertByLoc(SeqIdPtr target,Int4 offset,SeqLocPtr fragment)1177 NLM_EXTERN Boolean LIBCALL SeqInsertByLoc (SeqIdPtr target, Int4 offset, SeqLocPtr fragment)
1178 {
1179 return TRUE;
1180 }
1181
1182
1183 /*****************************************************************************
1184 *
1185 * SeqDeleteByLoc (slp, do_feat, do_split)
1186 *
1187 *****************************************************************************/
SeqDeleteByLocEx(SeqLocPtr slp,Boolean do_feat,Boolean do_split,Boolean mark_deleted_feat)1188 NLM_EXTERN Boolean LIBCALL SeqDeleteByLocEx (SeqLocPtr slp, Boolean do_feat, Boolean do_split, Boolean mark_deleted_feat)
1189 {
1190 SeqLocPtr tmp;
1191 Boolean retval = FALSE;
1192 Int2 numloc, i = 0, ctr, pick, totloc;
1193 SeqLocPtr PNTR locs, PNTR tlocs, PNTR theorder;
1194 BioseqPtr bsp;
1195 Int4 tstart, tstop;
1196
1197 if (slp == NULL) return retval;
1198
1199 numloc = 0;
1200 totloc = 0;
1201 locs = NULL;
1202 tmp = NULL;
1203
1204 while ((tmp = SeqLocFindNext(slp, tmp)) != NULL)
1205 {
1206 switch (tmp->choice)
1207 {
1208 case SEQLOC_INT:
1209 case SEQLOC_PNT:
1210 if (BioseqFind(SeqLocId(tmp)) != NULL)
1211 {
1212 if (numloc == totloc)
1213 {
1214 tlocs = locs;
1215 locs = (SeqLocPtr PNTR)(MemNew((totloc+20) * sizeof(SeqLocPtr)));
1216 MemCopy(locs, tlocs, (size_t)(totloc * sizeof(SeqLocPtr)));
1217 MemFree(tlocs);
1218 totloc += 20;
1219 }
1220 locs[numloc] = tmp;
1221 numloc++;
1222 }
1223 break;
1224 default:
1225 Message(MSG_ERROR, "Unsupported Seqloc [%d] in SeqDeleteByLoc",
1226 (int)(tmp->choice));
1227 break;
1228
1229 }
1230 }
1231
1232 if (! numloc) return retval;
1233
1234
1235 /***********************************************************
1236 *
1237 * first gather all the seqlocs, grouped by Bioseq, and
1238 * ordered from end to beginning. They must be ordered
1239 * before the underlying Bioseq is changed.
1240 *
1241 ***********************************************************/
1242
1243 retval = TRUE;
1244
1245 bsp = NULL;
1246 theorder = (SeqLocPtr PNTR)MemNew((sizeof(SeqLocPtr) * numloc));
1247 for (ctr = 0; ctr < numloc; ctr++)
1248 {
1249 pick = -1; /* flag none found */
1250 if (bsp != NULL)
1251 {
1252 for (i = 0; i < numloc; i++)
1253 {
1254 if (locs[i] != NULL)
1255 {
1256 if (SeqIdIn(SeqLocId(locs[i]), bsp->id))
1257 {
1258 pick = i;
1259 i++;
1260 break;
1261 }
1262 }
1263 }
1264 if (pick < 0)
1265 bsp = NULL; /* no more locs on this bioseq */
1266 }
1267
1268 if (bsp == NULL) /* have to find a new bioseq */
1269 {
1270 for (i = 0; i < numloc; i++)
1271 {
1272 if (locs[i] != NULL)
1273 {
1274 bsp = BioseqFind(SeqLocId(locs[i]));
1275 pick = i;
1276 i++;
1277 break;
1278 }
1279 }
1280 }
1281
1282 while (i < numloc)
1283 {
1284 if (SeqLocOrder(locs[pick], locs[i], bsp) == (-1)) /* it's after */
1285 pick = i;
1286 i++;
1287 }
1288
1289 theorder[ctr] = locs[pick];
1290 locs[pick] = NULL;
1291 }
1292
1293 MemFree(locs); /* finished with original list */
1294
1295 /*************************************************************
1296 *
1297 * Now do the actual deletions
1298 *
1299 *************************************************************/
1300
1301
1302 for (ctr = 0; ctr < numloc; ctr++)
1303 {
1304 tstart = SeqLocStart(theorder[ctr]);
1305 tstop = SeqLocStop(theorder[ctr]);
1306 BioseqDeleteEx(SeqLocId(theorder[ctr]), tstart, tstop, do_feat, do_split, mark_deleted_feat);
1307 }
1308
1309 MemFree(theorder);
1310
1311 return retval;
1312 }
1313
1314
SeqDeleteByLoc(SeqLocPtr slp,Boolean do_feat,Boolean do_split)1315 NLM_EXTERN Boolean LIBCALL SeqDeleteByLoc (SeqLocPtr slp, Boolean do_feat, Boolean do_split)
1316 {
1317 return SeqDeleteByLocEx (slp, do_feat, do_split, FALSE);
1318 }
1319
1320
1321 /*****************************************************************************
1322 *
1323 * SeqFeatDelete()
1324 * 0 = no changes made to location or product
1325 * 1 = changes made but feature still has some location
1326 * 2 = all of sfp->location in deleted interval
1327 *
1328 * if (merge)
1329 * 1) correct numbers > to by subtraction
1330 * 2) do not split intervals spanning the deletion
1331 * else
1332 * 1) do not change numbers > to
1333 * 2) split intervals which span the deletions
1334 *
1335 *****************************************************************************/
SeqFeatDelete(SeqFeatPtr sfp,SeqIdPtr target,Int4 from,Int4 to,Boolean merge)1336 NLM_EXTERN Int2 LIBCALL SeqFeatDelete (SeqFeatPtr sfp, SeqIdPtr target, Int4 from, Int4 to, Boolean merge)
1337 {
1338 ValNode vn;
1339 SeqLocPtr tloc;
1340 SeqInt si;
1341 Boolean changed = FALSE, tmpbool = FALSE;
1342 CdRegionPtr crp;
1343 CodeBreakPtr cbp, prevcbp, nextcbp;
1344 RnaRefPtr rrp;
1345 tRNAPtr trp;
1346 Boolean partial5, partial3;
1347 Uint1 strand;
1348 BioseqPtr bsp;
1349 Int4 new_frame;
1350
1351 tloc = &vn;
1352 MemSet((Pointer)tloc, 0, sizeof(ValNode));
1353 MemSet((Pointer)&si, 0, sizeof(SeqInt));
1354 tloc->choice = SEQLOC_INT;
1355 tloc->data.ptrvalue = (Pointer)(&si);
1356 si.id = target;
1357 si.from = from;
1358 si.to = to;
1359
1360 CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
1361 strand = SeqLocStrand (sfp->location);
1362 bsp = BioseqFindFromSeqLoc (sfp->location);
1363 sfp->location = SeqLocDelete(sfp->location, target, from, to, merge, &changed);
1364
1365 sfp->product = SeqLocDelete(sfp->product, target, from, to, merge, &changed);
1366
1367 if (sfp->location == NULL)
1368 return 2;
1369
1370 switch (sfp->data.choice)
1371 {
1372 case SEQFEAT_CDREGION: /* cdregion */
1373 crp = (CdRegionPtr)(sfp->data.value.ptrvalue);
1374 if (changed)
1375 {
1376 /* adjust frame */
1377 if ((strand == Seq_strand_minus && bsp != NULL && to == bsp->length - 1 && partial5)
1378 || (strand != Seq_strand_minus && from == 0 && partial5))
1379 {
1380 if (crp->frame == 0)
1381 {
1382 crp->frame = 1;
1383 }
1384 new_frame = crp->frame - ((to - from + 1) % 3);
1385 if (new_frame < 1)
1386 {
1387 new_frame += 3;
1388 }
1389 crp->frame = new_frame;
1390 }
1391 }
1392 /* fix code_break locations */
1393 prevcbp = NULL;
1394 for (cbp = crp->code_break; cbp != NULL; cbp = nextcbp)
1395 {
1396 nextcbp = cbp->next;
1397 cbp->loc = SeqLocDelete(cbp->loc, target, from, to, merge, &tmpbool);
1398 if (cbp->loc == NULL)
1399 {
1400 if (prevcbp != NULL)
1401 prevcbp->next = nextcbp;
1402 else
1403 crp->code_break = nextcbp;
1404 cbp->next = NULL;
1405 CodeBreakFree(cbp);
1406 }
1407 else
1408 prevcbp = cbp;
1409 }
1410 break;
1411 case SEQFEAT_RNA:
1412 rrp = (RnaRefPtr)(sfp->data.value.ptrvalue);
1413 if (rrp->ext.choice == 2) /* tRNA */
1414 {
1415 trp = (tRNAPtr)(rrp->ext.value.ptrvalue);
1416 if (trp->anticodon != NULL)
1417 {
1418 trp->anticodon = SeqLocDelete(trp->anticodon, target, from, to, merge, &tmpbool);
1419 }
1420 }
1421 break;
1422 default:
1423 break;
1424 }
1425
1426 if (changed)
1427 {
1428 return 1;
1429 }
1430 else
1431 return 0;
1432 }
1433
1434 /*****************************************************************************
1435 *
1436 * SeqLocDelete()
1437 * returns altered head or NULL if nothing left.
1438 * sets changed=TRUE if all or part of loc is deleted
1439 * does NOT set changed if location coordinates are only moved
1440 * if (merge) then corrects coordinates upstream of to
1441 * else
1442 * splits intervals covering from-to, does not correct upstream of to
1443 *
1444 *****************************************************************************/
SeqLocDeleteEx(SeqLocPtr head,SeqIdPtr target,Int4 from,Int4 to,Boolean merge,BoolPtr changed,BoolPtr partial5,BoolPtr partial3)1445 NLM_EXTERN SeqLocPtr LIBCALL SeqLocDeleteEx (SeqLocPtr head, SeqIdPtr target, Int4 from, Int4 to, Boolean merge, BoolPtr changed, BoolPtr partial5, BoolPtr partial3)
1446 {
1447 SeqIntPtr sip, sip2;
1448 SeqPntPtr spp;
1449 PackSeqPntPtr pspp, pspp2;
1450 SeqBondPtr sbp;
1451 SeqIdPtr sidp;
1452 SeqLocPtr slp, tmp, prev, next, thead;
1453 Int4 diff, numpnt, i, tpos;
1454 BioseqPtr bsp;
1455 Boolean part5, part3, first;
1456
1457 if ((head == NULL) || (target == NULL))
1458 return head;
1459
1460 head->next = NULL; /* caller maintains chains */
1461 diff = to - from + 1;
1462
1463 switch (head->choice)
1464 {
1465 case SEQLOC_BOND: /* bond -- 2 seqs */
1466 sbp = (SeqBondPtr)(head->data.ptrvalue);
1467 spp = sbp->a;
1468 if (SeqIdForSameBioseq(spp->id, target))
1469 {
1470 if (spp->point >= from)
1471 {
1472 if (spp->point <= to) /* delete it */
1473 {
1474 *changed = TRUE;
1475 sbp->a = SeqPntFree(spp);
1476 }
1477 else if (merge)
1478 spp->point -= diff;
1479 }
1480 }
1481 spp = sbp->b;
1482 if (spp != NULL)
1483 {
1484 if (SeqIdForSameBioseq(spp->id, target))
1485 {
1486 if (spp->point >= from)
1487 {
1488 if (spp->point <= to) /* delete it */
1489 {
1490 *changed = TRUE;
1491 sbp->b = SeqPntFree(spp);
1492 }
1493 else if (merge)
1494 spp->point -= diff;
1495 }
1496 }
1497 }
1498 if (sbp->a == NULL)
1499 {
1500 if (sbp->b != NULL) /* only a required */
1501 {
1502 sbp->a = sbp->b;
1503 sbp->b = NULL;
1504 }
1505 else
1506 {
1507 head = SeqLocFree(head);
1508 }
1509 }
1510 break;
1511 case SEQLOC_FEAT: /* feat -- can't track yet */
1512 case SEQLOC_NULL: /* NULL */
1513 case SEQLOC_EMPTY: /* empty */
1514 break;
1515 case SEQLOC_WHOLE: /* whole */
1516 sidp = (SeqIdPtr)(head->data.ptrvalue);
1517 if (SeqIdForSameBioseq(sidp, target))
1518 {
1519 bsp = BioseqFind(target);
1520 if (bsp != NULL) /* split it */
1521 {
1522 if ((from == 0) && (to >= (bsp->length - 1)))
1523 { /* complete delete */
1524 head = SeqLocFree(head);
1525 *changed = TRUE;
1526 break;
1527 }
1528
1529 if (! merge) /* split it up */
1530 {
1531 SeqIdFree(sidp);
1532 head->choice = SEQLOC_PACKED_INT;
1533 head->data.ptrvalue = NULL;
1534 slp = NULL;
1535 if (from != 0)
1536 {
1537 sip = SeqIntNew();
1538 sip->from = 0;
1539 sip->to = from - 1;
1540 sip->id = SeqIdDup(target);
1541 slp = ValNodeNew(NULL);
1542 slp->choice = SEQLOC_INT;
1543 slp->data.ptrvalue = sip;
1544 head->data.ptrvalue = slp;
1545 *changed = TRUE;
1546 }
1547 if (to < (bsp->length - 1))
1548 {
1549 sip = SeqIntNew();
1550 sip->from = to + 1;
1551 sip->to = bsp->length - 1;
1552 sip->id = SeqIdDup(target);
1553 tmp = ValNodeNew(NULL);
1554 tmp->choice = SEQLOC_INT;
1555 tmp->data.ptrvalue = sip;
1556 if (slp != NULL)
1557 slp->next = tmp;
1558 else
1559 head->data.ptrvalue = tmp;
1560 *changed = TRUE;
1561 }
1562
1563 }
1564 }
1565 }
1566 break;
1567 case SEQLOC_MIX: /* mix -- more than one seq */
1568 case SEQLOC_EQUIV: /* equiv -- ditto */
1569 case SEQLOC_PACKED_INT: /* packed int */
1570 prev = NULL;
1571 thead = NULL;
1572 part5 = FALSE;
1573 part3 = FALSE;
1574 first = TRUE;
1575 for (slp = (SeqLocPtr)(head->data.ptrvalue); slp != NULL; slp = next)
1576 {
1577 next = slp->next;
1578 tmp = SeqLocDeleteEx (slp, target, from, to, merge, changed, &part5, &part3);
1579 if (first) {
1580 if (partial5 != NULL) {
1581 *partial5 = part5;
1582 }
1583 }
1584 first = FALSE;
1585 if (tmp != NULL)
1586 {
1587 if (prev != NULL)
1588 {
1589 if ((merge) && (prev->choice == SEQLOC_INT) && (tmp->choice == SEQLOC_INT))
1590 {
1591 sip = (SeqIntPtr)(prev->data.ptrvalue);
1592 sip2 = (SeqIntPtr)(tmp->data.ptrvalue);
1593
1594 if (SeqIdForSameBioseq(sip->id, sip2->id))
1595 {
1596 /* merge intervals? */
1597 if ((sip->strand == Seq_strand_minus) &&
1598 (sip2->strand == Seq_strand_minus))
1599 {
1600 if (sip->from == (sip2->to + 1))
1601 {
1602 sip->from = sip2->from;
1603 sip->if_from = sip2->if_from;
1604 sip2->if_from = NULL;
1605 tmp = SeqLocFree(tmp);
1606 }
1607 }
1608 else if((sip->strand != Seq_strand_minus) &&
1609 (sip2->strand != Seq_strand_minus))
1610 {
1611 if (sip->to == (sip2->from - 1))
1612 {
1613 sip->to = sip2->to;
1614 sip->if_to = sip2->if_to;
1615 sip2->if_to = NULL;
1616 tmp = SeqLocFree(tmp);
1617 }
1618 }
1619 }
1620 }
1621 else if ((prev->choice == SEQLOC_NULL) && (tmp->choice == SEQLOC_NULL))
1622 {
1623 tmp = SeqLocFree(tmp);
1624 *changed = TRUE;
1625 }
1626 }
1627 else if (tmp->choice == SEQLOC_NULL)
1628 {
1629 tmp = SeqLocFree(tmp);
1630 *changed = TRUE;
1631 }
1632
1633 if (tmp != NULL) /* still have one? */
1634 {
1635 if (prev != NULL)
1636 prev->next = tmp;
1637 else
1638 thead = tmp;
1639 prev = tmp;
1640 }
1641 }
1642 else
1643 *changed = TRUE;
1644 }
1645 if (partial3 != NULL) {
1646 *partial3 = part3;
1647 }
1648 if (prev != NULL)
1649 {
1650 if (prev->choice == SEQLOC_NULL) /* ends with NULL */
1651 {
1652 prev = NULL;
1653 for (slp = thead; slp->next != NULL; slp = slp->next)
1654 prev = slp;
1655 if (prev != NULL)
1656 {
1657 prev->next = NULL;
1658 SeqLocFree(slp);
1659 }
1660 else
1661 {
1662 thead = SeqLocFree(thead);
1663 }
1664 *changed = TRUE;
1665 }
1666 }
1667 head->data.ptrvalue = thead;
1668 if (thead == NULL)
1669 head = SeqLocFree(head);
1670 break;
1671 case SEQLOC_INT: /* int */
1672 sip = (SeqIntPtr)(head->data.ptrvalue);
1673 if (SeqIdForSameBioseq(sip->id, target))
1674 {
1675 if (sip->to < from) /* completely before cut */
1676 break;
1677
1678 /* completely contained in cut */
1679 if ((sip->from >= from) && (sip->to <= to))
1680 {
1681 head = SeqLocFree(head);
1682 *changed = TRUE;
1683 break;
1684 }
1685
1686 if (sip->from > to) /* completely past cut */
1687 {
1688 if (merge)
1689 {
1690 sip->from -= diff;
1691 sip->to -= diff;
1692 }
1693 break;
1694 }
1695 /* overlap here */
1696
1697 if (sip->to > to)
1698 {
1699 if (merge)
1700 sip->to -= diff;
1701 }
1702 else /* to inside cut, so partial delete */
1703 {
1704 sip->to = from - 1;
1705 *changed = TRUE;
1706 if (sip->strand == Seq_strand_minus) {
1707 if (partial5 != NULL) {
1708 *partial5 = TRUE;
1709 }
1710 } else if (partial3 != NULL) {
1711 *partial3 = TRUE;
1712 }
1713 }
1714
1715 if (sip->from >= from) /* from inside cut, partial del */
1716 {
1717 *changed = TRUE;
1718 sip->from = to + 1;
1719 if (merge)
1720 sip->from -= diff;
1721 if (sip->strand == Seq_strand_minus) {
1722 if (partial3 != NULL) {
1723 *partial3 = TRUE;
1724 }
1725 } else if (partial5 != NULL) {
1726 *partial5 = TRUE;
1727 }
1728 }
1729
1730 if (merge)
1731 break;
1732
1733 /* interval spans cut.. only in non-merge */
1734 /* have to split */
1735
1736 if ((sip->from < from) && (sip->to > to))
1737 {
1738 *changed = TRUE;
1739 head->choice = SEQLOC_PACKED_INT;
1740 head->data.ptrvalue = NULL;
1741 tmp = ValNodeNew(NULL);
1742 tmp->choice = SEQLOC_INT;
1743 tmp->data.ptrvalue = sip;
1744
1745 sip2 = SeqIntNew();
1746 sip2->from = to + 1;
1747 sip2->to = sip->to;
1748 sip2->strand = sip->strand;
1749 sip2->if_to = sip->if_to;
1750 sip2->id = SeqIdDup(target);
1751 slp = ValNodeNew(NULL);
1752 slp->choice = SEQLOC_INT;
1753 slp->data.ptrvalue = sip2;
1754
1755 sip->if_to = NULL;
1756 sip->to = from - 1;
1757
1758 if (sip->strand == Seq_strand_minus)
1759 {
1760 head->data.ptrvalue = slp;
1761 slp->next = tmp;
1762 }
1763 else
1764 {
1765 head->data.ptrvalue = tmp;
1766 tmp->next = slp;
1767 }
1768
1769 }
1770
1771 }
1772 break;
1773 case SEQLOC_PNT: /* pnt */
1774 spp = (SeqPntPtr)(head->data.ptrvalue);
1775 if (SeqIdForSameBioseq(spp->id, target))
1776 {
1777 if ((spp->point >= from) && (spp->point <= to))
1778 {
1779 head = SeqLocFree(head);
1780 *changed = TRUE;
1781 }
1782 else if (spp->point > to)
1783 {
1784 if (merge)
1785 spp->point -= diff;
1786 }
1787 }
1788 break;
1789 case SEQLOC_PACKED_PNT: /* packed pnt */
1790 pspp = (PackSeqPntPtr)(head->data.ptrvalue);
1791 if (SeqIdForSameBioseq(pspp->id, target))
1792 {
1793 numpnt = PackSeqPntNum(pspp);
1794 pspp2 = PackSeqPntNew();
1795 head->data.ptrvalue = pspp2;
1796 for (i = 0; i < numpnt; i++)
1797 {
1798 tpos = PackSeqPntGet(pspp, i);
1799 if (tpos < from)
1800 PackSeqPntPut(pspp2, tpos);
1801 else
1802 {
1803 if (tpos > to)
1804 {
1805 if (merge)
1806 tpos -= diff;
1807 PackSeqPntPut(pspp2, tpos);
1808 }
1809 else
1810 *changed = TRUE;
1811 }
1812 }
1813 pspp2->id = pspp->id;
1814 pspp->id = NULL;
1815 pspp2->fuzz = pspp->fuzz;
1816 pspp->fuzz = NULL;
1817 pspp2->strand = pspp->strand;
1818 PackSeqPntFree(pspp);
1819 numpnt = PackSeqPntNum(pspp2);
1820 if (! numpnt)
1821 head = SeqLocFree(head);
1822
1823 }
1824 break;
1825 default:
1826 break;
1827 }
1828
1829 return head;
1830 }
1831
SeqLocDelete(SeqLocPtr head,SeqIdPtr target,Int4 from,Int4 to,Boolean merge,BoolPtr changed)1832 NLM_EXTERN SeqLocPtr LIBCALL SeqLocDelete (SeqLocPtr head, SeqIdPtr target, Int4 from, Int4 to, Boolean merge, BoolPtr changed)
1833
1834 {
1835 return SeqLocDeleteEx (head, target, from, to, merge, changed, NULL, NULL);
1836 }
1837
1838 typedef struct delstruct {
1839 SeqIdPtr sip;
1840 Int4 from, to;
1841 Boolean merge;
1842 } DelStruct, PNTR DelStructPtr;
1843
1844 NLM_EXTERN void DelFeat (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent);
1845
DelFeat(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)1846 NLM_EXTERN void DelFeat (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
1847 {
1848 DelStructPtr dsp;
1849 BioseqPtr bsp;
1850 BioseqSetPtr bssp;
1851 SeqAnnotPtr sap, nextsap;
1852 SeqFeatPtr sfp, nextsfp;
1853 Pointer PNTR prevsap, PNTR prevsfp;
1854
1855 dsp = (DelStructPtr)data;
1856 if (IS_Bioseq(sep))
1857 {
1858 bsp = (BioseqPtr)(sep->data.ptrvalue);
1859 sap = bsp->annot;
1860 prevsap = (Pointer PNTR) &(bsp->annot);
1861 }
1862 else
1863 {
1864 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
1865 sap = bssp->annot;
1866 prevsap = (Pointer PNTR) &(bssp->annot);
1867 }
1868
1869 while (sap != NULL)
1870 {
1871 nextsap = sap->next;
1872 if (sap->type == 1) /* feature table */
1873 {
1874 sfp = (SeqFeatPtr) sap->data;
1875 prevsfp = (Pointer PNTR) &(sap->data);
1876 while (sfp != NULL)
1877 {
1878 nextsfp = sfp->next;
1879 if (SeqFeatDelete(sfp, dsp->sip, dsp->from, dsp->to, dsp->merge) == 2)
1880 {
1881 /* location completely gone */
1882 *(prevsfp) = sfp->next;
1883 sfp->next = NULL;
1884 SeqFeatFree(sfp);
1885 } else {
1886 prevsfp = (Pointer PNTR) &(sfp->next);
1887 }
1888 sfp = nextsfp;
1889 }
1890 }
1891
1892 if (sap->data == NULL) /* all features deleted */
1893 {
1894 *(prevsap) = sap->next;
1895 sap->next = NULL;
1896 SeqAnnotFree (sap);
1897 } else {
1898 prevsap = (Pointer PNTR) &(sap->next);
1899 }
1900
1901 sap = nextsap;
1902 }
1903
1904 return;
1905 }
1906
1907
MarkDelFeat(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)1908 static void MarkDelFeat (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
1909 {
1910 DelStructPtr dsp;
1911 BioseqPtr bsp;
1912 BioseqSetPtr bssp;
1913 SeqAnnotPtr sap;
1914 SeqFeatPtr sfp;
1915
1916 dsp = (DelStructPtr)data;
1917 if (IS_Bioseq(sep))
1918 {
1919 bsp = (BioseqPtr)(sep->data.ptrvalue);
1920 sap = bsp->annot;
1921 }
1922 else
1923 {
1924 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
1925 sap = bssp->annot;
1926 }
1927
1928 while (sap != NULL)
1929 {
1930 if (sap->type == 1) /* feature table */
1931 {
1932 sfp = (SeqFeatPtr) sap->data;
1933 while (sfp != NULL)
1934 {
1935 if (SeqFeatDelete(sfp, dsp->sip, dsp->from, dsp->to, dsp->merge) == 2)
1936 {
1937 /* location completely gone */
1938 sfp->idx.deleteme = TRUE;
1939 }
1940 sfp = sfp->next;
1941 }
1942 }
1943
1944 sap = sap->next;
1945 }
1946 }
1947
1948
1949 /*****************************************************************************
1950 *
1951 * SeqEntryDelFeat(sep, id, from, to, do_split)
1952 * Deletes or truncates features on Bioseq (id) in the range
1953 * from-to, inclusive
1954 *
1955 * Moves features > to left to account for decrease in length
1956 * if do_split, breaks intervals across the deletion
1957 * else just reduces their size
1958 *
1959 * If sep == NULL, then calls SeqEntryFind(id) to set scope to look
1960 * for features.
1961 *
1962 *****************************************************************************/
SeqEntryDelFeatEx(SeqEntryPtr sep,SeqIdPtr sip,Int4 from,Int4 to,Boolean do_split,Boolean mark_deleted_feat)1963 NLM_EXTERN Boolean LIBCALL SeqEntryDelFeatEx (SeqEntryPtr sep, SeqIdPtr sip, Int4 from, Int4 to, Boolean do_split, Boolean mark_deleted_feat)
1964 {
1965
1966 DelStruct ds;
1967
1968 if (sip == NULL)
1969 return FALSE;
1970
1971 if (sep == NULL)
1972 sep = SeqEntryFind(sip);
1973
1974 if (sep == NULL) return FALSE;
1975
1976 ds.sip = sip;
1977 ds.from = from;
1978 ds.to = to;
1979 if (do_split)
1980 ds.merge = FALSE;
1981 else
1982 ds.merge = TRUE;
1983
1984 if (mark_deleted_feat) {
1985 SeqEntryExplore (sep, (Pointer)(&ds), MarkDelFeat);
1986 } else {
1987 SeqEntryExplore(sep, (Pointer)(&ds), DelFeat);
1988 }
1989
1990 return TRUE;
1991 }
1992
1993
SeqEntryDelFeat(SeqEntryPtr sep,SeqIdPtr sip,Int4 from,Int4 to,Boolean do_split)1994 NLM_EXTERN Boolean LIBCALL SeqEntryDelFeat (SeqEntryPtr sep, SeqIdPtr sip, Int4 from, Int4 to, Boolean do_split)
1995 {
1996 return SeqEntryDelFeatEx (sep, sip, from, to, do_split, FALSE);
1997 }
1998
1999
2000 /*****************************************************************************
2001 *
2002 * DescrToFeatures(sep)
2003 * Moves all Seqdescr to features in sep where possible
2004 *
2005 *****************************************************************************/
2006
CopyDeltaSeqPtrChain(DeltaSeqPtr dsp)2007 static DeltaSeqPtr CopyDeltaSeqPtrChain (DeltaSeqPtr dsp)
2008 {
2009 DeltaSeqPtr new_chain = NULL;
2010 SeqLocPtr slp_orig, slp_new;
2011 SeqLitPtr slip_orig, slip_new;
2012
2013 while (dsp != NULL) {
2014 if (dsp->choice == 1) {
2015 slp_orig = (SeqLocPtr) dsp->data.ptrvalue;
2016 slp_new = AsnIoMemCopy (slp_orig, (AsnReadFunc) SeqLocAsnRead, (AsnWriteFunc) SeqLocAsnWrite);
2017 ValNodeAddPointer (&new_chain, 1, slp_new);
2018 }
2019 else if (dsp->choice ==2)
2020 {
2021 slip_orig = (SeqLitPtr) dsp->data.ptrvalue;
2022 slip_new = AsnIoMemCopy(slip_orig, (AsnReadFunc) SeqLitAsnRead, (AsnWriteFunc) SeqLitAsnWrite);
2023 ValNodeAddPointer (&new_chain, 2, slip_new);
2024 }
2025 dsp = dsp->next;
2026 }
2027
2028 return new_chain;
2029 }
2030
2031 /*****************************************************************************
2032 *
2033 * BioseqCopy(newid, sourceid, from, to, strand, do_feat)
2034 * Creates a new Bioseq from sourceid in the range from-to inclusive.
2035 * If strand==Seq_strand_minus, reverse complements the sequence in
2036 * the copy and (if do_feat) corrects the feature table appropriately.
2037 * Names new Bioseq as newid, if not NULL
2038 * else Creates seqid.local = "Clipboard" if newid is NULL
2039 * If do_feat == TRUE copies appropriate region of feature table from
2040 * sourceid to new copy using SeqFeatsCopy().
2041 *
2042 *****************************************************************************/
BioseqCopyEx(SeqIdPtr newid,BioseqPtr oldbsp,Int4 from,Int4 to,Uint1 strand,Boolean do_feat)2043 NLM_EXTERN BioseqPtr LIBCALL BioseqCopyEx (SeqIdPtr newid, BioseqPtr oldbsp, Int4 from, Int4 to,
2044 Uint1 strand, Boolean do_feat)
2045 {
2046 BioseqPtr newbsp=NULL, tmpbsp;
2047 SeqPortPtr spp=NULL;
2048 ByteStorePtr bsp;
2049 Uint1 seqtype;
2050 ValNodePtr tmp;
2051 ObjectIdPtr oid;
2052 Int4 len, i;
2053 Int2 residue;
2054 ValNode fake;
2055 SeqLocPtr the_segs, head, curr;
2056 Boolean handled = FALSE, split;
2057 SeqFeatPtr sfp, newsfp, lastsfp;
2058 DeltaSeqPtr dsp;
2059 SeqEntryPtr oldscope;
2060
2061
2062 if ((oldbsp == NULL) || (from < 0)) return FALSE;
2063
2064 len = to - from + 1;
2065 if (len <= 0) return NULL;
2066
2067 newbsp = BioseqNew();
2068 if (newid != NULL)
2069 newbsp->id = SeqIdDup(newid);
2070 else
2071 {
2072 tmp = ValNodeNew(NULL);
2073 tmp->choice = SEQID_LOCAL;
2074 oid = ObjectIdNew();
2075 tmp->data.ptrvalue = (Pointer)oid;
2076 oid->str = StringSave("Clipboard");
2077 tmpbsp = BioseqFind(tmp); /* old clipboard present? */
2078 if (tmpbsp == NULL) {
2079 oldscope = SeqEntrySetScope (NULL);
2080 if (oldscope != NULL) {
2081 tmpbsp = BioseqFind(tmp);
2082 SeqEntrySetScope (oldscope);
2083 }
2084 }
2085 if (tmpbsp != NULL)
2086 BioseqFree(tmpbsp);
2087 newbsp->id = tmp;
2088 }
2089
2090 newbsp->repr = oldbsp->repr;
2091 newbsp->mol = oldbsp->mol;
2092 newbsp->length = len;
2093 newbsp->seq_ext_type = oldbsp->seq_ext_type;
2094
2095 if (newbsp->repr == Seq_repr_virtual)
2096 handled = TRUE; /* no more to do */
2097
2098 if (((newbsp->repr == Seq_repr_raw) ||
2099 (newbsp->repr == Seq_repr_const)) && newbsp->seq_data_type != Seq_code_gap)
2100 {
2101 if (ISA_aa(newbsp->mol))
2102 {
2103 seqtype = Seq_code_ncbieaa;
2104 }
2105 else
2106 {
2107 seqtype = Seq_code_iupacna;
2108 }
2109 newbsp->seq_data_type = seqtype;
2110 bsp = BSNew(len);
2111 if (bsp == NULL) goto erret;
2112
2113 newbsp->seq_data = (SeqDataPtr) bsp;
2114 spp = SeqPortNew(oldbsp, from, to, strand, seqtype);
2115 if (spp == NULL) goto erret;
2116
2117 for (i = 0; i < len; i++)
2118 {
2119 residue = SeqPortGetResidue(spp);
2120 if (! IS_residue(residue)) goto erret;
2121 BSPutByte(bsp, residue);
2122 }
2123
2124 SeqPortFree(spp);
2125 handled = TRUE;
2126 }
2127
2128 if ((newbsp->repr == Seq_repr_seg) ||
2129 (newbsp->repr == Seq_repr_ref) ||
2130 (newbsp->repr == Seq_repr_delta))
2131 {
2132 if (newbsp->repr == Seq_repr_seg) /* segmented */
2133 {
2134 fake.choice = SEQLOC_MIX; /* make SEQUENCE OF Seq-loc, into one */
2135 fake.data.ptrvalue = oldbsp->seq_ext;
2136 fake.next = NULL;
2137 the_segs = (SeqLocPtr)&fake;
2138 head = SeqLocCopyPart (the_segs, from, to, strand, FALSE, NULL, NULL);
2139 }
2140 else if (newbsp->repr == Seq_repr_ref) /* reference: is a Seq-loc */
2141 {
2142 head = SeqLocCopyPart ((SeqLocPtr)(oldbsp->seq_ext), from, to,
2143 strand, TRUE, NULL, NULL);
2144 }
2145 else if (newbsp->repr == Seq_repr_delta)
2146 {
2147 dsp = (DeltaSeqPtr)(oldbsp->seq_ext); /* real data is here */
2148
2149 head = CopyDeltaSeqPtrChain (dsp);
2150 }
2151
2152 newbsp->seq_ext = (Pointer)head;
2153 handled = TRUE;
2154 }
2155
2156 if (newbsp->repr == Seq_repr_map)
2157 {
2158 lastsfp = NULL;
2159 for (sfp = (SeqFeatPtr)(oldbsp->seq_ext); sfp != NULL; sfp = sfp->next)
2160 {
2161 split = FALSE;
2162 curr = SeqLocCopyRegion(newbsp->id, sfp->location, oldbsp, from, to, strand, &split);
2163 if (curr != NULL) /* got one */
2164 {
2165 newsfp = (SeqFeatPtr)AsnIoMemCopy((Pointer)sfp, (AsnReadFunc)SeqFeatAsnRead, (AsnWriteFunc)SeqFeatAsnWrite);
2166 SeqLocFree(newsfp->location);
2167 newsfp->location = curr;
2168 if (split)
2169 newsfp->partial = TRUE;
2170 if (lastsfp == NULL) /* first one */
2171 newbsp->seq_ext = (Pointer)newsfp;
2172 else
2173 lastsfp->next = newsfp;
2174 lastsfp = newsfp;
2175 }
2176 }
2177 handled = TRUE;
2178 }
2179
2180
2181 if (! handled) goto erret;
2182
2183 /* get descriptors */
2184 /* get features */
2185
2186 if (do_feat)
2187 SeqFeatsCopy (newbsp, oldbsp, from, to, strand);
2188
2189 return newbsp;
2190
2191 erret:
2192 BioseqFree(newbsp);
2193 SeqPortFree(spp);
2194 return NULL;
2195 }
2196
BioseqCopy(SeqIdPtr newid,SeqIdPtr sourceid,Int4 from,Int4 to,Uint1 strand,Boolean do_feat)2197 NLM_EXTERN BioseqPtr LIBCALL BioseqCopy (SeqIdPtr newid, SeqIdPtr sourceid, Int4 from, Int4 to,
2198 Uint1 strand, Boolean do_feat)
2199 {
2200 BioseqPtr oldbsp;
2201 SeqEntryPtr oldscope;
2202
2203 if ((sourceid == NULL) || (from < 0)) return FALSE;
2204
2205 oldbsp = BioseqFind(sourceid);
2206 if (oldbsp == NULL) {
2207 oldscope = SeqEntrySetScope (NULL);
2208 if (oldscope != NULL) {
2209 oldbsp = BioseqFind(sourceid);
2210 SeqEntrySetScope (oldscope);
2211 }
2212 }
2213 if (oldbsp == NULL) return NULL;
2214
2215 return BioseqCopyEx (newid, oldbsp, from, to, strand, do_feat);
2216 }
2217
2218 /*****************************************************************************
2219 *
2220 * SeqLocCopyPart (the_segs, from, to, strand, group, first_segp, last_segp)
2221 * cuts out from the_segs the part from offset from to offset to
2222 * reverse complements resulting seqloc if strand == Seq_strand_minus
2223 * if (group) puts resulting intervals into a new Seq-loc (of type
2224 * PACKED_INT if no SEQLOC_NULL, else SEQLOC_MIX).
2225 * Currently this always makes intervals or nulls. Is really for segmented and
2226 * reference sequence extensions
2227 * If first_segp and last_segp are not NULL, then they are filled in with the
2228 * ordinal number of the source segments that remain in the copy, based
2229 * on SeqLocFindNext, where 1 is the first one. Thus if the third and
2230 * fourth segments were copied, first is 3 and last is 4. If the
2231 * location was reverse complemented, first is 4 and last is 3.
2232 *
2233 *****************************************************************************/
SeqLocCopyPart(SeqLocPtr the_segs,Int4 from,Int4 to,Uint1 strand,Boolean group,Int2Ptr first_segp,Int2Ptr last_segp)2234 NLM_EXTERN SeqLocPtr LIBCALL SeqLocCopyPart (SeqLocPtr the_segs, Int4 from, Int4 to, Uint1 strand,
2235 Boolean group, Int2Ptr first_segp, Int2Ptr last_segp)
2236 {
2237 SeqLocPtr currseg, newhead, head, prev, curr, last;
2238 Int2 numloc, first_seg = 0, last_seg = 0, seg_ctr = 0;
2239 Int4 oldpos, tlen, tfrom, tto, tstart, tstop, xfrom, xto;
2240 Uint1 tstrand;
2241 SeqIdPtr tid;
2242 SeqIntPtr sip;
2243 Boolean done, started, wasa_null, hada_null;
2244 BioseqPtr bsp;
2245
2246 if (the_segs == NULL) return NULL;
2247 if ((from < 0) || (to < 0)) return NULL;
2248
2249 currseg = NULL;
2250 oldpos = 0; /* position in old sequence */
2251 done = FALSE;
2252 started = FALSE;
2253 head = NULL;
2254 prev = NULL;
2255 numloc = 0;
2256 wasa_null = FALSE;
2257 hada_null = FALSE;
2258 while ((oldpos <= to) && ((currseg = SeqLocFindNext(the_segs, currseg)) != NULL))
2259 {
2260 seg_ctr++;
2261 tlen = SeqLocLen(currseg);
2262 tid = SeqLocId(currseg);
2263 if (tlen < 0) {
2264 bsp = BioseqLockById (tid); /* only necessary for locations of type WHOLE */
2265 tlen = SeqLocLen (currseg);
2266 BioseqUnlock (bsp);
2267 }
2268 tstrand = SeqLocStrand(currseg);
2269 tfrom = SeqLocStart(currseg);
2270 tto = SeqLocStop(currseg);
2271
2272 if (! started)
2273 {
2274 wasa_null = FALSE;
2275 if (((oldpos + tlen - 1) >= from) &&
2276 (currseg->choice != SEQLOC_NULL))
2277 {
2278 tstart = from - oldpos;
2279 started = TRUE;
2280 first_seg = seg_ctr;
2281 }
2282 else
2283 tstart = -1;
2284 }
2285 else
2286 {
2287 if (currseg->choice == SEQLOC_NULL)
2288 {
2289 wasa_null = TRUE;
2290 tstart = -1; /* skip it till later */
2291 }
2292 else
2293 tstart = 0;
2294 }
2295
2296 if (tstart >= 0) /* have a start */
2297 {
2298 if ((oldpos + tlen - 1) >= to)
2299 {
2300 done = TRUE; /* hit the end */
2301 tstop = ((oldpos + tlen - 1) - to);
2302 }
2303 else
2304 tstop = 0;
2305
2306 if (tstrand == Seq_strand_minus)
2307 {
2308 xfrom = tfrom + tstop;
2309 xto = tto - tstart;
2310 }
2311 else
2312 {
2313 xfrom = tfrom + tstart;
2314 xto = tto - tstop;
2315 }
2316
2317 sip = SeqIntNew();
2318 sip->id = SeqIdDup(tid);
2319 sip->strand = tstrand;
2320 sip->from = xfrom;
2321 sip->to = xto;
2322 if (wasa_null) /* previous SEQLOC_NULL */
2323 {
2324 curr = ValNodeAddInt(&head, SEQLOC_NULL, 0);
2325 numloc++;
2326 wasa_null = FALSE;
2327 hada_null = TRUE;
2328 }
2329 curr = ValNodeAddPointer(&head, SEQLOC_INT, (Pointer)sip);
2330 numloc++;
2331 last_seg = seg_ctr;
2332 }
2333
2334 oldpos += tlen;
2335 }
2336
2337 if (strand == Seq_strand_minus) /* reverse order and complement */
2338 {
2339 newhead = NULL;
2340 last = NULL;
2341 while (head != NULL)
2342 {
2343 prev = NULL;
2344 for (curr = head; curr->next != NULL; curr = curr->next)
2345 prev = curr;
2346 if (prev != NULL)
2347 prev->next = NULL;
2348 else
2349 head = NULL;
2350
2351 if (newhead == NULL)
2352 newhead = curr;
2353 else
2354 last->next = curr;
2355 last = curr;
2356 if (curr->choice == SEQLOC_INT)
2357 {
2358 sip = (SeqIntPtr)(curr->data.ptrvalue);
2359 sip->strand = StrandCmp(sip->strand);
2360 }
2361 }
2362
2363 head = newhead;
2364 seg_ctr = last_seg;
2365 last_seg = first_seg;
2366 first_seg = seg_ctr;
2367 }
2368
2369 if ((numloc) && (group))
2370 {
2371 curr = ValNodeNew(NULL);
2372 if (hada_null)
2373 curr->choice = SEQLOC_MIX;
2374 else
2375 curr->choice = SEQLOC_PACKED_INT;
2376 curr->data.ptrvalue = (Pointer)head;
2377 head = curr;
2378 }
2379
2380 if (first_segp != NULL)
2381 *first_segp = first_seg;
2382 if (last_segp != NULL)
2383 *last_segp = last_seg;
2384
2385 return head;
2386 }
2387
2388 /*****************************************************************************
2389 *
2390 * SeqFeatCopy(new, old, from, to, strand)
2391 *
2392 *****************************************************************************/
IndexedSeqFeatsCopy(BioseqPtr newbsp,BioseqPtr oldbsp,Int4 from,Int4 to,Uint1 strand)2393 static Int2 LIBCALL IndexedSeqFeatsCopy (BioseqPtr newbsp, BioseqPtr oldbsp, Int4 from, Int4 to, Uint1 strand)
2394
2395 {
2396 Int2 ctr=0;
2397 SeqFeatPtr sfp, last=NULL, newsfp;
2398 SeqInt si;
2399 ValNode vn;
2400 ValNodePtr region;
2401 SeqLocPtr newloc;
2402 Boolean split = FALSE;
2403 SeqAnnotPtr sap = NULL, saptmp;
2404 CdRegionPtr crp;
2405 CodeBreakPtr cbp, prevcbp, nextcbp;
2406 RnaRefPtr rrp;
2407 tRNAPtr trp;
2408 SeqMgrFeatContext fcontext;
2409
2410 region = &vn;
2411 vn.choice = SEQLOC_INT;
2412 vn.data.ptrvalue = (Pointer)(&si);
2413 si.from = from;
2414 si.to = to;
2415 si.id = oldbsp->id;
2416 si.if_from = NULL;
2417 si.if_to = NULL;
2418
2419 sfp = NULL;
2420 while ((sfp = SeqMgrGetNextFeature (oldbsp, sfp, 0, 0, &fcontext)) != NULL)
2421 {
2422 /* can exit once past rightmost limit */
2423 if (fcontext.left > to) return ctr;
2424
2425 if (fcontext.right >= from && fcontext.left <= to) {
2426
2427 split = FALSE;
2428 newloc = SeqLocCopyRegion(newbsp->id, sfp->location, oldbsp, from, to, strand, &split);
2429 if (newloc != NULL) /* got one */
2430 {
2431 newsfp = (SeqFeatPtr)AsnIoMemCopy((Pointer)sfp, (AsnReadFunc)SeqFeatAsnRead, (AsnWriteFunc)SeqFeatAsnWrite);
2432 SeqLocFree(newsfp->location);
2433 newsfp->location = newloc;
2434 if (split)
2435 newsfp->partial = TRUE;
2436 if (last == NULL) /* first one */
2437 {
2438 sap = SeqAnnotNew();
2439 if (newbsp->annot == NULL)
2440 newbsp->annot = sap;
2441 else
2442 {
2443 for (saptmp = newbsp->annot; saptmp->next != NULL; saptmp = saptmp->next)
2444 continue;
2445 saptmp->next = sap;
2446 }
2447 sap->type = 1; /* feature table */
2448 sap->data = (Pointer)newsfp;
2449 }
2450 else
2451 last->next = newsfp;
2452 last = newsfp;
2453
2454 switch (newsfp->data.choice)
2455 {
2456 case SEQFEAT_CDREGION: /* cdregion */
2457 crp = (CdRegionPtr)(newsfp->data.value.ptrvalue);
2458 prevcbp = NULL;
2459 for (cbp = crp->code_break; cbp != NULL; cbp = nextcbp)
2460 {
2461 nextcbp = cbp->next;
2462 cbp->loc = SeqLocCopyRegion(newbsp->id, cbp->loc, oldbsp, from, to, strand, &split);
2463 if (cbp->loc == NULL)
2464 {
2465 if (prevcbp != NULL)
2466 prevcbp->next = nextcbp;
2467 else
2468 crp->code_break = nextcbp;
2469 cbp->next = NULL;
2470 CodeBreakFree(cbp);
2471 }
2472 else
2473 prevcbp = cbp;
2474 }
2475 break;
2476 case SEQFEAT_RNA:
2477 rrp = (RnaRefPtr)(newsfp->data.value.ptrvalue);
2478 if (rrp->ext.choice == 2) /* tRNA */
2479 {
2480 trp = (tRNAPtr)(rrp->ext.value.ptrvalue);
2481 if (trp->anticodon != NULL)
2482 {
2483 trp->anticodon = SeqLocCopyRegion(newbsp->id, trp->anticodon, oldbsp, from, to, strand, &split);
2484 }
2485 }
2486 break;
2487 default:
2488 break;
2489 }
2490 }
2491 }
2492
2493 }
2494 return ctr;
2495 }
2496
SeqFeatsCopy(BioseqPtr newbsp,BioseqPtr oldbsp,Int4 from,Int4 to,Uint1 strand)2497 NLM_EXTERN Int2 LIBCALL SeqFeatsCopy (BioseqPtr newbsp, BioseqPtr oldbsp, Int4 from, Int4 to, Uint1 strand)
2498 {
2499 Int2 ctr=0;
2500 BioseqContextPtr bcp = NULL;
2501 SeqFeatPtr sfp, last=NULL, newsfp;
2502 SeqInt si;
2503 ValNode vn;
2504 ValNodePtr region;
2505 SeqLocPtr newloc;
2506 Boolean split = FALSE;
2507 SeqAnnotPtr sap = NULL, saptmp;
2508 CdRegionPtr crp;
2509 CodeBreakPtr cbp, prevcbp, nextcbp;
2510 RnaRefPtr rrp;
2511 tRNAPtr trp;
2512 Uint2 entityID;
2513
2514 if (oldbsp == NULL) return ctr;
2515
2516 entityID = ObjMgrGetEntityIDForPointer (oldbsp);
2517 if (entityID > 0 && SeqMgrFeaturesAreIndexed (entityID)) {
2518 /* indexed version should be much faster */
2519 return IndexedSeqFeatsCopy (newbsp, oldbsp, from, to, strand);
2520 }
2521
2522 bcp = BioseqContextNew(oldbsp);
2523 if (bcp == NULL) return ctr;
2524
2525 region = &vn;
2526 vn.choice = SEQLOC_INT;
2527 vn.data.ptrvalue = (Pointer)(&si);
2528 si.from = from;
2529 si.to = to;
2530 si.id = oldbsp->id;
2531 si.if_from = NULL;
2532 si.if_to = NULL;
2533
2534 sfp = NULL;
2535 while ((sfp = BioseqContextGetSeqFeat(bcp, 0, sfp, NULL, 0)) != NULL)
2536 {
2537 split = FALSE;
2538 newloc = SeqLocCopyRegion(newbsp->id, sfp->location, oldbsp, from, to, strand, &split);
2539 if (newloc != NULL) /* got one */
2540 {
2541 newsfp = (SeqFeatPtr)AsnIoMemCopy((Pointer)sfp, (AsnReadFunc)SeqFeatAsnRead, (AsnWriteFunc)SeqFeatAsnWrite);
2542 SeqLocFree(newsfp->location);
2543 newsfp->location = newloc;
2544 if (split)
2545 newsfp->partial = TRUE;
2546 if (last == NULL) /* first one */
2547 {
2548 sap = SeqAnnotNew();
2549 if (newbsp->annot == NULL)
2550 newbsp->annot = sap;
2551 else
2552 {
2553 for (saptmp = newbsp->annot; saptmp->next != NULL; saptmp = saptmp->next)
2554 continue;
2555 saptmp->next = sap;
2556 }
2557 sap->type = 1; /* feature table */
2558 sap->data = (Pointer)newsfp;
2559 }
2560 else
2561 last->next = newsfp;
2562 last = newsfp;
2563
2564 switch (newsfp->data.choice)
2565 {
2566 case SEQFEAT_CDREGION: /* cdregion */
2567 crp = (CdRegionPtr)(newsfp->data.value.ptrvalue);
2568 prevcbp = NULL;
2569 for (cbp = crp->code_break; cbp != NULL; cbp = nextcbp)
2570 {
2571 nextcbp = cbp->next;
2572 cbp->loc = SeqLocCopyRegion(newbsp->id, cbp->loc, oldbsp, from, to, strand, &split);
2573 if (cbp->loc == NULL)
2574 {
2575 if (prevcbp != NULL)
2576 prevcbp->next = nextcbp;
2577 else
2578 crp->code_break = nextcbp;
2579 cbp->next = NULL;
2580 CodeBreakFree(cbp);
2581 }
2582 else
2583 prevcbp = cbp;
2584 }
2585 break;
2586 case SEQFEAT_RNA:
2587 rrp = (RnaRefPtr)(newsfp->data.value.ptrvalue);
2588 if (rrp->ext.choice == 2) /* tRNA */
2589 {
2590 trp = (tRNAPtr)(rrp->ext.value.ptrvalue);
2591 if (trp->anticodon != NULL)
2592 {
2593 trp->anticodon = SeqLocCopyRegion(newbsp->id, trp->anticodon, oldbsp, from, to, strand, &split);
2594 }
2595 }
2596 break;
2597 default:
2598 break;
2599 }
2600 }
2601
2602 }
2603 BioseqContextFree (bcp);
2604 return ctr;
2605 }
2606
2607
SeqLocCopyRegion(SeqIdPtr newid,SeqLocPtr head,BioseqPtr oldbsp,Int4 from,Int4 to,Uint1 strand,BoolPtr split)2608 NLM_EXTERN SeqLocPtr LIBCALL SeqLocCopyRegion(SeqIdPtr newid, SeqLocPtr head, BioseqPtr oldbsp,
2609 Int4 from, Int4 to, Uint1 strand, BoolPtr split)
2610 {
2611 SeqLocPtr newhead = NULL, tmp, slp, prev, next, thead;
2612 SeqIntPtr sip, sip2;
2613 SeqPntPtr spp, spp2;
2614 PackSeqPntPtr pspp, pspp2;
2615 SeqBondPtr sbp, sbp2;
2616 SeqIdPtr sidp, oldids;
2617 Int4 numpnt, i, tpos, len, intcnt, othercnt;
2618 Boolean dropped_one;
2619 IntFuzzPtr ifp;
2620 ValNode vn;
2621
2622 if ((head == NULL) || (oldbsp == NULL)) return NULL;
2623
2624 oldids = oldbsp->id;
2625 len = to - from + 1;
2626 switch (head->choice)
2627 {
2628 case SEQLOC_BOND: /* bond -- 2 seqs */
2629 sbp2 = NULL;
2630 sbp = (SeqBondPtr)(head->data.ptrvalue);
2631 vn.choice = SEQLOC_PNT;
2632 vn.data.ptrvalue = sbp->a;
2633 vn.next = NULL;
2634 tmp = SeqLocCopyRegion(newid, (SeqLocPtr)(&vn), oldbsp, from, to, strand, split);
2635 if (tmp != NULL)
2636 {
2637 sbp2 = SeqBondNew();
2638 sbp2->a = (SeqPntPtr)(tmp->data.ptrvalue);
2639 MemFree(tmp);
2640 }
2641 if (sbp->b != NULL)
2642 {
2643 vn.data.ptrvalue = sbp->b;
2644 tmp = SeqLocCopyRegion(newid, (SeqLocPtr)(&vn), oldbsp, from, to, strand, split);
2645 if (tmp != NULL)
2646 {
2647 if (sbp2 == NULL)
2648 {
2649 sbp2 = SeqBondNew();
2650 sbp2->a = (SeqPntPtr)(tmp->data.ptrvalue);
2651 }
2652 else
2653 sbp2->b = (SeqPntPtr)(tmp->data.ptrvalue);
2654 MemFree(tmp);
2655 }
2656 }
2657 if (sbp2 != NULL)
2658 {
2659 newhead = ValNodeNew(NULL);
2660 newhead->choice = SEQLOC_BOND;
2661 newhead->data.ptrvalue = sbp2;
2662 if ((sbp->b != NULL) && (sbp2->b == NULL))
2663 *split = TRUE;
2664 }
2665 break;
2666 case SEQLOC_FEAT: /* feat -- can't track yet */
2667 case SEQLOC_NULL: /* NULL */
2668 case SEQLOC_EMPTY: /* empty */
2669 break;
2670 case SEQLOC_WHOLE: /* whole */
2671 sidp = (SeqIdPtr)(head->data.ptrvalue);
2672 if (SeqIdIn(sidp, oldids))
2673 {
2674 if ((from != 0) || (to != (oldbsp->length - 1)))
2675 {
2676 *split = TRUE;
2677 }
2678 newhead = ValNodeNew(NULL);
2679 sip2 = SeqIntNew();
2680 sip2->id = SeqIdDup(newid);
2681 sip2->from = 0;
2682 sip2->to = to - from;
2683 newhead->choice = SEQLOC_INT;
2684 newhead->data.ptrvalue = (Pointer)sip2;
2685 if (strand == Seq_strand_minus)
2686 {
2687 sip2->strand = Seq_strand_minus;
2688 }
2689 else if (sip2->strand == Seq_strand_minus)
2690 {
2691 sip2->strand = strand;
2692 }
2693 }
2694 break;
2695 case SEQLOC_EQUIV: /* does it stay equiv? */
2696 case SEQLOC_MIX: /* mix -- more than one seq */
2697 case SEQLOC_PACKED_INT: /* packed int */
2698 prev = NULL;
2699 thead = NULL;
2700 dropped_one = FALSE;
2701 for (slp = (SeqLocPtr)(head->data.ptrvalue); slp != NULL; slp = next)
2702 {
2703 next = slp->next;
2704 tmp = SeqLocCopyRegion(newid, slp, oldbsp, from, to, strand, split);
2705 if (tmp != NULL)
2706 {
2707 if (prev != NULL)
2708 {
2709 if ((prev->choice == SEQLOC_INT) && (tmp->choice == SEQLOC_INT))
2710 {
2711 sip = (SeqIntPtr)(prev->data.ptrvalue);
2712 sip2 = (SeqIntPtr)(tmp->data.ptrvalue);
2713
2714 if ((sip->strand == Seq_strand_minus) &&
2715 (sip2->strand == Seq_strand_minus))
2716 {
2717 if (sip->from == (sip2->to + 1))
2718 {
2719 sip->from = sip2->from;
2720 sip->if_from = sip2->if_from;
2721 sip2->if_from = NULL;
2722 tmp = SeqLocFree(tmp);
2723 }
2724 }
2725 else if((sip->strand != Seq_strand_minus) &&
2726 (sip2->strand != Seq_strand_minus))
2727 {
2728 if (sip->to == (sip2->from - 1))
2729 {
2730 sip->to = sip2->to;
2731 sip->if_to = sip2->if_to;
2732 sip2->if_to = NULL;
2733 tmp = SeqLocFree(tmp);
2734 }
2735 }
2736 }
2737 else if ((prev->choice == SEQLOC_NULL) && (tmp->choice == SEQLOC_NULL))
2738 {
2739 tmp = SeqLocFree(tmp);
2740 dropped_one = TRUE;
2741 }
2742 }
2743 else if (tmp->choice == SEQLOC_NULL)
2744 {
2745 tmp = SeqLocFree(tmp);
2746 dropped_one = TRUE;
2747 }
2748
2749 if (tmp != NULL) /* still have one? */
2750 {
2751 if (prev != NULL)
2752 prev->next = tmp;
2753 else
2754 thead = tmp;
2755 prev = tmp;
2756 }
2757 else
2758 dropped_one = TRUE;
2759 }
2760 else
2761 dropped_one = TRUE;
2762 }
2763 if (prev != NULL)
2764 {
2765 if (prev->choice == SEQLOC_NULL) /* ends with NULL */
2766 {
2767 prev = NULL;
2768 for (slp = thead; slp->next != NULL; slp = slp->next)
2769 prev = slp;
2770 if (prev != NULL)
2771 {
2772 prev->next = NULL;
2773 SeqLocFree(slp);
2774 }
2775 else
2776 {
2777 thead = SeqLocFree(thead);
2778 }
2779 dropped_one = TRUE;
2780 }
2781 }
2782 if (thead != NULL)
2783 {
2784 if (dropped_one)
2785 *split = TRUE;
2786 intcnt = 0;
2787 othercnt = 0;
2788 for (slp = thead; slp != NULL; slp = slp->next)
2789 {
2790 if (slp->choice == SEQLOC_INT)
2791 intcnt++;
2792 else
2793 othercnt++;
2794 }
2795 if ((intcnt + othercnt) > 1)
2796 {
2797 newhead = ValNodeNew(NULL);
2798 if (head->choice == SEQLOC_EQUIV)
2799 newhead->choice = SEQLOC_EQUIV;
2800 else
2801 {
2802 if (othercnt == 0)
2803 newhead->choice = SEQLOC_PACKED_INT;
2804 else
2805 newhead->choice = SEQLOC_MIX;
2806 }
2807
2808 newhead->data.ptrvalue = (Pointer)thead;
2809 }
2810 else /* only one SeqLoc left */
2811 newhead = thead;
2812
2813 }
2814 break;
2815 case SEQLOC_INT: /* int */
2816 sip = (SeqIntPtr)(head->data.ptrvalue);
2817 if (SeqIdIn(sip->id, oldids))
2818 {
2819 if (sip->to < from) /* completely before cut */
2820 break;
2821 if (sip->from > to) /* completely after cut */
2822 break;
2823
2824 sip2 = SeqIntNew();
2825 sip2->id = SeqIdDup(newid);
2826 sip2->strand = sip->strand;
2827
2828 if (sip->to > to)
2829 {
2830 sip2->to = to;
2831 *split = TRUE;
2832 ifp = IntFuzzNew();
2833 ifp->choice = 4; /* lim */
2834 ifp->a = 1; /* greater than */
2835 sip2->if_to = ifp;
2836 }
2837 else
2838 {
2839 sip2->to = sip->to;
2840 if (sip->if_to != NULL)
2841 {
2842 ifp = IntFuzzNew();
2843 MemCopy((Pointer)ifp, (Pointer)(sip->if_to), sizeof(IntFuzz));
2844 sip2->if_to = ifp;
2845 }
2846 }
2847
2848 if (sip->from < from)
2849 {
2850 sip2->from = from;
2851 *split = TRUE;
2852 ifp = IntFuzzNew();
2853 ifp->choice = 4; /* lim */
2854 ifp->a = 2; /* less than */
2855 sip2->if_from = ifp;
2856 }
2857 else
2858 {
2859 sip2->from = sip->from;
2860 if (sip->if_from != NULL)
2861 {
2862 ifp = IntFuzzNew();
2863 MemCopy((Pointer)ifp, (Pointer)(sip->if_from), sizeof(IntFuzz));
2864 sip2->if_from = ifp;
2865 }
2866 }
2867 /* set to region coordinates */
2868 sip2->from -= from;
2869 sip2->to -= from;
2870 IntFuzzClip(sip2->if_from, from, to, strand, split);
2871 IntFuzzClip(sip2->if_to, from, to, strand, split);
2872
2873 if (strand == Seq_strand_minus) /* rev comp */
2874 {
2875 sip2->strand = StrandCmp(sip2->strand);
2876 tpos = len - sip2->from - 1;
2877 sip2->from = len - sip2->to - 1;
2878 sip2->to = tpos;
2879 /* IntFuzz already complemented by IntFuzzClip */
2880 /* just switch order */
2881 ifp = sip2->if_from;
2882 sip2->if_from = sip2->if_to;
2883 sip2->if_to = ifp;
2884 }
2885
2886 newhead = ValNodeNew(NULL);
2887 newhead->choice = SEQLOC_INT;
2888 newhead->data.ptrvalue = (Pointer)sip2;
2889 }
2890 break;
2891 case SEQLOC_PNT: /* pnt */
2892 spp = (SeqPntPtr)(head->data.ptrvalue);
2893 if (SeqIdIn(spp->id, oldids))
2894 {
2895 if ((spp->point >= from) && (spp->point <= to))
2896 {
2897 spp2 = SeqPntNew();
2898 spp2->id = SeqIdDup(newid);
2899 spp2->point = spp->point - from;
2900 spp2->strand = spp->strand;
2901 if (spp->fuzz != NULL)
2902 {
2903 ifp = IntFuzzNew();
2904 spp2->fuzz = ifp;
2905 MemCopy((Pointer)ifp, (Pointer)spp->fuzz, sizeof(IntFuzz));
2906 IntFuzzClip(ifp, from, to, strand, split);
2907 }
2908 if (strand == Seq_strand_minus)
2909 {
2910 spp2->point = len - spp2->point - 1;
2911 spp2->strand = StrandCmp(spp->strand);
2912 }
2913 else if (spp2->strand == Seq_strand_minus)
2914 {
2915 spp2->strand = strand;
2916 }
2917 newhead = ValNodeNew(NULL);
2918 newhead->choice = SEQLOC_PNT;
2919 newhead->data.ptrvalue = (Pointer)spp2;
2920 }
2921 }
2922 break;
2923 case SEQLOC_PACKED_PNT: /* packed pnt */
2924 pspp = (PackSeqPntPtr)(head->data.ptrvalue);
2925 if (SeqIdIn(pspp->id, oldids))
2926 {
2927 numpnt = PackSeqPntNum(pspp);
2928 pspp2 = PackSeqPntNew();
2929 pspp2->strand = pspp->strand;
2930 intcnt = 0; /* use for included points */
2931 othercnt = 0; /* use for exclued points */
2932 for (i = 0; i < numpnt; i++)
2933 {
2934 tpos = PackSeqPntGet(pspp, i);
2935 if ((tpos < from) || (tpos > to))
2936 {
2937 othercnt++;
2938 }
2939 else
2940 {
2941 intcnt++;
2942 PackSeqPntPut(pspp2, tpos - from);
2943 }
2944 }
2945 if (! intcnt) /* no points in region */
2946 {
2947 PackSeqPntFree(pspp2);
2948 break;
2949 }
2950 if (othercnt)
2951 *split = TRUE;
2952 if (pspp->fuzz != NULL)
2953 {
2954 ifp = IntFuzzNew();
2955 MemCopy((Pointer)ifp, (Pointer)(pspp->fuzz), sizeof(IntFuzz));
2956 }
2957 else
2958 ifp = NULL;
2959
2960 if (strand == Seq_strand_minus) /* rev comp */
2961 {
2962 IntFuzzClip(ifp, from, to, strand, split);
2963 pspp = pspp2;
2964 pspp2 = PackSeqPntNew();
2965 pspp2->strand = StrandCmp(pspp->strand);
2966 numpnt = PackSeqPntNum(pspp);
2967 numpnt--;
2968 for (i = numpnt; i >= 0; i--) /* reverse order */
2969 {
2970 tpos = PackSeqPntGet(pspp, i);
2971 PackSeqPntPut(pspp2, (len - tpos - 1));
2972 }
2973 PackSeqPntFree(pspp);
2974 }
2975 else if (pspp2->strand == Seq_strand_minus)
2976 {
2977 pspp2->strand = strand;
2978 }
2979 pspp2->id = SeqIdDup(newid);
2980 pspp2->fuzz = ifp;
2981
2982 newhead = ValNodeNew(NULL);
2983 newhead->choice = SEQLOC_PACKED_PNT;
2984 newhead->data.ptrvalue = (Pointer)pspp2;
2985
2986 }
2987 break;
2988 default:
2989 break;
2990
2991 }
2992 return newhead;
2993 }
2994
2995 /*****************************************************************************
2996 *
2997 * IntFuzzClip()
2998 * returns TRUE if clipped range values
2999 * in all cases, adjusts and/or complements IntFuzz
3000 * Designed for IntFuzz on SeqLocs
3001 *
3002 *****************************************************************************/
IntFuzzClip(IntFuzzPtr ifp,Int4 from,Int4 to,Uint1 strand,BoolPtr split)3003 NLM_EXTERN void LIBCALL IntFuzzClip(IntFuzzPtr ifp, Int4 from, Int4 to, Uint1 strand, BoolPtr split)
3004 {
3005 Int4 len, tmp;
3006
3007 if (ifp == NULL) return;
3008 len = to - from + 1;
3009 switch (ifp->choice)
3010 {
3011 case 1: /* plus/minus - no changes */
3012 case 3: /* percent - no changes */
3013 break;
3014 case 2: /* range */
3015 if (ifp->a > to) /* max */
3016 {
3017 *split = TRUE;
3018 ifp->a = to;
3019 }
3020 if (ifp->a < from)
3021 {
3022 *split = TRUE;
3023 ifp->a = from;
3024 }
3025 if (ifp->b > to) /* min */
3026 {
3027 *split = TRUE;
3028 ifp->b = to;
3029 }
3030 if (ifp->b < from)
3031 {
3032 *split = TRUE;
3033 ifp->b = from;
3034 }
3035 ifp->a -= from; /* adjust to window */
3036 ifp->b -= to;
3037 if (strand == Seq_strand_minus)
3038 {
3039 tmp = len - ifp->a; /* reverse/complement */
3040 ifp->a = len - ifp->b;
3041 ifp->b = tmp;
3042 }
3043 break;
3044 case 4: /* lim */
3045 if (strand == Seq_strand_minus) /* reverse/complement */
3046 {
3047 switch (ifp->a)
3048 {
3049 case 1: /* greater than */
3050 ifp->a = 2;
3051 break;
3052 case 2: /* less than */
3053 ifp->a = 1;
3054 break;
3055 case 3: /* to right of residue */
3056 ifp->a = 4;
3057 break;
3058 case 4: /* to left of residue */
3059 ifp->a = 3;
3060 break;
3061 default:
3062 break;
3063 }
3064 }
3065 break;
3066 }
3067 return;
3068 }
3069
3070 extern void
AdjustFeaturesForInsertion(BioseqPtr tobsp,SeqIdPtr to_id,Int4 pos,Int4 len,Boolean do_split)3071 AdjustFeaturesForInsertion
3072 (BioseqPtr tobsp,
3073 SeqIdPtr to_id,
3074 Int4 pos,
3075 Int4 len,
3076 Boolean do_split)
3077 {
3078 Uint2 entityID;
3079 SeqFeatPtr sfp;
3080 CdRegionPtr crp;
3081 CodeBreakPtr cbp, prevcbp, nextcbp;
3082 RnaRefPtr rrp;
3083 tRNAPtr trp;
3084 SeqMgrFeatContext fcontext;
3085 ValNodePtr prods, vnp;
3086 BioseqContextPtr bcp;
3087 Boolean partial5, partial3, changed;
3088
3089 if (tobsp == NULL || to_id == NULL)
3090 {
3091 return;
3092 }
3093
3094 entityID = ObjMgrGetEntityIDForPointer (tobsp);
3095 if (entityID > 0 && SeqMgrFeaturesAreIndexed (entityID)) {
3096 sfp = NULL;
3097 while ((sfp = SeqMgrGetNextFeature (tobsp, sfp, 0, 0, &fcontext)) != NULL)
3098 {
3099 if (len > 0) {
3100 sfp->location = SeqLocInsert (sfp->location, to_id,pos, len, do_split, NULL);
3101 } else {
3102 changed = FALSE;
3103 partial5 = FALSE;
3104 partial3 = FALSE;
3105 sfp->location = SeqEdSeqLocDelete (sfp->location, tobsp, pos, pos - len - 1, FALSE, &changed, &partial5, &partial3);
3106 if (changed) {
3107 if (sfp->location == NULL) {
3108 sfp->idx.deleteme = TRUE;
3109 }
3110 SetSeqLocPartial (sfp->location, partial5, partial3);
3111 sfp->partial |= partial5 || partial3;
3112 }
3113 }
3114 switch (sfp->data.choice)
3115 {
3116 case SEQFEAT_CDREGION: /* cdregion */
3117 crp = (CdRegionPtr)(sfp->data.value.ptrvalue);
3118 prevcbp = NULL;
3119 for (cbp = crp->code_break; cbp != NULL; cbp = nextcbp)
3120 {
3121 nextcbp = cbp->next;
3122 if (len > 0) {
3123 cbp->loc = SeqLocInsert (cbp->loc, to_id,pos, len, do_split, NULL);
3124 } else {
3125 changed = FALSE;
3126 partial5 = FALSE;
3127 partial3 = FALSE;
3128 cbp->loc = SeqEdSeqLocDelete (cbp->loc, tobsp, pos, pos - len + 1, FALSE, &changed, &partial5, &partial3);
3129 if (changed) {
3130 SetSeqLocPartial (cbp->loc, partial5, partial3);
3131 }
3132 }
3133 if (cbp->loc == NULL)
3134 {
3135 if (prevcbp != NULL)
3136 prevcbp->next = nextcbp;
3137 else
3138 crp->code_break = nextcbp;
3139 cbp->next = NULL;
3140 CodeBreakFree (cbp);
3141 }
3142 else
3143 prevcbp = cbp;
3144 }
3145 break;
3146 case SEQFEAT_RNA:
3147 rrp = (RnaRefPtr)(sfp->data.value.ptrvalue);
3148 if (rrp->ext.choice == 2) /* tRNA */
3149 {
3150 trp = (tRNAPtr)(rrp->ext.value.ptrvalue);
3151 if (trp->anticodon != NULL)
3152 {
3153 if (len > 0) {
3154 trp->anticodon = SeqLocInsert (trp->anticodon, to_id,pos, len, do_split, NULL);
3155 } else {
3156 changed = FALSE;
3157 partial5 = FALSE;
3158 partial3 = FALSE;
3159 trp->anticodon = SeqEdSeqLocDelete (trp->anticodon, tobsp, pos, pos - len + 1, FALSE, &changed, &partial5, &partial3);
3160 if (changed) {
3161 SetSeqLocPartial (trp->anticodon, partial5, partial3);
3162 }
3163 }
3164 }
3165 }
3166 break;
3167 default:
3168 break;
3169 }
3170 }
3171
3172 /* adjust features pointing by product */
3173 prods = SeqMgrGetSfpProductList (tobsp);
3174 for (vnp = prods; vnp != NULL; vnp = vnp->next) {
3175 sfp = (SeqFeatPtr) vnp->data.ptrvalue;
3176 if (sfp == NULL) continue;
3177 sfp->product = SeqLocInsert (sfp->product, to_id,pos, len, do_split, NULL);
3178 }
3179
3180 } else {
3181 bcp = BioseqContextNew(tobsp);
3182 sfp = NULL;
3183 /* adjust features pointing by location */
3184 while ((sfp = BioseqContextGetSeqFeat(bcp, 0, sfp, NULL, 0)) != NULL)
3185 {
3186 sfp->location = SeqLocInsert(sfp->location, to_id,pos, len, do_split, NULL);
3187 switch (sfp->data.choice)
3188 {
3189 case SEQFEAT_CDREGION: /* cdregion */
3190 crp = (CdRegionPtr)(sfp->data.value.ptrvalue);
3191 prevcbp = NULL;
3192 for (cbp = crp->code_break; cbp != NULL; cbp = nextcbp)
3193 {
3194 nextcbp = cbp->next;
3195 cbp->loc = SeqLocInsert(cbp->loc, to_id,pos, len, do_split, NULL);
3196 if (cbp->loc == NULL)
3197 {
3198 if (prevcbp != NULL)
3199 prevcbp->next = nextcbp;
3200 else
3201 crp->code_break = nextcbp;
3202 cbp->next = NULL;
3203 CodeBreakFree(cbp);
3204 }
3205 else
3206 prevcbp = cbp;
3207 }
3208 break;
3209 case SEQFEAT_RNA:
3210 rrp = (RnaRefPtr)(sfp->data.value.ptrvalue);
3211 if (rrp->ext.choice == 2) /* tRNA */
3212 {
3213 trp = (tRNAPtr)(rrp->ext.value.ptrvalue);
3214 if (trp->anticodon != NULL)
3215 {
3216 trp->anticodon = SeqLocInsert(trp->anticodon, to_id,pos, len, do_split, NULL);
3217 }
3218 }
3219 break;
3220 default:
3221 break;
3222 }
3223 }
3224
3225 sfp = NULL;
3226 /* adjust features pointing by product */
3227 while ((sfp = BioseqContextGetSeqFeat(bcp, 0, sfp, NULL, 1)) != NULL)
3228 sfp->product = SeqLocInsert(sfp->product, to_id,pos, len, do_split, NULL);
3229 BioseqContextFree(bcp);
3230 }
3231 }
3232
3233 /*****************************************************************************
3234 *
3235 * BioseqInsert (from_id, from, to, strand, to_id, pos, from_feat, to_feat,
3236 * do_split)
3237 * Inserts a copy the region "from"-"to" on "strand" of the Bioseq
3238 * identified by "from_id" into the Bioseq identified by "to_id"
3239 * before "pos".
3240 * if from_feat = TRUE, copies the feature table from "from" and updates
3241 * to locations to point to the proper residues in "to_id"
3242 * If to_feat = TRUE, updates feature table on "to_id" as well.
3243 * if do_split == TRUE, then splits features in "to_id" (to_feat must
3244 * be TRUE as well). Otherwise expands features at insertion.
3245 *
3246 * All operations are copies. "frombsp" is unchanged.
3247 * Insert will only occur between certain Bioseq.repr classes as below
3248 *
3249 * From Bioseq.repr To Bioseq.repr
3250 *
3251 * virtual raw segmented map
3252 * +---------------------------------------------------
3253 * virtual | length inst SeqLoc length
3254 * +---------------------------------------------------
3255 * raw | error copy SeqLoc error
3256 * +---------------------------------------------------
3257 * segmented | error inst SeqLoc* error
3258 * +---------------------------------------------------
3259 * map | error inst* SeqLoc copy
3260 * +---------------------------------------------------
3261 *
3262 * length = changes length of "to" by length of "from"
3263 * error = insertion not allowed
3264 * inst = "from" instantiated as residues ("N" or "X" for virtual "from")
3265 * inst* = as above, but a restriction map can instantiate other bases
3266 * than "N" for known restriction recognition sites.
3267 * copy = copy of "from" inserted into "to"
3268 * SeqLoc = a SeqLoc added to "to" which points to "from". No copy of residues.
3269 * SeqLoc* = as above, but note that "to" points to "from" directly, not
3270 * what "from" itself may point to.
3271 *
3272 *****************************************************************************/
BioseqInsert(SeqIdPtr from_id,Int4 from,Int4 to,Uint1 strand,SeqIdPtr to_id,Int4 pos,Boolean from_feat,Boolean to_feat,Boolean do_split)3273 NLM_EXTERN Boolean LIBCALL BioseqInsert (SeqIdPtr from_id, Int4 from, Int4 to, Uint1 strand, SeqIdPtr to_id, Int4 pos,
3274 Boolean from_feat, Boolean to_feat, Boolean do_split)
3275 {
3276 BioseqPtr tobsp, frombsp;
3277 Int4 len, i, ctr, tlen;
3278 Boolean from_type, to_type;
3279 Uint1 seqtype;
3280 SeqAnnotPtr sap, newsap;
3281 SeqFeatPtr sfp, newsfp, prevsfp, sfphead = NULL;
3282 BioseqContextPtr bcp;
3283 Boolean handled = FALSE;
3284 SeqPortPtr spp;
3285 Int2 residue;
3286 Boolean split, added = FALSE, do_bsadd = TRUE;
3287 SeqLocPtr newloc, curr, head, tloc, xloc, yloc, fake;
3288 SeqIntPtr sip;
3289 CdRegionPtr crp;
3290 CodeBreakPtr cbp, prevcbp, nextcbp;
3291 RnaRefPtr rrp;
3292 tRNAPtr trp;
3293 SeqEntryPtr oldscope;
3294
3295 if ((from_id == NULL) || (to_id == NULL)) return FALSE;
3296
3297 tobsp = BioseqFind(to_id);
3298 if (tobsp == NULL) {
3299 oldscope = SeqEntrySetScope (NULL);
3300 if (oldscope != NULL) {
3301 tobsp = BioseqFind(to_id);
3302 SeqEntrySetScope (oldscope);
3303 }
3304 }
3305 if (tobsp == NULL) return FALSE;
3306
3307 len = BioseqGetLen(tobsp);
3308
3309 if (pos == LAST_RESIDUE)
3310 pos = len - 1;
3311 else if (pos == APPEND_RESIDUE) {
3312 pos = len;
3313 }
3314
3315 if ((pos < 0) || (pos > len)) return FALSE;
3316
3317 frombsp = BioseqFind(from_id);
3318 if (frombsp == NULL) {
3319 oldscope = SeqEntrySetScope (NULL);
3320 if (oldscope != NULL) {
3321 frombsp = BioseqFind(from_id);
3322 SeqEntrySetScope (oldscope);
3323 }
3324 }
3325 if (frombsp == NULL) return FALSE;
3326
3327 from_type = ISA_na(frombsp->mol);
3328 to_type = ISA_na(tobsp->mol);
3329
3330 if (from_type != to_type) return FALSE;
3331
3332 len = BioseqGetLen(frombsp);
3333 if (to == LAST_RESIDUE)
3334 to = len - 1;
3335
3336 if ((from < 0) || (to >= len)) return FALSE;
3337
3338 len = to - from + 1;
3339
3340 if (tobsp->repr == Seq_repr_virtual)
3341 {
3342 if (frombsp->repr != Seq_repr_virtual)
3343 return FALSE;
3344
3345 handled = TRUE; /* just length and features */
3346 }
3347
3348 if (((tobsp->repr == Seq_repr_raw) || (tobsp->repr == Seq_repr_const)) && tobsp->seq_data_type != Seq_code_gap)
3349 {
3350 if (ISA_na(tobsp->mol))
3351 {
3352 seqtype = Seq_code_iupacna;
3353 }
3354 else
3355 {
3356 seqtype = Seq_code_ncbieaa;
3357 }
3358
3359 if (tobsp->seq_data_type != seqtype)
3360 BioseqRawConvert(tobsp, seqtype);
3361 BSSeek((ByteStorePtr) tobsp->seq_data, pos, SEEK_SET);
3362 if (do_bsadd) {
3363 Nlm_BSAdd((ByteStorePtr) tobsp->seq_data, len, FALSE);
3364 }
3365
3366 i = 0;
3367
3368 spp = SeqPortNew(frombsp, from, to, strand, seqtype);
3369 while ((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF)
3370 {
3371 if (! IS_residue(residue))
3372 {
3373 ErrPost(CTX_NCBIOBJ, 1, "Non-residue in BioseqInsert [%d]",
3374 (int)residue);
3375 }
3376 else
3377 {
3378 BSPutByte((ByteStorePtr) tobsp->seq_data, residue);
3379 i++;
3380 }
3381 }
3382 SeqPortFree(spp);
3383
3384 if (i != len)
3385 {
3386 ErrPost(CTX_NCBIOBJ, 1, "Tried to insert %ld residues but %ld went in",
3387 len, i);
3388 return FALSE;
3389 }
3390
3391 handled = TRUE;
3392 }
3393
3394 if ((tobsp->repr == Seq_repr_seg) || (tobsp->repr == Seq_repr_ref))
3395 {
3396 sip = SeqIntNew();
3397 sip->id = SeqIdDup(from_id);
3398 sip->from = from;
3399 sip->to = to;
3400 sip->strand = strand;
3401 tloc = ValNodeNew(NULL);
3402 tloc->choice = SEQLOC_INT;
3403 tloc->data.ptrvalue = (Pointer)sip;
3404 head = NULL;
3405 if (tobsp->repr == Seq_repr_seg)
3406 {
3407 fake = ValNodeNew(NULL);
3408 fake->choice = SEQLOC_MIX;
3409 fake->data.ptrvalue = (Pointer)(tobsp->seq_ext);
3410 }
3411 else
3412 fake = (SeqLocPtr)(tobsp->seq_ext);
3413 curr = NULL;
3414 ctr = 0;
3415 while ((curr = SeqLocFindNext(fake, curr)) != NULL)
3416 {
3417 if ((! added) && (ctr == pos))
3418 {
3419 newloc = SeqLocAdd(&head, tloc, TRUE, TRUE);
3420 added = TRUE;
3421 }
3422 tlen = SeqLocLen(curr);
3423 if ((! added) && ((ctr + tlen) > pos)) /* split interval */
3424 {
3425 yloc = NULL;
3426 xloc = SeqLocAdd(&yloc, curr, TRUE, TRUE);
3427 i = (pos - ctr) + SeqLocStart(curr);
3428 newloc = SeqLocInsert(xloc, SeqLocId(xloc), i, 0, TRUE, NULL);
3429 xloc = newloc;
3430 yloc = newloc->next;
3431 SeqLocAdd(&head, xloc, TRUE, TRUE);
3432 SeqLocAdd(&head, tloc, TRUE, TRUE);
3433 SeqLocAdd(&head, yloc, TRUE, TRUE);
3434 SeqLocFree(xloc);
3435 SeqLocFree(yloc);
3436 added = TRUE;
3437 }
3438 else
3439 newloc = SeqLocAdd(&head, curr, TRUE, TRUE);
3440 ctr += tlen;
3441 }
3442 if ((! added) && (ctr == pos))
3443 {
3444 newloc = SeqLocAdd(&head, tloc, TRUE, TRUE);
3445 added = TRUE;
3446 }
3447 SeqLocFree(tloc);
3448 SeqLocFree(fake);
3449 if (tobsp->repr == Seq_repr_seg)
3450 {
3451 tobsp->seq_ext = (Pointer)head;
3452 }
3453 else
3454 {
3455 tobsp->seq_ext = SeqLocPackage(head);
3456 }
3457 handled = TRUE;
3458 }
3459
3460 if (tobsp->repr == Seq_repr_map)
3461 {
3462 if (! ((frombsp->repr == Seq_repr_map) || (frombsp->repr == Seq_repr_virtual)))
3463 return FALSE;
3464
3465 prevsfp = NULL;
3466 for (sfp = (SeqFeatPtr)(tobsp->seq_ext); sfp != NULL; sfp = sfp->next)
3467 {
3468 sfp->location = SeqLocInsert(sfp->location, to_id, pos, len, TRUE, NULL);
3469 prevsfp = sfp;
3470 }
3471
3472 if (frombsp->repr == Seq_repr_map)
3473 {
3474 for (sfp = (SeqFeatPtr)(frombsp->seq_ext); sfp != NULL; sfp = sfp->next)
3475 {
3476 split = FALSE;
3477 newloc = SeqLocCopyRegion(to_id, sfp->location, frombsp, from, to, strand, &split);
3478 if (newloc != NULL) /* got one */
3479 {
3480 newsfp = (SeqFeatPtr)AsnIoMemCopy((Pointer)sfp, (AsnReadFunc)SeqFeatAsnRead, (AsnWriteFunc)SeqFeatAsnWrite);
3481 SeqLocFree(newsfp->location);
3482 newsfp->location = newloc;
3483 if (split)
3484 newsfp->partial = TRUE;
3485
3486 if (prevsfp == NULL)
3487 tobsp->seq_ext = (Pointer)newsfp;
3488 else
3489 prevsfp->next = newsfp;
3490 prevsfp = newsfp;
3491
3492 newsfp->location = SeqLocInsert(newsfp->location, to_id, 0,
3493 pos, TRUE, to_id);
3494 }
3495 }
3496 }
3497 handled = TRUE;
3498 }
3499
3500 if (! handled) return FALSE;
3501
3502 tobsp->length += len;
3503
3504 if (to_feat) /* fix up sourceid Bioseq feature table(s) */
3505 {
3506 AdjustFeaturesForInsertion (tobsp, to_id, pos, len, do_split);
3507 }
3508
3509 if (from_feat) /* add source Bioseq features to sourceid */
3510 {
3511 bcp = BioseqContextNew(frombsp);
3512 sfp = NULL; /* NOTE: should make NEW feature table */
3513 prevsfp = NULL;
3514 /* is there an old feature table to use? */
3515 for (newsap = tobsp->annot; newsap != NULL; newsap = newsap->next)
3516 {
3517 if (newsap->type == 1) /* feature table */
3518 break;
3519 }
3520 if (newsap != NULL)
3521 { /* create a new one if necessary */
3522 for (prevsfp = (SeqFeatPtr)(newsap->data); prevsfp != NULL;
3523 prevsfp = prevsfp->next)
3524 {
3525 if (prevsfp->next == NULL)
3526 break;
3527 }
3528 }
3529 /* get features by location */
3530 while ((sfp = BioseqContextGetSeqFeat(bcp, 0, sfp, NULL, 0)) != NULL)
3531 { /* copy all old features */
3532 split = FALSE;
3533 newloc = SeqLocCopyRegion(to_id, sfp->location, frombsp, from, to, strand, &split);
3534 if (newloc != NULL) /* got one */
3535 {
3536 newsfp = (SeqFeatPtr)AsnIoMemCopy((Pointer)sfp, (AsnReadFunc)SeqFeatAsnRead, (AsnWriteFunc)SeqFeatAsnWrite);
3537 SeqLocFree(newsfp->location);
3538 newsfp->location = newloc;
3539
3540 if (split)
3541 newsfp->partial = TRUE;
3542
3543 if (prevsfp == NULL)
3544 sfphead = newsfp;
3545 else
3546 prevsfp->next = newsfp;
3547 prevsfp = newsfp;
3548
3549 newsfp->location = SeqLocInsert(newsfp->location, to_id, 0,
3550 pos, TRUE, to_id);
3551 switch (newsfp->data.choice)
3552 {
3553 case SEQFEAT_CDREGION: /* cdregion */
3554 crp = (CdRegionPtr)(newsfp->data.value.ptrvalue);
3555 prevcbp = NULL;
3556 for (cbp = crp->code_break; cbp != NULL; cbp = nextcbp)
3557 {
3558 nextcbp = cbp->next;
3559 cbp->loc = SeqLocCopyRegion(to_id, cbp->loc, frombsp, from, to, strand, &split);
3560 if (cbp->loc == NULL)
3561 {
3562 if (prevcbp != NULL)
3563 prevcbp->next = nextcbp;
3564 else
3565 crp->code_break = nextcbp;
3566 cbp->next = NULL;
3567 CodeBreakFree(cbp);
3568 }
3569 else
3570 {
3571 cbp->loc = SeqLocInsert(cbp->loc, to_id, 0,
3572 pos, TRUE, to_id);
3573 prevcbp = cbp;
3574 }
3575 }
3576 break;
3577 case SEQFEAT_RNA:
3578 rrp = (RnaRefPtr)(newsfp->data.value.ptrvalue);
3579 if (rrp->ext.choice == 2) /* tRNA */
3580 {
3581 trp = (tRNAPtr)(rrp->ext.value.ptrvalue);
3582 if (trp->anticodon != NULL)
3583 {
3584 trp->anticodon = SeqLocCopyRegion(to_id, trp->anticodon, frombsp, from, to, strand, &split);
3585 trp->anticodon = SeqLocInsert(trp->anticodon, to_id, 0,
3586 pos, TRUE, to_id);
3587 }
3588 }
3589 break;
3590 default:
3591 break;
3592 }
3593 }
3594 }
3595
3596 sfp = NULL;
3597 /* get features by product */
3598 while ((sfp = BioseqContextGetSeqFeat(bcp, 0, sfp, NULL, 1)) != NULL)
3599 { /* copy all old features */
3600 split = FALSE;
3601 newloc = SeqLocCopyRegion(to_id, sfp->product, frombsp, from, to, strand, &split);
3602 if (newloc != NULL) /* got one */
3603 {
3604 newsfp = (SeqFeatPtr)AsnIoMemCopy((Pointer)sfp, (AsnReadFunc)SeqFeatAsnRead, (AsnWriteFunc)SeqFeatAsnWrite);
3605 SeqLocFree(newsfp->product);
3606 newsfp->product = newloc;
3607 if (split)
3608 newsfp->partial = TRUE;
3609
3610 if (prevsfp == NULL)
3611 sfphead = newsfp;
3612 else
3613 prevsfp->next = newsfp;
3614 prevsfp = newsfp;
3615
3616 newsfp->product = SeqLocInsert(newsfp->product, to_id, 0, pos,
3617 TRUE, to_id);
3618 }
3619 }
3620 BioseqContextFree(bcp);
3621
3622
3623 if (sfphead != NULL) /* orphan chain of seqfeats to attach */
3624 {
3625 if (newsap == NULL)
3626 {
3627 for (sap = tobsp->annot; sap != NULL; sap = sap->next)
3628 {
3629 if (sap->next == NULL)
3630 break;
3631 }
3632 newsap = SeqAnnotNew();
3633 newsap->type = 1;
3634 if (sap == NULL)
3635 tobsp->annot = newsap;
3636 else
3637 sap->next = newsap;
3638 }
3639
3640 newsap->data = (Pointer)sfphead;
3641 }
3642 }
3643
3644 return TRUE;
3645 }
3646
3647 /*****************************************************************************
3648 *
3649 * SeqLocInsert()
3650 * alters "head" by insert "len" residues before "pos" in any SeqLoc
3651 * on the Bioseq "target"
3652 * all SeqLocs not on "target" are unaltered
3653 * for SeqLocs on "target"
3654 * all SeqLocs before "pos" are unaltered
3655 * all SeqLocs >= "pos" are incremented by "len"
3656 * all SeqLocs spanning "pos"
3657 * if "split" == TRUE, are split into two SeqLocs, one to the
3658 * left of the insertion, the other to right
3659 * if "split" != TRUE, the SeqLoc is increased in length to cover
3660 * the insertion
3661 * returns altered head or NULL if nothing left.
3662 * if ("newid" != NULL) replaces "target" with "newid" whether the
3663 * SeqLoc is altered on not.
3664 *
3665 * Usage hints:
3666 * 1) To update a feature location on "target" when 10 residues of
3667 * sequence have been inserted before position 5
3668 * SeqFeatPtr->location = SeqLocInsert ( SeqFeatPtr->location ,
3669 * "target", 5, 10, TRUE, NULL); [for some feature types
3670 * you may want "split" equal FALSE]
3671 * 2) To insert the complete feature table from "source" into a
3672 * different Bioseq "dest" before position 20 in "dest"
3673 * SFP->location = SeqLocInsert(SFP->location, "source", 0, 20,
3674 * FALSE, "dest");
3675 *
3676 *
3677 *****************************************************************************/
SeqLocInsert(SeqLocPtr head,SeqIdPtr target,Int4 pos,Int4 len,Boolean split,SeqIdPtr newid)3678 NLM_EXTERN SeqLocPtr LIBCALL SeqLocInsert (SeqLocPtr head, SeqIdPtr target, Int4 pos, Int4 len,
3679 Boolean split, SeqIdPtr newid)
3680 {
3681 SeqIntPtr sip, sip2;
3682 SeqPntPtr spp;
3683 PackSeqPntPtr pspp, pspp2;
3684 SeqBondPtr sbp;
3685 SeqLocPtr slp, tmp, prev, next, thead, tmp2;
3686 Int4 diff, numpnt, i, tpos;
3687 Uint1 oldchoice;
3688 ValNode vn;
3689 SeqIdPtr sidp;
3690
3691 if ((head == NULL) || (target == NULL))
3692 return head;
3693
3694 head->next = NULL; /* caller maintains chains */
3695
3696 diff = len;
3697
3698 switch (head->choice)
3699 {
3700 case SEQLOC_BOND: /* bond -- 2 seqs */
3701 vn.next = NULL;
3702 vn.choice = SEQLOC_PNT;
3703
3704 sbp = (SeqBondPtr)(head->data.ptrvalue);
3705 vn.data.ptrvalue = (Pointer)(sbp->a);
3706 SeqLocInsert(&vn, target, pos, len, split, newid);
3707 sbp->a = (SeqPntPtr)(vn.data.ptrvalue);
3708 if (sbp->b != NULL)
3709 {
3710 vn.data.ptrvalue = (Pointer)(sbp->b);
3711 SeqLocInsert(&vn, target, pos, len, split, newid);
3712 sbp->b = (SeqPntPtr)(vn.data.ptrvalue);
3713 }
3714 break;
3715 case SEQLOC_FEAT: /* feat -- can't track yet */
3716 case SEQLOC_NULL: /* NULL */
3717 break;
3718 case SEQLOC_EMPTY: /* empty */
3719 case SEQLOC_WHOLE: /* whole */
3720 if (newid != NULL)
3721 {
3722 sidp = (SeqIdPtr)(head->data.ptrvalue);
3723 if (SeqIdForSameBioseq(sidp, target))
3724 {
3725 SeqIdFree(sidp);
3726 sidp = SeqIdDup(newid);
3727 head->data.ptrvalue = (Pointer)sidp;
3728 }
3729 }
3730 break;
3731 case SEQLOC_MIX: /* mix -- more than one seq */
3732 case SEQLOC_EQUIV: /* equiv -- ditto */
3733 case SEQLOC_PACKED_INT: /* packed int */
3734 prev = NULL;
3735 thead = NULL;
3736 for (slp = (SeqLocPtr)(head->data.ptrvalue); slp != NULL; slp = next)
3737 {
3738 next = slp->next;
3739 oldchoice = slp->choice;
3740 tmp = SeqLocInsert(slp, target, pos, len, split, newid);
3741 if (tmp != NULL)
3742 {
3743 if ((head->choice != SEQLOC_EQUIV) &&
3744 (oldchoice != tmp->choice)) /* split interval? */
3745 {
3746 if ((oldchoice == SEQLOC_INT) &&
3747 (tmp->choice == SEQLOC_PACKED_INT))
3748 {
3749 tmp2 = tmp;
3750 tmp = (SeqLocPtr)(tmp2->data.ptrvalue);
3751 MemFree(tmp2);
3752 while (tmp->next != NULL)
3753 {
3754 if (prev != NULL)
3755 prev->next = tmp;
3756 else
3757 thead = tmp;
3758 prev = tmp;
3759 tmp = tmp->next;
3760 }
3761 }
3762 }
3763 if (prev != NULL)
3764 prev->next = tmp;
3765 else
3766 thead = tmp;
3767 prev = tmp;
3768 }
3769 }
3770 head->data.ptrvalue = thead;
3771 if (thead == NULL)
3772 head = SeqLocFree(head);
3773 break;
3774 case SEQLOC_INT: /* int */
3775 sip = (SeqIntPtr)(head->data.ptrvalue);
3776 if (SeqIdForSameBioseq(sip->id, target))
3777 {
3778 if (newid != NULL) /* change id? */
3779 {
3780 SeqIdFree(sip->id);
3781 sip->id = SeqIdDup(newid);
3782 }
3783
3784 if (sip->to < pos) /* completely before insertion */
3785 {
3786 break;
3787 }
3788
3789 if ((! split) || (sip->from >= pos)) /* interval unbroken */
3790 {
3791 if (sip->from >= pos)
3792 sip->from += len;
3793 sip->to += len;
3794 break;
3795 }
3796
3797 /* split interval */
3798 sip2 = SeqIntNew();
3799 slp = ValNodeNew(NULL);
3800 slp->choice = SEQLOC_INT;
3801 slp->data.ptrvalue = (Pointer)sip2;
3802 sip2->strand = sip->strand;
3803 sip2->id = SeqIdDup(sip->id);
3804
3805 sip2->to = sip->to + len;
3806 sip2->from = pos + len;
3807 sip2->if_to = sip->if_to;
3808 sip->if_to = NULL;
3809 sip->to = pos - 1;
3810 head->next = slp;
3811
3812 if (sip->strand == Seq_strand_minus) /* reverse order */
3813 {
3814 head->data.ptrvalue = (Pointer)sip2;
3815 slp->data.ptrvalue = (Pointer)sip;
3816 }
3817
3818 thead = head; /* make split interval into PACKED_INT */
3819 head = ValNodeNew(NULL);
3820 head->choice = SEQLOC_PACKED_INT;
3821 head->data.ptrvalue = thead;
3822
3823 }
3824 break;
3825 case SEQLOC_PNT: /* pnt */
3826 spp = (SeqPntPtr)(head->data.ptrvalue);
3827 if (SeqIdForSameBioseq(spp->id, target))
3828 {
3829 if (newid != NULL) /* change id? */
3830 {
3831 SeqIdFree(spp->id);
3832 spp->id = SeqIdDup(newid);
3833 }
3834
3835 if (spp->point >= pos)
3836 spp->point += len;
3837 }
3838 break;
3839 case SEQLOC_PACKED_PNT: /* packed pnt */
3840 pspp = (PackSeqPntPtr)(head->data.ptrvalue);
3841 if (SeqIdForSameBioseq(pspp->id, target))
3842 {
3843 if (newid != NULL) /* change id? */
3844 {
3845 SeqIdFree(pspp->id);
3846 pspp->id = SeqIdDup(newid);
3847 }
3848
3849 numpnt = PackSeqPntNum(pspp);
3850 pspp2 = PackSeqPntNew();
3851 head->data.ptrvalue = pspp2;
3852 for (i = 0; i < numpnt; i++)
3853 {
3854 tpos = PackSeqPntGet(pspp, i);
3855 if (tpos >= pos)
3856 tpos += len;
3857 PackSeqPntPut(pspp2, tpos);
3858 }
3859 pspp2->id = pspp->id;
3860 pspp->id = NULL;
3861 pspp2->fuzz = pspp->fuzz;
3862 pspp->fuzz = NULL;
3863 pspp2->strand = pspp->strand;
3864 PackSeqPntFree(pspp);
3865 }
3866 break;
3867 default:
3868 break;
3869 }
3870
3871 if (head == NULL)
3872 ErrPost(CTX_NCBIOBJ, 1, "SeqLocInsert: lost a SeqLoc");
3873
3874 return head;
3875 }
3876
3877 /*****************************************************************************
3878 *
3879 * SeqLocSubtract (SeqLocPtr head, SeqLocPtr piece)
3880 * Deletes piece from head.
3881 * head may be changed.
3882 * returns the changed head.
3883 *
3884 *****************************************************************************/
SeqLocSubtract(SeqLocPtr head,SeqLocPtr piece)3885 NLM_EXTERN SeqLocPtr LIBCALL SeqLocSubtract (SeqLocPtr head, SeqLocPtr piece)
3886 {
3887 SeqLocPtr slp = NULL;
3888 SeqIdPtr sip;
3889 Int4 from, to;
3890 Boolean changed = FALSE;
3891
3892 if ((head == NULL) || (piece == NULL))
3893 return NULL;
3894
3895 while ((slp = SeqLocFindNext(piece, slp)) != NULL)
3896 {
3897 sip = SeqLocId(slp);
3898 from = SeqLocStart(slp);
3899 to = SeqLocStop(slp);
3900 head = SeqLocDelete(head, sip, from, to, FALSE, &changed);
3901 }
3902
3903 return head;
3904 }
3905
3906 /********************************************************************
3907 *
3908 * SeqLocReplaceID
3909 * replaces the Seq-Id in a Seq-Loc (slp) with a new Seq-Id (new_sip)
3910 *
3911 **********************************************************************/
SeqLocReplaceID(SeqLocPtr slp,SeqIdPtr new_sip)3912 NLM_EXTERN SeqLocPtr SeqLocReplaceID (SeqLocPtr slp, SeqIdPtr new_sip)
3913 {
3914 SeqLocPtr curr;
3915 PackSeqPntPtr pspp;
3916 SeqIntPtr target_sit;
3917 SeqBondPtr sbp;
3918 SeqPntPtr spp;
3919
3920 switch (slp->choice) {
3921 case SEQLOC_PACKED_INT :
3922 case SEQLOC_MIX :
3923 case SEQLOC_EQUIV :
3924 curr = NULL;
3925 while ((curr = SeqLocFindNext (slp, curr)) != NULL) {
3926 curr = SeqLocReplaceID (curr, new_sip);
3927 }
3928 break;
3929 case SEQLOC_PACKED_PNT :
3930 pspp = (PackSeqPntPtr) slp->data.ptrvalue;
3931 if (pspp != NULL) {
3932 SeqIdFree (pspp->id);
3933 pspp->id = SeqIdDup (new_sip);
3934 }
3935 break;
3936 case SEQLOC_EMPTY :
3937 case SEQLOC_WHOLE :
3938 SeqIdFree ((SeqIdPtr) slp->data.ptrvalue);
3939 slp->data.ptrvalue = (Pointer) SeqIdDup (new_sip);
3940 break;
3941 case SEQLOC_INT :
3942 target_sit = (SeqIntPtr) slp->data.ptrvalue;
3943 SeqIdFree (target_sit->id);
3944 target_sit->id = SeqIdDup (new_sip);
3945 break;
3946 case SEQLOC_PNT :
3947 spp = (SeqPntPtr) slp->data.ptrvalue;
3948 SeqIdFree(spp->id);
3949 spp->id = SeqIdDup(new_sip);
3950 break;
3951 case SEQLOC_BOND :
3952 sbp = (SeqBondPtr) slp->data.ptrvalue;
3953 if (sbp == NULL || sbp->a == NULL || sbp->b == NULL) break;
3954 /* only do this if both ends bonded to same Seq-id */
3955 if (SeqIdMatch (sbp->a->id, sbp->b->id)) {
3956 spp = sbp->a;
3957 SeqIdFree(spp->id);
3958 spp->id = SeqIdDup(new_sip);
3959 spp = sbp->b;
3960 SeqIdFree(spp->id);
3961 spp->id = SeqIdDup(new_sip);
3962 }
3963 break;
3964 default :
3965 break;
3966 }
3967 return slp;
3968 }
3969
3970 /**********************************************************
3971 *
3972 * NLM_EXTERN SeqLocPtr LIBCALL GapToSeqLoc(range):
3973 *
3974 * Gets the size of gap and constructs SeqLoc block with
3975 * $(seqlitdbtag) value as Dbtag.db and Dbtag.tag.id = 0.
3976 *
3977 **********************************************************/
GapToSeqLoc(Int4 range)3978 NLM_EXTERN SeqLocPtr LIBCALL GapToSeqLoc(Int4 range)
3979 {
3980 SeqLocPtr slp;
3981 SeqIntPtr sip;
3982 SeqIdPtr sidp;
3983 DbtagPtr dp;
3984
3985 if(range < 0)
3986 return(NULL);
3987
3988 slp = ValNodeNew(NULL);
3989 if(range == 0)
3990 {
3991 slp->choice = SEQLOC_NULL;
3992 slp->data.ptrvalue = NULL;
3993 slp->next = NULL;
3994 return(slp);
3995 }
3996
3997 dp = DbtagNew();
3998 dp->db = StringSave(seqlitdbtag);
3999 dp->tag = ObjectIdNew();
4000 dp->tag->id = 0;
4001 dp->tag->str = NULL;
4002
4003 sidp = ValNodeNew(NULL);
4004 sidp->choice = SEQID_GENERAL;
4005 sidp->data.ptrvalue = dp;
4006
4007 sip = SeqIntNew();
4008 sip->from = 0;
4009 sip->to = range - 1;
4010 sip->id = sidp;
4011
4012 slp->choice = SEQLOC_INT;
4013 slp->data.ptrvalue = sip;
4014
4015 return(slp);
4016 }
4017
4018 /**********************************************************
4019 *
4020 * NLM_EXTERN SeqLocPtr LIBCALL GapToSeqLocEx(range, unknown):
4021 *
4022 * Gets the size of gap and constructs SeqLoc block with
4023 * $(seqlitdbtag) value as Dbtag.db and Dbtag.tag.id = 0.
4024 *
4025 **********************************************************/
GapToSeqLocEx(Int4 range,Boolean unknown)4026 NLM_EXTERN SeqLocPtr LIBCALL GapToSeqLocEx(Int4 range, Boolean unknown)
4027 {
4028 SeqLocPtr slp;
4029 SeqIntPtr sip;
4030 SeqIdPtr sidp;
4031 DbtagPtr dp;
4032
4033 if(range < 0)
4034 return(NULL);
4035
4036 slp = ValNodeNew(NULL);
4037 if(range == 0)
4038 {
4039 slp->choice = SEQLOC_NULL;
4040 slp->data.ptrvalue = NULL;
4041 slp->next = NULL;
4042 return(slp);
4043 }
4044
4045 dp = DbtagNew();
4046 if(unknown == FALSE)
4047 dp->db = StringSave(seqlitdbtag);
4048 else
4049 dp->db = StringSave(unkseqlitdbtag);
4050 dp->tag = ObjectIdNew();
4051 dp->tag->id = 0;
4052 dp->tag->str = NULL;
4053
4054 sidp = ValNodeNew(NULL);
4055 sidp->choice = SEQID_GENERAL;
4056 sidp->data.ptrvalue = dp;
4057
4058 sip = SeqIntNew();
4059 sip->from = 0;
4060 sip->to = range - 1;
4061 sip->id = sidp;
4062
4063 slp->choice = SEQLOC_INT;
4064 slp->data.ptrvalue = sip;
4065
4066 return(slp);
4067 }
4068
4069 /**********************************************************
4070 *
4071 * NLM_EXTERN Boolean LIBCALL ISAGappedSeqLoc(slp):
4072 *
4073 * Looks at a single SeqLoc item. If it has the SeqId
4074 * of type GENERAL with Dbtag.db == $(seqlitdbtag) and
4075 * Dbtag.tag.id == 0, then returns TRUE, otherwise
4076 * returns FALSE.
4077 *
4078 **********************************************************/
ISAGappedSeqLoc(SeqLocPtr slp)4079 NLM_EXTERN Boolean LIBCALL ISAGappedSeqLoc(SeqLocPtr slp)
4080 {
4081 SeqIdPtr sip;
4082 DbtagPtr dp;
4083
4084 if(slp == NULL)
4085 return(FALSE);
4086
4087 sip = SeqLocId(slp);
4088 if(sip == NULL || sip->choice != SEQID_GENERAL)
4089 return(FALSE);
4090
4091 dp = (DbtagPtr) sip->data.ptrvalue;
4092 if(dp == NULL || dp->db == NULL || dp->tag == NULL)
4093 return(FALSE);
4094
4095 if((StringCmp(seqlitdbtag, dp->db) == 0 ||
4096 StringCmp(unkseqlitdbtag, dp->db) == 0) && dp->tag->id == 0)
4097 return(TRUE);
4098
4099 return(FALSE);
4100 }
4101
4102 /**********************************************************
4103 *
4104 * NLM_EXTERN DeltaSeqPtr LIBCALL GappedSeqLocsToDeltaSeqs(slp):
4105 *
4106 * This functions is used only in the case, if ISAGappedSeqLoc()
4107 * has returned TRUE.
4108 * Converts SeqLoc set to the sequence of DeltaSeqs.
4109 * Gbtag'ed SeqLocs it turns into SeqLits with the only "length"
4110 * element. The regular SeqLocs saves as they are. Returns
4111 * obtained DeltaSeq.
4112 *
4113 **********************************************************/
GappedSeqLocsToDeltaSeqs(SeqLocPtr slp)4114 NLM_EXTERN DeltaSeqPtr LIBCALL GappedSeqLocsToDeltaSeqs(SeqLocPtr slp)
4115 {
4116 DeltaSeqPtr res;
4117 DeltaSeqPtr dsp;
4118 SeqIntPtr sip;
4119 SeqLitPtr slip;
4120 SeqIdPtr id;
4121 DbtagPtr dp;
4122
4123 dsp = ValNodeNew(NULL);
4124 dsp->next = NULL;
4125 dsp->choice = 0;
4126 res = dsp;
4127 for(; slp != NULL; slp = slp->next)
4128 {
4129 if(ISAGappedSeqLoc(slp) != FALSE)
4130 {
4131 dsp->next = ValNodeNew(NULL);
4132 dsp = dsp->next;
4133 sip = slp->data.ptrvalue;
4134 slip = SeqLitNew();
4135 slip->length = sip->to - sip->from + 1;
4136 dsp->choice = 2;
4137 dsp->data.ptrvalue = slip;
4138 id = SeqLocId(slp);
4139 if(id != NULL)
4140 {
4141 dp = (DbtagPtr) id->data.ptrvalue;
4142 if(dp != NULL && dp->db != NULL &&
4143 StringCmp(unkseqlitdbtag, dp->db) == 0)
4144 {
4145 slip->fuzz = IntFuzzNew();
4146 slip->fuzz->choice = 4;
4147 }
4148 }
4149 }
4150 else
4151 {
4152 dsp->next = ValNodeNew(NULL);
4153 dsp = dsp->next;
4154 dsp->choice = 1;
4155 dsp->data.ptrvalue = AsnIoMemCopy((Pointer) slp,
4156 (AsnReadFunc) SeqLocAsnRead,
4157 (AsnWriteFunc) SeqLocAsnWrite);
4158 }
4159 }
4160 dsp = res->next;
4161 MemFree(res);
4162 return(dsp);
4163 }
4164
4165 /* This structure and the functions following it are used to track the prior locations
4166 * of features that were affected by the removal of nucleotides, so that they may be
4167 * returned to their original status in an undo.
4168 */
4169 typedef struct affectedfeat
4170 {
4171 SeqFeatPtr feat_before;
4172 SeqFeatPtr feat_after;
4173 } AffectedFeatData, PNTR AffectedFeatPtr;
4174
AffectedFeatNew(void)4175 static AffectedFeatPtr AffectedFeatNew (void)
4176 {
4177 AffectedFeatPtr afp;
4178
4179 afp = (AffectedFeatPtr) MemNew (sizeof (AffectedFeatData));
4180 if (afp != NULL)
4181 {
4182 afp->feat_before = NULL;
4183 afp->feat_after = NULL;
4184 }
4185 return afp;
4186 }
4187
AffectedFeatFree(AffectedFeatPtr afp)4188 static AffectedFeatPtr AffectedFeatFree (AffectedFeatPtr afp)
4189 {
4190 if (afp == NULL) return NULL;
4191 afp->feat_before = SeqFeatFree (afp->feat_before);
4192 afp->feat_after = SeqFeatFree (afp->feat_after);
4193 afp = MemFree (afp);
4194 return NULL;
4195 }
4196
SeqEdJournalAffectedFeatsFree(ValNodePtr vnp)4197 static ValNodePtr SeqEdJournalAffectedFeatsFree (ValNodePtr vnp)
4198 {
4199 if (vnp == NULL) return NULL;
4200 vnp->next = SeqEdJournalAffectedFeatsFree (vnp->next);
4201 vnp->data.ptrvalue = AffectedFeatFree ((AffectedFeatPtr) (vnp->data.ptrvalue));
4202 ValNodeFree (vnp);
4203 return NULL;
4204 }
4205
SeqEdRecreateDeletedFeats(SeqEdJournalPtr sejp)4206 static Boolean SeqEdRecreateDeletedFeats (SeqEdJournalPtr sejp)
4207 {
4208 ValNodePtr vnp;
4209 AffectedFeatPtr afp = NULL;
4210 Boolean recreated_feats = FALSE;
4211 SeqEntryPtr sep = NULL;
4212 SeqFeatPtr sfp;
4213
4214 for (vnp = sejp->affected_feats; vnp != NULL && afp == NULL; vnp = vnp->next)
4215 {
4216 if (vnp->choice == 1 || vnp->data.ptrvalue == NULL) continue;
4217 afp = (AffectedFeatPtr) vnp->data.ptrvalue;
4218 if (afp->feat_after == NULL && afp->feat_before != NULL)
4219 {
4220 vnp->choice = 1;
4221 if (sep == NULL)
4222 {
4223 sep = SeqMgrGetSeqEntryForData (sejp->bsp);
4224 if (sep == NULL) return FALSE;
4225 }
4226 sfp = CreateNewFeature (sep, NULL, afp->feat_before->data.choice, afp->feat_before);
4227 afp->feat_before = NULL;
4228 recreated_feats = TRUE;
4229 }
4230 }
4231 return recreated_feats;
4232 }
4233
4234
4235 /* This section of code deals with inserting new characters into a Bioseq and adjusting the
4236 * locations of the affected features. It is adapted from code from SeqLocInsert.
4237 */
4238
SeqEdInsertAdjustCdRgn(SeqFeatPtr sfp,BioseqPtr bsp,Int4 insert_pos,Int4 len,Boolean do_split)4239 NLM_EXTERN void SeqEdInsertAdjustCdRgn
4240 (SeqFeatPtr sfp,
4241 BioseqPtr bsp,
4242 Int4 insert_pos,
4243 Int4 len,
4244 Boolean do_split)
4245 {
4246 CdRegionPtr crp;
4247 CodeBreakPtr prevcbp, cbp, nextcbp;
4248
4249 if (sfp == NULL || bsp == NULL) return;
4250 crp = (CdRegionPtr)(sfp->data.value.ptrvalue);
4251 if (crp == NULL) return;
4252
4253 prevcbp = NULL;
4254 for (cbp = crp->code_break; cbp != NULL; cbp = nextcbp)
4255 {
4256 nextcbp = cbp->next;
4257 cbp->loc = SeqEdSeqLocInsert (cbp->loc, bsp, insert_pos, len, do_split, NULL);
4258 if (cbp->loc == NULL)
4259 {
4260 if (prevcbp != NULL)
4261 prevcbp->next = nextcbp;
4262 else
4263 crp->code_break = nextcbp;
4264 cbp->next = NULL;
4265 CodeBreakFree (cbp);
4266 }
4267 else
4268 {
4269 prevcbp = cbp;
4270 }
4271 }
4272 }
4273
SeqEdInsertAdjustRNA(SeqFeatPtr sfp,BioseqPtr bsp,Int4 insert_pos,Int4 len,Boolean do_split)4274 NLM_EXTERN void SeqEdInsertAdjustRNA
4275 (SeqFeatPtr sfp,
4276 BioseqPtr bsp,
4277 Int4 insert_pos,
4278 Int4 len,
4279 Boolean do_split)
4280 {
4281 RnaRefPtr rrp;
4282 tRNAPtr trp;
4283
4284 if (sfp == NULL || bsp == NULL) return;
4285 rrp = (RnaRefPtr)(sfp->data.value.ptrvalue);
4286 if (rrp == NULL) return;
4287 if (rrp->ext.choice == 2) /* tRNA */
4288 {
4289 trp = (tRNAPtr)(rrp->ext.value.ptrvalue);
4290 if (trp->anticodon != NULL)
4291 {
4292 trp->anticodon = SeqEdSeqLocInsert (trp->anticodon, bsp, insert_pos, len, do_split, NULL);
4293 }
4294 }
4295 }
4296
4297
4298 static BioseqPtr
GetParentForSegment(BioseqPtr bsp,Int4Ptr p_start,Int4Ptr p_stop)4299 GetParentForSegment
4300 (BioseqPtr bsp, Int4Ptr p_start, Int4Ptr p_stop)
4301 {
4302 BioseqSetPtr parts_bssp, seg_bssp;
4303 BioseqPtr master_bsp, other_part;
4304 SeqEntryPtr sep;
4305 Int4 offset = 0;
4306
4307 if (bsp == NULL || bsp->idx.parentptr == NULL || bsp->idx.parenttype != OBJ_BIOSEQSET) return NULL;
4308
4309 parts_bssp = (BioseqSetPtr) bsp->idx.parentptr;
4310 if (parts_bssp->_class != BioseqseqSet_class_parts
4311 || parts_bssp->idx.parentptr == NULL
4312 || parts_bssp->idx.parenttype != OBJ_BIOSEQSET)
4313 {
4314 return NULL;
4315 }
4316
4317 seg_bssp = (BioseqSetPtr) parts_bssp->idx.parentptr;
4318 if (seg_bssp->_class != BioseqseqSet_class_segset
4319 || seg_bssp->seq_set == NULL
4320 || !IS_Bioseq (seg_bssp->seq_set))
4321 {
4322 return NULL;
4323 }
4324
4325 master_bsp = (BioseqPtr) seg_bssp->seq_set->data.ptrvalue;
4326
4327 if (p_start != NULL || p_stop != NULL)
4328 {
4329 sep = parts_bssp->seq_set;
4330 while (sep != NULL && sep->data.ptrvalue != bsp)
4331 {
4332 if (IS_Bioseq (sep) && sep->data.ptrvalue != NULL)
4333 {
4334 other_part = sep->data.ptrvalue;
4335 offset += other_part->length;
4336 }
4337 sep = sep->next;
4338 }
4339 if (p_start != NULL)
4340 {
4341 *p_start = offset;
4342 }
4343 if (p_stop != NULL)
4344 {
4345 *p_stop = offset + bsp->length - 1;
4346 }
4347 }
4348
4349 return master_bsp;
4350 }
4351
4352
AdjustOffsetsForSegment(SeqIdPtr del_id,SeqIdPtr target_id,Int4Ptr from,Int4Ptr to)4353 static Boolean AdjustOffsetsForSegment (SeqIdPtr del_id, SeqIdPtr target_id, Int4Ptr from, Int4Ptr to)
4354 {
4355 BioseqPtr bsp_del, bsp_target, bsp_master;
4356 Int4 seg_offset = 0, seg_end = 0;
4357 Boolean rval = FALSE;
4358
4359 if (del_id == NULL || target_id == NULL || from == NULL || to == NULL)
4360 {
4361 return FALSE;
4362 }
4363
4364 bsp_del = BioseqFind (del_id);
4365 bsp_target = BioseqFind (target_id);
4366 if (bsp_del == NULL || bsp_target == NULL) return FALSE;
4367
4368 bsp_master = GetParentForSegment (bsp_del, &seg_offset, &seg_end);
4369 if (bsp_master != NULL)
4370 {
4371 if (bsp_master == bsp_target && seg_offset < *to)
4372 {
4373 /* loc to delete is in parent coordinates */
4374 if (*from > seg_end || *to < seg_offset)
4375 {
4376 /* loc to delete is entirely past this segment */
4377 }
4378 else
4379 {
4380 *from = MAX (0, *from - seg_offset);
4381 *to = MIN (bsp_target->length, *to - seg_offset);
4382 rval = TRUE;
4383 }
4384 }
4385 }
4386 return rval;
4387 }
4388
4389
SeqEdInsertSeqPnt(SeqPntPtr spp,SeqIdPtr target_id,Int4 pos,Int4 len,SeqIdPtr newid)4390 static void SeqEdInsertSeqPnt (SeqPntPtr spp, SeqIdPtr target_id, Int4 pos, Int4 len, SeqIdPtr newid)
4391 {
4392 Int4 to = pos + len;
4393 Boolean id_in_list;
4394
4395 if (spp == NULL) return;
4396
4397 if ((id_in_list = SeqIdIn(spp->id, target_id))
4398 || AdjustOffsetsForSegment (spp->id, target_id, &pos, &to))
4399 {
4400 if (id_in_list && newid != NULL) /* change id? */
4401 {
4402 SeqIdFree(spp->id);
4403 spp->id = SeqIdDup(newid);
4404 }
4405
4406 if (spp->point >= pos)
4407 {
4408 spp->point += len;
4409 }
4410 }
4411 }
4412
4413
4414 static void
SeqEdInsertSeqInt(SeqIntPtr sip,SeqIdPtr target_id,Int4 pos,Int4 len,Boolean split,SeqIdPtr newid,SeqIntPtr PNTR split_end)4415 SeqEdInsertSeqInt
4416 (SeqIntPtr sip,
4417 SeqIdPtr target_id,
4418 Int4 pos,
4419 Int4 len,
4420 Boolean split,
4421 SeqIdPtr newid,
4422 SeqIntPtr PNTR split_end)
4423 {
4424 Int4 to = pos + len;
4425 Boolean id_in_list;
4426 SeqIntPtr sip2;
4427 SeqLocPtr slp;
4428
4429 if (sip == NULL || split_end == NULL) return;
4430
4431 if (!(id_in_list = SeqIdIn(sip->id, target_id))
4432 && ! AdjustOffsetsForSegment(sip->id, target_id, &pos, &to))
4433 {
4434 return;
4435 }
4436
4437 if (newid != NULL && id_in_list) /* change id? */
4438 {
4439 SeqIdFree(sip->id);
4440 sip->id = SeqIdDup(newid);
4441 }
4442
4443 if (sip->to < pos) /* completely before insertion */
4444 {
4445 return;
4446 }
4447
4448 if ((! split) || (sip->from >= pos)) /* interval unbroken */
4449 {
4450 if (sip->from >= pos)
4451 sip->from += len;
4452 sip->to += len;
4453 return;
4454 }
4455 /* split interval */
4456 sip2 = SeqIntNew();
4457 slp = ValNodeNew(NULL);
4458 slp->choice = SEQLOC_INT;
4459 slp->data.ptrvalue = (Pointer)sip2;
4460 sip2->strand = sip->strand;
4461 sip2->id = SeqIdDup(sip->id);
4462
4463 sip2->to = sip->to + len;
4464 sip2->from = pos + len;
4465 sip2->if_to = sip->if_to;
4466 sip->if_to = NULL;
4467 sip->to = pos - 1;
4468
4469 *split_end = sip2;
4470 }
4471
4472
4473 /*****************************************************************************
4474 *
4475 * SeqEdSeqLocInsert()
4476 * alters "head" by insert "len" residues before "pos" in any SeqLoc
4477 * on the Bioseq "target"
4478 * all SeqLocs not on "target" are unaltered
4479 * for SeqLocs on "target"
4480 * all SeqLocs before "pos" are unaltered
4481 * all SeqLocs >= "pos" are incremented by "len"
4482 * all SeqLocs spanning "pos"
4483 * if "split" == TRUE, are split into two SeqLocs, one to the
4484 * left of the insertion, the other to right
4485 * if "split" != TRUE, the SeqLoc is increased in length to cover
4486 * the insertion
4487 * returns altered head or NULL if nothing left.
4488 * if ("newid" != NULL) replaces "target" with "newid" whether the
4489 * SeqLoc is altered on not.
4490 *
4491 * Usage hints:
4492 * 1) To update a feature location on "target" when 10 residues of
4493 * sequence have been inserted before position 5
4494 * SeqFeatPtr->location = SeqLocInsert ( SeqFeatPtr->location ,
4495 * "target", 5, 10, TRUE, NULL); [for some feature types
4496 * you may want "split" equal FALSE]
4497 * 2) To insert the complete feature table from "source" into a
4498 * different Bioseq "dest" before position 20 in "dest"
4499 * SFP->location = SeqLocInsert(SFP->location, "source", 0, 20,
4500 * FALSE, "dest");
4501 *
4502 *
4503 *****************************************************************************/
SeqEdSeqLocInsert(SeqLocPtr head,BioseqPtr target,Int4 pos,Int4 len,Boolean split,SeqIdPtr newid)4504 NLM_EXTERN SeqLocPtr LIBCALL SeqEdSeqLocInsert (SeqLocPtr head, BioseqPtr target, Int4 pos, Int4 len,
4505 Boolean split, SeqIdPtr newid)
4506 {
4507 SeqIntPtr sip, sip2;
4508 SeqPntPtr spp;
4509 PackSeqPntPtr pspp, pspp2;
4510 SeqBondPtr sbp;
4511 SeqLocPtr slp, tmp, prev, next, thead, tmp2;
4512 Int4 diff, numpnt, i, tpos;
4513 Uint1 oldchoice;
4514 SeqIdPtr sidp;
4515 Boolean id_in_list;
4516
4517 if ((head == NULL) || (target == NULL))
4518 return head;
4519
4520 head->next = NULL; /* caller maintains chains */
4521
4522 diff = len;
4523
4524 switch (head->choice)
4525 {
4526 case SEQLOC_BOND: /* bond -- 2 seqs */
4527 sbp = (SeqBondPtr)(head->data.ptrvalue);
4528 SeqEdInsertSeqPnt (sbp->a, target->id, pos, len, newid);
4529 SeqEdInsertSeqPnt (sbp->b, target->id, pos, len, newid);
4530 break;
4531 case SEQLOC_FEAT: /* feat -- can't track yet */
4532 case SEQLOC_NULL: /* NULL */
4533 break;
4534 case SEQLOC_EMPTY: /* empty */
4535 case SEQLOC_WHOLE: /* whole */
4536 if (newid != NULL)
4537 {
4538 sidp = (SeqIdPtr)(head->data.ptrvalue);
4539 if ( SeqIdIn(sidp, target->id))
4540 {
4541 SeqIdFree(sidp);
4542 sidp = SeqIdDup(newid);
4543 head->data.ptrvalue = (Pointer)sidp;
4544 }
4545 }
4546 break;
4547 case SEQLOC_MIX: /* mix -- more than one seq */
4548 case SEQLOC_EQUIV: /* equiv -- ditto */
4549 case SEQLOC_PACKED_INT: /* packed int */
4550 prev = NULL;
4551 thead = NULL;
4552 for (slp = (SeqLocPtr)(head->data.ptrvalue); slp != NULL; slp = next)
4553 {
4554 next = slp->next;
4555 oldchoice = slp->choice;
4556 tmp = SeqEdSeqLocInsert(slp, target, pos, len, split, newid);
4557 if (tmp != NULL)
4558 {
4559 if ((head->choice != SEQLOC_EQUIV) &&
4560 (oldchoice != tmp->choice)) /* split interval? */
4561 {
4562 if ((oldchoice == SEQLOC_INT) &&
4563 (tmp->choice == SEQLOC_PACKED_INT))
4564 {
4565 tmp2 = tmp;
4566 tmp = (SeqLocPtr)(tmp2->data.ptrvalue);
4567 MemFree(tmp2);
4568 while (tmp->next != NULL)
4569 {
4570 if (prev != NULL)
4571 prev->next = tmp;
4572 else
4573 thead = tmp;
4574 prev = tmp;
4575 tmp = tmp->next;
4576 }
4577 }
4578 }
4579 if (prev != NULL)
4580 prev->next = tmp;
4581 else
4582 thead = tmp;
4583 prev = tmp;
4584 }
4585 }
4586 head->data.ptrvalue = thead;
4587 if (thead == NULL)
4588 head = SeqLocFree(head);
4589 break;
4590 case SEQLOC_INT: /* int */
4591 sip = (SeqIntPtr)(head->data.ptrvalue);
4592 sip2 = NULL;
4593 SeqEdInsertSeqInt (sip, target->id, pos, len, split, newid, &sip2);
4594 if (sip2 != NULL)
4595 {
4596 thead = head; /* make split interval into PACKED_INT */
4597 head = ValNodeNew (NULL);
4598 head->choice = SEQLOC_PACKED_INT;
4599
4600 slp = ValNodeNew (NULL);
4601 slp->choice = SEQLOC_INT;
4602 slp->data.ptrvalue = sip2;
4603
4604 if (sip->strand == Seq_strand_minus) /* reverse order */
4605 {
4606 head->data.ptrvalue = slp;
4607 slp->next = thead;
4608 }
4609 else
4610 {
4611 head->data.ptrvalue = thead;
4612 thead->next = slp;
4613 }
4614 }
4615 break;
4616 case SEQLOC_PNT: /* pnt */
4617 spp = (SeqPntPtr)(head->data.ptrvalue);
4618 SeqEdInsertSeqPnt (spp, target->id, pos, len, newid);
4619 break;
4620 case SEQLOC_PACKED_PNT: /* packed pnt */
4621 pspp = (PackSeqPntPtr)(head->data.ptrvalue);
4622 if ((id_in_list = SeqIdIn(pspp->id, target->id))
4623 || AdjustOffsetsForSegment(pspp->id, target->id, &pos, NULL))
4624 {
4625 if (id_in_list && newid != NULL) /* change id? */
4626 {
4627 SeqIdFree(pspp->id);
4628 pspp->id = SeqIdDup(newid);
4629 }
4630
4631 numpnt = PackSeqPntNum(pspp);
4632 pspp2 = PackSeqPntNew();
4633 head->data.ptrvalue = pspp2;
4634 for (i = 0; i < numpnt; i++)
4635 {
4636 tpos = PackSeqPntGet(pspp, i);
4637 if (tpos >= pos)
4638 tpos += len;
4639 PackSeqPntPut(pspp2, tpos);
4640 }
4641 pspp2->id = pspp->id;
4642 pspp->id = NULL;
4643 pspp2->fuzz = pspp->fuzz;
4644 pspp->fuzz = NULL;
4645 pspp2->strand = pspp->strand;
4646 PackSeqPntFree(pspp);
4647 }
4648 break;
4649 default:
4650 break;
4651 }
4652
4653 if (head == NULL)
4654 ErrPost(CTX_NCBIOBJ, 1, "SeqEdSeqLocInsert: lost a SeqLoc");
4655
4656 return head;
4657 }
4658
4659
4660 /* return TRUE if spp should be deleted */
SeqEdDeleteFromSeqPnt(SeqPntPtr spp,SeqIdPtr target_id,Int4 from,Int4 to)4661 static Boolean SeqEdDeleteFromSeqPnt (SeqPntPtr spp, SeqIdPtr target_id, Int4 from, Int4 to)
4662 {
4663 Boolean rval = FALSE;
4664 Int4 diff = to - from + 1;
4665
4666 if (spp == NULL) return FALSE;
4667
4668 if (SeqIdIn (spp->id, target_id)
4669 || AdjustOffsetsForSegment (spp->id, target_id, &from, &to))
4670 {
4671 if ((spp->point >= from) && (spp->point <= to))
4672 {
4673 rval = TRUE;
4674 }
4675 else if (spp->point > to)
4676 {
4677 spp->point -= diff;
4678 }
4679 }
4680 return rval;
4681 }
4682
4683
4684 static SeqLocPtr
SeqEdDeleteFromSeqLocBond(SeqLocPtr head,SeqIdPtr target,Int4 from,Int4 to,Boolean merge,BoolPtr changed)4685 SeqEdDeleteFromSeqLocBond (SeqLocPtr head, SeqIdPtr target, Int4 from, Int4 to, Boolean merge, BoolPtr changed)
4686 {
4687 SeqBondPtr sbp;
4688
4689 if (head == NULL || target == NULL || head->choice != SEQLOC_BOND) return NULL;
4690 sbp = (SeqBondPtr)(head->data.ptrvalue);
4691
4692 if (SeqEdDeleteFromSeqPnt (sbp->a, target, from, to))
4693 {
4694 *changed = TRUE;
4695 sbp->a = SeqPntFree(sbp->a);
4696 }
4697
4698 if (SeqEdDeleteFromSeqPnt (sbp->b, target, from, to))
4699 {
4700 *changed = TRUE;
4701 sbp->b = SeqPntFree(sbp->b);
4702 }
4703
4704 if (sbp->a == NULL)
4705 {
4706 if (sbp->b != NULL) /* only a required */
4707 {
4708 sbp->a = sbp->b;
4709 sbp->b = NULL;
4710 }
4711 else
4712 {
4713 head = SeqLocFree(head);
4714 }
4715 }
4716 return head;
4717 }
4718
DeleteFromSeqLocWhole(SeqLocPtr head,BioseqPtr target,Int4 from,Int4 to,Boolean merge,BoolPtr changed)4719 static SeqLocPtr DeleteFromSeqLocWhole
4720 (SeqLocPtr head, BioseqPtr target, Int4 from, Int4 to, Boolean merge, BoolPtr changed)
4721 {
4722 SeqIdPtr sidp;
4723 SeqIntPtr sip;
4724 SeqLocPtr slp, tmp;
4725
4726 if (head == NULL || target == NULL || head->choice != SEQLOC_WHOLE) return NULL;
4727
4728 sidp = (SeqIdPtr)(head->data.ptrvalue);
4729
4730 if ( SeqIdIn(sidp, target->id))
4731 {
4732 if ((from == 0) && (to >= (target->length - 1)))
4733 { /* complete delete */
4734 head = SeqLocFree(head);
4735 *changed = TRUE;
4736 return head;
4737 }
4738
4739 if (! merge) /* split it up */
4740 {
4741 SeqIdFree(sidp);
4742 head->choice = SEQLOC_PACKED_INT;
4743 head->data.ptrvalue = NULL;
4744 slp = NULL;
4745 if (from != 0)
4746 {
4747 sip = SeqIntNew();
4748 sip->from = 0;
4749 sip->to = from - 1;
4750 sip->id = SeqIdDup(target->id);
4751 slp = ValNodeNew(NULL);
4752 slp->choice = SEQLOC_INT;
4753 slp->data.ptrvalue = sip;
4754 head->data.ptrvalue = slp;
4755 *changed = TRUE;
4756 }
4757 if (to < (target->length - 1))
4758 {
4759 sip = SeqIntNew();
4760 sip->from = to + 1;
4761 sip->to = target->length - 1;
4762 sip->id = SeqIdDup(target->id);
4763 tmp = ValNodeNew(NULL);
4764 tmp->choice = SEQLOC_INT;
4765 tmp->data.ptrvalue = sip;
4766 if (slp != NULL)
4767 slp->next = tmp;
4768 else
4769 head->data.ptrvalue = tmp;
4770 *changed = TRUE;
4771 }
4772 }
4773 }
4774 return head;
4775 }
4776
SeqEdDeleteFromSeqLocPackedInt(SeqLocPtr head,BioseqPtr target,Int4 from,Int4 to,Boolean merge,BoolPtr changed,BoolPtr partial5,BoolPtr partial3)4777 static SeqLocPtr SeqEdDeleteFromSeqLocPackedInt
4778 (SeqLocPtr head, BioseqPtr target, Int4 from, Int4 to, Boolean merge, BoolPtr changed, BoolPtr partial5, BoolPtr partial3)
4779 {
4780 Boolean part5, part3, first;
4781 SeqLocPtr slp, tmp, prev, next, thead;
4782 SeqIntPtr sip, sip2;
4783
4784 if (head == NULL || target == NULL) return NULL;
4785 if (head->choice != SEQLOC_MIX && head->choice != SEQLOC_EQUIV && head->choice != SEQLOC_PACKED_INT)
4786 return NULL;
4787 prev = NULL;
4788 thead = NULL;
4789 part5 = FALSE;
4790 part3 = FALSE;
4791 first = TRUE;
4792 for (slp = (SeqLocPtr)(head->data.ptrvalue); slp != NULL; slp = next)
4793 {
4794 next = slp->next;
4795 tmp = SeqEdSeqLocDelete (slp, target, from, to, merge, changed, &part5, &part3);
4796 if (first)
4797 {
4798 if (partial5 != NULL)
4799 {
4800 *partial5 = part5;
4801 }
4802 }
4803 first = FALSE;
4804 if (tmp != NULL)
4805 {
4806 if (prev != NULL)
4807 {
4808 if ((merge) && (prev->choice == SEQLOC_INT) && (tmp->choice == SEQLOC_INT))
4809 {
4810 sip = (SeqIntPtr)(prev->data.ptrvalue);
4811 sip2 = (SeqIntPtr)(tmp->data.ptrvalue);
4812
4813 if (SeqIdForSameBioseq(sip->id, sip2->id))
4814 {
4815 /* merge intervals? */
4816 if ((sip->strand == Seq_strand_minus) &&
4817 (sip2->strand == Seq_strand_minus))
4818 {
4819 if (sip->from == (sip2->to + 1))
4820 {
4821 sip->from = sip2->from;
4822 sip->if_from = sip2->if_from;
4823 sip2->if_from = NULL;
4824 tmp = SeqLocFree(tmp);
4825 }
4826 }
4827 else if((sip->strand != Seq_strand_minus) &&
4828 (sip2->strand != Seq_strand_minus))
4829 {
4830 if (sip->to == (sip2->from - 1))
4831 {
4832 sip->to = sip2->to;
4833 sip->if_to = sip2->if_to;
4834 sip2->if_to = NULL;
4835 tmp = SeqLocFree(tmp);
4836 }
4837 }
4838 }
4839 }
4840 else if ((prev->choice == SEQLOC_NULL) && (tmp->choice == SEQLOC_NULL))
4841 {
4842 tmp = SeqLocFree(tmp);
4843 *changed = TRUE;
4844 }
4845 }
4846 else if (tmp->choice == SEQLOC_NULL)
4847 {
4848 tmp = SeqLocFree(tmp);
4849 *changed = TRUE;
4850 }
4851
4852 if (tmp != NULL) /* still have one? */
4853 {
4854 if (prev != NULL)
4855 prev->next = tmp;
4856 else
4857 thead = tmp;
4858 prev = tmp;
4859 }
4860 }
4861 else
4862 {
4863 *changed = TRUE;
4864 }
4865 }
4866 if (partial3 != NULL)
4867 {
4868 *partial3 = part3;
4869 }
4870 if (prev != NULL)
4871 {
4872 if (prev->choice == SEQLOC_NULL) /* ends with NULL */
4873 {
4874 prev = NULL;
4875 for (slp = thead; slp->next != NULL; slp = slp->next)
4876 {
4877 prev = slp;
4878 }
4879 if (prev != NULL)
4880 {
4881 prev->next = NULL;
4882 SeqLocFree(slp);
4883 }
4884 else
4885 {
4886 thead = SeqLocFree(thead);
4887 }
4888 *changed = TRUE;
4889 }
4890 }
4891 head->data.ptrvalue = thead;
4892 if (thead == NULL)
4893 head = SeqLocFree(head);
4894 return head;
4895 }
4896
4897
SeqEdDeleteFromSeqLocInt(SeqLocPtr head,BioseqPtr target,Int4 from,Int4 to,Boolean merge,BoolPtr changed,BoolPtr partial5,BoolPtr partial3)4898 static SeqLocPtr SeqEdDeleteFromSeqLocInt (SeqLocPtr head, BioseqPtr target, Int4 from, Int4 to, Boolean merge, BoolPtr changed, BoolPtr partial5, BoolPtr partial3)
4899 {
4900 Int4 diff;
4901 SeqIntPtr sip, sip2;
4902 SeqLocPtr slp, tmp;
4903
4904 if (head == NULL || target == NULL || head->choice != SEQLOC_INT) return NULL;
4905
4906 sip = (SeqIntPtr)(head->data.ptrvalue);
4907 if ( !SeqIdIn(sip->id, target->id)
4908 && ! AdjustOffsetsForSegment (sip->id, target->id, &from, &to))
4909 {
4910 return head;
4911 }
4912
4913 diff = to - from + 1;
4914
4915 if (sip->to < from) /* completely before cut */
4916 return head;
4917
4918 /* completely contained in cut */
4919 if ((sip->from >= from) && (sip->to <= to))
4920 {
4921 head = SeqLocFree(head);
4922 *changed = TRUE;
4923 return head;
4924 }
4925
4926 if (sip->from > to) /* completely past cut */
4927 {
4928 sip->from -= diff;
4929 sip->to -= diff;
4930 return head;
4931 }
4932 /* overlap here */
4933 if (sip->to > to)
4934 {
4935 sip->to -= diff;
4936 }
4937 else /* to inside cut, so partial delete */
4938 {
4939 sip->to = from - 1;
4940 *changed = TRUE;
4941 if (partial3 != NULL)
4942 {
4943 *partial3 = TRUE;
4944 }
4945 }
4946
4947 if (sip->from >= from) /* from inside cut, partial del */
4948 {
4949 *changed = TRUE;
4950 sip->from = to + 1;
4951 sip->from -= diff;
4952 if (partial5 != NULL)
4953 {
4954 *partial5 = TRUE;
4955 }
4956
4957 if (merge)
4958 return head;
4959
4960 /* interval spans cut.. only in non-merge */
4961 /* have to split */
4962
4963 if ((sip->from < from) && (sip->to > to))
4964 {
4965 *changed = TRUE;
4966 head->choice = SEQLOC_PACKED_INT;
4967 head->data.ptrvalue = NULL;
4968 tmp = ValNodeNew(NULL);
4969 tmp->choice = SEQLOC_INT;
4970 tmp->data.ptrvalue = sip;
4971
4972 sip2 = SeqIntNew();
4973 sip2->from = to + 1;
4974 sip2->to = sip->to;
4975 sip2->strand = sip->strand;
4976 sip2->if_to = sip->if_to;
4977 sip2->id = SeqIdDup(target->id);
4978 slp = ValNodeNew(NULL);
4979 slp->choice = SEQLOC_INT;
4980 slp->data.ptrvalue = sip2;
4981
4982 sip->if_to = NULL;
4983 sip->to = from - 1;
4984
4985 if (sip->strand == Seq_strand_minus)
4986 {
4987 head->data.ptrvalue = slp;
4988 slp->next = tmp;
4989 }
4990 else
4991 {
4992 head->data.ptrvalue = tmp;
4993 tmp->next = slp;
4994 }
4995
4996 }
4997 }
4998 return head;
4999 }
5000
SeqEdDeleteFromSeqLocPackedPnt(SeqLocPtr head,BioseqPtr target,Int4 from,Int4 to,Boolean merge,BoolPtr changed)5001 static SeqLocPtr SeqEdDeleteFromSeqLocPackedPnt
5002 (SeqLocPtr head, BioseqPtr target, Int4 from, Int4 to, Boolean merge, BoolPtr changed)
5003 {
5004 PackSeqPntPtr pspp, pspp2;
5005 Int4 i, diff, numpnt, tpos;
5006
5007 if (head == NULL || target == NULL || head->choice != SEQLOC_PACKED_PNT) return NULL;
5008
5009 pspp = (PackSeqPntPtr)(head->data.ptrvalue);
5010 if (!SeqIdIn (pspp->id, target->id)) return head;
5011
5012 diff = to - from + 1;
5013
5014 numpnt = PackSeqPntNum(pspp);
5015 pspp2 = PackSeqPntNew();
5016 head->data.ptrvalue = pspp2;
5017 for (i = 0; i < numpnt; i++)
5018 {
5019 tpos = PackSeqPntGet(pspp, i);
5020 if (tpos < from)
5021 {
5022 PackSeqPntPut(pspp2, tpos);
5023 }
5024 else
5025 {
5026 if (tpos > to)
5027 {
5028 if (merge)
5029 {
5030 tpos -= diff;
5031 }
5032 PackSeqPntPut(pspp2, tpos);
5033 }
5034 else
5035 {
5036 *changed = TRUE;
5037 }
5038 }
5039 }
5040 pspp2->id = pspp->id;
5041 pspp->id = NULL;
5042 pspp2->fuzz = pspp->fuzz;
5043 pspp->fuzz = NULL;
5044 pspp2->strand = pspp->strand;
5045 PackSeqPntFree(pspp);
5046 numpnt = PackSeqPntNum(pspp2);
5047 if (! numpnt)
5048 {
5049 head = SeqLocFree(head);
5050 }
5051 return head;
5052 }
5053
5054
5055 /*****************************************************************************
5056 *
5057 * SeqEdSeqLocDelete()
5058 * returns altered head or NULL if nothing left.
5059 * sets changed=TRUE if all or part of loc is deleted
5060 * does NOT set changed if location coordinates are only moved
5061 * if (merge) then corrects coordinates upstream of to
5062 * else
5063 * splits intervals covering from-to, does not correct upstream of to
5064 *
5065 *****************************************************************************/
SeqEdSeqLocDelete(SeqLocPtr head,BioseqPtr target,Int4 from,Int4 to,Boolean merge,BoolPtr changed,BoolPtr partial5,BoolPtr partial3)5066 NLM_EXTERN SeqLocPtr SeqEdSeqLocDelete (SeqLocPtr head, BioseqPtr target, Int4 from, Int4 to, Boolean merge, BoolPtr changed, BoolPtr partial5, BoolPtr partial3)
5067 {
5068 SeqPntPtr spp;
5069 Int4 diff;
5070
5071 if ((head == NULL) || (target == NULL))
5072 return head;
5073
5074 head->next = NULL; /* caller maintains chains */
5075 diff = to - from + 1;
5076
5077 switch (head->choice)
5078 {
5079 case SEQLOC_BOND: /* bond -- 2 seqs */
5080 head = SeqEdDeleteFromSeqLocBond (head, target->id, from, to, merge, changed);
5081 break;
5082 case SEQLOC_FEAT: /* feat -- can't track yet */
5083 case SEQLOC_NULL: /* NULL */
5084 case SEQLOC_EMPTY: /* empty */
5085 break;
5086 case SEQLOC_WHOLE: /* whole */
5087 head = DeleteFromSeqLocWhole (head, target, from, to, merge, changed);
5088 break;
5089 case SEQLOC_MIX: /* mix -- more than one seq */
5090 case SEQLOC_EQUIV: /* equiv -- ditto */
5091 case SEQLOC_PACKED_INT: /* packed int */
5092 head = SeqEdDeleteFromSeqLocPackedInt (head, target, from, to, merge, changed, partial5, partial3);
5093 break;
5094 case SEQLOC_INT: /* int */
5095 head = SeqEdDeleteFromSeqLocInt (head, target, from, to, merge, changed, partial5, partial3);
5096 break;
5097 case SEQLOC_PNT: /* pnt */
5098 spp = (SeqPntPtr)(head->data.ptrvalue);
5099 if (SeqEdDeleteFromSeqPnt (spp, target->id, from, to))
5100 {
5101 head = SeqLocFree(head);
5102 *changed = TRUE;
5103 }
5104 break;
5105 case SEQLOC_PACKED_PNT: /* packed pnt */
5106 head = SeqEdDeleteFromSeqLocPackedPnt (head, target, from, to, merge, changed);
5107 break;
5108 default:
5109 break;
5110 }
5111
5112 return head;
5113 }
5114
5115
5116 NLM_EXTERN SeqFeatPtr
SeqEdGetNextFeature(BioseqPtr bsp,SeqFeatPtr curr,Uint1 seqFeatChoice,Uint1 featDefChoice,SeqMgrFeatContext PNTR context,Boolean byLabel,Boolean byLocusTag,Uint2 entityID)5117 SeqEdGetNextFeature
5118 (BioseqPtr bsp,
5119 SeqFeatPtr curr,
5120 Uint1 seqFeatChoice,
5121 Uint1 featDefChoice,
5122 SeqMgrFeatContext PNTR context,
5123 Boolean byLabel,
5124 Boolean byLocusTag,
5125 Uint2 entityID)
5126
5127 {
5128 SMFeatItemPtr PNTR array = NULL;
5129 BioseqExtraPtr bspextra;
5130 Int4 i;
5131 SMFeatItemPtr item;
5132 Int4 num = 0;
5133 ObjMgrDataPtr omdp;
5134 ObjMgrPtr omp;
5135 Uint1 seqfeattype;
5136
5137 if (context == NULL) return NULL;
5138
5139 /* if curr is NULL, initialize context fields (in user's stack) */
5140
5141 if (curr == NULL) {
5142 if (bsp == NULL) return NULL;
5143 omdp = (ObjMgrDataPtr) bsp->omdp;
5144 if (omdp == NULL)
5145 {
5146 omp = ObjMgrWriteLock ();
5147 omdp = ObjMgrFindByData (omp, bsp);
5148 ObjMgrUnlock ();
5149 bsp->omdp = (Pointer) omdp;
5150 }
5151 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
5152
5153 context->omdp = (Pointer) omdp;
5154 context->index = 0;
5155 }
5156
5157 omdp = (ObjMgrDataPtr) context->omdp;
5158 if (omdp == NULL) return NULL;
5159 bspextra = (BioseqExtraPtr) omdp->extradata;
5160 if (bspextra == NULL) return NULL;
5161 if (byLocusTag) {
5162 array = bspextra->genesByLocusTag;
5163 num = bspextra->numgenes;
5164 } else if (byLabel) {
5165 array = bspextra->featsByLabel;
5166 num = bspextra->numfeats;
5167 } else {
5168 array = bspextra->featsByPos;
5169 num = bspextra->numfeats;
5170 }
5171 if (array == NULL || num < 1) return NULL;
5172
5173 i = context->index;
5174
5175 /* now look for next appropriate feature */
5176
5177 while (i < num) {
5178 item = array [i];
5179 if (item != NULL) {
5180 curr = item->sfp;
5181 i++;
5182 if (curr != NULL) {
5183 seqfeattype = curr->data.choice;
5184 if ((seqFeatChoice == 0 || seqfeattype == seqFeatChoice) &&
5185 (featDefChoice == 0 || item->subtype == featDefChoice) &&
5186 (! item->ignore)) {
5187 context->entityID = entityID;
5188 context->itemID = item->itemID;
5189 context->sfp = curr;
5190 context->sap = item->sap;
5191 context->bsp = item->bsp;
5192 context->label = item->label;
5193 context->left = item->left;
5194 context->right = item->right;
5195 context->dnaStop = item->dnaStop;
5196 context->partialL = item->partialL;
5197 context->partialR = item->partialR;
5198 context->farloc = item->farloc;
5199 context->strand = item->strand;
5200 context->seqfeattype = seqfeattype;
5201 context->featdeftype = item->subtype;
5202 context->numivals = item->numivals;
5203 context->ivals = item->ivals;
5204 context->userdata = NULL;
5205 context->omdp = (Pointer) omdp;
5206 if (byLocusTag) {
5207 context->index = i;
5208 } else if (byLabel) {
5209 context->index = i;
5210 } else {
5211 context->index = item->index + 1;
5212 }
5213 return curr;
5214 }
5215 }
5216 }
5217 }
5218
5219 return NULL;
5220 }
5221
ReindexExtendedFeatures(SeqEdJournalPtr sejp)5222 static void ReindexExtendedFeatures (SeqEdJournalPtr sejp)
5223 {
5224 ValNodePtr vnp;
5225 AffectedFeatPtr afp;
5226 SeqFeatPtr affected_sfp, real_sfp;
5227 SeqMgrFeatContext fcontext;
5228
5229 for (vnp = sejp->affected_feats; vnp != NULL; vnp = vnp->next)
5230 {
5231 if (vnp->choice == 1 && vnp->data.ptrvalue != NULL)
5232 {
5233 afp = (AffectedFeatPtr) vnp->data.ptrvalue;
5234 affected_sfp = afp->feat_after;
5235 if (affected_sfp != NULL)
5236 {
5237 real_sfp = SeqMgrGetDesiredFeature (sejp->entityID, sejp->bsp, affected_sfp->idx.itemID, 0, NULL, &fcontext);
5238 SeqEdReindexFeature (real_sfp, sejp->bsp);
5239 }
5240 }
5241 }
5242 }
5243
5244
DoesSeqFeatMatch(SeqFeatPtr a,SeqFeatPtr b)5245 static Boolean DoesSeqFeatMatch (SeqFeatPtr a, SeqFeatPtr b)
5246 {
5247 if (a == b) return TRUE;
5248 if (a == NULL || b == NULL) return FALSE;
5249
5250 if (a->data.choice != b->data.choice) return FALSE;
5251 if (SeqLocCompare (a->location, b->location) != SLC_A_EQ_B)
5252 {
5253 return FALSE;
5254 }
5255 return TRUE;
5256 }
5257
5258
SeqEdInsertAdjustFeat(SeqFeatPtr sfp,SeqEdJournalPtr sejp,Int4 insert_point)5259 static void SeqEdInsertAdjustFeat (SeqFeatPtr sfp, SeqEdJournalPtr sejp, Int4 insert_point)
5260 {
5261 ValNodePtr vnp;
5262 AffectedFeatPtr afp = NULL;
5263 SeqLocPtr tmp_loc;
5264 Boolean split_mode;
5265 BioseqPtr bsp;
5266 Int4 insert_offset;
5267
5268 if (sfp == NULL || sejp == NULL)
5269 {
5270 return;
5271 }
5272
5273 bsp = GetParentForSegment (sejp->bsp, &insert_offset, NULL);
5274 if (bsp == NULL)
5275 {
5276 bsp = sejp->bsp;
5277 }
5278 else
5279 {
5280 insert_point += insert_offset;
5281 }
5282
5283 for (vnp = sejp->affected_feats; vnp != NULL && afp == NULL; vnp = vnp->next)
5284 {
5285 afp = (AffectedFeatPtr) vnp->data.ptrvalue;
5286 if (afp != NULL && DoesSeqFeatMatch (afp->feat_after, sfp))
5287 {
5288 vnp->choice = 1;
5289 }
5290 else
5291 {
5292 afp = NULL;
5293 }
5294 }
5295
5296 /* if we're inserting a gap and the feature is a coding region, need to split location
5297 * regardless of mode */
5298 split_mode = sejp->spliteditmode;
5299 if (sejp->action == eSeqEdInsertGap || sejp->action == eSeqEdDeleteGap
5300 && sfp->data.choice == SEQFEAT_CDREGION)
5301 {
5302 split_mode = TRUE;
5303 }
5304
5305 if (afp != NULL)
5306 {
5307 tmp_loc = sfp->location;
5308 sfp->location = afp->feat_before->location;
5309 afp->feat_before->location = tmp_loc;
5310 }
5311 else
5312 {
5313 sfp->location = SeqEdSeqLocInsert (sfp->location, bsp, insert_point,
5314 sejp->num_chars, split_mode, NULL);
5315 }
5316 switch (sfp->data.choice)
5317 {
5318 case SEQFEAT_CDREGION: /* cdregion */
5319 SeqEdInsertAdjustCdRgn (sfp, bsp, insert_point, sejp->num_chars,
5320 split_mode);
5321 break;
5322 case SEQFEAT_RNA:
5323 SeqEdInsertAdjustRNA (sfp, bsp, insert_point, sejp->num_chars,
5324 split_mode);
5325 break;
5326 default:
5327 break;
5328 }
5329 }
5330
IsDeltaSeqGap(DeltaSeqPtr dsp)5331 extern Boolean IsDeltaSeqGap (DeltaSeqPtr dsp)
5332 {
5333 SeqLitPtr slip;
5334 if (dsp == NULL || dsp->choice != 2 || dsp->data.ptrvalue == NULL)
5335 {
5336 return FALSE;
5337 }
5338 slip = (SeqLitPtr) (dsp->data.ptrvalue);
5339 if (slip->seq_data == NULL || slip->seq_data_type == Seq_code_gap)
5340 {
5341 return TRUE;
5342 }
5343 else
5344 {
5345 return FALSE;
5346 }
5347 }
5348
IsDeltaSeqUnknownGap(DeltaSeqPtr dsp)5349 extern Boolean IsDeltaSeqUnknownGap (DeltaSeqPtr dsp)
5350 {
5351 SeqLitPtr slip;
5352 if (dsp == NULL || dsp->choice != 2 || dsp->data.ptrvalue == NULL)
5353 {
5354 return FALSE;
5355 }
5356 slip = (SeqLitPtr) (dsp->data.ptrvalue);
5357 if ((slip->seq_data == NULL || slip->seq_data_type == Seq_code_gap) &&
5358 slip->fuzz != NULL && slip->fuzz->choice == 4)
5359 {
5360 return TRUE;
5361 }
5362 else
5363 {
5364 return FALSE;
5365 }
5366 }
5367
5368
IsDeltaSeqKnownGap(DeltaSeqPtr dsp)5369 extern Boolean IsDeltaSeqKnownGap (DeltaSeqPtr dsp)
5370 {
5371 SeqLitPtr slip;
5372 if (dsp == NULL || dsp->choice != 2 || dsp->data.ptrvalue == NULL)
5373 {
5374 return FALSE;
5375 }
5376 slip = (SeqLitPtr) (dsp->data.ptrvalue);
5377 if ((slip->seq_data == NULL || slip->seq_data_type == Seq_code_gap) &&
5378 slip->fuzz == NULL)
5379 {
5380 return TRUE;
5381 }
5382 else
5383 {
5384 return FALSE;
5385 }
5386 }
5387
5388
DoesSeqLitHaveGapTypeOrLinkage(SeqLitPtr slip)5389 extern Boolean DoesSeqLitHaveGapTypeOrLinkage (SeqLitPtr slip)
5390 {
5391 if (slip != NULL && slip->seq_data_type == Seq_code_gap) {
5392 return TRUE;
5393 } else {
5394 return FALSE;
5395 }
5396 }
5397
DoesDeltaSeqHaveGapTypeOrLinkage(DeltaSeqPtr dsp)5398 extern Boolean DoesDeltaSeqHaveGapTypeOrLinkage (DeltaSeqPtr dsp)
5399 {
5400 if (dsp != NULL && dsp->choice == 2) {
5401 return DoesSeqLitHaveGapTypeOrLinkage ((SeqLitPtr) dsp->data.ptrvalue);
5402 } else {
5403 return FALSE;
5404 }
5405 }
5406
GetDeltaSeqForOffset(BioseqPtr bsp,Int4 offset,Int4Ptr seqstart)5407 static DeltaSeqPtr GetDeltaSeqForOffset (BioseqPtr bsp, Int4 offset, Int4Ptr seqstart)
5408 {
5409 Int4 curr_pos = 0;
5410 Boolean found = FALSE;
5411 SeqLocPtr slp;
5412 SeqLitPtr slip = NULL;
5413 DeltaSeqPtr dsp;
5414
5415 if (bsp == NULL || bsp->repr != Seq_repr_delta
5416 || bsp->seq_ext_type != 4 || bsp->seq_ext == NULL
5417 || offset < 0)
5418 {
5419 return NULL;
5420 }
5421
5422 if (seqstart != NULL)
5423 {
5424 *seqstart = 0;
5425 }
5426 dsp = (DeltaSeqPtr) bsp->seq_ext;
5427 while (dsp != NULL && !found)
5428 {
5429 if (dsp->data.ptrvalue == NULL) continue;
5430 if (dsp->choice == 1)
5431 { /* SeqLoc */
5432 slp = (SeqLocPtr)(dsp->data.ptrvalue);
5433 curr_pos += SeqLocLen (slp);
5434 }
5435 else if (dsp->choice == 2)
5436 {
5437 slip = (SeqLitPtr) (dsp->data.ptrvalue);
5438 curr_pos += slip->length;
5439 }
5440 if (curr_pos > offset
5441 || (curr_pos == offset
5442 && (dsp->next == NULL || ! IsDeltaSeqGap (dsp))))
5443 {
5444 found = TRUE;
5445 }
5446 else
5447 {
5448 if (seqstart != NULL)
5449 {
5450 *seqstart = curr_pos;
5451 }
5452 dsp=dsp->next;
5453 }
5454 }
5455
5456 return dsp;
5457 }
5458
5459 static Boolean
SeqEdInsertByteStore(ByteStorePtr seq_data,Int4 insert_point,CharPtr char_data,Int4 num_chars,Uint1 moltype)5460 SeqEdInsertByteStore
5461 (ByteStorePtr seq_data,
5462 Int4 insert_point,
5463 CharPtr char_data,
5464 Int4 num_chars,
5465 Uint1 moltype)
5466 {
5467 Char ch;
5468 Int4 i;
5469
5470 if (seq_data == NULL || insert_point < 0 || char_data == NULL || num_chars < 1)
5471 {
5472 return FALSE;
5473 }
5474 BSSeek(seq_data, insert_point, SEEK_SET);
5475 Nlm_BSAdd(seq_data, num_chars, FALSE);
5476 BSSeek(seq_data, insert_point, SEEK_SET);
5477 for (i = 0; i < num_chars; i++)
5478 {
5479 ch = TO_UPPER (char_data [i]);
5480 if ( ISA_na (moltype) ) {
5481 if (ch == 'U') ch = 'T';
5482 if (ch == 'X') ch = 'N';
5483 if ( StringChr ("EFIJLOPQXZ-.*", ch) == NULL ) {
5484 BSPutByte ( seq_data, (Int2) ch );
5485 }
5486 }
5487 else
5488 {
5489 if ( StringChr("JO-.", ch) == NULL ) {
5490 BSPutByte ( seq_data, (Int2) ch );
5491 }
5492 }
5493 }
5494 return TRUE;
5495 }
5496
SeqEdInsertRaw(SeqEdJournalPtr sejp,Int4 insert_point)5497 static Boolean SeqEdInsertRaw (SeqEdJournalPtr sejp, Int4 insert_point)
5498 {
5499 Boolean rval;
5500 BioseqPtr bsp;
5501
5502 if (sejp == NULL || sejp->bsp == NULL || sejp->bsp->repr != Seq_repr_raw
5503 || sejp->char_data == NULL || sejp->num_chars == 0 || insert_point < 0)
5504 {
5505 return FALSE;
5506 }
5507 if (sejp->bsp->seq_data_type == Seq_code_gap) return FALSE;
5508
5509 rval = SeqEdInsertByteStore ((ByteStorePtr) sejp->bsp->seq_data, insert_point,
5510 sejp->char_data, sejp->num_chars, sejp->moltype);
5511
5512 if (rval)
5513 {
5514 sejp->bsp->length += sejp->num_chars;
5515 bsp = GetParentForSegment (sejp->bsp, NULL, NULL);
5516 if (bsp != NULL)
5517 {
5518 bsp->length += sejp->num_chars;
5519 }
5520 }
5521 return rval;
5522 }
5523
5524 static Boolean
SeqEdInsertIntoDeltaGap(DeltaSeqPtr dsp,SeqEdJournalPtr sejp,Int4 insert_point)5525 SeqEdInsertIntoDeltaGap
5526 (DeltaSeqPtr dsp,
5527 SeqEdJournalPtr sejp,
5528 Int4 insert_point)
5529 {
5530 SeqLitPtr slip, slip_data, slip_second_gap;
5531 Boolean rval = FALSE;
5532 DeltaSeqPtr dsp_data, dsp_second_gap;
5533 IntFuzzPtr ifp = NULL;
5534
5535 if (dsp == NULL || dsp->choice != 2 || dsp->data.ptrvalue == NULL)
5536 {
5537 return rval;
5538 }
5539 slip = (SeqLitPtr) dsp->data.ptrvalue;
5540 if (slip->seq_data != NULL && slip->seq_data_type != Seq_code_gap)
5541 {
5542 return rval;
5543 }
5544
5545 if (slip->fuzz != NULL && slip->fuzz->choice == 4)
5546 {
5547 ifp = IntFuzzNew ();
5548 ifp->choice = 4;
5549 }
5550
5551 /* split the gap in two and create a new DeltaSeqPtr in the middle */
5552 slip_data = SeqLitNew ();
5553 slip_data->seq_data_type = Seq_code_iupacna;
5554 slip_data->seq_data = (SeqDataPtr) BSNew (sejp->num_chars);
5555 rval = SeqEdInsertByteStore ((ByteStorePtr) slip_data->seq_data, 0,
5556 sejp->char_data, sejp->num_chars, sejp->moltype);
5557 if (rval)
5558 {
5559 slip_data->length = sejp->num_chars;
5560 /* create second gap */
5561 slip_second_gap = SeqLitNew ();
5562 slip_second_gap->length = slip->length - insert_point;
5563 slip_second_gap->fuzz = ifp;
5564 /* truncate first gap */
5565 slip->length = insert_point;
5566 dsp_data = ValNodeNew (NULL);
5567 dsp_data->choice = 2;
5568 dsp_data->data.ptrvalue = slip_data;
5569 dsp_second_gap = ValNodeNew (NULL);
5570 dsp_second_gap->choice = 2;
5571 dsp_second_gap->data.ptrvalue = slip_second_gap;
5572 dsp_second_gap->next = dsp->next;
5573 dsp_data->next = dsp_second_gap;
5574 dsp->next = dsp_data;
5575 }
5576 return rval;
5577 }
5578
IsInsertAllNs(SeqEdJournalPtr sejp)5579 static Boolean IsInsertAllNs (SeqEdJournalPtr sejp)
5580 {
5581 Int4 k;
5582
5583 if (sejp == NULL || sejp->char_data == NULL || sejp->num_chars < 1)
5584 {
5585 return FALSE;
5586 }
5587
5588 for (k = 0; k < sejp->num_chars; k++)
5589 {
5590 if (TO_LOWER (sejp->char_data [k]) != 'n')
5591 {
5592 return FALSE;
5593 }
5594 }
5595 return TRUE;
5596 }
5597
SeqEdInsertDelta(SeqEdJournalPtr sejp,Int4 insert_point)5598 static Boolean SeqEdInsertDelta (SeqEdJournalPtr sejp, Int4 insert_point)
5599 {
5600 DeltaSeqPtr dsp;
5601 SeqLitPtr slip;
5602 Int4 seqstart = 0;
5603 ByteStorePtr bs_new;
5604 Boolean rval;
5605
5606 if (sejp == NULL || sejp->bsp == NULL || sejp->bsp->repr != Seq_repr_delta
5607 || sejp->bsp->seq_ext_type != 4
5608 || sejp->char_data == NULL || sejp->num_chars == 0
5609 || insert_point < 0)
5610 {
5611 return FALSE;
5612 }
5613
5614 dsp = GetDeltaSeqForOffset (sejp->bsp, insert_point, &seqstart);
5615
5616 if (dsp == NULL || dsp->choice != 2 || dsp->data.ptrvalue == NULL)
5617 {
5618 return FALSE;
5619 }
5620
5621 slip = (SeqLitPtr) dsp->data.ptrvalue;
5622 insert_point -= seqstart;
5623
5624 if (IsDeltaSeqGap (dsp))
5625 {
5626 if (slip->fuzz == NULL && IsInsertAllNs (sejp))
5627 {
5628 slip->length += sejp->num_chars;
5629 rval = TRUE;
5630 }
5631 else
5632 {
5633 rval = SeqEdInsertIntoDeltaGap (dsp, sejp, insert_point);
5634 }
5635 }
5636 else
5637 {
5638 if (slip->seq_data_type != Seq_code_iupacna && slip->seq_data_type != Seq_code_gap)
5639 {
5640 bs_new = BSConvertSeq((ByteStorePtr) slip->seq_data, Seq_code_iupacna,
5641 slip->seq_data_type,
5642 slip->length);
5643 slip->seq_data_type = Seq_code_iupacna;
5644 slip->seq_data = (SeqDataPtr) bs_new;
5645 }
5646
5647 rval = SeqEdInsertByteStore ((ByteStorePtr) slip->seq_data, insert_point,
5648 sejp->char_data, sejp->num_chars,
5649 sejp->moltype);
5650 if (rval)
5651 {
5652 slip->length += sejp->num_chars;
5653 }
5654 }
5655
5656 if (rval)
5657 {
5658 sejp->bsp->length += sejp->num_chars;
5659 }
5660 return rval;
5661 }
5662
5663 static Boolean
SeqEdInsertGap(SeqEdJournalPtr sejp,Int4 insert_point)5664 SeqEdInsertGap (SeqEdJournalPtr sejp, Int4 insert_point)
5665 {
5666 DeltaSeqPtr dsp, dsp_gap, dsp_after;
5667 Int4 seqstart = 0;
5668 SeqLitPtr slip, slip_before, slip_gap, slip_after;
5669 ByteStorePtr bs_new;
5670
5671 if (sejp == NULL || sejp->bsp == NULL || sejp->bsp->repr != Seq_repr_delta
5672 || sejp->bsp->seq_ext_type != 4
5673 || sejp->char_data == NULL || sejp->num_chars == 0
5674 || insert_point < 0)
5675 {
5676 return FALSE;
5677 }
5678
5679 dsp = GetDeltaSeqForOffset (sejp->bsp, insert_point, &seqstart);
5680
5681 if (dsp == NULL || dsp->choice != 2 || dsp->data.ptrvalue == NULL)
5682 {
5683 return FALSE;
5684 }
5685
5686 slip_gap = SeqLitNew ();
5687 slip_gap->seq_data_type = 0;
5688 slip_gap->seq_data = NULL;
5689 slip_gap->length = sejp->num_chars;
5690 if (sejp->unknown_gap)
5691 {
5692 slip_gap->fuzz = IntFuzzNew ();
5693 slip_gap->fuzz->choice = 4;
5694 }
5695
5696 slip = (SeqLitPtr) (dsp->data.ptrvalue);
5697
5698 /* make insert_point relative to start of this SeqLit */
5699 insert_point -= seqstart;
5700
5701 if (insert_point == 0)
5702 {
5703 /* insert gap before */
5704 dsp_after = ValNodeNew (NULL);
5705 dsp_after->choice = 2;
5706 dsp_after->data.ptrvalue = slip;
5707 dsp_after->next = dsp->next;
5708 dsp->next = dsp_after;
5709 dsp->data.ptrvalue = slip_gap;
5710 }
5711 else if (insert_point == slip->length)
5712 {
5713 /* insert gap after */
5714 dsp_after = ValNodeNew (NULL);
5715 dsp_after->choice = 2;
5716 dsp_after->data.ptrvalue = slip_gap;
5717 dsp_after->next = dsp->next;
5718 dsp->next = dsp_after;
5719 }
5720 else if (IsDeltaSeqUnknownGap (dsp))
5721 {
5722 /* can't insert gap inside gap of unknown length */
5723 slip_gap = SeqLitFree (slip_gap);
5724 return FALSE;
5725 }
5726 else if (IsDeltaSeqGap (dsp) && !sejp->unknown_gap)
5727 {
5728 slip_gap = SeqLitFree (slip_gap);
5729 slip->length += sejp->num_chars;
5730 }
5731 else
5732 {
5733 slip_before = SeqLitNew ();
5734 slip_before->seq_data_type = Seq_code_iupacna;
5735 slip_before->length = insert_point;
5736
5737 slip_after = SeqLitNew ();
5738 slip_after->seq_data_type = Seq_code_iupacna;
5739 slip_after->length = slip->length - insert_point;
5740
5741 if (slip->seq_data != NULL && slip->seq_data_type != Seq_code_gap)
5742 {
5743 if (slip->seq_data_type != Seq_code_iupacna && slip->seq_data_type != Seq_code_gap)
5744 {
5745 bs_new = BSConvertSeq((ByteStorePtr) slip->seq_data, Seq_code_iupacna,
5746 slip->seq_data_type,
5747 slip->length);
5748 slip->seq_data_type = Seq_code_iupacna;
5749 slip->seq_data = (SeqDataPtr) bs_new;
5750 }
5751 slip_before->seq_data = (SeqDataPtr) BSNew (slip_before->length);
5752 slip_after->seq_data = (SeqDataPtr) BSNew (slip_after->length);
5753
5754 BSSeek((ByteStorePtr) slip->seq_data, 0, SEEK_SET);
5755 BSInsertFromBS ((ByteStorePtr) slip_before->seq_data, (ByteStorePtr) slip->seq_data, slip_before->length);
5756 BSInsertFromBS ((ByteStorePtr) slip_after->seq_data, (ByteStorePtr) slip->seq_data, slip_after->length);
5757 }
5758
5759 dsp_after = ValNodeNew (NULL);
5760 dsp_after->choice = 2;
5761 dsp_after->data.ptrvalue = slip_after;
5762 dsp_after->next = dsp->next;
5763
5764 dsp_gap = ValNodeNew (NULL);
5765 dsp_gap->choice = 2;
5766 dsp_gap->data.ptrvalue = slip_gap;
5767 dsp_gap->next = dsp_after;
5768
5769 dsp->data.ptrvalue = slip_before;
5770 dsp->next = dsp_gap;
5771 slip = SeqLitFree (slip);
5772 }
5773
5774 sejp->bsp->length += sejp->num_chars;
5775
5776 return TRUE;
5777 }
5778
5779 NLM_EXTERN Boolean
SeqEdInsert(SeqEdJournalPtr sejp)5780 SeqEdInsert (SeqEdJournalPtr sejp)
5781 {
5782 Int4 len;
5783 SeqFeatPtr sfp;
5784 SeqMgrFeatContext fcontext;
5785 ValNodePtr prods, vnp;
5786 BioseqContextPtr bcp;
5787 Int4 insert_point;
5788 Boolean recreated_feats = FALSE;
5789 Boolean rval = FALSE;
5790 BioseqPtr bsp;
5791 Int4 insert_offset = 0;
5792
5793 if (sejp == NULL || sejp->bsp == NULL
5794 || sejp->char_data == NULL || sejp->num_chars == 0)
5795 {
5796 return FALSE;
5797 }
5798
5799 len = BioseqGetLen(sejp->bsp);
5800 insert_point = sejp->offset;
5801
5802 if (insert_point == LAST_RESIDUE)
5803 {
5804 insert_point = len - 1;
5805 }
5806 else if (insert_point == APPEND_RESIDUE)
5807 {
5808 insert_point = len;
5809 }
5810
5811 if ((insert_point < 0) || (insert_point > len)) return FALSE;
5812
5813 if (sejp->action == eSeqEdInsertGap || sejp->action == eSeqEdDeleteGap)
5814 {
5815 rval = SeqEdInsertGap (sejp, insert_point);
5816 }
5817 else if (sejp->bsp->repr == Seq_repr_raw)
5818 {
5819 rval = SeqEdInsertRaw (sejp, insert_point);
5820 }
5821 else if (sejp->bsp->repr == Seq_repr_delta)
5822 {
5823 rval = SeqEdInsertDelta (sejp, insert_point);
5824 }
5825
5826 if (!rval)
5827 {
5828 return FALSE;
5829 }
5830
5831 /* fix features */
5832 if (sejp->entityID > 0 && SeqMgrFeaturesAreIndexed (sejp->entityID))
5833 {
5834 sfp = NULL;
5835 bsp = GetParentForSegment (sejp->bsp, &insert_offset, NULL);
5836 if (bsp == NULL)
5837 {
5838 bsp = sejp->bsp;
5839 }
5840
5841 while ((sfp = SeqEdGetNextFeature (bsp, sfp, 0, 0, &fcontext, FALSE, FALSE, sejp->entityID)) != NULL)
5842 {
5843 SeqEdInsertAdjustFeat (sfp, sejp, insert_point);
5844 }
5845
5846 if (bsp != sejp->bsp)
5847 {
5848 insert_point += insert_offset;
5849 }
5850
5851 /* adjust features pointing by product */
5852 prods = SeqMgrGetSfpProductList (sejp->bsp);
5853 for (vnp = prods; vnp != NULL; vnp = vnp->next)
5854 {
5855 sfp = (SeqFeatPtr) vnp->data.ptrvalue;
5856 if (sfp == NULL) continue;
5857 sfp->product = SeqEdSeqLocInsert (sfp->product, bsp, insert_point, sejp->num_chars, sejp->spliteditmode, NULL);
5858 }
5859 } else {
5860 bcp = BioseqContextNew(sejp->bsp);
5861 sfp = NULL;
5862 /* adjust features pointing by location */
5863 while ((sfp = BioseqContextGetSeqFeat(bcp, 0, sfp, NULL, 0)) != NULL)
5864 {
5865 SeqEdInsertAdjustFeat (sfp, sejp, insert_point);
5866 }
5867 sfp = NULL;
5868 /* adjust features pointing by product */
5869 while ((sfp = BioseqContextGetSeqFeat(bcp, 0, sfp, NULL, 1)) != NULL)
5870 {
5871 sfp->product = SeqEdSeqLocInsert (sfp->product, sejp->bsp, insert_point, sejp->num_chars, sejp->spliteditmode, NULL);
5872 }
5873 BioseqContextFree(bcp);
5874 }
5875
5876 recreated_feats = SeqEdRecreateDeletedFeats (sejp);
5877
5878 if (recreated_feats)
5879 {
5880 SeqMgrIndexFeatures (sejp->entityID, NULL);
5881 }
5882 else
5883 {
5884 SeqEdReindexAffectedFeatures (sejp->offset, sejp->num_chars,
5885 sejp->spliteditmode, sejp->bsp);
5886 ReindexExtendedFeatures (sejp);
5887 }
5888 sejp->affected_feats = SeqEdJournalAffectedFeatsFree (sejp->affected_feats);
5889 return TRUE;
5890 }
5891
5892
5893 /* This section contains code for deleting from sequences and feature locations, adapted from
5894 * that found in edutil.c */
5895
5896 /*****************************************************************************
5897 *
5898 * SeqEdSeqFeatDelete()
5899 * 0 = no changes made to location or product
5900 * 1 = changes made but feature still has some location
5901 * 2 = all of sfp->location in deleted interval
5902 *
5903 * if (merge)
5904 * 1) correct numbers > to by subtraction
5905 * 2) do not split intervals spanning the deletion
5906 * else
5907 * 1) do not change numbers > to
5908 * 2) split intervals which span the deletions
5909 *
5910 *****************************************************************************/
SeqEdSeqFeatDelete(SeqFeatPtr sfp,BioseqPtr target,Int4 from,Int4 to,Boolean merge)5911 NLM_EXTERN Int2 LIBCALL SeqEdSeqFeatDelete (SeqFeatPtr sfp, BioseqPtr target, Int4 from, Int4 to, Boolean merge)
5912 {
5913 ValNode vn;
5914 SeqLocPtr tloc;
5915 SeqInt si;
5916 Boolean changed = FALSE, tmpbool = FALSE;
5917 CdRegionPtr crp;
5918 CodeBreakPtr cbp, prevcbp, nextcbp;
5919 RnaRefPtr rrp;
5920 tRNAPtr trp;
5921
5922 tloc = &vn;
5923 MemSet((Pointer)tloc, 0, sizeof(ValNode));
5924 MemSet((Pointer)&si, 0, sizeof(SeqInt));
5925 tloc->choice = SEQLOC_INT;
5926 tloc->data.ptrvalue = (Pointer)(&si);
5927 si.id = target->id;
5928 si.from = from;
5929 si.to = to;
5930
5931 sfp->location = SeqEdSeqLocDelete (sfp->location, target, from, to, merge, &changed, NULL, NULL);
5932 sfp->product = SeqEdSeqLocDelete(sfp->product, target, from, to, merge, &changed, NULL, NULL);
5933
5934 if (sfp->location == NULL)
5935 return 2;
5936
5937 switch (sfp->data.choice)
5938 {
5939 case SEQFEAT_CDREGION: /* cdregion */
5940 crp = (CdRegionPtr)(sfp->data.value.ptrvalue);
5941 prevcbp = NULL;
5942 for (cbp = crp->code_break; cbp != NULL; cbp = nextcbp)
5943 {
5944 nextcbp = cbp->next;
5945 cbp->loc = SeqEdSeqLocDelete(cbp->loc, target, from, to, merge, &tmpbool, NULL, NULL);
5946 if (cbp->loc == NULL)
5947 {
5948 if (prevcbp != NULL)
5949 prevcbp->next = nextcbp;
5950 else
5951 crp->code_break = nextcbp;
5952 cbp->next = NULL;
5953 CodeBreakFree(cbp);
5954 }
5955 else
5956 prevcbp = cbp;
5957 }
5958 break;
5959 case SEQFEAT_RNA:
5960 rrp = (RnaRefPtr)(sfp->data.value.ptrvalue);
5961 if (rrp->ext.choice == 2) /* tRNA */
5962 {
5963 trp = (tRNAPtr)(rrp->ext.value.ptrvalue);
5964 if (trp->anticodon != NULL)
5965 {
5966 trp->anticodon = SeqEdSeqLocDelete(trp->anticodon, target, from, to, merge, &tmpbool, NULL, NULL);
5967 }
5968 }
5969 break;
5970 default:
5971 break;
5972 }
5973
5974 if (changed)
5975 {
5976 return 1;
5977 }
5978 else
5979 return 0;
5980 }
5981
5982 /*
5983 static Boolean SeqEdDeleteFromDeltaSeq (DeltaSeqPtr dsp, Int4 from, Int4 to)
5984 {
5985 ByteStorePtr bs_new;
5986 SeqLitPtr slip;
5987
5988 if (dsp == NULL || dsp->choice != 2 || dsp->data.ptrvalue == NULL)
5989 {
5990 return FALSE;
5991 }
5992
5993 slip = (SeqLitPtr) dsp->data.ptrvalue;
5994
5995 if (from < 0 || to > slip->length)
5996 {
5997 return FALSE;
5998 }
5999 if (to < 0)
6000 {
6001 to = slip->length - 1;
6002 }
6003
6004 if (! IsDeltaSeqGap (dsp))
6005 {
6006 if (slip->seq_data_type != Seq_code_iupacna)
6007 {
6008 bs_new = BSConvertSeq(slip->seq_data, Seq_code_iupacna,
6009 slip->seq_data_type,
6010 slip->length);
6011 slip->seq_data_type = Seq_code_iupacna;
6012 slip->seq_data = bs_new;
6013 }
6014 BSSeek(slip->seq_data, from, SEEK_SET);
6015 Nlm_BSDelete (slip->seq_data, to - from + 1);
6016 }
6017 slip->length -= (to - from + 1);
6018
6019 return TRUE;
6020 }
6021 */
6022
DeleteFromSeqLit(SeqLitPtr slip,Int4 from,Int4 to)6023 static void DeleteFromSeqLit (SeqLitPtr slip, Int4 from, Int4 to)
6024 {
6025 ByteStorePtr bs_new;
6026
6027 if (slip == NULL)
6028 {
6029 return;
6030 }
6031 if (from < 0)
6032 {
6033 from = 0;
6034 }
6035
6036 if (to > slip->length - 1 || to < 0)
6037 {
6038 to = slip->length - 1;
6039 }
6040
6041 if (slip->seq_data != NULL && slip->seq_data_type != Seq_code_gap)
6042 {
6043 if (slip->seq_data_type != Seq_code_iupacna && slip->seq_data_type != Seq_code_gap)
6044 {
6045 bs_new = BSConvertSeq((ByteStorePtr) slip->seq_data, Seq_code_iupacna,
6046 slip->seq_data_type,
6047 slip->length);
6048 slip->seq_data_type = Seq_code_iupacna;
6049 slip->seq_data = (SeqDataPtr) bs_new;
6050 }
6051 BSSeek((ByteStorePtr) slip->seq_data, from, SEEK_SET);
6052 Nlm_BSDelete ((ByteStorePtr) slip->seq_data, to - from + 1);
6053 }
6054 slip->length -= (to - from + 1);
6055 }
6056
SeqEdDeleteFromDeltaBsp(BioseqPtr bsp,Int4 from,Int4 to)6057 static Boolean SeqEdDeleteFromDeltaBsp (BioseqPtr bsp, Int4 from, Int4 to)
6058 {
6059 Boolean retval = FALSE;
6060 DeltaSeqPtr dsp, dsp_next, prev_dsp;
6061 SeqLitPtr slip;
6062 Int4 curr_pos = 0;
6063 Int4 del_to, del_from;
6064 Int4 piece_len;
6065 SeqLocPtr slp;
6066
6067 if (bsp == NULL || bsp->repr != Seq_repr_delta
6068 || bsp->seq_ext_type != 4 || bsp->seq_ext == NULL)
6069 {
6070 return retval;
6071 }
6072
6073 prev_dsp = NULL;
6074 dsp = (DeltaSeqPtr) bsp->seq_ext;
6075 while (dsp != NULL && curr_pos <= to)
6076 {
6077 dsp_next = dsp->next;
6078 piece_len = 0;
6079 /* remove empty dsps */
6080 if (dsp->data.ptrvalue == NULL)
6081 {
6082 /* skip */
6083 prev_dsp = dsp;
6084 }
6085 else if (dsp->choice == 1)
6086 { /* SeqLoc */
6087 slp = (SeqLocPtr)(dsp->data.ptrvalue);
6088 piece_len = SeqLocLen (slp);
6089 prev_dsp = dsp;
6090 }
6091 else if (dsp->choice == 2)
6092 {
6093 slip = (SeqLitPtr) (dsp->data.ptrvalue);
6094 piece_len = slip->length;
6095 if (curr_pos + piece_len > from)
6096 {
6097 if (from > curr_pos)
6098 {
6099 del_from = from - curr_pos;
6100 }
6101 else
6102 {
6103 del_from = 0;
6104 }
6105
6106 if (to - curr_pos < slip->length - 1)
6107 {
6108 del_to = to - curr_pos;
6109 }
6110 else
6111 {
6112 del_to = slip->length - 1;
6113 }
6114 DeleteFromSeqLit (slip, del_from, del_to);
6115
6116 /* remove empty delta seq parts */
6117 if (slip->length == 0)
6118 {
6119 if (prev_dsp == NULL)
6120 {
6121 bsp->seq_ext = dsp->next;
6122 }
6123 else
6124 {
6125 prev_dsp->next = dsp->next;
6126 }
6127 dsp->next = NULL;
6128 slip = SeqLitFree (slip);
6129 dsp = ValNodeFree (dsp);
6130 }
6131 else
6132 {
6133 prev_dsp = dsp;
6134 }
6135 }
6136 else
6137 {
6138 prev_dsp = dsp;
6139 }
6140 }
6141 curr_pos += piece_len;
6142 dsp = dsp_next;
6143 }
6144 return TRUE;
6145 }
6146
SeqEdDeleteFromSegOrDeltaBsp(BioseqPtr bsp,Int4 from,Int4 to)6147 static Boolean SeqEdDeleteFromSegOrDeltaBsp (BioseqPtr bsp, Int4 from, Int4 to)
6148 {
6149 SeqLocPtr tmp, head;
6150 DeltaSeqPtr tdsp = NULL;
6151 SeqLocPtr PNTR newheadptr;
6152 Int4 totlen, templen, tfrom, tto, diff1, diff2;
6153 SeqLocPtr slp, tloc, newhead, prev;
6154 Boolean retval = FALSE;
6155 SeqInt si;
6156 ValNode vn;
6157
6158 if (bsp == NULL) return retval;
6159 if (bsp->repr != Seq_repr_seg && bsp->repr != Seq_repr_delta) return retval;
6160
6161 head = ValNodeNew(NULL); /* allocate to facilitate SeqLocFree */
6162 head->choice = SEQLOC_MIX; /* make a SeqLoc out of the extension */
6163 if (bsp->repr == Seq_repr_seg)
6164 head->data.ptrvalue = bsp->seq_ext;
6165 else
6166 {
6167 tdsp = (DeltaSeqPtr)(bsp->seq_ext);
6168 head->data.ptrvalue = DeltaSeqsToSeqLocs(tdsp);
6169 }
6170
6171 newhead = NULL;
6172 newheadptr = &newhead;
6173
6174 tloc = &vn;
6175 MemSet((Pointer)tloc, 0, sizeof(ValNode));
6176 MemSet((Pointer)&si, 0, sizeof(SeqInt));
6177 tloc->choice = SEQLOC_INT;
6178 tloc->data.ptrvalue = (Pointer)(&si);
6179
6180 slp = NULL;
6181 totlen = 0;
6182 while ((slp = SeqLocFindNext(head, slp)) != NULL)
6183 {
6184 templen = SeqLocLen(slp);
6185 tfrom = SeqLocStart(slp);
6186 tto = SeqLocStop(slp);
6187
6188 if (((totlen + templen - 1) < from) || /* before cut */
6189 (totlen > to)) /* after cut */
6190 {
6191 tmp = SeqLocAdd(newheadptr, slp, TRUE, TRUE); /* add whole SeqLoc */
6192 }
6193 else
6194 {
6195 retval = TRUE; /* will modify or drop interval */
6196 diff1 = from - totlen; /* partial beginning? */
6197 diff2 = (templen + totlen - 1) - to; /* partial end? */
6198 si.id = SeqLocId(slp);
6199 si.strand = SeqLocStrand(slp);
6200
6201 if (diff1 > 0) /* partial start */
6202 {
6203 if (si.strand != Seq_strand_minus)
6204 {
6205 si.from = tfrom;
6206 si.to = tfrom + diff1 - 1;
6207 }
6208 else
6209 {
6210 si.from = tto - diff1 + 1;
6211 si.to = tto;
6212 }
6213 tmp = SeqLocAdd(newheadptr, tloc, TRUE, TRUE);
6214 }
6215
6216 if (diff2 > 0) /* partial end */
6217 {
6218 if (si.strand != Seq_strand_minus)
6219 {
6220 si.from = tto - diff2 + 1;
6221 si.to = tto;
6222 }
6223 else
6224 {
6225 si.from = tfrom;
6226 si.to = tfrom + diff2 - 1;
6227 }
6228 tmp = SeqLocAdd(newheadptr, tloc, TRUE, TRUE);
6229 }
6230 }
6231 totlen += templen;
6232 }
6233
6234 prev = NULL;
6235 for (tmp = newhead; tmp != NULL; tmp = tmp->next)
6236 {
6237 if (tmp->next == NULL) /* last one */
6238 {
6239 if (tmp->choice == SEQLOC_NULL)
6240 {
6241 if (prev != NULL)
6242 prev->next = NULL;
6243 else /* only a NULL left */
6244 {
6245 newhead = NULL;
6246 }
6247 MemFree(tmp);
6248 break;
6249 }
6250 }
6251 prev = tmp;
6252 }
6253
6254 if (bsp->repr == Seq_repr_seg)
6255 bsp->seq_ext = newhead;
6256 else
6257 {
6258 bsp->seq_ext = SeqLocsToDeltaSeqs(tdsp, newhead);
6259 DeltaSeqSetFree(tdsp);
6260 SeqLocSetFree(newhead);
6261 }
6262 SeqLocFree(head);
6263 return TRUE;
6264 }
6265
SeqEdDeleteFromMapBioseq(BioseqPtr bsp,Int4 from,Int4 to)6266 static Boolean SeqEdDeleteFromMapBioseq (BioseqPtr bsp, Int4 from, Int4 to)
6267 {
6268 SeqFeatPtr sfpcurr, sfpnext, sfpprev;
6269 Int2 dropped;
6270
6271 if (bsp == NULL || bsp->repr != Seq_repr_map) return FALSE;
6272
6273 sfpprev = NULL;
6274 sfpnext = NULL;
6275 sfpcurr = (SeqFeatPtr)(bsp->seq_ext);
6276 bsp->seq_ext = NULL;
6277 for (; sfpcurr != NULL; sfpcurr = sfpnext)
6278 {
6279 sfpnext = sfpcurr->next;
6280 dropped = SeqEdSeqFeatDelete(sfpcurr, bsp, from, to, TRUE);
6281 if (dropped == 2) /* completely gone */
6282 {
6283 SeqFeatFree(sfpcurr);
6284 }
6285 else
6286 {
6287 if (sfpprev == NULL)
6288 bsp->seq_ext = (Pointer)sfpcurr;
6289 else
6290 sfpprev->next = sfpcurr;
6291 sfpcurr->next = NULL;
6292 sfpprev = sfpcurr;
6293 }
6294 }
6295 return TRUE;
6296 }
6297
FreeSeqLocList(SeqLocPtr slp)6298 static SeqLocPtr FreeSeqLocList (SeqLocPtr slp)
6299 {
6300 if (slp == NULL)
6301 {
6302 return NULL;
6303 }
6304 slp->next = SeqLocFree (slp->next);
6305 slp = SeqLocFree (slp);
6306 return slp;
6307 }
6308
ReStitchLocation(Int4 delete_point,SeqFeatPtr sfp)6309 static Boolean ReStitchLocation (Int4 delete_point, SeqFeatPtr sfp)
6310 {
6311 Int4 this_start, this_stop, next_start, next_stop;
6312 SeqLocPtr this_slp, next_slp, loc_list = NULL, tmp_slp, last_slp = NULL, tmp_next;
6313 SeqIdPtr this_id, next_id;
6314 Boolean merged = FALSE;
6315 Uint1 this_strand, next_strand;
6316
6317 if (sfp->location == NULL)
6318 {
6319 return FALSE;
6320 }
6321
6322 this_start = SeqLocStart (sfp->location);
6323 this_stop = SeqLocStop (sfp->location);
6324 if (delete_point <= this_start || delete_point >= this_stop)
6325 {
6326 return FALSE;
6327 }
6328
6329 this_slp = SeqLocFindNext (sfp->location, NULL);
6330 if (this_slp == NULL)
6331 {
6332 return FALSE;
6333 }
6334 next_slp = SeqLocFindNext (sfp->location, this_slp);
6335
6336 while (next_slp != NULL)
6337 {
6338 this_start = SeqLocStart (this_slp);
6339 this_stop = SeqLocStop (this_slp);
6340 this_id = SeqLocId (this_slp);
6341 this_strand = SeqLocStrand (this_slp);
6342 next_start = SeqLocStart (next_slp);
6343 next_stop = SeqLocStop (next_slp);
6344 next_id = SeqLocId (next_slp);
6345 next_strand = SeqLocStrand (next_slp);
6346 if (this_stop + 1 == next_start
6347 && next_start == delete_point
6348 && SeqIdComp (this_id, next_id) == SIC_YES
6349 && this_strand == next_strand)
6350 {
6351 tmp_slp = SeqLocIntNew (this_start, next_stop, this_strand, this_id);
6352 next_slp = SeqLocFindNext (sfp->location, next_slp);
6353 merged = TRUE;
6354 }
6355 else
6356 {
6357 tmp_next = this_slp->next;
6358 this_slp->next = NULL;
6359 tmp_slp = SeqLocCopy (this_slp);
6360 this_slp->next = tmp_next;
6361 }
6362 if (tmp_slp != NULL)
6363 {
6364 if (last_slp == NULL)
6365 {
6366 loc_list = tmp_slp;
6367 }
6368 else
6369 {
6370 last_slp->next = tmp_slp;
6371 }
6372 last_slp = tmp_slp;
6373 }
6374
6375 this_slp = next_slp;
6376 if (this_slp != NULL)
6377 {
6378 next_slp = SeqLocFindNext (sfp->location, this_slp);
6379 }
6380 }
6381 if (merged && loc_list != NULL)
6382 {
6383 if (this_slp != NULL)
6384 {
6385 this_start = SeqLocStart (this_slp);
6386 this_stop = SeqLocStop (this_slp);
6387 tmp_next = this_slp->next;
6388 this_slp->next = NULL;
6389 tmp_slp = SeqLocCopy (this_slp);
6390 this_slp->next = tmp_next;
6391 if (last_slp == NULL)
6392 {
6393 loc_list = tmp_slp;
6394 }
6395 else
6396 {
6397 last_slp->next = tmp_slp;
6398 }
6399 }
6400 if (loc_list->next == NULL)
6401 {
6402 sfp->location = SeqLocFree (sfp->location);
6403 sfp->location = loc_list;
6404 }
6405 else
6406 {
6407 /* already mix, just need to replace list */
6408 sfp->location->data.ptrvalue = FreeSeqLocList (sfp->location->data.ptrvalue);
6409 sfp->location->data.ptrvalue = loc_list;
6410 }
6411 return TRUE;
6412 }
6413 else
6414 {
6415 loc_list = FreeSeqLocList (loc_list);
6416 return FALSE;
6417 }
6418 }
6419
6420 /* ideally, this should take a SeqJournalEntry and perform the deletion.
6421 * We will always be deleting a contiguous section of characters.
6422 * This function will only delete from the specified Bioseq, so there should
6423 * be no need to call BioseqFind (which is expensive).
6424 */
SeqEdDeleteFromBsp(SeqEdJournalPtr sejp,BoolPtr pfeats_deleted)6425 NLM_EXTERN Boolean SeqEdDeleteFromBsp (SeqEdJournalPtr sejp, BoolPtr pfeats_deleted)
6426 {
6427 Boolean retval = FALSE;
6428 Boolean feats_altered = FALSE;
6429 Int4 deleted;
6430 SeqFeatPtr sfp;
6431 SeqMgrFeatContext fcontext;
6432 BioseqContextPtr bcp;
6433 Int2 feat_change;
6434 Boolean feats_deleted = FALSE;
6435 SeqFeatPtr tmp_sfp;
6436 AffectedFeatPtr afp;
6437 Boolean merge_mode;
6438 Boolean location_restitched = FALSE, adjusted_master = FALSE;
6439 BioseqPtr bsp;
6440 Int4 cut_offset = 0, offset = 0;
6441
6442 if (sejp == NULL || sejp->bsp == NULL || sejp->offset < 0 || sejp->offset >= sejp->bsp->length
6443 || sejp->offset + sejp->num_chars + 1 < 0 || sejp->offset + sejp->num_chars > sejp->bsp->length
6444 || sejp->num_chars < 1)
6445 {
6446 return retval;
6447 }
6448
6449 if (sejp->affected_feats != NULL)
6450 {
6451 sejp->affected_feats = SeqEdJournalAffectedFeatsFree (sejp->affected_feats);
6452 }
6453
6454 bsp = GetParentForSegment (sejp->bsp, &offset, NULL);
6455 if (bsp == NULL)
6456 {
6457 bsp = sejp->bsp;
6458 cut_offset = sejp->offset;
6459 }
6460 else
6461 {
6462 cut_offset = sejp->offset + offset;
6463 }
6464
6465 /* fix features */
6466 if (sejp->entityID > 0 && SeqMgrFeaturesAreIndexed (sejp->entityID)) {
6467
6468 sfp = NULL;
6469 while ((sfp = SeqEdGetNextFeature (bsp, sfp, 0, 0, &fcontext, FALSE, FALSE, sejp->entityID)) != NULL)
6470 {
6471 if ((cut_offset <= fcontext.left && cut_offset + sejp->num_chars >= fcontext.left)
6472 || (cut_offset >= fcontext.left && cut_offset + sejp->num_chars <= fcontext.right)
6473 || (cut_offset <= fcontext.right && cut_offset + sejp->num_chars >= fcontext.right))
6474 {
6475 tmp_sfp = (SeqFeatPtr)AsnIoMemCopy((Pointer)sfp, (AsnReadFunc)SeqFeatAsnRead, (AsnWriteFunc)SeqFeatAsnWrite);
6476 }
6477 else
6478 {
6479 tmp_sfp = NULL;
6480 }
6481 /* if we're deleting a gap and the feature is a coding region, merge location
6482 * by default */
6483 merge_mode = sejp->spliteditmode;
6484 if (sejp->action == eSeqEdInsertGap || sejp->action == eSeqEdDeleteGap
6485 && sfp->data.choice == SEQFEAT_CDREGION)
6486 {
6487 merge_mode = TRUE;
6488 }
6489
6490 feat_change = SeqEdSeqFeatDelete (sfp, bsp, cut_offset,
6491 cut_offset + sejp->num_chars - 1,
6492 sejp->spliteditmode);
6493
6494 if (feat_change == 0 || feat_change == 1)
6495 {
6496 if (ReStitchLocation (cut_offset, sfp))
6497 {
6498 feat_change = 1;
6499 location_restitched = TRUE;
6500 }
6501 }
6502
6503 if (feat_change > 0)
6504 {
6505 if (feat_change == 2)
6506 {
6507 /* remove from index and SeqAnnot */
6508 sfp->idx.deleteme = TRUE;
6509 feats_deleted = TRUE;
6510 }
6511
6512 afp = AffectedFeatNew ();
6513 if (afp != NULL)
6514 {
6515 afp->feat_before = tmp_sfp;
6516 if (feat_change != 2)
6517 {
6518 afp->feat_after = (SeqFeatPtr)AsnIoMemCopy((Pointer)sfp, (AsnReadFunc)SeqFeatAsnRead, (AsnWriteFunc)SeqFeatAsnWrite);
6519 if (afp->feat_after != NULL)
6520 {
6521 afp->feat_after->idx.itemID = sfp->idx.itemID;
6522 }
6523 }
6524 }
6525 ValNodeAddPointer (&sejp->affected_feats, 0, afp);
6526 feats_altered = TRUE;
6527 }
6528 else
6529 {
6530 SeqFeatFree (tmp_sfp);
6531 }
6532 if (bsp != sejp->bsp)
6533 {
6534 adjusted_master = TRUE;
6535 }
6536 }
6537 } else {
6538 bcp = BioseqContextNew(sejp->bsp);
6539 sfp = NULL;
6540 /* adjust features pointing by location */
6541 while ((sfp = BioseqContextGetSeqFeat(bcp, 0, sfp, NULL, 0)) != NULL)
6542 {
6543 tmp_sfp = (SeqFeatPtr)AsnIoMemCopy((Pointer)sfp, (AsnReadFunc)SeqFeatAsnRead, (AsnWriteFunc)SeqFeatAsnWrite);
6544 /* if we're deleting a gap and the feature is a coding region, merge location
6545 * by default */
6546 merge_mode = sejp->spliteditmode;
6547 if (sejp->action == eSeqEdInsertGap || sejp->action == eSeqEdDeleteGap
6548 && sfp->data.choice == SEQFEAT_CDREGION)
6549 {
6550 merge_mode = TRUE;
6551 }
6552 feat_change = SeqEdSeqFeatDelete (sfp, bsp, cut_offset,
6553 cut_offset + sejp->num_chars - 1,
6554 sejp->spliteditmode);
6555
6556 if (feat_change == 0 || feat_change == 1)
6557 {
6558 if (ReStitchLocation (cut_offset, sfp))
6559 {
6560 feat_change = 1;
6561 location_restitched = TRUE;
6562 }
6563 }
6564
6565 if (feat_change > 0)
6566 {
6567 if (feat_change == 2)
6568 {
6569 /* remove from index and SeqAnnot */
6570 sfp->idx.deleteme = TRUE;
6571 feats_deleted = TRUE;
6572 }
6573 afp = AffectedFeatNew ();
6574 if (afp != NULL)
6575 {
6576 afp->feat_before = tmp_sfp;
6577 afp->feat_after = (SeqFeatPtr)AsnIoMemCopy((Pointer)sfp, (AsnReadFunc)SeqFeatAsnRead, (AsnWriteFunc)SeqFeatAsnWrite);
6578 }
6579 ValNodeAddPointer (&sejp->affected_feats, 0, afp);
6580 feats_altered = TRUE;
6581 }
6582 else
6583 {
6584 SeqFeatFree (tmp_sfp);
6585 }
6586 }
6587 BioseqContextFree(bcp);
6588 }
6589
6590 /* now delete nucleotides from bioseq */
6591 switch (sejp->bsp->repr)
6592 {
6593 case Seq_repr_raw:
6594 case Seq_repr_const:
6595 if (sejp->bsp->seq_data_type != Seq_code_gap) {
6596 /* if actual sequence present */
6597 if (ISA_na(sejp->bsp->mol))
6598 {
6599 if (sejp->bsp->seq_data_type != Seq_code_iupacna) /* need 1 byte/base */
6600 BioseqRawConvert(sejp->bsp, Seq_code_iupacna);
6601 }
6602 else
6603 {
6604 if (sejp->bsp->seq_data_type != Seq_code_ncbieaa)
6605 BioseqRawConvert(sejp->bsp, Seq_code_ncbieaa);
6606 }
6607
6608 BSSeek((ByteStorePtr) sejp->bsp->seq_data, sejp->offset, SEEK_SET);
6609 deleted = BSDelete((ByteStorePtr) sejp->bsp->seq_data, sejp->num_chars);
6610 if (deleted != sejp->num_chars) /* error */
6611 ErrPost(CTX_NCBIOBJ, 1, "Delete of %ld residues failed", sejp->num_chars);
6612 else
6613 retval = TRUE;
6614 }
6615 break;
6616 case Seq_repr_seg:
6617 /* update segmented sequence */
6618 retval = SeqEdDeleteFromSegOrDeltaBsp (sejp->bsp, sejp->offset, sejp->offset + sejp->num_chars - 1);
6619 break;
6620 case Seq_repr_delta:
6621 /* update delta sequence */
6622 retval = SeqEdDeleteFromDeltaBsp (sejp->bsp, sejp->offset, sejp->offset + sejp->num_chars - 1);
6623 break;
6624 case Seq_repr_map:
6625 /* map bioseq */
6626 retval = SeqEdDeleteFromMapBioseq (sejp->bsp, sejp->offset, sejp->offset + sejp->num_chars - 1);
6627 break;
6628 case Seq_repr_virtual:
6629 retval = TRUE; /* nothing to do */
6630 break;
6631 }
6632
6633 if (retval)
6634 {
6635 sejp->bsp->length -= sejp->num_chars;
6636 if (bsp != sejp->bsp)
6637 {
6638 bsp->length -= sejp->num_chars;
6639 }
6640 }
6641
6642 if (feats_deleted)
6643 {
6644 DeleteMarkedObjects (sejp->entityID, 0, NULL);
6645 SeqMgrIndexFeatures (sejp->entityID, NULL);
6646 }
6647 else if (location_restitched || adjusted_master)
6648 {
6649 SeqMgrIndexFeatures (sejp->entityID, NULL);
6650 }
6651 else
6652 {
6653 SeqEdReindexAffectedFeatures (sejp->offset, 0 - sejp->num_chars,
6654 sejp->spliteditmode, sejp->bsp);
6655
6656 }
6657
6658 if (pfeats_deleted != NULL)
6659 {
6660 *pfeats_deleted = feats_deleted;
6661 }
6662
6663 return retval;
6664 }
6665
6666 /* this function will indicate whether the interval on the Bioseq specified contains
6667 * any gaps of unknown length.
6668 */
6669
6670 /*
6671 static Boolean DoesIntervalContainUnknownGap (BioseqPtr bsp, Int4 from, Int4 to)
6672 {
6673 DeltaSeqPtr from_dsp, to_dsp, this_dsp;
6674 Int4 from_start = 0, to_start = 0;
6675 Boolean unknown_gap = FALSE;
6676
6677 if (bsp == NULL || from < 0 || from >= bsp->length || to < 0 || to >= bsp->length)
6678 {
6679 return FALSE;
6680 }
6681
6682 from_dsp = GetDeltaSeqForOffset (bsp, from, &from_start);
6683 to_dsp = GetDeltaSeqForOffset (bsp, to, &to_start);
6684
6685 this_dsp = from_dsp;
6686 while (!unknown_gap && this_dsp != NULL && (to_dsp == NULL || this_dsp != to_dsp->next))
6687 {
6688 unknown_gap = IsDeltaSeqUnknownGap (this_dsp);
6689 this_dsp = this_dsp->next;
6690 }
6691
6692 return unknown_gap;
6693 }
6694 */
6695
6696 /* This section of code deals with editing the sequence by inserting and removing characters.
6697 * Functions are needed to change the indices for the affected features so that they will
6698 * display properly.
6699 */
SeqEdFixExtraIndex(SMFeatItemPtr PNTR array,Int4 num,Int4 shift_start,Int4 shift_amt,Boolean split,BioseqPtr bsp)6700 static void SeqEdFixExtraIndex
6701 (SMFeatItemPtr PNTR array,
6702 Int4 num,
6703 Int4 shift_start,
6704 Int4 shift_amt,
6705 Boolean split,
6706 BioseqPtr bsp)
6707 {
6708 SMFeatItemPtr item;
6709 Int4 i = 0, j, k, n;
6710 Int4Ptr newivals;
6711
6712 if (array == NULL || num < 1 || bsp == NULL) return;
6713 while (i < num) {
6714 item = array [i];
6715 i++;
6716 if (item != NULL) {
6717 if (item->right >= shift_start)
6718 {
6719 if (item->left > shift_start
6720 || (shift_amt > 0 && item->left == shift_start))
6721 {
6722 /* move left and right indexed endpoints */
6723 item->left += shift_amt;
6724 if (item->left < 0)
6725 {
6726 item->left = 0;
6727 }
6728 item->right += shift_amt;
6729 /* move all ivals */
6730 for (j = 0; j < item->numivals; j++)
6731 {
6732 item->ivals [2 * j] += shift_amt;
6733 if (item->ivals [2 * j] < 0)
6734 {
6735 item->ivals [2 * j] = 0;
6736 }
6737 item->ivals [2 * j + 1] += shift_amt;
6738 if (item->ivals [2 * j + 1] < 0)
6739 {
6740 item->ivals [2 * j + 1] = 0;
6741 }
6742 }
6743 }
6744 else
6745 {
6746 item->right += shift_amt;
6747 for (j = 0; j < item->numivals; j++)
6748 {
6749 if (item->ivals [2 * j] < shift_start && item->ivals[2 * j + 1] < shift_start)
6750 {
6751 /* upstream - we may safely ignore */
6752 }
6753 else if ((item->ivals [2 * j] > shift_start && item->ivals [2 * j + 1] > shift_start)
6754 || (shift_amt > 0 && item->ivals [2 * j] >= shift_start
6755 && item->ivals [2 * j + 1] >= shift_start))
6756 {
6757 /* downstream - shift both endpoints */
6758 item->ivals [2 * j] += shift_amt;
6759 item->ivals [2 * j + 1] += shift_amt;
6760 }
6761 else if (split)
6762 {
6763 /* create a new list of ivals */
6764 newivals = (Int4Ptr) MemNew (sizeof (Int4) * (item->numivals + 1) * 2);
6765 /* copy all ivals up to j into new list */
6766 for (k = 0; k < j; k++)
6767 {
6768 newivals [2 * k] = item->ivals [2 * k];
6769 newivals [2 * k + 1] = item->ivals [2 * k + 1];
6770 }
6771 /* create two intervals using split */
6772 if (item->ivals [2 * j] < item->ivals [2 * j + 1])
6773 {
6774 /* plus strand */
6775 newivals [2 * k] = item->ivals [2 * j];
6776 newivals [2 * k + 1] = shift_start - 1;
6777 k++;
6778 newivals [2 * k] = shift_start + shift_amt;
6779 newivals [2 * k + 1] = item->ivals [2 * j + 1] + shift_amt;
6780 k++;
6781 }
6782 else
6783 {
6784 /* minus strand */
6785 newivals [2 * k] = item->ivals [2 * j] + shift_amt;
6786 newivals [2 * k + 1] = shift_start + shift_amt;
6787 k++;
6788 newivals [2 * k] = shift_start - 1;
6789 newivals [2 * k + 1] = item->ivals [2 * j + 1];
6790 k++;
6791 }
6792 /* copy remaining intervals (they will be shifted later in the loop */
6793 n = j + 1;
6794 while (n < item->numivals)
6795 {
6796 newivals[2 * k] = item->ivals [2 * n];
6797 newivals[2 * k + 1] = item->ivals [2 * n + 1];
6798 k++;
6799 n++;
6800 }
6801 MemFree (item->ivals);
6802 item->ivals = newivals;
6803 item->numivals ++;
6804 /* increment j so that we will not re-increment the second interval */
6805 j++;
6806 }
6807 else
6808 {
6809 /* move only downstream endpoint */
6810 if (item->ivals [2 * j] > shift_start
6811 || (shift_amt > 0 && item->ivals [2 * j] == shift_start))
6812 {
6813 item->ivals [2 * j] += shift_amt;
6814 if (item->ivals [2 * j] < 0)
6815 {
6816 item->ivals [2 * j] = 0;
6817 }
6818 }
6819 else
6820 {
6821 item->ivals [2 * j + 1] += shift_amt;
6822 if (item->ivals [2 * j + 1] < 0)
6823 {
6824 item->ivals [2 * j + 1] = 0;
6825 }
6826 }
6827 }
6828 }
6829 }
6830 }
6831 }
6832 }
6833 }
6834
SeqEdReindexAffectedFeatures(Int4 shift_start,Int4 shift_amt,Boolean split,BioseqPtr bsp)6835 NLM_EXTERN void SeqEdReindexAffectedFeatures (Int4 shift_start, Int4 shift_amt,
6836 Boolean split, BioseqPtr bsp)
6837 {
6838 ObjMgrDataPtr omdp;
6839 BioseqExtraPtr bspextra;
6840 ObjMgrPtr omp;
6841
6842 if (bsp == NULL) return;
6843
6844 omdp = (ObjMgrDataPtr) bsp->omdp;
6845 if (omdp == NULL)
6846 {
6847 omp = ObjMgrWriteLock ();
6848 omdp = ObjMgrFindByData (omp, bsp);
6849 ObjMgrUnlock ();
6850 bsp->omdp = (Pointer) omdp;
6851 }
6852 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return;
6853
6854 bspextra = (BioseqExtraPtr) omdp->extradata;
6855 if (bspextra == NULL) return;
6856
6857 SeqEdFixExtraIndex (bspextra->featsByPos, bspextra->numfeats,
6858 shift_start, shift_amt, split, bsp);
6859 }
6860
SeqEdReindexFeature(SeqFeatPtr sfp,BioseqPtr bsp)6861 NLM_EXTERN void SeqEdReindexFeature (SeqFeatPtr sfp, BioseqPtr bsp)
6862 {
6863 ObjMgrDataPtr omdp;
6864 BioseqExtraPtr bspextra;
6865 ObjMgrPtr omp;
6866 Int4 i;
6867 SeqLocPtr this_slp;
6868 SMFeatItemPtr item = NULL;
6869 Int4 numivals;
6870 Int4 start, stop;
6871 Int4 left, right;
6872
6873 if (sfp == NULL || bsp == NULL) return;
6874 omdp = (ObjMgrDataPtr) bsp->omdp;
6875 if (omdp == NULL)
6876 {
6877 omp = ObjMgrWriteLock ();
6878 omdp = ObjMgrFindByData (omp, bsp);
6879 ObjMgrUnlock ();
6880 bsp->omdp = (Pointer) omdp;
6881 }
6882 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return;
6883
6884 bspextra = (BioseqExtraPtr) omdp->extradata;
6885 if (bspextra == NULL) return;
6886
6887 for (i = 0; i < bspextra->numfeats; i++)
6888 {
6889 item = bspextra->featsByPos [i];
6890 if (item != NULL && item->itemID == sfp->idx.itemID)
6891 {
6892 /* first, find out how many intervals we have, so we can make sure our ivals
6893 * array is the right size */
6894 for (this_slp = SeqLocFindNext (sfp->location, NULL), numivals = 0;
6895 this_slp != NULL;
6896 this_slp = this_slp->next, numivals ++)
6897 {
6898
6899 }
6900 if (numivals != item->numivals)
6901 {
6902 item->ivals = MemFree (item->ivals);
6903 item->ivals = (Int4Ptr) MemNew (2 * numivals * sizeof (Int4));
6904 if (item->ivals == NULL) return;
6905 item->numivals = numivals;
6906 }
6907
6908 /* now populate the ivals */
6909
6910 left = -1;
6911 right = -1;
6912 for (this_slp = SeqLocFindNext (sfp->location, NULL), numivals = 0;
6913 this_slp != NULL;
6914 this_slp = this_slp->next, numivals ++)
6915 {
6916 start = GetOffsetInBioseq (this_slp, bsp, SEQLOC_START);
6917 stop = GetOffsetInBioseq (this_slp, bsp, SEQLOC_STOP);
6918 item->ivals [2 * numivals] = start;
6919 item->ivals [2 * numivals + 1] = stop;
6920 if (left == -1 || start < left)
6921 {
6922 left = start;
6923 }
6924 if (stop < left)
6925 {
6926 left = stop;
6927 }
6928 if (right == -1 || right < start)
6929 {
6930 right = start;
6931 }
6932 if (right < stop)
6933 {
6934 right = stop;
6935 }
6936 }
6937 item->left = left;
6938 item->right = right;
6939 }
6940 else
6941 {
6942 item = NULL;
6943 }
6944 }
6945 }
6946
6947
6948 /* This function will repair any problems with the interval order that
6949 * moving the feature interval around may have caused.
6950 */
SeqEdRepairIntervalOrder(SeqFeatPtr sfp,BioseqPtr bsp)6951 NLM_EXTERN void SeqEdRepairIntervalOrder (SeqFeatPtr sfp, BioseqPtr bsp)
6952 {
6953 Boolean hasNulls;
6954 SeqLocPtr gslp;
6955 Boolean noLeft, noRight;
6956
6957 hasNulls = LocationHasNullsBetween (sfp->location);
6958 gslp = SeqLocMerge (bsp, sfp->location, NULL, FALSE, FALSE, hasNulls);
6959 if (gslp != NULL)
6960 {
6961 CheckSeqLocForPartial (sfp->location, &noLeft, &noRight);
6962 sfp->location = SeqLocFree (sfp->location);
6963 sfp->location = gslp;
6964 if (bsp->repr == Seq_repr_seg)
6965 {
6966 gslp = SegLocToParts (bsp, sfp->location);
6967 sfp->location = SeqLocFree (sfp->location);
6968 sfp->location = gslp;
6969 }
6970 FreeAllFuzz (sfp->location);
6971 SetSeqLocPartial (sfp->location, noLeft, noRight);
6972 }
6973 }
6974
6975 /* This function recursively frees a list of SeqEdJournalPtr, working in the next direction,
6976 * and fixes the prev pointer for the previous entry in the SeqEdJournalPtr list (if there is one).
6977 */
SeqEdJournalFree(SeqEdJournalPtr sejp)6978 NLM_EXTERN void SeqEdJournalFree (SeqEdJournalPtr sejp)
6979 {
6980 SeqEdJournalPtr prev;
6981
6982 if (sejp == NULL) return;
6983 SeqEdJournalFree (sejp->next);
6984 sejp->slp = SeqLocFree (sejp->slp);
6985 MemFree (sejp->char_data);
6986 sejp->affected_feats = SeqEdJournalAffectedFeatsFree (sejp->affected_feats);
6987 prev = sejp->prev;
6988 if (prev != NULL)
6989 prev->next = NULL;
6990 MemFree (sejp);
6991 }
6992
SeqEdJournalNewSeqEdit(ESeqEdJournalAction action,Int4 offset,Int4 num_chars,CharPtr char_data,Boolean spliteditmode,BioseqPtr bsp,Uint1 moltype,Uint2 entityID)6993 NLM_EXTERN SeqEdJournalPtr SeqEdJournalNewSeqEdit
6994 (ESeqEdJournalAction action,
6995 Int4 offset,
6996 Int4 num_chars,
6997 CharPtr char_data,
6998 Boolean spliteditmode,
6999 BioseqPtr bsp,
7000 Uint1 moltype,
7001 Uint2 entityID)
7002 {
7003 SeqEdJournalPtr sejp;
7004
7005 if (num_chars == 0) return NULL;
7006 sejp = (SeqEdJournalPtr) MemNew (sizeof (SeqEdJournalData));
7007 if (sejp == NULL) return NULL;
7008 sejp->action = action;
7009 sejp->offset = offset;
7010 sejp->num_chars = num_chars;
7011 sejp->spliteditmode = spliteditmode;
7012 sejp->affected_feats = NULL;
7013 sejp->sfp = NULL;
7014 sejp->slp = NULL;
7015 sejp->bsp = bsp;
7016 sejp->moltype = moltype;
7017 sejp->entityID = entityID;
7018 sejp->char_data = MemNew (sejp->num_chars + 1);
7019 if (char_data != NULL)
7020 {
7021 StringCpy (sejp->char_data, char_data);
7022 }
7023 sejp->prev = NULL;
7024 sejp->next = NULL;
7025 return sejp;
7026 }
7027
SeqEdJournalNewFeatEdit(ESeqEdJournalAction action,SeqFeatPtr sfp,SeqLocPtr slp,BioseqPtr bsp,Uint1 moltype,Uint2 entityID)7028 NLM_EXTERN SeqEdJournalPtr SeqEdJournalNewFeatEdit
7029 (ESeqEdJournalAction action,
7030 SeqFeatPtr sfp,
7031 SeqLocPtr slp,
7032 BioseqPtr bsp,
7033 Uint1 moltype,
7034 Uint2 entityID)
7035 {
7036 SeqEdJournalPtr sejp;
7037
7038 if (sfp == NULL || slp == NULL) return NULL;
7039 sejp = (SeqEdJournalPtr) MemNew (sizeof (SeqEdJournalData));
7040 if (sejp == NULL) return NULL;
7041 sejp->action = action;
7042 sejp->offset = 0;
7043 sejp->num_chars = 0;
7044 sejp->spliteditmode = FALSE;
7045 sejp->sfp = sfp;
7046 sejp->slp = slp;
7047 sejp->bsp = bsp;
7048 sejp->affected_feats = NULL;
7049 sejp->moltype = moltype;
7050 sejp->entityID = entityID;
7051 sejp->char_data = NULL;
7052 sejp->prev = NULL;
7053 sejp->next = NULL;
7054 return sejp;
7055 }
7056
7057 /* This section of code contains functions used by the new sequence editor for moving feature
7058 * intervals.
7059 */
SeqEdAdjustFeatureInterval(SeqLocPtr slp,Int4 change,EMoveType move_type,Int4 interval_offset,BioseqPtr bsp)7060 NLM_EXTERN Boolean SeqEdAdjustFeatureInterval
7061 (SeqLocPtr slp, Int4 change, EMoveType move_type, Int4 interval_offset, BioseqPtr bsp)
7062 {
7063 SeqIntPtr sint;
7064 SeqPntPtr spp;
7065 SeqLocPtr this_slp;
7066 Boolean rval = FALSE;
7067
7068 if (slp == NULL || bsp == NULL) return rval;
7069
7070 if (slp->choice == SEQLOC_INT)
7071 {
7072 if (interval_offset != 0)
7073 {
7074 return rval;
7075 }
7076 sint = (SeqIntPtr)slp->data.ptrvalue;
7077 switch (move_type)
7078 {
7079 case eLeftEnd:
7080 if (sint->from + change < sint->to
7081 && sint->from + change > -1
7082 && sint->from + change < bsp->length)
7083 {
7084 sint->from += change;
7085 rval = TRUE;
7086 }
7087 break;
7088 case eRightEnd:
7089 if (sint->to + change > sint->from
7090 && sint->to + change > -1
7091 && sint->to + change < bsp->length)
7092 {
7093 sint->to += change;
7094 rval = TRUE;
7095 }
7096 break;
7097 case eSlide:
7098 if (sint->from + change > -1 && sint->from + change < bsp->length
7099 && sint->to + change > -1 && sint->to + change < bsp->length)
7100 {
7101 sint->from += change;
7102 sint->to += change;
7103 rval = TRUE;
7104 }
7105 }
7106 }
7107 else if (slp->choice == SEQLOC_PNT)
7108 {
7109 if (interval_offset != 0)
7110 {
7111 return rval;
7112 }
7113 spp = (SeqPntPtr)(slp->data.ptrvalue);
7114 if (spp->point + change > -1 && spp->point + change < bsp->length)
7115 {
7116 spp->point += change;
7117 rval = TRUE;
7118 }
7119 }
7120 else
7121 {
7122 for (this_slp = SeqLocFindNext (slp, NULL);
7123 this_slp != NULL && interval_offset > 0;
7124 this_slp = SeqLocFindNext (slp, this_slp), interval_offset --)
7125 {}
7126 if (this_slp != NULL && interval_offset == 0)
7127 {
7128 rval = SeqEdAdjustFeatureInterval (this_slp, change, move_type, interval_offset, bsp);
7129 }
7130 }
7131 return rval;
7132 }
7133
7134
SeqEdGetNthIntervalEndPoints(SeqLocPtr slp,Int4 n,Int4Ptr left,Int4Ptr right)7135 NLM_EXTERN Boolean SeqEdGetNthIntervalEndPoints
7136 (SeqLocPtr slp, Int4 n, Int4Ptr left, Int4Ptr right)
7137 {
7138 Boolean rval = FALSE;
7139 SeqIntPtr sintp;
7140 SeqPntPtr spp;
7141 SeqLocPtr this_slp;
7142
7143 if (slp == NULL || left == NULL || right == NULL || n < 0) return FALSE;
7144 switch (slp->choice)
7145 {
7146 case SEQLOC_INT:
7147 if (n == 0)
7148 {
7149 sintp = (SeqIntPtr) slp->data.ptrvalue;
7150 *left = sintp->from;
7151 *right = sintp->to;
7152 rval = TRUE;
7153 }
7154 break;
7155 case SEQLOC_PNT:
7156 if (n == 0)
7157 {
7158 spp = (SeqPntPtr) slp->data.ptrvalue;
7159 *left = spp->point;
7160 *right = spp->point;
7161 rval = TRUE;
7162 }
7163 break;
7164 default:
7165 for (this_slp = SeqLocFindNext (slp, NULL);
7166 this_slp != NULL && n > 0;
7167 this_slp = SeqLocFindNext (slp, this_slp), n --)
7168 {}
7169 if (this_slp != NULL && n == 0)
7170 {
7171 rval = SeqEdGetNthIntervalEndPoints (this_slp, n, left, right);
7172 }
7173 break;
7174 }
7175 return rval;
7176 }
7177
7178 static void
SeqEdFixFeatureIndexForFeatureLocAdjust(BioseqPtr bsp,SeqFeatPtr sfp,Int4 change,Int4 move_type,Int4 interval_offset)7179 SeqEdFixFeatureIndexForFeatureLocAdjust
7180 (BioseqPtr bsp,
7181 SeqFeatPtr sfp,
7182 Int4 change,
7183 Int4 move_type,
7184 Int4 interval_offset)
7185 {
7186 ObjMgrDataPtr omdp;
7187 BioseqExtraPtr bspextra;
7188 ObjMgrPtr omp;
7189 SMFeatItemPtr item;
7190 Int4 i, j;
7191 Int4 left, right;
7192
7193 if (bsp == NULL || sfp == NULL) return;
7194
7195 omdp = (ObjMgrDataPtr) bsp->omdp;
7196 if (omdp == NULL)
7197 {
7198 omp = ObjMgrWriteLock ();
7199 omdp = ObjMgrFindByData (omp, bsp);
7200 ObjMgrUnlock ();
7201 bsp->omdp = (Pointer) omdp;
7202 }
7203 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return;
7204
7205 bspextra = (BioseqExtraPtr) omdp->extradata;
7206 if (bspextra == NULL) return;
7207
7208 if (! SeqEdGetNthIntervalEndPoints (sfp->location, interval_offset, &left, &right))
7209 {
7210 return;
7211 }
7212
7213 i = 0;
7214 while (i < bspextra->numfeats) {
7215 item = bspextra->featsByPos [i];
7216 i++;
7217 if (item != NULL && item->itemID == sfp->idx.itemID)
7218 {
7219 if (interval_offset >= item->numivals || interval_offset < 0) return;
7220 if (item->ivals [ 2 * interval_offset] < item->ivals [2 * interval_offset + 1])
7221 {
7222 item->ivals [2 * interval_offset] = left;
7223 item->ivals [2 * interval_offset + 1] = right;
7224 }
7225 else
7226 {
7227 item->ivals [2 * interval_offset + 1] = left;
7228 item->ivals [2 * interval_offset] = right;
7229 }
7230 /* correct item left and right values */
7231 if (item->ivals [0] > item->ivals [1])
7232 {
7233 item->right = item->ivals [0];
7234 item->left = item->ivals [1];
7235 }
7236 else
7237 {
7238 item->left = item->ivals [0];
7239 item->right = item->ivals [1];
7240 }
7241 for (j = 1; j < item->numivals; j++)
7242 {
7243 if (item->left > item->ivals[2 * j])
7244 {
7245 item->left = item->ivals [2 * j];
7246 }
7247 if (item->left > item->ivals [2 * j + 1])
7248 {
7249 item->left = item->ivals [2 * j + 1];
7250 }
7251 if (item->right < item->ivals [2 * j])
7252 {
7253 item->right = item->ivals [2 * j];
7254 }
7255 if (item->right < item->ivals [2 * j + 1])
7256 {
7257 item->right = item->ivals [ 2 * j + 1];
7258 }
7259 }
7260 }
7261 }
7262 }
7263
7264
SeqEdFeatureAdjust(SeqFeatPtr sfp,SeqLocPtr orig_loc,Int4 change,EMoveType move_type,Int4 interval_offset,BioseqPtr bsp)7265 NLM_EXTERN void SeqEdFeatureAdjust
7266 (SeqFeatPtr sfp,
7267 SeqLocPtr orig_loc,
7268 Int4 change,
7269 EMoveType move_type,
7270 Int4 interval_offset,
7271 BioseqPtr bsp)
7272 {
7273 SeqLocPtr new_loc;
7274 Boolean partial3, partial5;
7275
7276 if (sfp == NULL || bsp == NULL)
7277 {
7278 return;
7279 }
7280
7281 CheckSeqLocForPartial (orig_loc, &partial5, &partial3);
7282 new_loc = SeqLocMerge (bsp, orig_loc, NULL, FALSE, FALSE, FALSE);
7283 if (new_loc == NULL)
7284 {
7285 return;
7286 }
7287 SetSeqLocPartial (new_loc, partial5, partial3);
7288
7289 if (SeqEdAdjustFeatureInterval (new_loc, change, move_type, interval_offset, bsp))
7290 {
7291 SeqLocFree (sfp->location);
7292 sfp->location = new_loc;
7293
7294 /* need to reindex feature */
7295 SeqEdFixFeatureIndexForFeatureLocAdjust (bsp, sfp, change, move_type, interval_offset);
7296 }
7297 }
7298
7299
7300 NLM_EXTERN void
AdjustFeatureForGapChange(SeqFeatPtr sfp,BioseqPtr bsp,Int4 offset,Int4 len_diff)7301 AdjustFeatureForGapChange
7302 (SeqFeatPtr sfp,
7303 BioseqPtr bsp,
7304 Int4 offset,
7305 Int4 len_diff)
7306 {
7307 if (sfp == NULL || bsp == NULL || offset < 0 || len_diff == 0)
7308 {
7309 return;
7310 }
7311
7312 if (len_diff > 0)
7313 {
7314 SeqEdSeqFeatDelete (sfp, bsp, offset, offset + len_diff - 1, TRUE);
7315 }
7316 else
7317 {
7318 sfp->location = SeqEdSeqLocInsert (sfp->location, bsp, offset, -len_diff, FALSE, NULL);
7319 if (sfp->data.choice == SEQFEAT_CDREGION)
7320 {
7321 SeqEdInsertAdjustCdRgn (sfp, bsp, offset, -len_diff, FALSE);
7322 }
7323 else if (sfp->data.choice == SEQFEAT_RNA)
7324 {
7325 SeqEdInsertAdjustRNA (sfp, bsp, offset, -len_diff, FALSE);
7326 }
7327 }
7328 }
7329
7330
7331
7332
7333