1 /*  edutil.h
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * File Name:  edutil.h
27 *
28 * Author:  James Ostell
29 *
30 * Version Creation Date: 2/2/94
31 *
32 * $Revision: 6.23 $
33 *
34 * File Description:  Sequence editing utilities
35 *
36 * Modifications:
37 * --------------------------------------------------------------------------
38 * Date       Name        Description of modification
39 * -------  ----------  -----------------------------------------------------
40 *
41 * $Log: edutil.h,v $
42 * Revision 6.23  2010/07/12 12:21:49  bollin
43 * Introduced a version of BioseqDelete that uses idx.deleteme to remove features
44 * (instead of freeing them immediately), and fixed bugs in VecScreenTool when
45 * entire Bioseqs are deleted.
46 *
47 * Revision 6.22  2010/06/11 12:03:22  bollin
48 * Added iBOL compliance report, which marks items with low trace as failing.
49 * Also checking in first draft of functions to reverse Quality Scores, not using
50 * until we can verify that they work for float and int graphs.
51 *
52 * Revision 6.21  2009/03/04 16:34:15  bollin
53 * Added function for removing contigs from scaffolds.
54 *
55 * Revision 6.20  2007/05/08 17:18:32  bollin
56 * Added functions for identifying AGP gap DeltaSeqs
57 *
58 * Revision 6.19  2007/05/07 17:43:03  bollin
59 * Made functions IsDeltaSeqGap and IsDeltaSeqUnknownGap extern.
60 *
61 * Revision 6.18  2006/07/13 17:06:38  bollin
62 * use Uint4 instead of Uint2 for itemID values
63 * removed unused variables
64 * resolved compiler warnings
65 *
66 * Revision 6.17  2006/02/07 13:41:29  bollin
67 * added function AdjustFeatureForGapChange, which changes a feature to accommodate
68 * a change in the length of a gap
69 *
70 * Revision 6.16  2005/05/02 14:21:15  bollin
71 * removed function prototypes for PlayJournal and UnplayJournal, since these
72 * functions live in desktop/seqpanel.c and are only used there
73 *
74 * Revision 6.15  2005/04/28 20:10:32  bollin
75 * added new function AdjustFeaturesForInsertion which is called by BioseqInsert
76 * and also by a new function in sequin3.c for converting a raw bioseq to a delta
77 * and inserting gaps
78 *
79 * Revision 6.14  2005/04/06 19:33:15  bollin
80 * made it possible to insert and remove gaps from delta sequences
81 *
82 * Revision 6.13  2004/10/08 16:04:16  bollin
83 * added ability to check when an action will remove a feature
84 *
85 * Revision 6.12  2004/09/29 18:49:57  bollin
86 * fixed bugs in sequence editing, can now undo a nucleotide deletion that
87 * removes an entire feature location (feature will be restored)
88 *
89 * Revision 6.11  2004/09/23 14:59:51  bollin
90 * moved functions that depend on functions that depend on BLAST functions
91 * into seqpanel.c, made function scalled by those functions extern
92 *
93 * Revision 6.10  2004/09/22 18:20:32  bollin
94 * added functions for playing and unplaying a sequence editor action to translate
95 * a CDS
96 *
97 * Revision 6.9  2004/07/30 18:46:55  bollin
98 * added function for reordering intervals after they have been dragged by
99 * the sequence editor
100 *
101 * Revision 6.8  2004/07/28 20:06:19  bollin
102 * added journaling for undo/redo of dragged sequence location changes
103 *
104 * Revision 6.7  2004/07/28 15:22:15  bollin
105 * moved functions for moving feature locations around to edutil.c from
106 * seqpanel.c
107 *
108 * Revision 6.6  2004/07/22 15:34:41  bazhin
109 * Added function prototype GapToSeqLocEx() to handle gaps of unknown
110 * lengths within locations.
111 *
112 * Revision 6.5  2004/07/12 12:29:45  bollin
113 * moved new sequence editor editing functions here
114 *
115 * Revision 6.4  2003/02/10 22:57:45  kans
116 * added BioseqCopyEx, which takes a BioseqPtr instead of a SeqIdPtr for the source
117 *
118 * Revision 6.3  2002/07/02 13:23:43  kans
119 * added SeqLocDeleteEx
120 *
121 * Revision 6.2  1997/11/10 19:50:23  kans
122 * Fixed incorrect comment for ISAGappedSeqLoc() function (SB).
123 *
124 * Revision 6.1  1997/10/24 19:15:01  bazhin
125 * Added descriptors for three easy functions GapToSeqLoc(...),
126 * ISAGappedSeqLoc(...) and GappedSeqLocsToDeltaSeqs(...) for
127 * processing "gap(...)" tokens in CONTIG line.
128 *
129 * Revision 6.0  1997/08/25 18:05:28  madden
130 * Revision changed to 6.0
131 *
132 * Revision 5.6  1997/07/25 20:34:56  kans
133 * added SegLocToPartsEx
134 *
135 * Revision 5.5  1997/06/19 18:37:34  vakatov
136 * [WIN32,MSVC++]  Adopted for the "NCBIOBJ.LIB" DLL'ization
137 *
138 * Revision 5.4  1996/10/09 17:31:37  kans
139 * was missing an ifdef cplusplus at the bottom
140 *
141  * Revision 5.3  1996/10/09  16:34:59  chappey
142  * added SeqLocReplaceID() that replaces the Seq-Id of a Seq-Loc
143  *
144  * Revision 5.2  1996/06/12  18:29:34  epstein
145  * move SeqLocIntNew() and SeqLocPntNew() from edutil to sequtil
146  *
147  * Revision 5.1  1996/06/10  15:07:22  epstein
148  * replace make_seq_loc() with SeqLocIntNew() and make_pnt_loc with SeqLocPntNew()
149  *
150  * Revision 5.0  1996/05/28  13:23:23  ostell
151  * Set to revision 5.0
152  *
153  * Revision 4.7  1996/03/12  22:14:22  ostell
154  * added SeqLocToParts()
155  *
156  * Revision 4.5  1996/01/30  16:24:04  ostell
157  * changed name of SeqLocPack() to SeqLocPackage()
158  *
159  * Revision 4.4  1996/01/29  22:03:52  ostell
160  * revised SeqLocAdd
161  * added SeqLocPack
162  *
163  * Revision 4.3  1996/01/10  22:25:25  ostell
164  * added SeqLocIntNew()
165  *
166  * Revision 4.2  1995/12/29  21:31:44  ostell
167  * added mapping functions between delta seq and seq loc, for editing utilities
168  *
169  * Revision 4.1  1995/11/15  20:40:20  ostell
170  * fixed SeqLocCopyPart so it correctly handles SEQLOC_NULL in segmented
171  * records
172  *
173  * Revision 4.0  1995/07/26  13:49:01  ostell
174  * force revision to 4.0
175  *
176  * Revision 1.6  1995/05/15  21:46:05  ostell
177  * added Log line
178  *
179 *
180 *
181 *
182 * ==========================================================================
183 */
184 #ifndef _NCBI_EdUtil_
185 #define _NCBI_EdUtil_
186 
187 #ifndef _NCBI_SeqPort_
188 #include <seqport.h>           /* other utilities */
189 #endif
190 
191 #undef NLM_EXTERN
192 #ifdef NLM_IMPORT
193 #define NLM_EXTERN NLM_IMPORT
194 #else
195 #define NLM_EXTERN extern
196 #endif
197 
198 #ifdef __cplusplus
199 extern "C" {
200 #endif
201 
202 #include "explore.h"
203 
204 /*****************************************************************************
205 *
206 *   Sequence Editing Utilties
207 *       High Level Cut, Copy, Paste
208 *
209 *****************************************************************************/
210 
211 
212 /*****************************************************************************
213 *
214 *   BioseqDelete (target, from, to, do_feat, do_split)
215 *      Deletes the region of sequence between from-to, inclusive, on the
216 *        Bioseq whose SeqId is target.
217 *      If do_feat, the feature table is updated to reflect the deletion
218 *        using SeqEntryDelFeat()
219 *      If do_split, the features across the deleted region are split into
220 *        two intervals on either side. If not, the feature is just shortened.
221 *
222 *****************************************************************************/
223 NLM_EXTERN Boolean LIBCALL BioseqDelete (SeqIdPtr target, Int4 from, Int4 to, Boolean do_feat, Boolean do_split);
224 NLM_EXTERN Boolean LIBCALL BioseqDeleteEx (SeqIdPtr target, Int4 from, Int4 to, Boolean do_feat, Boolean do_split, Boolean mark_deleted_feat);
225 
226 
227 
228 /*****************************************************************************
229 *
230 *   BioseqCopy(newid, sourceid, from, to, strand, do_feat)
231 *      Creates a new Bioseq from sourceid in the range from-to inclusive.
232 *      If strand==Seq_strand_minus, reverse complements the sequence in
233 *        the copy and (if do_feat) corrects the feature table appropriately.
234 *      Names new Bioseq as newid, if not NULL
235 *        else Creates seqid.local = "Clipboard" if newid is NULL
236 *      If do_feat == TRUE copies appropriate region of feature table from
237 *        sourceid to new copy using SeqFeatsCopy().
238 *
239 *
240 *   BioseqCopyEx(newid, oldbsp, from, to, strand, do_feat)
241 *      Internal function called by BioseqCopy that takes BioseqPtr
242 *
243 *****************************************************************************/
244 NLM_EXTERN BioseqPtr LIBCALL BioseqCopy (SeqIdPtr newid, SeqIdPtr sourceid, Int4 from, Int4 to,
245                                Uint1 strand, Boolean do_feat);
246 
247 NLM_EXTERN BioseqPtr LIBCALL BioseqCopyEx (SeqIdPtr newid, BioseqPtr oldbsp, Int4 from, Int4 to,
248                                Uint1 strand, Boolean do_feat);
249 
250 /*****************************************************************************
251 *
252 *	SeqLocCopyPart (the_segs, from, to, strand, group, first_segp, last_segp)
253 *      cuts out from the_segs the part from offset from to offset to
254 *      reverse complements resulting seqloc if strand == Seq_strand_minus
255 *      if (group) puts resulting intervals into a new Seq-loc (of type
256 *        PACKED_INT if no SEQLOC_NULL, else SEQLOC_MIX).
257 *      Currently this always makes intervals or nulls. Is really for segmented and
258 *        reference sequence extensions
259 *      If first_segp and last_segp are not NULL, then they are filled in with the
260 *        ordinal number of the source segments that remain in the copy, based
261 *        on SeqLocFindNext, where 1 is the first one. Thus if the third and
262 *        fourth segments were copied, first is 3 and last is 4. If the
263 *        location was reverse complemented, first is 4 and last is 3.
264 *
265 *****************************************************************************/
266 NLM_EXTERN SeqLocPtr LIBCALL SeqLocCopyPart PROTO((SeqLocPtr the_segs, Int4 from, Int4 to,
267          Uint1 strand, Boolean group, Int2Ptr first_segp, Int2Ptr last_segp));
268 
269 /* This function is used by BioseqInsert to adjust features on a sequence after
270  * an insertion.  pos indicates the start of the insertion, len indicates
271  * the length of the inserted sequence, and do_split indicates whether features
272  * that cover the insertion should be split at either side of the insertion.
273  */
274 extern void
275 AdjustFeaturesForInsertion
276 (BioseqPtr tobsp,
277  SeqIdPtr  to_id,
278  Int4 pos,
279  Int4 len,
280  Boolean do_split);
281 
282 /*****************************************************************************
283 *
284 * BioseqInsert (from_id, from, to, strand, to_id, pos, from_feat, to_feat,
285 *                                                                     do_split)
286 *       Inserts a copy the region "from"-"to" on "strand" of the Bioseq
287 *          identified by "from_id" into the Bioseq identified by "to_id"
288 *          before "pos".
289 *          To append to the end of the sequence, use APPEND_RESIDUE for "pos".
290 *       if from_feat = TRUE, copies the feature table from "from" and updates
291 *          to locations to point to the proper residues in "to_id"
292 *       If to_feat = TRUE, updates feature table on "to_id" as well.
293 *          if do_split == TRUE, then splits features in "to_id" (to_feat must
294 *             be TRUE as well). Otherwise expands features at insertion.
295 *
296 *       All operations are copies. "frombsp" is unchanged.
297 *       Insert will only occur between certain Bioseq.repr classes as below
298 *
299 *   From Bioseq.repr                      To Bioseq.repr
300 *
301 *                          virtual       raw      segmented        map
302 *                       +---------------------------------------------------
303 *             virtual   |   length       inst      SeqLoc         length
304 *                       +---------------------------------------------------
305 *                 raw   |   error        copy      SeqLoc         error
306 *                       +---------------------------------------------------
307 *           segmented   |   error        inst      SeqLoc*        error
308 *                       +---------------------------------------------------
309 *                 map   |   error        inst*     SeqLoc         copy
310 *                       +---------------------------------------------------
311 *
312 *   length = changes length of "to" by length of "from"
313 *   error  = insertion not allowed
314 *   inst   = "from" instantiated as residues ("N" or "X" for virtual "from")
315 *   inst*  = as above, but a restriction map can instantiate other bases
316 *            than "N" for known restriction recognition sites.
317 *   copy   = copy of "from" inserted into "to"
318 *   SeqLoc = a SeqLoc added to "to" which points to "from". No copy of residues.
319 *   SeqLoc* = as above, but note that "to" points to "from" directly, not
320 *             what "from" itself may point to.
321 *
322 *****************************************************************************/
323 NLM_EXTERN Boolean LIBCALL BioseqInsert (SeqIdPtr from_id, Int4 from, Int4 to, Uint1 strand, SeqIdPtr to_id, Int4 pos,
324             Boolean from_feat, Boolean to_feat, Boolean do_split);
325 
326 
327 /*****************************************************************************
328 *
329 *   BioseqOverwrite (target, pos, residue, seqcode)
330 *      Overwrites the residue at pos with residue in the
331 *        Bioseq whose SeqId is target.
332 *      residue is iupacna for DNA or ncbieaa for protein
333 *      target MUST be a raw Bioseq right now
334 *      no changes are made to the feature table
335 *
336 *****************************************************************************/
337 NLM_EXTERN Boolean LIBCALL BioseqOverwrite (SeqIdPtr target, Int4 pos, Uint1 residue);
338 
339 /*****************************************************************************
340 *
341 *       Some defined values are provided for the Int4 values passed as
342 *           from, to, and pos in functions above. Defined in seqport.h
343 *
344 *       FIRST_RESIDUE   0	(first residue of sequence)
345 *       LAST_RESIDUE    -1  (last residue of sequence.. interpreted as
346 *                              length - 1)
347 *       APPEND_RESIDUE  -2  (interpreted as length.. off the end of the
348 *                              sequence. Only for "pos" in BioseqInsert
349 *                              to append to the end of the sequence )
350 *
351 *
352 *****************************************************************************/
353 
354 
355 
356 
357 
358 /*****************************************************************************
359 *
360 *   Sequence Editing Utilties
361 *       Lower Level Support Routines
362 *
363 *****************************************************************************/
364 
365 
366 /*****************************************************************************
367 *
368 *   SeqFeatDelete()
369 *     0 = no changes made to location or product
370 *     1 = changes made but feature still has some location
371 *     2 = all of sfp->location in deleted interval
372 *
373 *   if (merge)
374 *      1) correct numbers > to by subtraction
375 *      2) do not split intervals spanning the deletion
376 *   else
377 *      1) do not change numbers > to
378 *      2) split intervals which span the deletions
379 *
380 *****************************************************************************/
381 NLM_EXTERN Int2 LIBCALL SeqFeatDelete (SeqFeatPtr sfp, SeqIdPtr target, Int4 from, Int4 to, Boolean merge);
382 
383 NLM_EXTERN Boolean LIBCALL SeqInsertByLoc (SeqIdPtr target, Int4 offset, SeqLocPtr fragment);
384 
385 /*****************************************************************************
386 *
387 *   SeqDeleteByLoc (slp, do_feat, do_split)
388 *   	deletes regions referenced by slp
389 *       if do_feat, deletes features in those regions as well
390 *       if do_split, splits intervals crossing the deletion
391 *         else just shortens them.
392 *
393 *****************************************************************************/
394 NLM_EXTERN Boolean LIBCALL SeqDeleteByLoc (SeqLocPtr slp, Boolean do_feat, Boolean do_split);
395 NLM_EXTERN Boolean LIBCALL SeqDeleteByLocEx (SeqLocPtr slp, Boolean do_feat, Boolean do_split, Boolean mark_deleted_feat);
396 
397 
398 /*****************************************************************************
399 *
400 *   SeqLocAdd(headptr, slp, merge, do_copy)
401 *   	creates a linked list of SeqLocs.
402 *       returns a pointer to the last SeqLoc in the chain
403 *       if (merge)
404 *   	  deletes double NULLs or Nulls at start (application must delete at stop)
405 *         merges adjacent intervals on the same strand
406 *       if (do_copy)
407 *   	  Makes copies of incoming SeqLocs
408 *         if incoming is merged, deletes the incoming SeqLoc
409 *
410 *       call SeqLocPack(head) to turn into a SeqLoc from a linked list
411 *
412 *****************************************************************************/
413 NLM_EXTERN SeqLocPtr LIBCALL SeqLocAdd (SeqLocPtr PNTR head, SeqLocPtr slp, Boolean merge, Boolean do_copy);
414 
415 /*****************************************************************************
416 *
417 *   SeqLocPackage(head)
418 *     head is a chain of 1 or more SeqLocs connected by slp->next
419 *     Assumes was built by SeqLocAdd to remove redundancy
420 *     Frees the last element if it is a NULL.
421 *     If more than one element left, then packages the chain into a SEQLOC_MIX,
422 *       or SEQLOC_PACKED_INT as appropriate
423 *     returns pointer to the head of the resulting single SeqLoc
424 *
425 *****************************************************************************/
426 NLM_EXTERN SeqLocPtr LIBCALL SeqLocPackage (SeqLocPtr head);
427 
428 /*****************************************************************************
429 *
430 *   SegLocToPartsEx(BioseqPtr seg, SeqLocPtr slp, Boolean nullsBetween)
431 *   SegLocToParts(BioseqPtr seg, SeqLocPtr slp)
432 *   	seg must be a segmented Bioseq
433 *       slp must be a SeqLoc on it
434 *       nullsBetween makes order instead of join
435 *       function maps slp to the components of seg
436 *       returns a new SeqLocPtr
437 *       does not delete slp
438 *
439 *****************************************************************************/
440 NLM_EXTERN SeqLocPtr LIBCALL SegLocToPartsEx (BioseqPtr seg, SeqLocPtr slp, Boolean nullsBetween);
441 
442 NLM_EXTERN SeqLocPtr LIBCALL SegLocToParts (BioseqPtr seg, SeqLocPtr slp);
443 
444 
445 
446 /*****************************************************************************
447 *
448 *   SeqLocDelete()
449 *       returns altered head or NULL if nothing left.
450 *   sets changed=TRUE if all or part of loc is deleted
451 *   does NOT set changed if location coordinates are only moved
452 *   if (merge) then corrects coordinates upstream of to
453 *   else
454 *     splits intervals covering from-to, does not correct upstream of to
455 *
456 *****************************************************************************/
457 NLM_EXTERN SeqLocPtr LIBCALL SeqLocDelete (SeqLocPtr head, SeqIdPtr target, Int4 from, Int4 to, Boolean merge, BoolPtr changed);
458 
459 NLM_EXTERN SeqLocPtr LIBCALL SeqLocDeleteEx (SeqLocPtr head, SeqIdPtr target, Int4 from, Int4 to, Boolean merge, BoolPtr changed, BoolPtr partial5, BoolPtr partial3);
460 
461 NLM_EXTERN SeqLocPtr LIBCALL SeqLocSubtract (SeqLocPtr head, SeqLocPtr piece);
462 
463 
464 /*****************************************************************************
465 *
466 *   DeltaSeqsToSeqLocs(dsp)
467 *   	converts a chain of delta seqs to seqlocs
468 *   	each SeqLit is converted to SeqLoc of type Int with a SeqId of type
469 *          Dbtag where db="Seq\tLit" and objectId.id which is the index of the
470 *          element in the delta seq chain where 1 is the first one.
471 *   	Returned SeqLoc is of type "mix" and must be freed by caller.
472 *
473 *****************************************************************************/
474 NLM_EXTERN SeqLocPtr LIBCALL DeltaSeqsToSeqLocs (DeltaSeqPtr dsp);
475 
476 /*****************************************************************************
477 *
478 *   SeqLocsToDeltaSeqs(dsp, slp)
479 *   	converts a chain of seqlocs	generated by DeltaSeqToSeqLocs() back into
480 *         delta seqs. dsp is the original chain of DeltaSeqs, which is required
481 *         to convert the delta seqs back.
482 *
483 *****************************************************************************/
484 NLM_EXTERN DeltaSeqPtr LIBCALL SeqLocsToDeltaSeqs (DeltaSeqPtr dsp, SeqLocPtr slp);
485 
486 /*****************************************************************************
487 *
488 *   ISADeltaSeqsToSeqLoc(slp)
489 *   	returns Index (> 0) if this (one) SeqLoc was converted from a Delta Seq by
490 *         DeltaSeqsToSeqLocs() by looking for the special Dbtag name
491 *
492 *****************************************************************************/
493 NLM_EXTERN Int4 LIBCALL ISADeltaSeqsToSeqLoc (SeqLocPtr slp);
494 
495 
496 /*****************************************************************************
497 *
498 *   SeqEntryDelFeat(sep, id, from, to, do_split)
499 *   	Deletes or truncates features on Bioseq (id) in the range
500 *       from-to, inclusive
501 *
502 *		Moves features > to left to account for decrease in length
503 *       if do_split, breaks intervals across the deletion
504 *       else just reduces their size
505 *
506 *       If sep == NULL, then calls SeqEntryFind(id) to set scope to look
507 *       for features.
508 *
509 *****************************************************************************/
510 NLM_EXTERN Boolean	LIBCALL SeqEntryDelFeat PROTO((SeqEntryPtr sep, SeqIdPtr sip, Int4 from, Int4 to, Boolean do_split));
511 NLM_EXTERN Boolean	LIBCALL SeqEntryDelFeatEx PROTO((SeqEntryPtr sep, SeqIdPtr sip, Int4 from, Int4 to, Boolean do_split, Boolean mark_deleted_feat));
512 
513 /*****************************************************************************
514 *
515 *   SeqFeatCopy(new, old, from, to, strand)
516 *
517 *****************************************************************************/
518 NLM_EXTERN Int2 LIBCALL SeqFeatsCopy (BioseqPtr newbsp, BioseqPtr oldbsp, Int4 from, Int4 to, Uint1 strand);
519 
520 NLM_EXTERN SeqLocPtr LIBCALL SeqLocCopyRegion(SeqIdPtr newid, SeqLocPtr head, BioseqPtr oldbsp,
521     Int4 from, Int4 to, Uint1 strand, BoolPtr split);
522 
523 /*****************************************************************************
524 *
525 *   IntFuzzClip()
526 *       returns TRUE if clipped range values
527 *       in all cases, adjusts and/or complements IntFuzz
528 *       Designed for IntFuzz on SeqLocs
529 *
530 *****************************************************************************/
531 NLM_EXTERN void LIBCALL IntFuzzClip(IntFuzzPtr ifp, Int4 from, Int4 to, Uint1 strand, BoolPtr split);
532 
533 /*****************************************************************************
534 *
535 *   SeqLocInsert()
536 *       alters "head" by inserting "len" residues before "pos" in any SeqLoc
537 *         on the Bioseq "target"
538 *       all SeqLocs not on "target" are unaltered
539 *       for SeqLocs on "target"
540 *          all SeqLocs before "pos" are unaltered
541 *          all SeqLocs >= "pos" are incremented by "len"
542 *          all SeqLocs spanning "pos"
543 *             if "split" == TRUE, are split into two SeqLocs, one to the
544 *               left of the insertion, the other to right
545 *             if "split" != TRUE, the SeqLoc is increased in length to cover
546 *               the insertion
547 *       returns altered head or NULL if nothing left.
548 *       if ("newid" != NULL) replaces "target" with "newid" whether the
549 *          SeqLoc is altered on not.
550 *
551 *       Usage hints:
552 *          1) To update a feature location on "target" when 10 residues of
553 *               sequence have been inserted before position 5
554 *          SeqFeatPtr->location = SeqLocInsert ( SeqFeatPtr->location ,
555 *                "target", 5, 10, TRUE, NULL);  [for some feature types
556 *                      you may want "split" equal FALSE]
557 *
558 *          2) To insert the complete feature table from "source" into a
559 *                different Bioseq "dest" before position 20 in "dest"
560 *          SFP->location = SeqLocInsert(SFP->location, "source", 0, 20,
561 *                FALSE, "dest");
562 *
563 *
564 *****************************************************************************/
565 NLM_EXTERN SeqLocPtr LIBCALL SeqLocInsert (SeqLocPtr head, SeqIdPtr target, Int4 pos, Int4 len,
566                                                Boolean split, SeqIdPtr newid);
567 
568 
569 /********************************************************************
570 *
571 * SeqLocReplaceID
572 *   replaces the Seq-Id in a Seq-Loc (slp) with a new Seq-Id (new_sip)
573 *
574 **********************************************************************/
575 NLM_EXTERN SeqLocPtr SeqLocReplaceID (SeqLocPtr slp, SeqIdPtr new_sip);
576 
577 /**********************************************************
578  *
579  *   NLM_EXTERN SeqLocPtr LIBCALL GapToSeqLoc(range):
580  *
581  *      Gets the size of gap and constructs SeqLoc block with
582  *   $(seqlitdbtag) value as Dbtag.db and Dbtag.tag.id = 0.
583  *
584  **********************************************************/
585 NLM_EXTERN SeqLocPtr LIBCALL GapToSeqLoc(Int4 range);
586 
587 /**********************************************************
588  *
589  *   NLM_EXTERN SeqLocPtr LIBCALL GapToSeqLocEx(range, unknown):
590  *
591  *      Gets the size of gap and constructs SeqLoc block with
592  *   $(seqlitdbtag) value in case of gap of known length or
593  *   $(unkseqlitdbtag) value if length is unknown as Dbtag.db
594  *   and Dbtag.tag.id = 0.
595  *
596  **********************************************************/
597 NLM_EXTERN SeqLocPtr LIBCALL GapToSeqLocEx(Int4 range, Boolean unknown);
598 
599 /**********************************************************
600  *
601  *   NLM_EXTERN Boolean LIBCALL ISAGappedSeqLoc(slp):
602  *
603  *      Looks at a single SeqLoc item. If it has the SeqId
604  *   of type GENERAL with Dbtag.db == $(seqlitdbtag) and
605  *   Dbtag.tag.id == 0, then returns TRUE, otherwise
606  *   returns FALSE.
607  *
608  **********************************************************/
609 NLM_EXTERN Boolean LIBCALL ISAGappedSeqLoc(SeqLocPtr slp);
610 
611 /**********************************************************
612  *
613  *   NLM_EXTERN DeltaSeqPtr LIBCALL GappedSeqLocsToDeltaSeqs(slp):
614  *
615  *      This functions is used only in the case, if ISAGappedSeqLoc()
616  *   has returned TRUE.
617  *      Converts SeqLoc set to the sequence of DeltaSeqs.
618  *   Gbtag'ed SeqLocs it turns into SeqLits with the only "length"
619  *   element. The regular SeqLocs saves as they are. Returns
620  *   obtained DeltaSeq.
621  *
622  **********************************************************/
623 NLM_EXTERN DeltaSeqPtr LIBCALL GappedSeqLocsToDeltaSeqs(SeqLocPtr slp);
624 
625 /* the following typedefs and functions are used by the new sequence editor
626  * in desktop/seqpanel.c
627  */
628 typedef enum { eSeqEdInsert, eSeqEdDelete, eSeqEdFeatMove, eSeqEdTranslate,
629                eSeqEdInsertGap, eSeqEdDeleteGap,
630                eSeqEdJournalStart, eSeqEdJournalEnd } ESeqEdJournalAction ;
631 
632 typedef struct seqedjournaldata
633 {
634   ESeqEdJournalAction action;              /* indicates action taken */
635   Int4                offset;              /* position to the left of deletion or right of insertion */
636   Int4                num_chars;           /* number of characters inserted or deleted */
637   CharPtr             char_data;           /* characters inserted or removed */
638                                            /* when creating a journal entry for deletion, allocate
639                                             * space for char_data but do not populate it - it will
640                                             * be populated when the journal entry is played */
641   Boolean             spliteditmode;       /* if insertion occurs and spliteditmode is true and
642                                             * a feature overlaps the insertion position, the location
643                                             * of the feature will be discontinuous at the point of
644                                             * insertion. */
645   SeqFeatPtr          sfp;                 /* A feature that was moved - should be NULL
646                                             * unless action == eSeqEdFeatMove */
647   SeqLocPtr           slp;                 /* A location for sfp - if the journal has
648                                             * already been played, this is the previous
649                                             * location, if the journal has been undone,
650                                             * this is the location before the redo. */
651   ValNodePtr          affected_feats;      /* This is a list of features which were shortened by
652                                             * an eSeqEdDelete operation - their locations will
653                                             * need to be reconstructed if the operation is undone. */
654   Boolean             unknown_gap;         /* This is used only when action is eSeqEdInsertGap
655                                             * or eSeqEdDeleteGap.  It indicates whether the gap
656                                             * being inserted (or deleted) from a delta sequence
657                                             * is unknown. */
658   BioseqPtr           bsp;                 /* The Bioseq for which the action is to be/was applied. */
659   Uint1               moltype;             /* Molecule type for bsp.  Stored for convenience. */
660   Uint2               entityID;            /* entityID for bsp.  Stored for convenience. */
661   Pointer             next;                /* Journal entries are a doubly-linked list so that */
662   Pointer             prev;                /* we can traverse the list in both directions for  */
663                                            /* undo and redo. */
664 } SeqEdJournalData, PNTR SeqEdJournalPtr;
665 
666 NLM_EXTERN SeqLocPtr LIBCALL
667 SeqEdSeqLocInsert
668 (SeqLocPtr head,
669  BioseqPtr target,
670  Int4 pos,
671  Int4 len,
672  Boolean split,
673  SeqIdPtr newid);
674 
675 NLM_EXTERN void
676 SeqEdInsertAdjustRNA
677 (SeqFeatPtr sfp,
678  BioseqPtr  bsp,
679  Int4       insert_pos,
680  Int4       len,
681  Boolean    do_split);
682 
683 NLM_EXTERN void
684 SeqEdInsertAdjustCdRgn
685 (SeqFeatPtr sfp,
686  BioseqPtr  bsp,
687  Int4       insert_pos,
688  Int4       len,
689  Boolean    do_split);
690 
691 NLM_EXTERN SeqLocPtr
692 SeqEdSeqLocDelete
693 (SeqLocPtr head,
694  BioseqPtr target,
695  Int4      from,
696  Int4      to,
697  Boolean   merge,
698  BoolPtr   changed,
699  BoolPtr   partial5,
700  BoolPtr   partial3);
701 
702 NLM_EXTERN Int2 LIBCALL
703 SeqEdSeqFeatDelete
704 (SeqFeatPtr sfp,
705  BioseqPtr  target,
706  Int4       from,
707  Int4       to,
708  Boolean merge);
709 
710 NLM_EXTERN void SeqEdJournalFree (SeqEdJournalPtr sejp);
711 NLM_EXTERN SeqEdJournalPtr SeqEdJournalNewSeqEdit
712 (ESeqEdJournalAction action,
713  Int4                offset,
714  Int4                num_chars,
715  CharPtr             char_data,
716  Boolean             spliteditmode,
717  BioseqPtr           bsp,
718  Uint1               moltype,
719  Uint2               entityID);
720 
721 NLM_EXTERN SeqEdJournalPtr SeqEdJournalNewFeatEdit
722 (ESeqEdJournalAction action,
723  SeqFeatPtr          sfp,
724  SeqLocPtr           slp,
725  BioseqPtr           bsp,
726  Uint1               moltype,
727  Uint2               entityID);
728 
729 NLM_EXTERN SeqEdJournalPtr SeqEdJournalNewTranslate
730 (SeqFeatPtr sfp,
731  BioseqPtr  bsp,
732  Uint2      entityID);
733 
734 NLM_EXTERN SeqFeatPtr
735 SeqEdGetNextFeature
736 (BioseqPtr              bsp,
737  SeqFeatPtr             curr,
738  Uint1                  seqFeatChoice,
739  Uint1                  featDefChoice,
740  SeqMgrFeatContext PNTR context,
741  Boolean                byLabel,
742  Boolean                byLocusTag,
743  Uint2                  entityID);
744 
745 /* this enum describes the kind of motion for feature adjusts */
746 typedef enum { eLeftEnd=1, eRightEnd, eSlide } EMoveType;
747 
748 /* this function moves just the location */
749 NLM_EXTERN Boolean SeqEdAdjustFeatureInterval
750 (SeqLocPtr slp, Int4 change, EMoveType move_type, Int4 interval_offset, BioseqPtr bsp);
751 
752 /* This function moves a feature location */
753 NLM_EXTERN void SeqEdFeatureAdjust
754 (SeqFeatPtr sfp,
755  SeqLocPtr  orig_loc,
756  Int4       change,
757  EMoveType  move_type,
758  Int4       interval_offset,
759  BioseqPtr  bsp);
760 
761 /* This function locates the endpoints of the Nth interval in a SeqLoc */
762 NLM_EXTERN Boolean SeqEdGetNthIntervalEndPoints
763 (SeqLocPtr slp, Int4 n, Int4Ptr left, Int4Ptr right);
764 
765 /* this function is used to repair the interval order after a feature location
766  * interval has been dragged around.
767  */
768 NLM_EXTERN void SeqEdRepairIntervalOrder (SeqFeatPtr sfp, BioseqPtr bsp);
769 NLM_EXTERN Boolean SeqEdInsert (SeqEdJournalPtr sejp);
770 NLM_EXTERN void SeqEdReindexAffectedFeatures (Int4 shift_start, Int4 shift_amt,
771                                           Boolean split, BioseqPtr bsp);
772 NLM_EXTERN void SeqEdReindexFeature (SeqFeatPtr sfp, BioseqPtr bsp);
773 NLM_EXTERN Boolean SeqEdDeleteFromBsp (SeqEdJournalPtr sejp, BoolPtr pfeats_deleted);
774 
775 NLM_EXTERN void
776 AdjustFeatureForGapChange
777 (SeqFeatPtr sfp,
778  BioseqPtr  bsp,
779  Int4       offset,
780  Int4       len_diff);
781 
782 extern Boolean IsDeltaSeqGap (DeltaSeqPtr dsp);
783 extern Boolean IsDeltaSeqUnknownGap (DeltaSeqPtr dsp);
784 extern Boolean IsDeltaSeqKnownGap (DeltaSeqPtr dsp);
785 extern Boolean DoesSeqLitHaveGapTypeOrLinkage (SeqLitPtr slip);
786 extern Boolean DoesDeltaSeqHaveGapTypeOrLinkage (DeltaSeqPtr dsp);
787 
788 
789 #ifdef __cplusplus
790 }
791 #endif
792 
793 #undef NLM_EXTERN
794 #ifdef NLM_EXPORT
795 #define NLM_EXTERN NLM_EXPORT
796 #else
797 #define NLM_EXTERN
798 #endif
799 
800 #endif
801