1 /* edutil.h 2 * =========================================================================== 3 * 4 * PUBLIC DOMAIN NOTICE 5 * National Center for Biotechnology Information 6 * 7 * This software/database is a "United States Government Work" under the 8 * terms of the United States Copyright Act. It was written as part of 9 * the author's official duties as a United States Government employee and 10 * thus cannot be copyrighted. This software/database is freely available 11 * to the public for use. The National Library of Medicine and the U.S. 12 * Government have not placed any restriction on its use or reproduction. 13 * 14 * Although all reasonable efforts have been taken to ensure the accuracy 15 * and reliability of the software and data, the NLM and the U.S. 16 * Government do not and cannot warrant the performance or results that 17 * may be obtained by using this software or data. The NLM and the U.S. 18 * Government disclaim all warranties, express or implied, including 19 * warranties of performance, merchantability or fitness for any particular 20 * purpose. 21 * 22 * Please cite the author in any work or product based on this material. 23 * 24 * =========================================================================== 25 * 26 * File Name: edutil.h 27 * 28 * Author: James Ostell 29 * 30 * Version Creation Date: 2/2/94 31 * 32 * $Revision: 6.23 $ 33 * 34 * File Description: Sequence editing utilities 35 * 36 * Modifications: 37 * -------------------------------------------------------------------------- 38 * Date Name Description of modification 39 * ------- ---------- ----------------------------------------------------- 40 * 41 * $Log: edutil.h,v $ 42 * Revision 6.23 2010/07/12 12:21:49 bollin 43 * Introduced a version of BioseqDelete that uses idx.deleteme to remove features 44 * (instead of freeing them immediately), and fixed bugs in VecScreenTool when 45 * entire Bioseqs are deleted. 46 * 47 * Revision 6.22 2010/06/11 12:03:22 bollin 48 * Added iBOL compliance report, which marks items with low trace as failing. 49 * Also checking in first draft of functions to reverse Quality Scores, not using 50 * until we can verify that they work for float and int graphs. 51 * 52 * Revision 6.21 2009/03/04 16:34:15 bollin 53 * Added function for removing contigs from scaffolds. 54 * 55 * Revision 6.20 2007/05/08 17:18:32 bollin 56 * Added functions for identifying AGP gap DeltaSeqs 57 * 58 * Revision 6.19 2007/05/07 17:43:03 bollin 59 * Made functions IsDeltaSeqGap and IsDeltaSeqUnknownGap extern. 60 * 61 * Revision 6.18 2006/07/13 17:06:38 bollin 62 * use Uint4 instead of Uint2 for itemID values 63 * removed unused variables 64 * resolved compiler warnings 65 * 66 * Revision 6.17 2006/02/07 13:41:29 bollin 67 * added function AdjustFeatureForGapChange, which changes a feature to accommodate 68 * a change in the length of a gap 69 * 70 * Revision 6.16 2005/05/02 14:21:15 bollin 71 * removed function prototypes for PlayJournal and UnplayJournal, since these 72 * functions live in desktop/seqpanel.c and are only used there 73 * 74 * Revision 6.15 2005/04/28 20:10:32 bollin 75 * added new function AdjustFeaturesForInsertion which is called by BioseqInsert 76 * and also by a new function in sequin3.c for converting a raw bioseq to a delta 77 * and inserting gaps 78 * 79 * Revision 6.14 2005/04/06 19:33:15 bollin 80 * made it possible to insert and remove gaps from delta sequences 81 * 82 * Revision 6.13 2004/10/08 16:04:16 bollin 83 * added ability to check when an action will remove a feature 84 * 85 * Revision 6.12 2004/09/29 18:49:57 bollin 86 * fixed bugs in sequence editing, can now undo a nucleotide deletion that 87 * removes an entire feature location (feature will be restored) 88 * 89 * Revision 6.11 2004/09/23 14:59:51 bollin 90 * moved functions that depend on functions that depend on BLAST functions 91 * into seqpanel.c, made function scalled by those functions extern 92 * 93 * Revision 6.10 2004/09/22 18:20:32 bollin 94 * added functions for playing and unplaying a sequence editor action to translate 95 * a CDS 96 * 97 * Revision 6.9 2004/07/30 18:46:55 bollin 98 * added function for reordering intervals after they have been dragged by 99 * the sequence editor 100 * 101 * Revision 6.8 2004/07/28 20:06:19 bollin 102 * added journaling for undo/redo of dragged sequence location changes 103 * 104 * Revision 6.7 2004/07/28 15:22:15 bollin 105 * moved functions for moving feature locations around to edutil.c from 106 * seqpanel.c 107 * 108 * Revision 6.6 2004/07/22 15:34:41 bazhin 109 * Added function prototype GapToSeqLocEx() to handle gaps of unknown 110 * lengths within locations. 111 * 112 * Revision 6.5 2004/07/12 12:29:45 bollin 113 * moved new sequence editor editing functions here 114 * 115 * Revision 6.4 2003/02/10 22:57:45 kans 116 * added BioseqCopyEx, which takes a BioseqPtr instead of a SeqIdPtr for the source 117 * 118 * Revision 6.3 2002/07/02 13:23:43 kans 119 * added SeqLocDeleteEx 120 * 121 * Revision 6.2 1997/11/10 19:50:23 kans 122 * Fixed incorrect comment for ISAGappedSeqLoc() function (SB). 123 * 124 * Revision 6.1 1997/10/24 19:15:01 bazhin 125 * Added descriptors for three easy functions GapToSeqLoc(...), 126 * ISAGappedSeqLoc(...) and GappedSeqLocsToDeltaSeqs(...) for 127 * processing "gap(...)" tokens in CONTIG line. 128 * 129 * Revision 6.0 1997/08/25 18:05:28 madden 130 * Revision changed to 6.0 131 * 132 * Revision 5.6 1997/07/25 20:34:56 kans 133 * added SegLocToPartsEx 134 * 135 * Revision 5.5 1997/06/19 18:37:34 vakatov 136 * [WIN32,MSVC++] Adopted for the "NCBIOBJ.LIB" DLL'ization 137 * 138 * Revision 5.4 1996/10/09 17:31:37 kans 139 * was missing an ifdef cplusplus at the bottom 140 * 141 * Revision 5.3 1996/10/09 16:34:59 chappey 142 * added SeqLocReplaceID() that replaces the Seq-Id of a Seq-Loc 143 * 144 * Revision 5.2 1996/06/12 18:29:34 epstein 145 * move SeqLocIntNew() and SeqLocPntNew() from edutil to sequtil 146 * 147 * Revision 5.1 1996/06/10 15:07:22 epstein 148 * replace make_seq_loc() with SeqLocIntNew() and make_pnt_loc with SeqLocPntNew() 149 * 150 * Revision 5.0 1996/05/28 13:23:23 ostell 151 * Set to revision 5.0 152 * 153 * Revision 4.7 1996/03/12 22:14:22 ostell 154 * added SeqLocToParts() 155 * 156 * Revision 4.5 1996/01/30 16:24:04 ostell 157 * changed name of SeqLocPack() to SeqLocPackage() 158 * 159 * Revision 4.4 1996/01/29 22:03:52 ostell 160 * revised SeqLocAdd 161 * added SeqLocPack 162 * 163 * Revision 4.3 1996/01/10 22:25:25 ostell 164 * added SeqLocIntNew() 165 * 166 * Revision 4.2 1995/12/29 21:31:44 ostell 167 * added mapping functions between delta seq and seq loc, for editing utilities 168 * 169 * Revision 4.1 1995/11/15 20:40:20 ostell 170 * fixed SeqLocCopyPart so it correctly handles SEQLOC_NULL in segmented 171 * records 172 * 173 * Revision 4.0 1995/07/26 13:49:01 ostell 174 * force revision to 4.0 175 * 176 * Revision 1.6 1995/05/15 21:46:05 ostell 177 * added Log line 178 * 179 * 180 * 181 * 182 * ========================================================================== 183 */ 184 #ifndef _NCBI_EdUtil_ 185 #define _NCBI_EdUtil_ 186 187 #ifndef _NCBI_SeqPort_ 188 #include <seqport.h> /* other utilities */ 189 #endif 190 191 #undef NLM_EXTERN 192 #ifdef NLM_IMPORT 193 #define NLM_EXTERN NLM_IMPORT 194 #else 195 #define NLM_EXTERN extern 196 #endif 197 198 #ifdef __cplusplus 199 extern "C" { 200 #endif 201 202 #include "explore.h" 203 204 /***************************************************************************** 205 * 206 * Sequence Editing Utilties 207 * High Level Cut, Copy, Paste 208 * 209 *****************************************************************************/ 210 211 212 /***************************************************************************** 213 * 214 * BioseqDelete (target, from, to, do_feat, do_split) 215 * Deletes the region of sequence between from-to, inclusive, on the 216 * Bioseq whose SeqId is target. 217 * If do_feat, the feature table is updated to reflect the deletion 218 * using SeqEntryDelFeat() 219 * If do_split, the features across the deleted region are split into 220 * two intervals on either side. If not, the feature is just shortened. 221 * 222 *****************************************************************************/ 223 NLM_EXTERN Boolean LIBCALL BioseqDelete (SeqIdPtr target, Int4 from, Int4 to, Boolean do_feat, Boolean do_split); 224 NLM_EXTERN Boolean LIBCALL BioseqDeleteEx (SeqIdPtr target, Int4 from, Int4 to, Boolean do_feat, Boolean do_split, Boolean mark_deleted_feat); 225 226 227 228 /***************************************************************************** 229 * 230 * BioseqCopy(newid, sourceid, from, to, strand, do_feat) 231 * Creates a new Bioseq from sourceid in the range from-to inclusive. 232 * If strand==Seq_strand_minus, reverse complements the sequence in 233 * the copy and (if do_feat) corrects the feature table appropriately. 234 * Names new Bioseq as newid, if not NULL 235 * else Creates seqid.local = "Clipboard" if newid is NULL 236 * If do_feat == TRUE copies appropriate region of feature table from 237 * sourceid to new copy using SeqFeatsCopy(). 238 * 239 * 240 * BioseqCopyEx(newid, oldbsp, from, to, strand, do_feat) 241 * Internal function called by BioseqCopy that takes BioseqPtr 242 * 243 *****************************************************************************/ 244 NLM_EXTERN BioseqPtr LIBCALL BioseqCopy (SeqIdPtr newid, SeqIdPtr sourceid, Int4 from, Int4 to, 245 Uint1 strand, Boolean do_feat); 246 247 NLM_EXTERN BioseqPtr LIBCALL BioseqCopyEx (SeqIdPtr newid, BioseqPtr oldbsp, Int4 from, Int4 to, 248 Uint1 strand, Boolean do_feat); 249 250 /***************************************************************************** 251 * 252 * SeqLocCopyPart (the_segs, from, to, strand, group, first_segp, last_segp) 253 * cuts out from the_segs the part from offset from to offset to 254 * reverse complements resulting seqloc if strand == Seq_strand_minus 255 * if (group) puts resulting intervals into a new Seq-loc (of type 256 * PACKED_INT if no SEQLOC_NULL, else SEQLOC_MIX). 257 * Currently this always makes intervals or nulls. Is really for segmented and 258 * reference sequence extensions 259 * If first_segp and last_segp are not NULL, then they are filled in with the 260 * ordinal number of the source segments that remain in the copy, based 261 * on SeqLocFindNext, where 1 is the first one. Thus if the third and 262 * fourth segments were copied, first is 3 and last is 4. If the 263 * location was reverse complemented, first is 4 and last is 3. 264 * 265 *****************************************************************************/ 266 NLM_EXTERN SeqLocPtr LIBCALL SeqLocCopyPart PROTO((SeqLocPtr the_segs, Int4 from, Int4 to, 267 Uint1 strand, Boolean group, Int2Ptr first_segp, Int2Ptr last_segp)); 268 269 /* This function is used by BioseqInsert to adjust features on a sequence after 270 * an insertion. pos indicates the start of the insertion, len indicates 271 * the length of the inserted sequence, and do_split indicates whether features 272 * that cover the insertion should be split at either side of the insertion. 273 */ 274 extern void 275 AdjustFeaturesForInsertion 276 (BioseqPtr tobsp, 277 SeqIdPtr to_id, 278 Int4 pos, 279 Int4 len, 280 Boolean do_split); 281 282 /***************************************************************************** 283 * 284 * BioseqInsert (from_id, from, to, strand, to_id, pos, from_feat, to_feat, 285 * do_split) 286 * Inserts a copy the region "from"-"to" on "strand" of the Bioseq 287 * identified by "from_id" into the Bioseq identified by "to_id" 288 * before "pos". 289 * To append to the end of the sequence, use APPEND_RESIDUE for "pos". 290 * if from_feat = TRUE, copies the feature table from "from" and updates 291 * to locations to point to the proper residues in "to_id" 292 * If to_feat = TRUE, updates feature table on "to_id" as well. 293 * if do_split == TRUE, then splits features in "to_id" (to_feat must 294 * be TRUE as well). Otherwise expands features at insertion. 295 * 296 * All operations are copies. "frombsp" is unchanged. 297 * Insert will only occur between certain Bioseq.repr classes as below 298 * 299 * From Bioseq.repr To Bioseq.repr 300 * 301 * virtual raw segmented map 302 * +--------------------------------------------------- 303 * virtual | length inst SeqLoc length 304 * +--------------------------------------------------- 305 * raw | error copy SeqLoc error 306 * +--------------------------------------------------- 307 * segmented | error inst SeqLoc* error 308 * +--------------------------------------------------- 309 * map | error inst* SeqLoc copy 310 * +--------------------------------------------------- 311 * 312 * length = changes length of "to" by length of "from" 313 * error = insertion not allowed 314 * inst = "from" instantiated as residues ("N" or "X" for virtual "from") 315 * inst* = as above, but a restriction map can instantiate other bases 316 * than "N" for known restriction recognition sites. 317 * copy = copy of "from" inserted into "to" 318 * SeqLoc = a SeqLoc added to "to" which points to "from". No copy of residues. 319 * SeqLoc* = as above, but note that "to" points to "from" directly, not 320 * what "from" itself may point to. 321 * 322 *****************************************************************************/ 323 NLM_EXTERN Boolean LIBCALL BioseqInsert (SeqIdPtr from_id, Int4 from, Int4 to, Uint1 strand, SeqIdPtr to_id, Int4 pos, 324 Boolean from_feat, Boolean to_feat, Boolean do_split); 325 326 327 /***************************************************************************** 328 * 329 * BioseqOverwrite (target, pos, residue, seqcode) 330 * Overwrites the residue at pos with residue in the 331 * Bioseq whose SeqId is target. 332 * residue is iupacna for DNA or ncbieaa for protein 333 * target MUST be a raw Bioseq right now 334 * no changes are made to the feature table 335 * 336 *****************************************************************************/ 337 NLM_EXTERN Boolean LIBCALL BioseqOverwrite (SeqIdPtr target, Int4 pos, Uint1 residue); 338 339 /***************************************************************************** 340 * 341 * Some defined values are provided for the Int4 values passed as 342 * from, to, and pos in functions above. Defined in seqport.h 343 * 344 * FIRST_RESIDUE 0 (first residue of sequence) 345 * LAST_RESIDUE -1 (last residue of sequence.. interpreted as 346 * length - 1) 347 * APPEND_RESIDUE -2 (interpreted as length.. off the end of the 348 * sequence. Only for "pos" in BioseqInsert 349 * to append to the end of the sequence ) 350 * 351 * 352 *****************************************************************************/ 353 354 355 356 357 358 /***************************************************************************** 359 * 360 * Sequence Editing Utilties 361 * Lower Level Support Routines 362 * 363 *****************************************************************************/ 364 365 366 /***************************************************************************** 367 * 368 * SeqFeatDelete() 369 * 0 = no changes made to location or product 370 * 1 = changes made but feature still has some location 371 * 2 = all of sfp->location in deleted interval 372 * 373 * if (merge) 374 * 1) correct numbers > to by subtraction 375 * 2) do not split intervals spanning the deletion 376 * else 377 * 1) do not change numbers > to 378 * 2) split intervals which span the deletions 379 * 380 *****************************************************************************/ 381 NLM_EXTERN Int2 LIBCALL SeqFeatDelete (SeqFeatPtr sfp, SeqIdPtr target, Int4 from, Int4 to, Boolean merge); 382 383 NLM_EXTERN Boolean LIBCALL SeqInsertByLoc (SeqIdPtr target, Int4 offset, SeqLocPtr fragment); 384 385 /***************************************************************************** 386 * 387 * SeqDeleteByLoc (slp, do_feat, do_split) 388 * deletes regions referenced by slp 389 * if do_feat, deletes features in those regions as well 390 * if do_split, splits intervals crossing the deletion 391 * else just shortens them. 392 * 393 *****************************************************************************/ 394 NLM_EXTERN Boolean LIBCALL SeqDeleteByLoc (SeqLocPtr slp, Boolean do_feat, Boolean do_split); 395 NLM_EXTERN Boolean LIBCALL SeqDeleteByLocEx (SeqLocPtr slp, Boolean do_feat, Boolean do_split, Boolean mark_deleted_feat); 396 397 398 /***************************************************************************** 399 * 400 * SeqLocAdd(headptr, slp, merge, do_copy) 401 * creates a linked list of SeqLocs. 402 * returns a pointer to the last SeqLoc in the chain 403 * if (merge) 404 * deletes double NULLs or Nulls at start (application must delete at stop) 405 * merges adjacent intervals on the same strand 406 * if (do_copy) 407 * Makes copies of incoming SeqLocs 408 * if incoming is merged, deletes the incoming SeqLoc 409 * 410 * call SeqLocPack(head) to turn into a SeqLoc from a linked list 411 * 412 *****************************************************************************/ 413 NLM_EXTERN SeqLocPtr LIBCALL SeqLocAdd (SeqLocPtr PNTR head, SeqLocPtr slp, Boolean merge, Boolean do_copy); 414 415 /***************************************************************************** 416 * 417 * SeqLocPackage(head) 418 * head is a chain of 1 or more SeqLocs connected by slp->next 419 * Assumes was built by SeqLocAdd to remove redundancy 420 * Frees the last element if it is a NULL. 421 * If more than one element left, then packages the chain into a SEQLOC_MIX, 422 * or SEQLOC_PACKED_INT as appropriate 423 * returns pointer to the head of the resulting single SeqLoc 424 * 425 *****************************************************************************/ 426 NLM_EXTERN SeqLocPtr LIBCALL SeqLocPackage (SeqLocPtr head); 427 428 /***************************************************************************** 429 * 430 * SegLocToPartsEx(BioseqPtr seg, SeqLocPtr slp, Boolean nullsBetween) 431 * SegLocToParts(BioseqPtr seg, SeqLocPtr slp) 432 * seg must be a segmented Bioseq 433 * slp must be a SeqLoc on it 434 * nullsBetween makes order instead of join 435 * function maps slp to the components of seg 436 * returns a new SeqLocPtr 437 * does not delete slp 438 * 439 *****************************************************************************/ 440 NLM_EXTERN SeqLocPtr LIBCALL SegLocToPartsEx (BioseqPtr seg, SeqLocPtr slp, Boolean nullsBetween); 441 442 NLM_EXTERN SeqLocPtr LIBCALL SegLocToParts (BioseqPtr seg, SeqLocPtr slp); 443 444 445 446 /***************************************************************************** 447 * 448 * SeqLocDelete() 449 * returns altered head or NULL if nothing left. 450 * sets changed=TRUE if all or part of loc is deleted 451 * does NOT set changed if location coordinates are only moved 452 * if (merge) then corrects coordinates upstream of to 453 * else 454 * splits intervals covering from-to, does not correct upstream of to 455 * 456 *****************************************************************************/ 457 NLM_EXTERN SeqLocPtr LIBCALL SeqLocDelete (SeqLocPtr head, SeqIdPtr target, Int4 from, Int4 to, Boolean merge, BoolPtr changed); 458 459 NLM_EXTERN SeqLocPtr LIBCALL SeqLocDeleteEx (SeqLocPtr head, SeqIdPtr target, Int4 from, Int4 to, Boolean merge, BoolPtr changed, BoolPtr partial5, BoolPtr partial3); 460 461 NLM_EXTERN SeqLocPtr LIBCALL SeqLocSubtract (SeqLocPtr head, SeqLocPtr piece); 462 463 464 /***************************************************************************** 465 * 466 * DeltaSeqsToSeqLocs(dsp) 467 * converts a chain of delta seqs to seqlocs 468 * each SeqLit is converted to SeqLoc of type Int with a SeqId of type 469 * Dbtag where db="Seq\tLit" and objectId.id which is the index of the 470 * element in the delta seq chain where 1 is the first one. 471 * Returned SeqLoc is of type "mix" and must be freed by caller. 472 * 473 *****************************************************************************/ 474 NLM_EXTERN SeqLocPtr LIBCALL DeltaSeqsToSeqLocs (DeltaSeqPtr dsp); 475 476 /***************************************************************************** 477 * 478 * SeqLocsToDeltaSeqs(dsp, slp) 479 * converts a chain of seqlocs generated by DeltaSeqToSeqLocs() back into 480 * delta seqs. dsp is the original chain of DeltaSeqs, which is required 481 * to convert the delta seqs back. 482 * 483 *****************************************************************************/ 484 NLM_EXTERN DeltaSeqPtr LIBCALL SeqLocsToDeltaSeqs (DeltaSeqPtr dsp, SeqLocPtr slp); 485 486 /***************************************************************************** 487 * 488 * ISADeltaSeqsToSeqLoc(slp) 489 * returns Index (> 0) if this (one) SeqLoc was converted from a Delta Seq by 490 * DeltaSeqsToSeqLocs() by looking for the special Dbtag name 491 * 492 *****************************************************************************/ 493 NLM_EXTERN Int4 LIBCALL ISADeltaSeqsToSeqLoc (SeqLocPtr slp); 494 495 496 /***************************************************************************** 497 * 498 * SeqEntryDelFeat(sep, id, from, to, do_split) 499 * Deletes or truncates features on Bioseq (id) in the range 500 * from-to, inclusive 501 * 502 * Moves features > to left to account for decrease in length 503 * if do_split, breaks intervals across the deletion 504 * else just reduces their size 505 * 506 * If sep == NULL, then calls SeqEntryFind(id) to set scope to look 507 * for features. 508 * 509 *****************************************************************************/ 510 NLM_EXTERN Boolean LIBCALL SeqEntryDelFeat PROTO((SeqEntryPtr sep, SeqIdPtr sip, Int4 from, Int4 to, Boolean do_split)); 511 NLM_EXTERN Boolean LIBCALL SeqEntryDelFeatEx PROTO((SeqEntryPtr sep, SeqIdPtr sip, Int4 from, Int4 to, Boolean do_split, Boolean mark_deleted_feat)); 512 513 /***************************************************************************** 514 * 515 * SeqFeatCopy(new, old, from, to, strand) 516 * 517 *****************************************************************************/ 518 NLM_EXTERN Int2 LIBCALL SeqFeatsCopy (BioseqPtr newbsp, BioseqPtr oldbsp, Int4 from, Int4 to, Uint1 strand); 519 520 NLM_EXTERN SeqLocPtr LIBCALL SeqLocCopyRegion(SeqIdPtr newid, SeqLocPtr head, BioseqPtr oldbsp, 521 Int4 from, Int4 to, Uint1 strand, BoolPtr split); 522 523 /***************************************************************************** 524 * 525 * IntFuzzClip() 526 * returns TRUE if clipped range values 527 * in all cases, adjusts and/or complements IntFuzz 528 * Designed for IntFuzz on SeqLocs 529 * 530 *****************************************************************************/ 531 NLM_EXTERN void LIBCALL IntFuzzClip(IntFuzzPtr ifp, Int4 from, Int4 to, Uint1 strand, BoolPtr split); 532 533 /***************************************************************************** 534 * 535 * SeqLocInsert() 536 * alters "head" by inserting "len" residues before "pos" in any SeqLoc 537 * on the Bioseq "target" 538 * all SeqLocs not on "target" are unaltered 539 * for SeqLocs on "target" 540 * all SeqLocs before "pos" are unaltered 541 * all SeqLocs >= "pos" are incremented by "len" 542 * all SeqLocs spanning "pos" 543 * if "split" == TRUE, are split into two SeqLocs, one to the 544 * left of the insertion, the other to right 545 * if "split" != TRUE, the SeqLoc is increased in length to cover 546 * the insertion 547 * returns altered head or NULL if nothing left. 548 * if ("newid" != NULL) replaces "target" with "newid" whether the 549 * SeqLoc is altered on not. 550 * 551 * Usage hints: 552 * 1) To update a feature location on "target" when 10 residues of 553 * sequence have been inserted before position 5 554 * SeqFeatPtr->location = SeqLocInsert ( SeqFeatPtr->location , 555 * "target", 5, 10, TRUE, NULL); [for some feature types 556 * you may want "split" equal FALSE] 557 * 558 * 2) To insert the complete feature table from "source" into a 559 * different Bioseq "dest" before position 20 in "dest" 560 * SFP->location = SeqLocInsert(SFP->location, "source", 0, 20, 561 * FALSE, "dest"); 562 * 563 * 564 *****************************************************************************/ 565 NLM_EXTERN SeqLocPtr LIBCALL SeqLocInsert (SeqLocPtr head, SeqIdPtr target, Int4 pos, Int4 len, 566 Boolean split, SeqIdPtr newid); 567 568 569 /******************************************************************** 570 * 571 * SeqLocReplaceID 572 * replaces the Seq-Id in a Seq-Loc (slp) with a new Seq-Id (new_sip) 573 * 574 **********************************************************************/ 575 NLM_EXTERN SeqLocPtr SeqLocReplaceID (SeqLocPtr slp, SeqIdPtr new_sip); 576 577 /********************************************************** 578 * 579 * NLM_EXTERN SeqLocPtr LIBCALL GapToSeqLoc(range): 580 * 581 * Gets the size of gap and constructs SeqLoc block with 582 * $(seqlitdbtag) value as Dbtag.db and Dbtag.tag.id = 0. 583 * 584 **********************************************************/ 585 NLM_EXTERN SeqLocPtr LIBCALL GapToSeqLoc(Int4 range); 586 587 /********************************************************** 588 * 589 * NLM_EXTERN SeqLocPtr LIBCALL GapToSeqLocEx(range, unknown): 590 * 591 * Gets the size of gap and constructs SeqLoc block with 592 * $(seqlitdbtag) value in case of gap of known length or 593 * $(unkseqlitdbtag) value if length is unknown as Dbtag.db 594 * and Dbtag.tag.id = 0. 595 * 596 **********************************************************/ 597 NLM_EXTERN SeqLocPtr LIBCALL GapToSeqLocEx(Int4 range, Boolean unknown); 598 599 /********************************************************** 600 * 601 * NLM_EXTERN Boolean LIBCALL ISAGappedSeqLoc(slp): 602 * 603 * Looks at a single SeqLoc item. If it has the SeqId 604 * of type GENERAL with Dbtag.db == $(seqlitdbtag) and 605 * Dbtag.tag.id == 0, then returns TRUE, otherwise 606 * returns FALSE. 607 * 608 **********************************************************/ 609 NLM_EXTERN Boolean LIBCALL ISAGappedSeqLoc(SeqLocPtr slp); 610 611 /********************************************************** 612 * 613 * NLM_EXTERN DeltaSeqPtr LIBCALL GappedSeqLocsToDeltaSeqs(slp): 614 * 615 * This functions is used only in the case, if ISAGappedSeqLoc() 616 * has returned TRUE. 617 * Converts SeqLoc set to the sequence of DeltaSeqs. 618 * Gbtag'ed SeqLocs it turns into SeqLits with the only "length" 619 * element. The regular SeqLocs saves as they are. Returns 620 * obtained DeltaSeq. 621 * 622 **********************************************************/ 623 NLM_EXTERN DeltaSeqPtr LIBCALL GappedSeqLocsToDeltaSeqs(SeqLocPtr slp); 624 625 /* the following typedefs and functions are used by the new sequence editor 626 * in desktop/seqpanel.c 627 */ 628 typedef enum { eSeqEdInsert, eSeqEdDelete, eSeqEdFeatMove, eSeqEdTranslate, 629 eSeqEdInsertGap, eSeqEdDeleteGap, 630 eSeqEdJournalStart, eSeqEdJournalEnd } ESeqEdJournalAction ; 631 632 typedef struct seqedjournaldata 633 { 634 ESeqEdJournalAction action; /* indicates action taken */ 635 Int4 offset; /* position to the left of deletion or right of insertion */ 636 Int4 num_chars; /* number of characters inserted or deleted */ 637 CharPtr char_data; /* characters inserted or removed */ 638 /* when creating a journal entry for deletion, allocate 639 * space for char_data but do not populate it - it will 640 * be populated when the journal entry is played */ 641 Boolean spliteditmode; /* if insertion occurs and spliteditmode is true and 642 * a feature overlaps the insertion position, the location 643 * of the feature will be discontinuous at the point of 644 * insertion. */ 645 SeqFeatPtr sfp; /* A feature that was moved - should be NULL 646 * unless action == eSeqEdFeatMove */ 647 SeqLocPtr slp; /* A location for sfp - if the journal has 648 * already been played, this is the previous 649 * location, if the journal has been undone, 650 * this is the location before the redo. */ 651 ValNodePtr affected_feats; /* This is a list of features which were shortened by 652 * an eSeqEdDelete operation - their locations will 653 * need to be reconstructed if the operation is undone. */ 654 Boolean unknown_gap; /* This is used only when action is eSeqEdInsertGap 655 * or eSeqEdDeleteGap. It indicates whether the gap 656 * being inserted (or deleted) from a delta sequence 657 * is unknown. */ 658 BioseqPtr bsp; /* The Bioseq for which the action is to be/was applied. */ 659 Uint1 moltype; /* Molecule type for bsp. Stored for convenience. */ 660 Uint2 entityID; /* entityID for bsp. Stored for convenience. */ 661 Pointer next; /* Journal entries are a doubly-linked list so that */ 662 Pointer prev; /* we can traverse the list in both directions for */ 663 /* undo and redo. */ 664 } SeqEdJournalData, PNTR SeqEdJournalPtr; 665 666 NLM_EXTERN SeqLocPtr LIBCALL 667 SeqEdSeqLocInsert 668 (SeqLocPtr head, 669 BioseqPtr target, 670 Int4 pos, 671 Int4 len, 672 Boolean split, 673 SeqIdPtr newid); 674 675 NLM_EXTERN void 676 SeqEdInsertAdjustRNA 677 (SeqFeatPtr sfp, 678 BioseqPtr bsp, 679 Int4 insert_pos, 680 Int4 len, 681 Boolean do_split); 682 683 NLM_EXTERN void 684 SeqEdInsertAdjustCdRgn 685 (SeqFeatPtr sfp, 686 BioseqPtr bsp, 687 Int4 insert_pos, 688 Int4 len, 689 Boolean do_split); 690 691 NLM_EXTERN SeqLocPtr 692 SeqEdSeqLocDelete 693 (SeqLocPtr head, 694 BioseqPtr target, 695 Int4 from, 696 Int4 to, 697 Boolean merge, 698 BoolPtr changed, 699 BoolPtr partial5, 700 BoolPtr partial3); 701 702 NLM_EXTERN Int2 LIBCALL 703 SeqEdSeqFeatDelete 704 (SeqFeatPtr sfp, 705 BioseqPtr target, 706 Int4 from, 707 Int4 to, 708 Boolean merge); 709 710 NLM_EXTERN void SeqEdJournalFree (SeqEdJournalPtr sejp); 711 NLM_EXTERN SeqEdJournalPtr SeqEdJournalNewSeqEdit 712 (ESeqEdJournalAction action, 713 Int4 offset, 714 Int4 num_chars, 715 CharPtr char_data, 716 Boolean spliteditmode, 717 BioseqPtr bsp, 718 Uint1 moltype, 719 Uint2 entityID); 720 721 NLM_EXTERN SeqEdJournalPtr SeqEdJournalNewFeatEdit 722 (ESeqEdJournalAction action, 723 SeqFeatPtr sfp, 724 SeqLocPtr slp, 725 BioseqPtr bsp, 726 Uint1 moltype, 727 Uint2 entityID); 728 729 NLM_EXTERN SeqEdJournalPtr SeqEdJournalNewTranslate 730 (SeqFeatPtr sfp, 731 BioseqPtr bsp, 732 Uint2 entityID); 733 734 NLM_EXTERN SeqFeatPtr 735 SeqEdGetNextFeature 736 (BioseqPtr bsp, 737 SeqFeatPtr curr, 738 Uint1 seqFeatChoice, 739 Uint1 featDefChoice, 740 SeqMgrFeatContext PNTR context, 741 Boolean byLabel, 742 Boolean byLocusTag, 743 Uint2 entityID); 744 745 /* this enum describes the kind of motion for feature adjusts */ 746 typedef enum { eLeftEnd=1, eRightEnd, eSlide } EMoveType; 747 748 /* this function moves just the location */ 749 NLM_EXTERN Boolean SeqEdAdjustFeatureInterval 750 (SeqLocPtr slp, Int4 change, EMoveType move_type, Int4 interval_offset, BioseqPtr bsp); 751 752 /* This function moves a feature location */ 753 NLM_EXTERN void SeqEdFeatureAdjust 754 (SeqFeatPtr sfp, 755 SeqLocPtr orig_loc, 756 Int4 change, 757 EMoveType move_type, 758 Int4 interval_offset, 759 BioseqPtr bsp); 760 761 /* This function locates the endpoints of the Nth interval in a SeqLoc */ 762 NLM_EXTERN Boolean SeqEdGetNthIntervalEndPoints 763 (SeqLocPtr slp, Int4 n, Int4Ptr left, Int4Ptr right); 764 765 /* this function is used to repair the interval order after a feature location 766 * interval has been dragged around. 767 */ 768 NLM_EXTERN void SeqEdRepairIntervalOrder (SeqFeatPtr sfp, BioseqPtr bsp); 769 NLM_EXTERN Boolean SeqEdInsert (SeqEdJournalPtr sejp); 770 NLM_EXTERN void SeqEdReindexAffectedFeatures (Int4 shift_start, Int4 shift_amt, 771 Boolean split, BioseqPtr bsp); 772 NLM_EXTERN void SeqEdReindexFeature (SeqFeatPtr sfp, BioseqPtr bsp); 773 NLM_EXTERN Boolean SeqEdDeleteFromBsp (SeqEdJournalPtr sejp, BoolPtr pfeats_deleted); 774 775 NLM_EXTERN void 776 AdjustFeatureForGapChange 777 (SeqFeatPtr sfp, 778 BioseqPtr bsp, 779 Int4 offset, 780 Int4 len_diff); 781 782 extern Boolean IsDeltaSeqGap (DeltaSeqPtr dsp); 783 extern Boolean IsDeltaSeqUnknownGap (DeltaSeqPtr dsp); 784 extern Boolean IsDeltaSeqKnownGap (DeltaSeqPtr dsp); 785 extern Boolean DoesSeqLitHaveGapTypeOrLinkage (SeqLitPtr slip); 786 extern Boolean DoesDeltaSeqHaveGapTypeOrLinkage (DeltaSeqPtr dsp); 787 788 789 #ifdef __cplusplus 790 } 791 #endif 792 793 #undef NLM_EXTERN 794 #ifdef NLM_EXPORT 795 #define NLM_EXTERN NLM_EXPORT 796 #else 797 #define NLM_EXTERN 798 #endif 799 800 #endif 801