1 /* 2 * Jalview - A Sequence Alignment Editor and Viewer (2.11.1.4) 3 * Copyright (C) 2021 The Jalview Authors 4 * 5 * This file is part of Jalview. 6 * 7 * Jalview is free software: you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation, either version 3 10 * of the License, or (at your option) any later version. 11 * 12 * Jalview is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty 14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 15 * PURPOSE. See the GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>. 19 * The Jalview Authors are detailed in the 'AUTHORS' file. 20 */ 21 package jalview.datamodel; 22 23 import java.util.Hashtable; 24 import java.util.List; 25 import java.util.Map; 26 import java.util.Set; 27 28 /** 29 * Data structure to hold and manipulate a multiple sequence alignment 30 */ 31 public interface AlignmentI extends AnnotatedCollectionI 32 { 33 /** 34 * Calculates the number of sequences in an alignment, excluding hidden 35 * sequences 36 * 37 * @return Number of sequences in alignment 38 */ getHeight()39 int getHeight(); 40 41 /** 42 * Calculates the number of sequences in an alignment, including hidden 43 * sequences 44 * 45 * @return Number of sequences in alignment 46 */ getAbsoluteHeight()47 int getAbsoluteHeight(); 48 49 /** 50 * 51 * Answers the width of the alignment, including gaps, that is, the length of 52 * the longest sequence, or -1 if there are no sequences. Avoid calling this 53 * method repeatedly where possible, as it has to perform a calculation. Note 54 * that this width includes any hidden columns. 55 * 56 * @return 57 * @see AlignmentI#getVisibleWidth() 58 */ 59 @Override getWidth()60 int getWidth(); 61 62 /** 63 * 64 * Answers the visible width of the alignment, including gaps, that is, the 65 * length of the longest sequence, excluding any hidden columns. Answers -1 if 66 * there are no sequences. Avoid calling this method repeatedly where 67 * possible, as it has to perform a calculation. 68 * 69 * @return 70 */ getVisibleWidth()71 int getVisibleWidth(); 72 73 /** 74 * Calculates if this set of sequences (visible and invisible) are all the 75 * same length 76 * 77 * @return true if all sequences in alignment are the same length 78 */ isAligned()79 boolean isAligned(); 80 81 /** 82 * Calculates if this set of sequences is all the same length 83 * 84 * @param includeHidden 85 * optionally exclude hidden sequences from test 86 * @return true if all (or just visible) sequences are the same length 87 */ isAligned(boolean includeHidden)88 boolean isAligned(boolean includeHidden); 89 90 /** 91 * Answers if the sequence at alignmentIndex is hidden 92 * 93 * @param alignmentIndex 94 * the index to check 95 * @return true if the sequence is hidden 96 */ isHidden(int alignmentIndex)97 boolean isHidden(int alignmentIndex); 98 99 /** 100 * Gets sequences as a Synchronized collection 101 * 102 * @return All sequences in alignment. 103 */ 104 @Override getSequences()105 List<SequenceI> getSequences(); 106 107 /** 108 * Gets sequences as a SequenceI[] 109 * 110 * @return All sequences in alignment. 111 */ getSequencesArray()112 SequenceI[] getSequencesArray(); 113 114 /** 115 * Find a specific sequence in this alignment. 116 * 117 * @param i 118 * Index of required sequence. 119 * 120 * @return SequenceI at given index. 121 */ getSequenceAt(int i)122 SequenceI getSequenceAt(int i); 123 124 /** 125 * Find a specific sequence in this alignment. 126 * 127 * @param i 128 * Index of required sequence in full alignment, i.e. if all columns 129 * were visible 130 * 131 * @return SequenceI at given index. 132 */ getSequenceAtAbsoluteIndex(int i)133 SequenceI getSequenceAtAbsoluteIndex(int i); 134 135 /** 136 * Returns a map of lists of sequences keyed by sequence name. 137 * 138 * @return 139 */ getSequencesByName()140 Map<String, List<SequenceI>> getSequencesByName(); 141 142 /** 143 * Add a new sequence to this alignment. 144 * 145 * @param seq 146 * New sequence will be added at end of alignment. 147 */ addSequence(SequenceI seq)148 void addSequence(SequenceI seq); 149 150 /** 151 * Used to set a particular index of the alignment with the given sequence. 152 * 153 * @param i 154 * Index of sequence to be updated. if i>length, sequence will be 155 * added to end, with no intervening positions. 156 * @param seq 157 * New sequence to be inserted. The existing sequence at position i 158 * will be replaced. 159 * @return existing sequence (or null if i>current length) 160 */ replaceSequenceAt(int i, SequenceI seq)161 SequenceI replaceSequenceAt(int i, SequenceI seq); 162 163 /** 164 * Deletes a sequence from the alignment. Updates hidden sequences to account 165 * for the removed sequence. Do NOT use this method to delete sequences which 166 * are just hidden. 167 * 168 * @param s 169 * Sequence to be deleted. 170 */ deleteSequence(SequenceI s)171 void deleteSequence(SequenceI s); 172 173 /** 174 * Deletes a sequence from the alignment. Updates hidden sequences to account 175 * for the removed sequence. Do NOT use this method to delete sequences which 176 * are just hidden. 177 * 178 * @param i 179 * Index of sequence to be deleted. 180 */ deleteSequence(int i)181 void deleteSequence(int i); 182 183 /** 184 * Deletes a sequence in the alignment which has been hidden. 185 * 186 * @param i 187 * Index of sequence to be deleted 188 */ deleteHiddenSequence(int i)189 void deleteHiddenSequence(int i); 190 191 /** 192 * Finds sequence in alignment using sequence name as query. 193 * 194 * @param name 195 * Id of sequence to search for. 196 * 197 * @return Sequence matching query, if found. If not found returns null. 198 */ findName(String name)199 SequenceI findName(String name); 200 findSequenceMatch(String name)201 SequenceI[] findSequenceMatch(String name); 202 203 /** 204 * Finds index of a given sequence in the alignment. 205 * 206 * @param s 207 * Sequence to look for. 208 * 209 * @return Index of sequence within the alignment or -1 if not found 210 */ findIndex(SequenceI s)211 int findIndex(SequenceI s); 212 213 /** 214 * Returns the first group (in the order in which groups were added) that 215 * includes the given sequence instance and aligned position (base 0), or null 216 * if none found 217 * 218 * @param seq 219 * - must be contained in the alignment (not a dataset sequence) 220 * @param position 221 * 222 * @return 223 */ findGroup(SequenceI seq, int position)224 SequenceGroup findGroup(SequenceI seq, int position); 225 226 /** 227 * Finds all groups that a given sequence is part of. 228 * 229 * @param s 230 * Sequence in alignment. 231 * 232 * @return All groups containing given sequence. 233 */ findAllGroups(SequenceI s)234 SequenceGroup[] findAllGroups(SequenceI s); 235 236 /** 237 * Adds a new SequenceGroup to this alignment. 238 * 239 * @param sg 240 * New group to be added. 241 */ addGroup(SequenceGroup sg)242 void addGroup(SequenceGroup sg); 243 244 /** 245 * Deletes a specific SequenceGroup 246 * 247 * @param g 248 * Group will be deleted from alignment. 249 */ deleteGroup(SequenceGroup g)250 void deleteGroup(SequenceGroup g); 251 252 /** 253 * Get all the groups associated with this alignment. 254 * 255 * @return All groups as a list. 256 */ getGroups()257 List<SequenceGroup> getGroups(); 258 259 /** 260 * Deletes all groups from this alignment. 261 */ deleteAllGroups()262 void deleteAllGroups(); 263 264 /** 265 * Adds a new AlignmentAnnotation to this alignment 266 * 267 * @note Care should be taken to ensure that annotation is at least as wide as 268 * the longest sequence in the alignment for rendering purposes. 269 */ addAnnotation(AlignmentAnnotation aa)270 void addAnnotation(AlignmentAnnotation aa); 271 272 /** 273 * moves annotation to a specified index in alignment annotation display stack 274 * 275 * @param aa 276 * the annotation object to be moved 277 * @param index 278 * the destination position 279 */ setAnnotationIndex(AlignmentAnnotation aa, int index)280 void setAnnotationIndex(AlignmentAnnotation aa, int index); 281 282 /** 283 * Delete all annotations, including auto-calculated if the flag is set true. 284 * Returns true if at least one annotation was deleted, else false. 285 * 286 * @param includingAutoCalculated 287 * @return 288 */ deleteAllAnnotations(boolean includingAutoCalculated)289 boolean deleteAllAnnotations(boolean includingAutoCalculated); 290 291 /** 292 * Deletes a specific AlignmentAnnotation from the alignment, and removes its 293 * reference from any SequenceI or SequenceGroup object's annotation if and 294 * only if aa is contained within the alignment's annotation vector. 295 * Otherwise, it will do nothing. 296 * 297 * @param aa 298 * the annotation to delete 299 * @return true if annotation was deleted from this alignment. 300 */ deleteAnnotation(AlignmentAnnotation aa)301 boolean deleteAnnotation(AlignmentAnnotation aa); 302 303 /** 304 * Deletes a specific AlignmentAnnotation from the alignment, and optionally 305 * removes any reference from any SequenceI or SequenceGroup object's 306 * annotation if and only if aa is contained within the alignment's annotation 307 * vector. Otherwise, it will do nothing. 308 * 309 * @param aa 310 * the annotation to delete 311 * @param unhook 312 * flag indicating if any references should be removed from 313 * annotation - use this if you intend to add the annotation back 314 * into the alignment 315 * @return true if annotation was deleted from this alignment. 316 */ deleteAnnotation(AlignmentAnnotation aa, boolean unhook)317 boolean deleteAnnotation(AlignmentAnnotation aa, boolean unhook); 318 319 /** 320 * Get the annotation associated with this alignment (this can be null if no 321 * annotation has ever been created on the alignment) 322 * 323 * @return array of AlignmentAnnotation objects 324 */ 325 @Override getAlignmentAnnotation()326 AlignmentAnnotation[] getAlignmentAnnotation(); 327 328 /** 329 * Change the gap character used in this alignment to 'gc' 330 * 331 * @param gc 332 * the new gap character. 333 */ setGapCharacter(char gc)334 void setGapCharacter(char gc); 335 336 /** 337 * Get the gap character used in this alignment 338 * 339 * @return gap character 340 */ getGapCharacter()341 char getGapCharacter(); 342 343 /** 344 * Test if alignment contains RNA structure 345 * 346 * @return true if RNA structure AligmnentAnnotation was added to alignment 347 */ hasRNAStructure()348 boolean hasRNAStructure(); 349 350 /** 351 * Get the associated dataset for the alignment. 352 * 353 * @return Alignment containing dataset sequences or null of this is a 354 * dataset. 355 */ getDataset()356 AlignmentI getDataset(); 357 358 /** 359 * Set the associated dataset for the alignment, or create one. 360 * 361 * @param dataset 362 * The dataset alignment or null to construct one. 363 */ setDataset(AlignmentI dataset)364 void setDataset(AlignmentI dataset); 365 366 /** 367 * pads sequences with gaps (to ensure the set looks like an alignment) 368 * 369 * @return boolean true if alignment was modified 370 */ padGaps()371 boolean padGaps(); 372 getHiddenSequences()373 HiddenSequences getHiddenSequences(); 374 getHiddenColumns()375 HiddenColumns getHiddenColumns(); 376 377 /** 378 * Compact representation of alignment 379 * 380 * @return CigarArray 381 */ getCompactAlignment()382 CigarArray getCompactAlignment(); 383 384 /** 385 * Set an arbitrary key value pair for an alignment. Note: both key and value 386 * objects should return a meaningful, human readable response to .toString() 387 * 388 * @param key 389 * @param value 390 */ setProperty(Object key, Object value)391 void setProperty(Object key, Object value); 392 393 /** 394 * Get a named property from the alignment. 395 * 396 * @param key 397 * @return value of property 398 */ getProperty(Object key)399 Object getProperty(Object key); 400 401 /** 402 * Get the property hashtable. 403 * 404 * @return hashtable of alignment properties (or null if none are defined) 405 */ getProperties()406 Hashtable getProperties(); 407 408 /** 409 * add a reference to a frame of aligned codons for this alignment 410 * 411 * @param codons 412 */ addCodonFrame(AlignedCodonFrame codons)413 void addCodonFrame(AlignedCodonFrame codons); 414 415 /** 416 * remove a particular codon frame reference from this alignment 417 * 418 * @param codons 419 * @return true if codon frame was removed. 420 */ removeCodonFrame(AlignedCodonFrame codons)421 boolean removeCodonFrame(AlignedCodonFrame codons); 422 423 /** 424 * get all codon frames associated with this alignment 425 * 426 * @return 427 */ getCodonFrames()428 List<AlignedCodonFrame> getCodonFrames(); 429 430 /** 431 * Set the codon frame mappings (replacing any existing list). 432 */ setCodonFrames(List<AlignedCodonFrame> acfs)433 void setCodonFrames(List<AlignedCodonFrame> acfs); 434 435 /** 436 * get codon frames involving sequenceI 437 */ getCodonFrame(SequenceI seq)438 List<AlignedCodonFrame> getCodonFrame(SequenceI seq); 439 440 /** 441 * find sequence with given name in alignment 442 * 443 * @param token 444 * name to find 445 * @param b 446 * true implies that case insensitive matching will <em>also</em> be 447 * tried 448 * @return matched sequence or null 449 */ findName(String token, boolean b)450 SequenceI findName(String token, boolean b); 451 452 /** 453 * find next sequence with given name in alignment starting after a given 454 * sequence 455 * 456 * @param startAfter 457 * the sequence after which the search will be started (usually the 458 * result of the last call to findName) 459 * @param token 460 * name to find 461 * @param b 462 * true implies that case insensitive matching will <em>also</em> be 463 * tried 464 * @return matched sequence or null 465 */ findName(SequenceI startAfter, String token, boolean b)466 SequenceI findName(SequenceI startAfter, String token, boolean b); 467 468 /** 469 * find first sequence in alignment which is involved in the given search 470 * result object 471 * 472 * @param results 473 * @return -1 or index of sequence in alignment 474 */ findIndex(SearchResultsI results)475 int findIndex(SearchResultsI results); 476 477 /** 478 * append sequences and annotation from another alignment object to this one. 479 * Note: this is a straight transfer of object references, and may result in 480 * toappend's dependent data being transformed to fit the alignment (changing 481 * gap characters, etc...). If you are uncertain, use the copy Alignment copy 482 * constructor to create a new version which can be appended without side 483 * effect. 484 * 485 * @param toappend 486 * - the alignment to be appended. 487 */ append(AlignmentI toappend)488 void append(AlignmentI toappend); 489 490 /** 491 * Justify the sequences to the left or right by deleting and inserting gaps 492 * before the initial residue or after the terminal residue 493 * 494 * @param right 495 * true if alignment padded to right, false to justify to left 496 * @return true if alignment was changed TODO: return undo object 497 */ justify(boolean right)498 boolean justify(boolean right); 499 500 /** 501 * add given annotation row at given position (0 is start, -1 is end) 502 * 503 * @param consensus 504 * @param i 505 */ addAnnotation(AlignmentAnnotation consensus, int i)506 void addAnnotation(AlignmentAnnotation consensus, int i); 507 508 /** 509 * search for or create a specific annotation row on the alignment 510 * 511 * @param name 512 * name for annotation (must match) 513 * @param calcId 514 * calcId for the annotation (null or must match) 515 * @param autoCalc 516 * - value of autocalc flag for the annotation 517 * @param seqRef 518 * - null or specific sequence reference 519 * @param groupRef 520 * - null or specific group reference 521 * @param method 522 * - CalcId for the annotation (must match) 523 * 524 * @return existing annotation matching the given attributes 525 */ findOrCreateAnnotation(String name, String calcId, boolean autoCalc, SequenceI seqRef, SequenceGroup groupRef)526 AlignmentAnnotation findOrCreateAnnotation(String name, String calcId, 527 boolean autoCalc, SequenceI seqRef, SequenceGroup groupRef); 528 529 /** 530 * move the given group up or down in the alignment by the given number of 531 * rows. Implementor assumes given group is already present on alignment - no 532 * recalculations are triggered. 533 * 534 * @param sg 535 * @param map 536 * @param up 537 * @param i 538 */ moveSelectedSequencesByOne(SequenceGroup sg, Map<SequenceI, SequenceCollectionI> map, boolean up)539 void moveSelectedSequencesByOne(SequenceGroup sg, 540 Map<SequenceI, SequenceCollectionI> map, boolean up); 541 542 /** 543 * validate annotation after an edit and update any alignment state flags 544 * accordingly 545 * 546 * @param alignmentAnnotation 547 */ validateAnnotation(AlignmentAnnotation alignmentAnnotation)548 void validateAnnotation(AlignmentAnnotation alignmentAnnotation); 549 550 /** 551 * Align this alignment the same as the given one. If both of the same type 552 * (nucleotide/protein) then align both identically. If this is nucleotide and 553 * the other is protein, make 3 gaps for each gap in the protein sequences. If 554 * this is protein and the other is nucleotide, insert a gap for each 3 gaps 555 * (or part thereof) between nucleotide bases. Returns the number of mapped 556 * sequences that were realigned . 557 * 558 * @param al 559 * @return 560 */ alignAs(AlignmentI al)561 int alignAs(AlignmentI al); 562 563 /** 564 * Returns the set of distinct sequence names in the alignment. 565 * 566 * @return 567 */ getSequenceNames()568 Set<String> getSequenceNames(); 569 570 /** 571 * Checks if the alignment has at least one sequence with one non-gaped 572 * residue 573 * 574 * @return 575 */ hasValidSequence()576 public boolean hasValidSequence(); 577 578 /** 579 * Update any mappings to 'virtual' sequences to compatible real ones, if 580 * present in the added sequences. Returns a count of mappings updated. 581 * 582 * @param seqs 583 * @return 584 */ realiseMappings(List<SequenceI> seqs)585 int realiseMappings(List<SequenceI> seqs); 586 587 /** 588 * Returns the first AlignedCodonFrame that has a mapping between the given 589 * dataset sequences 590 * 591 * @param mapFrom 592 * @param mapTo 593 * @return 594 */ getMapping(SequenceI mapFrom, SequenceI mapTo)595 AlignedCodonFrame getMapping(SequenceI mapFrom, SequenceI mapTo); 596 597 /** 598 * Set the hidden columns collection on the alignment. Answers true if the 599 * hidden column selection changed, else false. 600 * 601 * @param cols 602 * @return 603 */ setHiddenColumns(HiddenColumns cols)604 public boolean setHiddenColumns(HiddenColumns cols); 605 606 /** 607 * Set the first sequence as representative and hide its insertions. Typically 608 * used when loading JPred files. 609 */ setupJPredAlignment()610 public void setupJPredAlignment(); 611 612 /** 613 * Add gaps into the sequences aligned to profileseq under the given 614 * AlignmentView 615 * 616 * @param profileseq 617 * sequence in al which sequences are aligned to 618 * @param input 619 * alignment view where sequence corresponding to profileseq is first 620 * entry 621 * @return new HiddenColumns for new alignment view, with insertions into 622 * profileseq marked as hidden. 623 */ propagateInsertions(SequenceI profileseq, AlignmentView input)624 public HiddenColumns propagateInsertions(SequenceI profileseq, 625 AlignmentView input); 626 627 } 628