1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer (2.11.1.4)
3  * Copyright (C) 2021 The Jalview Authors
4  *
5  * This file is part of Jalview.
6  *
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *
12  * Jalview is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15  * PURPOSE.  See the GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.datamodel;
22 
23 import java.util.Hashtable;
24 import java.util.List;
25 import java.util.Map;
26 import java.util.Set;
27 
28 /**
29  * Data structure to hold and manipulate a multiple sequence alignment
30  */
31 public interface AlignmentI extends AnnotatedCollectionI
32 {
33   /**
34    * Calculates the number of sequences in an alignment, excluding hidden
35    * sequences
36    *
37    * @return Number of sequences in alignment
38    */
getHeight()39   int getHeight();
40 
41   /**
42    * Calculates the number of sequences in an alignment, including hidden
43    * sequences
44    *
45    * @return Number of sequences in alignment
46    */
getAbsoluteHeight()47   int getAbsoluteHeight();
48 
49   /**
50    *
51    * Answers the width of the alignment, including gaps, that is, the length of
52    * the longest sequence, or -1 if there are no sequences. Avoid calling this
53    * method repeatedly where possible, as it has to perform a calculation. Note
54    * that this width includes any hidden columns.
55    *
56    * @return
57    * @see AlignmentI#getVisibleWidth()
58    */
59   @Override
getWidth()60   int getWidth();
61 
62   /**
63    *
64    * Answers the visible width of the alignment, including gaps, that is, the
65    * length of the longest sequence, excluding any hidden columns. Answers -1 if
66    * there are no sequences. Avoid calling this method repeatedly where
67    * possible, as it has to perform a calculation.
68    *
69    * @return
70    */
getVisibleWidth()71   int getVisibleWidth();
72 
73   /**
74    * Calculates if this set of sequences (visible and invisible) are all the
75    * same length
76    *
77    * @return true if all sequences in alignment are the same length
78    */
isAligned()79   boolean isAligned();
80 
81   /**
82    * Calculates if this set of sequences is all the same length
83    *
84    * @param includeHidden
85    *          optionally exclude hidden sequences from test
86    * @return true if all (or just visible) sequences are the same length
87    */
isAligned(boolean includeHidden)88   boolean isAligned(boolean includeHidden);
89 
90   /**
91    * Answers if the sequence at alignmentIndex is hidden
92    *
93    * @param alignmentIndex
94    *          the index to check
95    * @return true if the sequence is hidden
96    */
isHidden(int alignmentIndex)97   boolean isHidden(int alignmentIndex);
98 
99   /**
100    * Gets sequences as a Synchronized collection
101    *
102    * @return All sequences in alignment.
103    */
104   @Override
getSequences()105   List<SequenceI> getSequences();
106 
107   /**
108    * Gets sequences as a SequenceI[]
109    *
110    * @return All sequences in alignment.
111    */
getSequencesArray()112   SequenceI[] getSequencesArray();
113 
114   /**
115    * Find a specific sequence in this alignment.
116    *
117    * @param i
118    *          Index of required sequence.
119    *
120    * @return SequenceI at given index.
121    */
getSequenceAt(int i)122   SequenceI getSequenceAt(int i);
123 
124   /**
125    * Find a specific sequence in this alignment.
126    *
127    * @param i
128    *          Index of required sequence in full alignment, i.e. if all columns
129    *          were visible
130    *
131    * @return SequenceI at given index.
132    */
getSequenceAtAbsoluteIndex(int i)133   SequenceI getSequenceAtAbsoluteIndex(int i);
134 
135   /**
136    * Returns a map of lists of sequences keyed by sequence name.
137    *
138    * @return
139    */
getSequencesByName()140   Map<String, List<SequenceI>> getSequencesByName();
141 
142   /**
143    * Add a new sequence to this alignment.
144    *
145    * @param seq
146    *          New sequence will be added at end of alignment.
147    */
addSequence(SequenceI seq)148   void addSequence(SequenceI seq);
149 
150   /**
151    * Used to set a particular index of the alignment with the given sequence.
152    *
153    * @param i
154    *          Index of sequence to be updated. if i>length, sequence will be
155    *          added to end, with no intervening positions.
156    * @param seq
157    *          New sequence to be inserted. The existing sequence at position i
158    *          will be replaced.
159    * @return existing sequence (or null if i>current length)
160    */
replaceSequenceAt(int i, SequenceI seq)161   SequenceI replaceSequenceAt(int i, SequenceI seq);
162 
163   /**
164    * Deletes a sequence from the alignment. Updates hidden sequences to account
165    * for the removed sequence. Do NOT use this method to delete sequences which
166    * are just hidden.
167    *
168    * @param s
169    *          Sequence to be deleted.
170    */
deleteSequence(SequenceI s)171   void deleteSequence(SequenceI s);
172 
173   /**
174    * Deletes a sequence from the alignment. Updates hidden sequences to account
175    * for the removed sequence. Do NOT use this method to delete sequences which
176    * are just hidden.
177    *
178    * @param i
179    *          Index of sequence to be deleted.
180    */
deleteSequence(int i)181   void deleteSequence(int i);
182 
183   /**
184    * Deletes a sequence in the alignment which has been hidden.
185    *
186    * @param i
187    *          Index of sequence to be deleted
188    */
deleteHiddenSequence(int i)189   void deleteHiddenSequence(int i);
190 
191   /**
192    * Finds sequence in alignment using sequence name as query.
193    *
194    * @param name
195    *          Id of sequence to search for.
196    *
197    * @return Sequence matching query, if found. If not found returns null.
198    */
findName(String name)199   SequenceI findName(String name);
200 
findSequenceMatch(String name)201   SequenceI[] findSequenceMatch(String name);
202 
203   /**
204    * Finds index of a given sequence in the alignment.
205    *
206    * @param s
207    *          Sequence to look for.
208    *
209    * @return Index of sequence within the alignment or -1 if not found
210    */
findIndex(SequenceI s)211   int findIndex(SequenceI s);
212 
213   /**
214    * Returns the first group (in the order in which groups were added) that
215    * includes the given sequence instance and aligned position (base 0), or null
216    * if none found
217    *
218    * @param seq
219    *          - must be contained in the alignment (not a dataset sequence)
220    * @param position
221    *
222    * @return
223    */
findGroup(SequenceI seq, int position)224   SequenceGroup findGroup(SequenceI seq, int position);
225 
226   /**
227    * Finds all groups that a given sequence is part of.
228    *
229    * @param s
230    *          Sequence in alignment.
231    *
232    * @return All groups containing given sequence.
233    */
findAllGroups(SequenceI s)234   SequenceGroup[] findAllGroups(SequenceI s);
235 
236   /**
237    * Adds a new SequenceGroup to this alignment.
238    *
239    * @param sg
240    *          New group to be added.
241    */
addGroup(SequenceGroup sg)242   void addGroup(SequenceGroup sg);
243 
244   /**
245    * Deletes a specific SequenceGroup
246    *
247    * @param g
248    *          Group will be deleted from alignment.
249    */
deleteGroup(SequenceGroup g)250   void deleteGroup(SequenceGroup g);
251 
252   /**
253    * Get all the groups associated with this alignment.
254    *
255    * @return All groups as a list.
256    */
getGroups()257   List<SequenceGroup> getGroups();
258 
259   /**
260    * Deletes all groups from this alignment.
261    */
deleteAllGroups()262   void deleteAllGroups();
263 
264   /**
265    * Adds a new AlignmentAnnotation to this alignment
266    *
267    * @note Care should be taken to ensure that annotation is at least as wide as
268    *       the longest sequence in the alignment for rendering purposes.
269    */
addAnnotation(AlignmentAnnotation aa)270   void addAnnotation(AlignmentAnnotation aa);
271 
272   /**
273    * moves annotation to a specified index in alignment annotation display stack
274    *
275    * @param aa
276    *          the annotation object to be moved
277    * @param index
278    *          the destination position
279    */
setAnnotationIndex(AlignmentAnnotation aa, int index)280   void setAnnotationIndex(AlignmentAnnotation aa, int index);
281 
282   /**
283    * Delete all annotations, including auto-calculated if the flag is set true.
284    * Returns true if at least one annotation was deleted, else false.
285    *
286    * @param includingAutoCalculated
287    * @return
288    */
deleteAllAnnotations(boolean includingAutoCalculated)289   boolean deleteAllAnnotations(boolean includingAutoCalculated);
290 
291   /**
292    * Deletes a specific AlignmentAnnotation from the alignment, and removes its
293    * reference from any SequenceI or SequenceGroup object's annotation if and
294    * only if aa is contained within the alignment's annotation vector.
295    * Otherwise, it will do nothing.
296    *
297    * @param aa
298    *          the annotation to delete
299    * @return true if annotation was deleted from this alignment.
300    */
deleteAnnotation(AlignmentAnnotation aa)301   boolean deleteAnnotation(AlignmentAnnotation aa);
302 
303   /**
304    * Deletes a specific AlignmentAnnotation from the alignment, and optionally
305    * removes any reference from any SequenceI or SequenceGroup object's
306    * annotation if and only if aa is contained within the alignment's annotation
307    * vector. Otherwise, it will do nothing.
308    *
309    * @param aa
310    *          the annotation to delete
311    * @param unhook
312    *          flag indicating if any references should be removed from
313    *          annotation - use this if you intend to add the annotation back
314    *          into the alignment
315    * @return true if annotation was deleted from this alignment.
316    */
deleteAnnotation(AlignmentAnnotation aa, boolean unhook)317   boolean deleteAnnotation(AlignmentAnnotation aa, boolean unhook);
318 
319   /**
320    * Get the annotation associated with this alignment (this can be null if no
321    * annotation has ever been created on the alignment)
322    *
323    * @return array of AlignmentAnnotation objects
324    */
325   @Override
getAlignmentAnnotation()326   AlignmentAnnotation[] getAlignmentAnnotation();
327 
328   /**
329    * Change the gap character used in this alignment to 'gc'
330    *
331    * @param gc
332    *          the new gap character.
333    */
setGapCharacter(char gc)334   void setGapCharacter(char gc);
335 
336   /**
337    * Get the gap character used in this alignment
338    *
339    * @return gap character
340    */
getGapCharacter()341   char getGapCharacter();
342 
343   /**
344    * Test if alignment contains RNA structure
345    *
346    * @return true if RNA structure AligmnentAnnotation was added to alignment
347    */
hasRNAStructure()348   boolean hasRNAStructure();
349 
350   /**
351    * Get the associated dataset for the alignment.
352    *
353    * @return Alignment containing dataset sequences or null of this is a
354    *         dataset.
355    */
getDataset()356   AlignmentI getDataset();
357 
358   /**
359    * Set the associated dataset for the alignment, or create one.
360    *
361    * @param dataset
362    *          The dataset alignment or null to construct one.
363    */
setDataset(AlignmentI dataset)364   void setDataset(AlignmentI dataset);
365 
366   /**
367    * pads sequences with gaps (to ensure the set looks like an alignment)
368    *
369    * @return boolean true if alignment was modified
370    */
padGaps()371   boolean padGaps();
372 
getHiddenSequences()373   HiddenSequences getHiddenSequences();
374 
getHiddenColumns()375   HiddenColumns getHiddenColumns();
376 
377   /**
378    * Compact representation of alignment
379    *
380    * @return CigarArray
381    */
getCompactAlignment()382   CigarArray getCompactAlignment();
383 
384   /**
385    * Set an arbitrary key value pair for an alignment. Note: both key and value
386    * objects should return a meaningful, human readable response to .toString()
387    *
388    * @param key
389    * @param value
390    */
setProperty(Object key, Object value)391   void setProperty(Object key, Object value);
392 
393   /**
394    * Get a named property from the alignment.
395    *
396    * @param key
397    * @return value of property
398    */
getProperty(Object key)399   Object getProperty(Object key);
400 
401   /**
402    * Get the property hashtable.
403    *
404    * @return hashtable of alignment properties (or null if none are defined)
405    */
getProperties()406   Hashtable getProperties();
407 
408   /**
409    * add a reference to a frame of aligned codons for this alignment
410    *
411    * @param codons
412    */
addCodonFrame(AlignedCodonFrame codons)413   void addCodonFrame(AlignedCodonFrame codons);
414 
415   /**
416    * remove a particular codon frame reference from this alignment
417    *
418    * @param codons
419    * @return true if codon frame was removed.
420    */
removeCodonFrame(AlignedCodonFrame codons)421   boolean removeCodonFrame(AlignedCodonFrame codons);
422 
423   /**
424    * get all codon frames associated with this alignment
425    *
426    * @return
427    */
getCodonFrames()428   List<AlignedCodonFrame> getCodonFrames();
429 
430   /**
431    * Set the codon frame mappings (replacing any existing list).
432    */
setCodonFrames(List<AlignedCodonFrame> acfs)433   void setCodonFrames(List<AlignedCodonFrame> acfs);
434 
435   /**
436    * get codon frames involving sequenceI
437    */
getCodonFrame(SequenceI seq)438   List<AlignedCodonFrame> getCodonFrame(SequenceI seq);
439 
440   /**
441    * find sequence with given name in alignment
442    *
443    * @param token
444    *          name to find
445    * @param b
446    *          true implies that case insensitive matching will <em>also</em> be
447    *          tried
448    * @return matched sequence or null
449    */
findName(String token, boolean b)450   SequenceI findName(String token, boolean b);
451 
452   /**
453    * find next sequence with given name in alignment starting after a given
454    * sequence
455    *
456    * @param startAfter
457    *          the sequence after which the search will be started (usually the
458    *          result of the last call to findName)
459    * @param token
460    *          name to find
461    * @param b
462    *          true implies that case insensitive matching will <em>also</em> be
463    *          tried
464    * @return matched sequence or null
465    */
findName(SequenceI startAfter, String token, boolean b)466   SequenceI findName(SequenceI startAfter, String token, boolean b);
467 
468   /**
469    * find first sequence in alignment which is involved in the given search
470    * result object
471    *
472    * @param results
473    * @return -1 or index of sequence in alignment
474    */
findIndex(SearchResultsI results)475   int findIndex(SearchResultsI results);
476 
477   /**
478    * append sequences and annotation from another alignment object to this one.
479    * Note: this is a straight transfer of object references, and may result in
480    * toappend's dependent data being transformed to fit the alignment (changing
481    * gap characters, etc...). If you are uncertain, use the copy Alignment copy
482    * constructor to create a new version which can be appended without side
483    * effect.
484    *
485    * @param toappend
486    *          - the alignment to be appended.
487    */
append(AlignmentI toappend)488   void append(AlignmentI toappend);
489 
490   /**
491    * Justify the sequences to the left or right by deleting and inserting gaps
492    * before the initial residue or after the terminal residue
493    *
494    * @param right
495    *          true if alignment padded to right, false to justify to left
496    * @return true if alignment was changed TODO: return undo object
497    */
justify(boolean right)498   boolean justify(boolean right);
499 
500   /**
501    * add given annotation row at given position (0 is start, -1 is end)
502    *
503    * @param consensus
504    * @param i
505    */
addAnnotation(AlignmentAnnotation consensus, int i)506   void addAnnotation(AlignmentAnnotation consensus, int i);
507 
508   /**
509    * search for or create a specific annotation row on the alignment
510    *
511    * @param name
512    *          name for annotation (must match)
513    * @param calcId
514    *          calcId for the annotation (null or must match)
515    * @param autoCalc
516    *          - value of autocalc flag for the annotation
517    * @param seqRef
518    *          - null or specific sequence reference
519    * @param groupRef
520    *          - null or specific group reference
521    * @param method
522    *          - CalcId for the annotation (must match)
523    *
524    * @return existing annotation matching the given attributes
525    */
findOrCreateAnnotation(String name, String calcId, boolean autoCalc, SequenceI seqRef, SequenceGroup groupRef)526   AlignmentAnnotation findOrCreateAnnotation(String name, String calcId,
527           boolean autoCalc, SequenceI seqRef, SequenceGroup groupRef);
528 
529   /**
530    * move the given group up or down in the alignment by the given number of
531    * rows. Implementor assumes given group is already present on alignment - no
532    * recalculations are triggered.
533    *
534    * @param sg
535    * @param map
536    * @param up
537    * @param i
538    */
moveSelectedSequencesByOne(SequenceGroup sg, Map<SequenceI, SequenceCollectionI> map, boolean up)539   void moveSelectedSequencesByOne(SequenceGroup sg,
540           Map<SequenceI, SequenceCollectionI> map, boolean up);
541 
542   /**
543    * validate annotation after an edit and update any alignment state flags
544    * accordingly
545    *
546    * @param alignmentAnnotation
547    */
validateAnnotation(AlignmentAnnotation alignmentAnnotation)548   void validateAnnotation(AlignmentAnnotation alignmentAnnotation);
549 
550   /**
551    * Align this alignment the same as the given one. If both of the same type
552    * (nucleotide/protein) then align both identically. If this is nucleotide and
553    * the other is protein, make 3 gaps for each gap in the protein sequences. If
554    * this is protein and the other is nucleotide, insert a gap for each 3 gaps
555    * (or part thereof) between nucleotide bases. Returns the number of mapped
556    * sequences that were realigned .
557    *
558    * @param al
559    * @return
560    */
alignAs(AlignmentI al)561   int alignAs(AlignmentI al);
562 
563   /**
564    * Returns the set of distinct sequence names in the alignment.
565    *
566    * @return
567    */
getSequenceNames()568   Set<String> getSequenceNames();
569 
570   /**
571    * Checks if the alignment has at least one sequence with one non-gaped
572    * residue
573    *
574    * @return
575    */
hasValidSequence()576   public boolean hasValidSequence();
577 
578   /**
579    * Update any mappings to 'virtual' sequences to compatible real ones, if
580    * present in the added sequences. Returns a count of mappings updated.
581    *
582    * @param seqs
583    * @return
584    */
realiseMappings(List<SequenceI> seqs)585   int realiseMappings(List<SequenceI> seqs);
586 
587   /**
588    * Returns the first AlignedCodonFrame that has a mapping between the given
589    * dataset sequences
590    *
591    * @param mapFrom
592    * @param mapTo
593    * @return
594    */
getMapping(SequenceI mapFrom, SequenceI mapTo)595   AlignedCodonFrame getMapping(SequenceI mapFrom, SequenceI mapTo);
596 
597   /**
598    * Set the hidden columns collection on the alignment. Answers true if the
599    * hidden column selection changed, else false.
600    *
601    * @param cols
602    * @return
603    */
setHiddenColumns(HiddenColumns cols)604   public boolean setHiddenColumns(HiddenColumns cols);
605 
606   /**
607    * Set the first sequence as representative and hide its insertions. Typically
608    * used when loading JPred files.
609    */
setupJPredAlignment()610   public void setupJPredAlignment();
611 
612   /**
613    * Add gaps into the sequences aligned to profileseq under the given
614    * AlignmentView
615    *
616    * @param profileseq
617    *          sequence in al which sequences are aligned to
618    * @param input
619    *          alignment view where sequence corresponding to profileseq is first
620    *          entry
621    * @return new HiddenColumns for new alignment view, with insertions into
622    *         profileseq marked as hidden.
623    */
propagateInsertions(SequenceI profileseq, AlignmentView input)624   public HiddenColumns propagateInsertions(SequenceI profileseq,
625           AlignmentView input);
626 
627 }
628