1 /* 2 * Jalview - A Sequence Alignment Editor and Viewer (2.11.1.4) 3 * Copyright (C) 2021 The Jalview Authors 4 * 5 * This file is part of Jalview. 6 * 7 * Jalview is free software: you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation, either version 3 10 * of the License, or (at your option) any later version. 11 * 12 * Jalview is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty 14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 15 * PURPOSE. See the GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>. 19 * The Jalview Authors are detailed in the 'AUTHORS' file. 20 */ 21 package jalview.io; 22 23 import jalview.datamodel.AlignmentAnnotation; 24 import jalview.datamodel.AlignmentI; 25 import jalview.datamodel.Sequence; 26 import jalview.datamodel.SequenceGroup; 27 import jalview.datamodel.SequenceI; 28 import jalview.util.MessageManager; 29 30 import java.io.IOException; 31 import java.util.ArrayList; 32 import java.util.Enumeration; 33 import java.util.Hashtable; 34 import java.util.List; 35 import java.util.Vector; 36 37 /** 38 * DOCUMENT ME! 39 * 40 * @author $author$ 41 * @version $Revision$ 42 */ 43 public abstract class AlignFile extends FileParse 44 implements AlignmentFileReaderI, AlignmentFileWriterI 45 { 46 int noSeqs = 0; 47 48 int maxLength = 0; 49 50 /** 51 * Sequences to be added to form a new alignment. TODO: remove vector in this 52 * class 53 */ 54 protected Vector<SequenceI> seqs; 55 56 /** 57 * annotation to be added to generated alignment object 58 */ 59 protected Vector<AlignmentAnnotation> annotations; 60 61 /** 62 * SequenceGroups to be added to the alignment object 63 */ 64 protected List<SequenceGroup> seqGroups; 65 66 /** 67 * Properties to be added to generated alignment object 68 */ 69 private Hashtable properties; 70 71 long start; 72 73 long end; 74 75 /** 76 * true if parse() has been called 77 */ 78 private boolean parseCalled = false; 79 80 private boolean parseImmediately = true; 81 82 /** 83 * @return if doParse() was called at construction time 84 */ isParseImmediately()85 protected boolean isParseImmediately() 86 { 87 return parseImmediately; 88 } 89 90 /** 91 * Creates a new AlignFile object. 92 */ AlignFile()93 public AlignFile() 94 { 95 // Shouldn't we init data structures (JBPNote: not sure - initData is for 96 // initialising the structures used for reading from a datasource, and the 97 // bare constructor hasn't got any datasource) 98 initData(); 99 } 100 AlignFile(SequenceI[] seqs)101 public AlignFile(SequenceI[] seqs) 102 { 103 this(); 104 setSeqs(seqs); 105 } 106 107 /** 108 * Constructor which parses the data from a file of some specified type. 109 * 110 * @param dataObject 111 * Filename, URL or Pasted String to read from. 112 * @param sourceType 113 * What type of file to read from (File, URL, Pasted String) 114 */ AlignFile(String dataObject, DataSourceType sourceType)115 public AlignFile(String dataObject, DataSourceType sourceType) 116 throws IOException 117 { 118 this(true, dataObject, sourceType); 119 } 120 121 /** 122 * Constructor which (optionally delays) parsing of data from a file of some 123 * specified type. 124 * 125 * @param parseImmediately 126 * if false, need to call 'doParse()' to begin parsing data 127 * @param dataObject 128 * Filename, URL or Pasted String to read from. 129 * @param sourceType 130 * What type of file to read from (File, URL) 131 * @throws IOException 132 */ AlignFile(boolean parseImmediately, String dataObject, DataSourceType sourceType)133 public AlignFile(boolean parseImmediately, String dataObject, 134 DataSourceType sourceType) throws IOException 135 { 136 super(dataObject, sourceType); 137 initData(); 138 if (parseImmediately) 139 { 140 doParse(); 141 } 142 } 143 144 /** 145 * Attempt to read from the position where some other parsing process left 146 * off. 147 * 148 * @param source 149 * @throws IOException 150 */ AlignFile(FileParse source)151 public AlignFile(FileParse source) throws IOException 152 { 153 this(true, source); 154 } 155 156 /** 157 * Construct a new parser to read from the position where some other parsing 158 * process left 159 * 160 * @param parseImmediately 161 * if false, need to call 'doParse()' to begin parsing data 162 * @param source 163 */ AlignFile(boolean parseImmediately, FileParse source)164 public AlignFile(boolean parseImmediately, FileParse source) 165 throws IOException 166 { 167 super(source); 168 initData(); 169 170 // stash flag in case parse needs to know if it has to autoconfigure or was 171 // configured after construction 172 this.parseImmediately = parseImmediately; 173 174 if (parseImmediately) 175 { 176 doParse(); 177 } 178 } 179 180 /** 181 * called if parsing was delayed till after parser was constructed 182 * 183 * @throws IOException 184 */ doParse()185 public void doParse() throws IOException 186 { 187 if (parseCalled) 188 { 189 throw new IOException( 190 "Implementation error: Parser called twice for same data.\n" 191 + "Need to call initData() again before parsing can be reattempted."); 192 } 193 parseCalled = true; 194 parse(); 195 } 196 197 /** 198 * Return the seqs Vector 199 */ getSeqs()200 public Vector<SequenceI> getSeqs() 201 { 202 return seqs; 203 } 204 getSeqGroups()205 public List<SequenceGroup> getSeqGroups() 206 { 207 return seqGroups; 208 } 209 210 /** 211 * Return the Sequences in the seqs Vector as an array of Sequences 212 */ 213 @Override getSeqsAsArray()214 public SequenceI[] getSeqsAsArray() 215 { 216 SequenceI[] s = new SequenceI[seqs.size()]; 217 218 for (int i = 0; i < seqs.size(); i++) 219 { 220 s[i] = seqs.elementAt(i); 221 } 222 223 return s; 224 } 225 226 /** 227 * called by AppletFormatAdapter to generate an annotated alignment, rather 228 * than bare sequences. 229 * 230 * @param al 231 */ 232 @Override addAnnotations(AlignmentI al)233 public void addAnnotations(AlignmentI al) 234 { 235 addProperties(al); 236 for (int i = 0; i < annotations.size(); i++) 237 { 238 // detect if annotations.elementAt(i) rna secondary structure 239 // if so then do: 240 /* 241 * SequenceFeature[] pairArray = 242 * Rna.GetBasePairsFromAlignmentAnnotation(annotations.elementAt(i)); 243 * Rna.HelixMap(pairArray); 244 */ 245 AlignmentAnnotation an = annotations.elementAt(i); 246 an.validateRangeAndDisplay(); 247 al.addAnnotation(an); 248 } 249 250 } 251 252 /** 253 * register sequence groups on the alignment for **output** 254 * 255 * @param al 256 */ addSeqGroups(AlignmentI al)257 public void addSeqGroups(AlignmentI al) 258 { 259 this.seqGroups = al.getGroups(); 260 261 } 262 263 /** 264 * Add any additional information extracted from the file to the alignment 265 * properties. 266 * 267 * @note implicitly called by addAnnotations() 268 * @param al 269 */ addProperties(AlignmentI al)270 public void addProperties(AlignmentI al) 271 { 272 if (properties != null && properties.size() > 0) 273 { 274 Enumeration keys = properties.keys(); 275 Enumeration vals = properties.elements(); 276 while (keys.hasMoreElements()) 277 { 278 al.setProperty(keys.nextElement(), vals.nextElement()); 279 } 280 } 281 } 282 283 /** 284 * Store a non-null key-value pair in a hashtable used to set alignment 285 * properties note: null keys will raise an error, null values will result in 286 * the key/value pair being silently ignored. 287 * 288 * @param key 289 * - non-null key object 290 * @param value 291 * - non-null value 292 */ setAlignmentProperty(Object key, Object value)293 protected void setAlignmentProperty(Object key, Object value) 294 { 295 if (key == null) 296 { 297 throw new Error(MessageManager.getString( 298 "error.implementation_error_cannot_have_null_alignment")); 299 } 300 if (value == null) 301 { 302 return; // null properties are ignored. 303 } 304 if (properties == null) 305 { 306 properties = new Hashtable(); 307 } 308 properties.put(key, value); 309 } 310 getAlignmentProperty(Object key)311 protected Object getAlignmentProperty(Object key) 312 { 313 if (properties != null && key != null) 314 { 315 return properties.get(key); 316 } 317 return null; 318 } 319 320 /** 321 * Initialise objects to store sequence data in. 322 */ initData()323 protected void initData() 324 { 325 seqs = new Vector<SequenceI>(); 326 annotations = new Vector<AlignmentAnnotation>(); 327 seqGroups = new ArrayList<SequenceGroup>(); 328 parseCalled = false; 329 } 330 331 /** 332 * DOCUMENT ME! 333 * 334 * @param s 335 * DOCUMENT ME! 336 */ 337 @Override setSeqs(SequenceI[] s)338 public void setSeqs(SequenceI[] s) 339 { 340 seqs = new Vector<SequenceI>(); 341 342 for (int i = 0; i < s.length; i++) 343 { 344 seqs.addElement(s[i]); 345 } 346 } 347 348 /** 349 * This method must be implemented to parse the contents of the file. 350 */ parse()351 public abstract void parse() throws IOException; 352 353 /** 354 * A general parser for ids. 355 * 356 * @String id Id to be parsed 357 */ parseId(String id)358 Sequence parseId(String id) 359 { 360 Sequence seq = null; 361 id = id.trim(); 362 int space = id.indexOf(" "); 363 if (space > -1) 364 { 365 seq = new Sequence(id.substring(0, space), ""); 366 String desc = id.substring(space + 1); 367 seq.setDescription(desc); 368 369 /* 370 * it is tempting to parse Ensembl style gene description e.g. 371 * chromosome:GRCh38:7:140696688:140721955:1 and set the 372 * start position of the sequence, but this causes much confusion 373 * for reverse strand feature locations 374 */ 375 } 376 else 377 { 378 seq = new Sequence(id, ""); 379 } 380 381 return seq; 382 } 383 384 /** 385 * Creates the output id. Adds prefix Uniprot format source|id and optionally 386 * suffix Jalview /start-end 387 * 388 * @param jvsuffix 389 * 390 * @String id Id to be parsed 391 */ printId(SequenceI seq, boolean jvsuffix)392 String printId(SequenceI seq, boolean jvsuffix) 393 { 394 return seq.getDisplayId(jvsuffix); 395 } 396 printId(SequenceI seq)397 String printId(SequenceI seq) 398 { 399 return printId(seq, true); 400 } 401 402 /** 403 * vector of String[] treeName, newickString pairs 404 */ 405 Vector<String[]> newickStrings = null; 406 addNewickTree(String treeName, String newickString)407 protected void addNewickTree(String treeName, String newickString) 408 { 409 if (newickStrings == null) 410 { 411 newickStrings = new Vector<String[]>(); 412 } 413 newickStrings.addElement(new String[] { treeName, newickString }); 414 } 415 getTreeCount()416 protected int getTreeCount() 417 { 418 return newickStrings == null ? 0 : newickStrings.size(); 419 } 420 421 @Override addGroups(AlignmentI al)422 public void addGroups(AlignmentI al) 423 { 424 425 for (SequenceGroup sg : getSeqGroups()) 426 { 427 al.addGroup(sg); 428 } 429 } 430 addSequence(SequenceI seq)431 protected void addSequence(SequenceI seq) 432 { 433 seqs.add(seq); 434 } 435 } 436