1 /* 2 * Jalview - A Sequence Alignment Editor and Viewer (2.11.1.4) 3 * Copyright (C) 2021 The Jalview Authors 4 * 5 * This file is part of Jalview. 6 * 7 * Jalview is free software: you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation, either version 3 10 * of the License, or (at your option) any later version. 11 * 12 * Jalview is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty 14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 15 * PURPOSE. See the GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>. 19 * The Jalview Authors are detailed in the 'AUTHORS' file. 20 */ 21 package jalview.gui; 22 23 import jalview.analysis.AlignmentUtils; 24 import jalview.analysis.CrossRef; 25 import jalview.api.AlignmentViewPanel; 26 import jalview.api.FeatureSettingsModelI; 27 import jalview.bin.Cache; 28 import jalview.datamodel.Alignment; 29 import jalview.datamodel.AlignmentI; 30 import jalview.datamodel.DBRefEntry; 31 import jalview.datamodel.DBRefSource; 32 import jalview.datamodel.GeneLociI; 33 import jalview.datamodel.SequenceI; 34 import jalview.ext.ensembl.EnsemblInfo; 35 import jalview.ext.ensembl.EnsemblMap; 36 import jalview.io.gff.SequenceOntologyI; 37 import jalview.structure.StructureSelectionManager; 38 import jalview.util.DBRefUtils; 39 import jalview.util.MapList; 40 import jalview.util.MappingUtils; 41 import jalview.util.MessageManager; 42 import jalview.viewmodel.seqfeatures.FeatureRendererModel; 43 import jalview.ws.SequenceFetcher; 44 45 import java.util.ArrayList; 46 import java.util.HashMap; 47 import java.util.List; 48 import java.util.Map; 49 import java.util.Set; 50 51 /** 52 * Factory constructor and runnable for discovering and displaying 53 * cross-references for a set of aligned sequences 54 * 55 * @author jprocter 56 * 57 */ 58 public class CrossRefAction implements Runnable 59 { 60 private AlignFrame alignFrame; 61 62 private SequenceI[] sel; 63 64 private final boolean _odna; 65 66 private String source; 67 68 List<AlignmentViewPanel> xrefViews = new ArrayList<>(); 69 getXrefViews()70 List<AlignmentViewPanel> getXrefViews() 71 { 72 return xrefViews; 73 } 74 75 @Override run()76 public void run() 77 { 78 final long sttime = System.currentTimeMillis(); 79 alignFrame.setProgressBar(MessageManager.formatMessage( 80 "status.searching_for_sequences_from", new Object[] 81 { source }), sttime); 82 try 83 { 84 AlignmentI alignment = alignFrame.getViewport().getAlignment(); 85 AlignmentI dataset = alignment.getDataset() == null ? alignment 86 : alignment.getDataset(); 87 boolean dna = alignment.isNucleotide(); 88 if (_odna != dna) 89 { 90 System.err 91 .println("Conflict: showProducts for alignment originally " 92 + "thought to be " + (_odna ? "DNA" : "Protein") 93 + " now searching for " + (dna ? "DNA" : "Protein") 94 + " Context."); 95 } 96 AlignmentI xrefs = new CrossRef(sel, dataset) 97 .findXrefSequences(source, dna); 98 if (xrefs == null) 99 { 100 return; 101 } 102 103 /* 104 * try to look up chromosomal coordinates for nucleotide 105 * sequences (if not already retrieved) 106 */ 107 findGeneLoci(xrefs.getSequences()); 108 109 /* 110 * get display scheme (if any) to apply to features 111 */ 112 FeatureSettingsModelI featureColourScheme = new SequenceFetcher() 113 .getFeatureColourScheme(source); 114 115 if (dna && AlignmentUtils.looksLikeEnsembl(alignment)) 116 { 117 // override default featureColourScheme so products have Ensembl variant colours 118 featureColourScheme = new SequenceFetcher() 119 .getFeatureColourScheme(DBRefSource.ENSEMBL); 120 } 121 122 AlignmentI xrefsAlignment = makeCrossReferencesAlignment(dataset, 123 xrefs); 124 if (!dna) 125 { 126 xrefsAlignment = AlignmentUtils.makeCdsAlignment( 127 xrefsAlignment.getSequencesArray(), dataset, sel); 128 xrefsAlignment.alignAs(alignment); 129 } 130 131 /* 132 * If we are opening a splitframe, make a copy of this alignment (sharing the same dataset 133 * sequences). If we are DNA, drop introns and update mappings 134 */ 135 AlignmentI copyAlignment = null; 136 137 if (Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true)) 138 { 139 copyAlignment = copyAlignmentForSplitFrame(alignment, dataset, dna, 140 xrefs, xrefsAlignment); 141 if (copyAlignment == null) 142 { 143 return; // failed 144 } 145 } 146 147 /* 148 * build AlignFrame(s) according to available alignment data 149 */ 150 AlignFrame newFrame = new AlignFrame(xrefsAlignment, 151 AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT); 152 if (Cache.getDefault("HIDE_INTRONS", true)) 153 { 154 newFrame.hideFeatureColumns(SequenceOntologyI.EXON, false); 155 } 156 String newtitle = String.format("%s %s %s", 157 dna ? MessageManager.getString("label.proteins") 158 : MessageManager.getString("label.nucleotides"), 159 MessageManager.getString("label.for"), alignFrame.getTitle()); 160 newFrame.setTitle(newtitle); 161 162 if (copyAlignment == null) 163 { 164 /* 165 * split frame display is turned off in preferences file 166 */ 167 Desktop.addInternalFrame(newFrame, newtitle, 168 AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT); 169 xrefViews.add(newFrame.alignPanel); 170 return; // via finally clause 171 } 172 173 AlignFrame copyThis = new AlignFrame(copyAlignment, 174 AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT); 175 copyThis.setTitle(alignFrame.getTitle()); 176 177 boolean showSequenceFeatures = alignFrame.getViewport() 178 .isShowSequenceFeatures(); 179 newFrame.setShowSeqFeatures(showSequenceFeatures); 180 copyThis.setShowSeqFeatures(showSequenceFeatures); 181 FeatureRendererModel myFeatureStyling = alignFrame.alignPanel 182 .getSeqPanel().seqCanvas.getFeatureRenderer(); 183 184 /* 185 * copy feature rendering settings to split frame 186 */ 187 FeatureRendererModel fr1 = newFrame.alignPanel.getSeqPanel().seqCanvas 188 .getFeatureRenderer(); 189 fr1.transferSettings(myFeatureStyling); 190 fr1.findAllFeatures(true); 191 FeatureRendererModel fr2 = copyThis.alignPanel.getSeqPanel().seqCanvas 192 .getFeatureRenderer(); 193 fr2.transferSettings(myFeatureStyling); 194 fr2.findAllFeatures(true); 195 196 /* 197 * apply 'database source' feature configuration 198 * if any - first to the new splitframe view about to be displayed 199 */ 200 201 newFrame.getViewport().applyFeaturesStyle(featureColourScheme); 202 copyThis.getViewport().applyFeaturesStyle(featureColourScheme); 203 204 /* 205 * and for JAL-3330 also to original alignFrame view(s) 206 * this currently trashes any original settings. 207 */ 208 for (AlignmentViewPanel origpanel: alignFrame.getAlignPanels()) { 209 origpanel.getAlignViewport() 210 .mergeFeaturesStyle(featureColourScheme); 211 } 212 213 SplitFrame sf = new SplitFrame(dna ? copyThis : newFrame, 214 dna ? newFrame : copyThis); 215 216 newFrame.setVisible(true); 217 copyThis.setVisible(true); 218 String linkedTitle = MessageManager 219 .getString("label.linked_view_title"); 220 Desktop.addInternalFrame(sf, linkedTitle, -1, -1); 221 sf.adjustInitialLayout(); 222 223 // finally add the top, then bottom frame to the view list 224 xrefViews.add(dna ? copyThis.alignPanel : newFrame.alignPanel); 225 xrefViews.add(!dna ? copyThis.alignPanel : newFrame.alignPanel); 226 227 } catch (OutOfMemoryError e) 228 { 229 new OOMWarning("whilst fetching crossreferences", e); 230 } catch (Throwable e) 231 { 232 Cache.log.error("Error when finding crossreferences", e); 233 } finally 234 { 235 alignFrame.setProgressBar(MessageManager.formatMessage( 236 "status.finished_searching_for_sequences_from", new Object[] 237 { source }), sttime); 238 } 239 } 240 241 /** 242 * Tries to add chromosomal coordinates to any nucleotide sequence which does 243 * not already have them. Coordinates are retrieved from Ensembl given an 244 * Ensembl identifier, either on the sequence itself or on a peptide sequence 245 * it has a reference to. 246 * 247 * <pre> 248 * Example (human): 249 * - fetch EMBLCDS cross-references for Uniprot entry P30419 250 * - the EMBL sequences do not have xrefs to Ensembl 251 * - the Uniprot entry has xrefs to 252 * ENSP00000258960, ENSP00000468424, ENST00000258960, ENST00000592782 253 * - either of the transcript ids can be used to retrieve gene loci e.g. 254 * http://rest.ensembl.org/map/cds/ENST00000592782/1..100000 255 * Example (invertebrate): 256 * - fetch EMBLCDS cross-references for Uniprot entry Q43517 (FER1_SOLLC) 257 * - the Uniprot entry has an xref to ENSEMBLPLANTS Solyc10g044520.1.1 258 * - can retrieve gene loci with 259 * http://rest.ensemblgenomes.org/map/cds/Solyc10g044520.1.1/1..100000 260 * </pre> 261 * 262 * @param sequences 263 */ findGeneLoci(List<SequenceI> sequences)264 public static void findGeneLoci(List<SequenceI> sequences) 265 { 266 Map<DBRefEntry, GeneLociI> retrievedLoci = new HashMap<>(); 267 for (SequenceI seq : sequences) 268 { 269 findGeneLoci(seq, retrievedLoci); 270 } 271 } 272 273 /** 274 * Tres to find chromosomal coordinates for the sequence, by searching its 275 * direct and indirect cross-references for Ensembl. If the loci have already 276 * been retrieved, just reads them out of the map of retrievedLoci; this is 277 * the case of an alternative transcript for the same protein. Otherwise calls 278 * a REST service to retrieve the loci, and if successful, adds them to the 279 * sequence and to the retrievedLoci. 280 * 281 * @param seq 282 * @param retrievedLoci 283 */ findGeneLoci(SequenceI seq, Map<DBRefEntry, GeneLociI> retrievedLoci)284 static void findGeneLoci(SequenceI seq, 285 Map<DBRefEntry, GeneLociI> retrievedLoci) 286 { 287 /* 288 * don't replace any existing chromosomal coordinates 289 */ 290 if (seq == null || seq.isProtein() || seq.getGeneLoci() != null 291 || seq.getDBRefs() == null) 292 { 293 return; 294 } 295 296 Set<String> ensemblDivisions = new EnsemblInfo().getDivisions(); 297 298 /* 299 * first look for direct dbrefs from sequence to Ensembl 300 */ 301 String[] divisionsArray = ensemblDivisions 302 .toArray(new String[ensemblDivisions.size()]); 303 DBRefEntry[] seqRefs = seq.getDBRefs(); 304 DBRefEntry[] directEnsemblRefs = DBRefUtils.selectRefs(seqRefs, 305 divisionsArray); 306 if (directEnsemblRefs != null) 307 { 308 for (DBRefEntry ensemblRef : directEnsemblRefs) 309 { 310 if (fetchGeneLoci(seq, ensemblRef, retrievedLoci)) 311 { 312 return; 313 } 314 } 315 } 316 317 /* 318 * else look for indirect dbrefs from sequence to Ensembl 319 */ 320 for (DBRefEntry dbref : seq.getDBRefs()) 321 { 322 if (dbref.getMap() != null && dbref.getMap().getTo() != null) 323 { 324 DBRefEntry[] dbrefs = dbref.getMap().getTo().getDBRefs(); 325 DBRefEntry[] indirectEnsemblRefs = DBRefUtils.selectRefs(dbrefs, 326 divisionsArray); 327 if (indirectEnsemblRefs != null) 328 { 329 for (DBRefEntry ensemblRef : indirectEnsemblRefs) 330 { 331 if (fetchGeneLoci(seq, ensemblRef, retrievedLoci)) 332 { 333 return; 334 } 335 } 336 } 337 } 338 } 339 } 340 341 /** 342 * Retrieves chromosomal coordinates for the Ensembl (or EnsemblGenomes) 343 * identifier in dbref. If successful, and the sequence length matches gene 344 * loci length, then add it to the sequence, and to the retrievedLoci map. 345 * Answers true if successful, else false. 346 * 347 * @param seq 348 * @param dbref 349 * @param retrievedLoci 350 * @return 351 */ fetchGeneLoci(SequenceI seq, DBRefEntry dbref, Map<DBRefEntry, GeneLociI> retrievedLoci)352 static boolean fetchGeneLoci(SequenceI seq, DBRefEntry dbref, 353 Map<DBRefEntry, GeneLociI> retrievedLoci) 354 { 355 String accession = dbref.getAccessionId(); 356 String division = dbref.getSource(); 357 358 /* 359 * hack: ignore cross-references to Ensembl protein ids 360 * (or use map/translation perhaps?) 361 * todo: is there an equivalent in EnsemblGenomes? 362 */ 363 if (accession.startsWith("ENSP")) 364 { 365 return false; 366 } 367 EnsemblMap mapper = new EnsemblMap(); 368 369 /* 370 * try CDS mapping first 371 */ 372 GeneLociI geneLoci = mapper.getCdsMapping(division, accession, 1, 373 seq.getLength()); 374 if (geneLoci != null) 375 { 376 MapList map = geneLoci.getMapping(); 377 int mappedFromLength = MappingUtils.getLength(map.getFromRanges()); 378 if (mappedFromLength == seq.getLength()) 379 { 380 seq.setGeneLoci(geneLoci.getSpeciesId(), geneLoci.getAssemblyId(), 381 geneLoci.getChromosomeId(), map); 382 retrievedLoci.put(dbref, geneLoci); 383 return true; 384 } 385 } 386 387 /* 388 * else try CDNA mapping 389 */ 390 geneLoci = mapper.getCdnaMapping(division, accession, 1, 391 seq.getLength()); 392 if (geneLoci != null) 393 { 394 MapList map = geneLoci.getMapping(); 395 int mappedFromLength = MappingUtils.getLength(map.getFromRanges()); 396 if (mappedFromLength == seq.getLength()) 397 { 398 seq.setGeneLoci(geneLoci.getSpeciesId(), geneLoci.getAssemblyId(), 399 geneLoci.getChromosomeId(), map); 400 retrievedLoci.put(dbref, geneLoci); 401 return true; 402 } 403 } 404 405 return false; 406 } 407 408 /** 409 * @param alignment 410 * @param dataset 411 * @param dna 412 * @param xrefs 413 * @param xrefsAlignment 414 * @return 415 */ copyAlignmentForSplitFrame(AlignmentI alignment, AlignmentI dataset, boolean dna, AlignmentI xrefs, AlignmentI xrefsAlignment)416 protected AlignmentI copyAlignmentForSplitFrame(AlignmentI alignment, 417 AlignmentI dataset, boolean dna, AlignmentI xrefs, 418 AlignmentI xrefsAlignment) 419 { 420 AlignmentI copyAlignment; 421 boolean copyAlignmentIsAligned = false; 422 if (dna) 423 { 424 copyAlignment = AlignmentUtils.makeCdsAlignment(sel, dataset, 425 xrefsAlignment.getSequencesArray()); 426 if (copyAlignment.getHeight() == 0) 427 { 428 JvOptionPane.showMessageDialog(alignFrame, 429 MessageManager.getString("label.cant_map_cds"), 430 MessageManager.getString("label.operation_failed"), 431 JvOptionPane.OK_OPTION); 432 System.err.println("Failed to make CDS alignment"); 433 return null; 434 } 435 436 /* 437 * pending getting Embl transcripts to 'align', 438 * we are only doing this for Ensembl 439 */ 440 // TODO proper criteria for 'can align as cdna' 441 if (DBRefSource.ENSEMBL.equalsIgnoreCase(source) 442 || AlignmentUtils.looksLikeEnsembl(alignment)) 443 { 444 copyAlignment.alignAs(alignment); 445 copyAlignmentIsAligned = true; 446 } 447 } 448 else 449 { 450 copyAlignment = AlignmentUtils.makeCopyAlignment(sel, 451 xrefs.getSequencesArray(), dataset); 452 } 453 copyAlignment 454 .setGapCharacter(alignFrame.viewport.getGapCharacter()); 455 456 StructureSelectionManager ssm = StructureSelectionManager 457 .getStructureSelectionManager(Desktop.instance); 458 459 /* 460 * register any new mappings for sequence mouseover etc 461 * (will not duplicate any previously registered mappings) 462 */ 463 ssm.registerMappings(dataset.getCodonFrames()); 464 465 if (copyAlignment.getHeight() <= 0) 466 { 467 System.err.println( 468 "No Sequences generated for xRef type " + source); 469 return null; 470 } 471 472 /* 473 * align protein to dna 474 */ 475 if (dna && copyAlignmentIsAligned) 476 { 477 xrefsAlignment.alignAs(copyAlignment); 478 } 479 else 480 { 481 /* 482 * align cdna to protein - currently only if 483 * fetching and aligning Ensembl transcripts! 484 */ 485 // TODO: generalise for other sources of locus/transcript/cds data 486 if (dna && DBRefSource.ENSEMBL.equalsIgnoreCase(source)) 487 { 488 copyAlignment.alignAs(xrefsAlignment); 489 } 490 } 491 492 return copyAlignment; 493 } 494 495 /** 496 * Makes an alignment containing the given sequences, and adds them to the 497 * given dataset, which is also set as the dataset for the new alignment 498 * 499 * TODO: refactor to DatasetI method 500 * 501 * @param dataset 502 * @param seqs 503 * @return 504 */ makeCrossReferencesAlignment(AlignmentI dataset, AlignmentI seqs)505 protected AlignmentI makeCrossReferencesAlignment(AlignmentI dataset, 506 AlignmentI seqs) 507 { 508 SequenceI[] sprods = new SequenceI[seqs.getHeight()]; 509 for (int s = 0; s < sprods.length; s++) 510 { 511 sprods[s] = (seqs.getSequenceAt(s)).deriveSequence(); 512 if (dataset.getSequences() == null || !dataset.getSequences() 513 .contains(sprods[s].getDatasetSequence())) 514 { 515 dataset.addSequence(sprods[s].getDatasetSequence()); 516 } 517 sprods[s].updatePDBIds(); 518 } 519 Alignment al = new Alignment(sprods); 520 al.setDataset(dataset); 521 return al; 522 } 523 524 /** 525 * Constructor 526 * 527 * @param af 528 * @param seqs 529 * @param fromDna 530 * @param dbSource 531 */ CrossRefAction(AlignFrame af, SequenceI[] seqs, boolean fromDna, String dbSource)532 CrossRefAction(AlignFrame af, SequenceI[] seqs, boolean fromDna, 533 String dbSource) 534 { 535 this.alignFrame = af; 536 this.sel = seqs; 537 this._odna = fromDna; 538 this.source = dbSource; 539 } 540 getHandlerFor(final SequenceI[] sel, final boolean fromDna, final String source, final AlignFrame alignFrame)541 public static CrossRefAction getHandlerFor(final SequenceI[] sel, 542 final boolean fromDna, final String source, 543 final AlignFrame alignFrame) 544 { 545 return new CrossRefAction(alignFrame, sel, fromDna, source); 546 } 547 548 } 549