1 /* 2 * Jalview - A Sequence Alignment Editor and Viewer (2.11.1.4) 3 * Copyright (C) 2021 The Jalview Authors 4 * 5 * This file is part of Jalview. 6 * 7 * Jalview is free software: you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation, either version 3 10 * of the License, or (at your option) any later version. 11 * 12 * Jalview is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty 14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 15 * PURPOSE. See the GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>. 19 * The Jalview Authors are detailed in the 'AUTHORS' file. 20 */ 21 package jalview.datamodel; 22 23 import java.util.Comparator; 24 import java.util.LinkedHashMap; 25 import java.util.Map; 26 import java.util.Map.Entry; 27 import java.util.SortedMap; 28 import java.util.TreeMap; 29 import java.util.Vector; 30 31 import jalview.datamodel.features.FeatureAttributeType; 32 import jalview.datamodel.features.FeatureAttributes; 33 import jalview.datamodel.features.FeatureLocationI; 34 import jalview.datamodel.features.FeatureSourceI; 35 import jalview.datamodel.features.FeatureSources; 36 import jalview.util.StringUtils; 37 38 /** 39 * A class that models a single contiguous feature on a sequence. If flag 40 * 'contactFeature' is true, the start and end positions are interpreted instead 41 * as two contact points. 42 */ 43 public class SequenceFeature implements FeatureLocationI 44 { 45 /* 46 * score value if none is set; preferably Float.Nan, but see 47 * JAL-2060 and JAL-2554 for a couple of blockers to that 48 */ 49 private static final float NO_SCORE = 0f; 50 51 private static final String STATUS = "status"; 52 53 public static final String STRAND = "STRAND"; 54 55 // key for Phase designed not to conflict with real GFF data 56 public static final String PHASE = "!Phase"; 57 58 // private key for ENA location designed not to conflict with real GFF data 59 private static final String LOCATION = "!Location"; 60 61 private static final String ROW_DATA = "<tr><td>%s</td><td>%s</td><td>%s</td></tr>"; 62 63 /* 64 * type, begin, end, featureGroup, score and contactFeature are final 65 * to ensure that the integrity of SequenceFeatures data store 66 * can't be broken by direct update of these fields 67 */ 68 public final String type; 69 70 public final int begin; 71 72 public final int end; 73 74 public final String featureGroup; 75 76 public final float score; 77 78 private final boolean contactFeature; 79 80 public String description; 81 82 /* 83 * a map of key-value pairs; may be populated from GFF 'column 9' data, 84 * other data sources (e.g. GenBank file), or programmatically 85 */ 86 public Map<String, Object> otherDetails; 87 88 public Vector<String> links; 89 90 /* 91 * the identifier (if known) for the FeatureSource held in FeatureSources, 92 * as a provider of metadata about feature attributes 93 */ 94 private String source; 95 96 /** 97 * Constructs a duplicate feature. Note: Uses makes a shallow copy of the 98 * otherDetails map, so the new and original SequenceFeature may reference the 99 * same objects in the map. 100 * 101 * @param cpy 102 */ SequenceFeature(SequenceFeature cpy)103 public SequenceFeature(SequenceFeature cpy) 104 { 105 this(cpy, cpy.getBegin(), cpy.getEnd(), cpy.getFeatureGroup(), cpy 106 .getScore()); 107 } 108 109 /** 110 * Constructor 111 * 112 * @param theType 113 * @param theDesc 114 * @param theBegin 115 * @param theEnd 116 * @param group 117 */ SequenceFeature(String theType, String theDesc, int theBegin, int theEnd, String group)118 public SequenceFeature(String theType, String theDesc, int theBegin, 119 int theEnd, String group) 120 { 121 this(theType, theDesc, theBegin, theEnd, NO_SCORE, group); 122 } 123 124 /** 125 * Constructor including a score value 126 * 127 * @param theType 128 * @param theDesc 129 * @param theBegin 130 * @param theEnd 131 * @param theScore 132 * @param group 133 */ SequenceFeature(String theType, String theDesc, int theBegin, int theEnd, float theScore, String group)134 public SequenceFeature(String theType, String theDesc, int theBegin, 135 int theEnd, float theScore, String group) 136 { 137 this.type = theType; 138 this.description = theDesc; 139 this.begin = theBegin; 140 this.end = theEnd; 141 this.featureGroup = group; 142 this.score = theScore; 143 144 /* 145 * for now, only "Disulfide/disulphide bond" is treated as a contact feature 146 */ 147 this.contactFeature = "disulfide bond".equalsIgnoreCase(type) 148 || "disulphide bond".equalsIgnoreCase(type); 149 } 150 151 /** 152 * A copy constructor that allows the value of final fields to be 'modified' 153 * 154 * @param sf 155 * @param newType 156 * @param newBegin 157 * @param newEnd 158 * @param newGroup 159 * @param newScore 160 */ SequenceFeature(SequenceFeature sf, String newType, int newBegin, int newEnd, String newGroup, float newScore)161 public SequenceFeature(SequenceFeature sf, String newType, int newBegin, 162 int newEnd, String newGroup, float newScore) 163 { 164 this(newType, sf.getDescription(), newBegin, newEnd, newScore, 165 newGroup); 166 167 this.source = sf.source; 168 169 if (sf.otherDetails != null) 170 { 171 otherDetails = new LinkedHashMap<>(); 172 otherDetails.putAll(sf.otherDetails); 173 } 174 if (sf.links != null && sf.links.size() > 0) 175 { 176 links = new Vector<>(); 177 links.addAll(sf.links); 178 } 179 } 180 181 /** 182 * A copy constructor that allows the value of final fields to be 'modified' 183 * 184 * @param sf 185 * @param newBegin 186 * @param newEnd 187 * @param newGroup 188 * @param newScore 189 */ SequenceFeature(SequenceFeature sf, int newBegin, int newEnd, String newGroup, float newScore)190 public SequenceFeature(SequenceFeature sf, int newBegin, int newEnd, 191 String newGroup, float newScore) 192 { 193 this(sf, sf.getType(), newBegin, newEnd, newGroup, newScore); 194 } 195 196 /** 197 * Two features are considered equal if they have the same type, group, 198 * description, start, end, phase, strand, and (if present) 'Name', ID' and 199 * 'Parent' attributes. 200 * 201 * Note we need to check Parent to distinguish the same exon occurring in 202 * different transcripts (in Ensembl GFF). This allows assembly of transcript 203 * sequences from their component exon regions. 204 */ 205 @Override equals(Object o)206 public boolean equals(Object o) 207 { 208 return equals(o, false); 209 } 210 211 /** 212 * Overloaded method allows the equality test to optionally ignore the 213 * 'Parent' attribute of a feature. This supports avoiding adding many 214 * superficially duplicate 'exon' or CDS features to genomic or protein 215 * sequence. 216 * 217 * @param o 218 * @param ignoreParent 219 * @return 220 */ equals(Object o, boolean ignoreParent)221 public boolean equals(Object o, boolean ignoreParent) 222 { 223 if (o == null || !(o instanceof SequenceFeature)) 224 { 225 return false; 226 } 227 228 SequenceFeature sf = (SequenceFeature) o; 229 boolean sameScore = Float.isNaN(score) ? Float.isNaN(sf.score) 230 : score == sf.score; 231 if (begin != sf.begin || end != sf.end || !sameScore) 232 { 233 return false; 234 } 235 236 if (getStrand() != sf.getStrand()) 237 { 238 return false; 239 } 240 241 if (!(type + description + featureGroup + getPhase()).equals( 242 sf.type + sf.description + sf.featureGroup + sf.getPhase())) 243 { 244 return false; 245 } 246 if (!equalAttribute(getValue("ID"), sf.getValue("ID"))) 247 { 248 return false; 249 } 250 if (!equalAttribute(getValue("Name"), sf.getValue("Name"))) 251 { 252 return false; 253 } 254 if (!ignoreParent) 255 { 256 if (!equalAttribute(getValue("Parent"), sf.getValue("Parent"))) 257 { 258 return false; 259 } 260 } 261 return true; 262 } 263 264 /** 265 * Returns true if both values are null, are both non-null and equal 266 * 267 * @param att1 268 * @param att2 269 * @return 270 */ equalAttribute(Object att1, Object att2)271 protected static boolean equalAttribute(Object att1, Object att2) 272 { 273 if (att1 == null && att2 == null) 274 { 275 return true; 276 } 277 if (att1 != null) 278 { 279 return att1.equals(att2); 280 } 281 return att2.equals(att1); 282 } 283 284 /** 285 * DOCUMENT ME! 286 * 287 * @return DOCUMENT ME! 288 */ 289 @Override getBegin()290 public int getBegin() 291 { 292 return begin; 293 } 294 295 /** 296 * DOCUMENT ME! 297 * 298 * @return DOCUMENT ME! 299 */ 300 @Override getEnd()301 public int getEnd() 302 { 303 return end; 304 } 305 306 /** 307 * DOCUMENT ME! 308 * 309 * @return DOCUMENT ME! 310 */ getType()311 public String getType() 312 { 313 return type; 314 } 315 316 /** 317 * DOCUMENT ME! 318 * 319 * @return DOCUMENT ME! 320 */ getDescription()321 public String getDescription() 322 { 323 return description; 324 } 325 setDescription(String desc)326 public void setDescription(String desc) 327 { 328 description = desc; 329 } 330 getFeatureGroup()331 public String getFeatureGroup() 332 { 333 return featureGroup; 334 } 335 336 /** 337 * Adds a hyperlink for the feature. This should have the format label|url. 338 * 339 * @param labelLink 340 */ addLink(String labelLink)341 public void addLink(String labelLink) 342 { 343 if (links == null) 344 { 345 links = new Vector<>(); 346 } 347 348 if (!links.contains(labelLink)) 349 { 350 links.insertElementAt(labelLink, 0); 351 } 352 } 353 getScore()354 public float getScore() 355 { 356 return score; 357 } 358 359 /** 360 * Used for getting values which are not in the basic set. eg STRAND, PHASE 361 * for GFF file 362 * 363 * @param key 364 * String 365 */ getValue(String key)366 public Object getValue(String key) 367 { 368 if (otherDetails == null) 369 { 370 return null; 371 } 372 else 373 { 374 return otherDetails.get(key); 375 } 376 } 377 378 /** 379 * Answers the value of the specified attribute as string, or null if no such 380 * value. If more than one attribute name is provided, tries to resolve as keys 381 * to nested maps. For example, if attribute "CSQ" holds a map of key-value 382 * pairs, then getValueAsString("CSQ", "Allele") returns the value of "Allele" 383 * in that map. 384 * 385 * @param key 386 * @return 387 */ getValueAsString(String... key)388 public String getValueAsString(String... key) 389 { 390 if (otherDetails == null) 391 { 392 return null; 393 } 394 Object value = otherDetails.get(key[0]); 395 if (key.length > 1 && value instanceof Map<?, ?>) 396 { 397 value = ((Map) value).get(key[1]); 398 } 399 return value == null ? null : value.toString(); 400 } 401 402 /** 403 * Returns a property value for the given key if known, else the specified 404 * default value 405 * 406 * @param key 407 * @param defaultValue 408 * @return 409 */ getValue(String key, Object defaultValue)410 public Object getValue(String key, Object defaultValue) 411 { 412 Object value = getValue(key); 413 return value == null ? defaultValue : value; 414 } 415 416 /** 417 * Used for setting values which are not in the basic set. eg STRAND, FRAME 418 * for GFF file 419 * 420 * @param key 421 * eg STRAND 422 * @param value 423 * eg + 424 */ setValue(String key, Object value)425 public void setValue(String key, Object value) 426 { 427 if (value != null) 428 { 429 if (otherDetails == null) 430 { 431 /* 432 * LinkedHashMap preserves insertion order of attributes 433 */ 434 otherDetails = new LinkedHashMap<>(); 435 } 436 437 otherDetails.put(key, value); 438 recordAttribute(key, value); 439 } 440 } 441 442 /** 443 * Notifies the addition of a feature attribute. This lets us keep track of 444 * which attributes are present on each feature type, and also the range of 445 * numerical-valued attributes. 446 * 447 * @param key 448 * @param value 449 */ recordAttribute(String key, Object value)450 protected void recordAttribute(String key, Object value) 451 { 452 String attDesc = null; 453 if (source != null) 454 { 455 attDesc = FeatureSources.getInstance().getSource(source) 456 .getAttributeName(key); 457 } 458 459 FeatureAttributes.getInstance().addAttribute(this.type, attDesc, value, 460 key); 461 } 462 463 /* 464 * The following methods are added to maintain the castor Uniprot mapping file 465 * for the moment. 466 */ setStatus(String status)467 public void setStatus(String status) 468 { 469 setValue(STATUS, status); 470 } 471 getStatus()472 public String getStatus() 473 { 474 return (String) getValue(STATUS); 475 } 476 477 /** 478 * Return 1 for forward strand ('+' in GFF), -1 for reverse strand ('-' in 479 * GFF), and 0 for unknown or not (validly) specified 480 * 481 * @return 482 */ getStrand()483 public int getStrand() 484 { 485 int strand = 0; 486 if (otherDetails != null) 487 { 488 Object str = otherDetails.get(STRAND); 489 if ("-".equals(str)) 490 { 491 strand = -1; 492 } 493 else if ("+".equals(str)) 494 { 495 strand = 1; 496 } 497 } 498 return strand; 499 } 500 501 /** 502 * Set the value of strand 503 * 504 * @param strand 505 * should be "+" for forward, or "-" for reverse 506 */ setStrand(String strand)507 public void setStrand(String strand) 508 { 509 setValue(STRAND, strand); 510 } 511 setPhase(String phase)512 public void setPhase(String phase) 513 { 514 setValue(PHASE, phase); 515 } 516 getPhase()517 public String getPhase() 518 { 519 return (String) getValue(PHASE); 520 } 521 522 /** 523 * Sets the 'raw' ENA format location specifier e.g. join(12..45,89..121) 524 * 525 * @param loc 526 */ setEnaLocation(String loc)527 public void setEnaLocation(String loc) 528 { 529 setValue(LOCATION, loc); 530 } 531 532 /** 533 * Gets the 'raw' ENA format location specifier e.g. join(12..45,89..121) 534 * 535 * @param loc 536 */ getEnaLocation()537 public String getEnaLocation() 538 { 539 return (String) getValue(LOCATION); 540 } 541 542 /** 543 * Readable representation, for debug only, not guaranteed not to change 544 * between versions 545 */ 546 @Override toString()547 public String toString() 548 { 549 return String.format("%d %d %s %s", getBegin(), getEnd(), getType(), 550 getDescription()); 551 } 552 553 /** 554 * Overridden to ensure that whenever two objects are equal, they have the 555 * same hashCode 556 */ 557 @Override hashCode()558 public int hashCode() 559 { 560 String s = getType() + getDescription() + getFeatureGroup() 561 + getValue("ID") + getValue("Name") + getValue("Parent") 562 + getPhase(); 563 return s.hashCode() + getBegin() + getEnd() + (int) getScore() 564 + getStrand(); 565 } 566 567 /** 568 * Answers true if the feature's start/end values represent two related 569 * positions, rather than ends of a range. Such features may be visualised or 570 * reported differently to features on a range. 571 */ 572 @Override isContactFeature()573 public boolean isContactFeature() 574 { 575 return contactFeature; 576 } 577 578 /** 579 * Answers true if the sequence has zero start and end position 580 * 581 * @return 582 */ isNonPositional()583 public boolean isNonPositional() 584 { 585 return begin == 0 && end == 0; 586 } 587 588 /** 589 * Answers an html-formatted report of feature details. If parameter 590 * {@code mf} is not null, the feature is a virtual linked feature, and 591 * details included both the original location and the mapped location 592 * (CDS/peptide). 593 * 594 * @param seqName 595 * @param mf 596 * 597 * @return 598 */ getDetailsReport(String seqName, MappedFeatures mf)599 public String getDetailsReport(String seqName, MappedFeatures mf) 600 { 601 FeatureSourceI metadata = FeatureSources.getInstance() 602 .getSource(source); 603 604 StringBuilder sb = new StringBuilder(128); 605 sb.append("<br>"); 606 sb.append("<table>"); 607 String name = mf == null ? seqName : mf.getLinkedSequenceName(); 608 sb.append(String.format(ROW_DATA, "Location", name, 609 begin == end ? begin 610 : begin + (isContactFeature() ? ":" : "-") + end)); 611 612 String consequence = ""; 613 if (mf != null) 614 { 615 int[] localRange = mf.getMappedPositions(begin, end); 616 int from = localRange[0]; 617 int to = localRange[localRange.length - 1]; 618 String s = mf.isFromCds() ? "Peptide Location" : "Coding location"; 619 sb.append(String.format(ROW_DATA, s, seqName, from == to ? from 620 : from + (isContactFeature() ? ":" : "-") + to)); 621 if (mf.isFromCds()) 622 { 623 consequence = mf.findProteinVariants(this); 624 } 625 } 626 sb.append(String.format(ROW_DATA, "Type", type, "")); 627 String desc = StringUtils.stripHtmlTags(description); 628 sb.append(String.format(ROW_DATA, "Description", desc, "")); 629 if (!Float.isNaN(score) && score != 0f) 630 { 631 sb.append(String.format(ROW_DATA, "Score", score, "")); 632 } 633 if (featureGroup != null) 634 { 635 sb.append(String.format(ROW_DATA, "Group", featureGroup, "")); 636 } 637 638 if (!consequence.isEmpty()) 639 { 640 sb.append(String.format(ROW_DATA, "Consequence", 641 "<i>Translated by Jalview</i>", consequence)); 642 } 643 644 if (otherDetails != null) 645 { 646 TreeMap<String, Object> ordered = new TreeMap<>( 647 String.CASE_INSENSITIVE_ORDER); 648 ordered.putAll(otherDetails); 649 650 for (Entry<String, Object> entry : ordered.entrySet()) 651 { 652 String key = entry.getKey(); 653 654 Object value = entry.getValue(); 655 if (value instanceof Map<?, ?>) 656 { 657 /* 658 * expand values in a Map attribute across separate lines 659 * copy to a TreeMap for alphabetical ordering 660 */ 661 Map<String, Object> values = (Map<String, Object>) value; 662 SortedMap<String, Object> sm = new TreeMap<>( 663 String.CASE_INSENSITIVE_ORDER); 664 sm.putAll(values); 665 for (Entry<?, ?> e : sm.entrySet()) 666 { 667 sb.append(String.format(ROW_DATA, key, e.getKey().toString(), e 668 .getValue().toString())); 669 } 670 } 671 else 672 { 673 // tried <td title="key"> but it failed to provide a tooltip :-( 674 String attDesc = null; 675 if (metadata != null) 676 { 677 attDesc = metadata.getAttributeName(key); 678 } 679 String s = entry.getValue().toString(); 680 if (isValueInteresting(key, s, metadata)) 681 { 682 sb.append(String.format(ROW_DATA, key, attDesc == null ? "" 683 : attDesc, s)); 684 } 685 } 686 } 687 } 688 sb.append("</table>"); 689 690 String text = sb.toString(); 691 return text; 692 } 693 694 /** 695 * Answers true if we judge the value is worth displaying, by some heuristic 696 * rules, else false 697 * 698 * @param key 699 * @param value 700 * @param metadata 701 * @return 702 */ isValueInteresting(String key, String value, FeatureSourceI metadata)703 boolean isValueInteresting(String key, String value, 704 FeatureSourceI metadata) 705 { 706 /* 707 * currently suppressing zero values as well as null or empty 708 */ 709 if (value == null || "".equals(value) || ".".equals(value) 710 || "0".equals(value)) 711 { 712 return false; 713 } 714 715 if (metadata == null) 716 { 717 return true; 718 } 719 720 FeatureAttributeType attType = metadata.getAttributeType(key); 721 if (attType != null 722 && (attType == FeatureAttributeType.Float || attType 723 .equals(FeatureAttributeType.Integer))) 724 { 725 try 726 { 727 float fval = Float.valueOf(value); 728 if (fval == 0f) 729 { 730 return false; 731 } 732 } catch (NumberFormatException e) 733 { 734 // ignore 735 } 736 } 737 738 return true; // default to interesting 739 } 740 741 /** 742 * Sets the feature source identifier 743 * 744 * @param theSource 745 */ setSource(String theSource)746 public void setSource(String theSource) 747 { 748 source = theSource; 749 } 750 } 751 752 class SFSortByEnd implements Comparator<SequenceFeature> 753 { 754 @Override compare(SequenceFeature a, SequenceFeature b)755 public int compare(SequenceFeature a, SequenceFeature b) 756 { 757 return a.getEnd() - b.getEnd(); 758 } 759 } 760 761 class SFSortByBegin implements Comparator<SequenceFeature> 762 { 763 @Override compare(SequenceFeature a, SequenceFeature b)764 public int compare(SequenceFeature a, SequenceFeature b) 765 { 766 return a.getBegin() - b.getBegin(); 767 } 768 } 769