1 /* 2 * Jalview - A Sequence Alignment Editor and Viewer (2.11.1.4) 3 * Copyright (C) 2021 The Jalview Authors 4 * 5 * This file is part of Jalview. 6 * 7 * Jalview is free software: you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation, either version 3 10 * of the License, or (at your option) any later version. 11 * 12 * Jalview is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty 14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 15 * PURPOSE. See the GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>. 19 * The Jalview Authors are detailed in the 'AUTHORS' file. 20 */ 21 package jalview.ws.sifts; 22 23 import java.io.File; 24 import java.io.FileInputStream; 25 import java.io.FileOutputStream; 26 import java.io.IOException; 27 import java.io.InputStream; 28 import java.io.PrintStream; 29 import java.net.URL; 30 import java.net.URLConnection; 31 import java.nio.file.Files; 32 import java.nio.file.Path; 33 import java.nio.file.attribute.BasicFileAttributes; 34 import java.util.ArrayList; 35 import java.util.Arrays; 36 import java.util.Collection; 37 import java.util.Collections; 38 import java.util.Date; 39 import java.util.HashMap; 40 import java.util.HashSet; 41 import java.util.List; 42 import java.util.Map; 43 import java.util.Set; 44 import java.util.TreeMap; 45 import java.util.zip.GZIPInputStream; 46 47 import javax.xml.bind.JAXBContext; 48 import javax.xml.bind.Unmarshaller; 49 import javax.xml.stream.XMLInputFactory; 50 import javax.xml.stream.XMLStreamReader; 51 52 import MCview.Atom; 53 import MCview.PDBChain; 54 import jalview.analysis.AlignSeq; 55 import jalview.analysis.scoremodels.ScoreMatrix; 56 import jalview.analysis.scoremodels.ScoreModels; 57 import jalview.api.DBRefEntryI; 58 import jalview.api.SiftsClientI; 59 import jalview.datamodel.DBRefEntry; 60 import jalview.datamodel.DBRefSource; 61 import jalview.datamodel.SequenceI; 62 import jalview.io.BackupFiles; 63 import jalview.io.StructureFile; 64 import jalview.schemes.ResidueProperties; 65 import jalview.structure.StructureMapping; 66 import jalview.util.Comparison; 67 import jalview.util.DBRefUtils; 68 import jalview.util.Format; 69 import jalview.xml.binding.sifts.Entry; 70 import jalview.xml.binding.sifts.Entry.Entity; 71 import jalview.xml.binding.sifts.Entry.Entity.Segment; 72 import jalview.xml.binding.sifts.Entry.Entity.Segment.ListMapRegion.MapRegion; 73 import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue; 74 import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.CrossRefDb; 75 import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.ResidueDetail; 76 77 public class SiftsClient implements SiftsClientI 78 { 79 /* 80 * for use in mocking out file fetch for tests only 81 * - reset to null after testing! 82 */ 83 private static File mockSiftsFile; 84 85 private Entry siftsEntry; 86 87 private StructureFile pdb; 88 89 private String pdbId; 90 91 private String structId; 92 93 private CoordinateSys seqCoordSys = CoordinateSys.UNIPROT; 94 95 /** 96 * PDB sequence position to sequence coordinate mapping as derived from SIFTS 97 * record for the identified SeqCoordSys Used for lift-over from sequence 98 * derived from PDB (with first extracted PDBRESNUM as 'start' to the sequence 99 * being annotated with PDB data 100 */ 101 private jalview.datamodel.Mapping seqFromPdbMapping; 102 103 private static final int BUFFER_SIZE = 4096; 104 105 public static final int UNASSIGNED = Integer.MIN_VALUE; 106 107 private static final int PDB_RES_POS = 0; 108 109 private static final int PDB_ATOM_POS = 1; 110 111 private static final int PDBE_POS = 2; 112 113 private static final String NOT_OBSERVED = "Not_Observed"; 114 115 private static final String SIFTS_FTP_BASE_URL = "http://ftp.ebi.ac.uk/pub/databases/msd/sifts/xml/"; 116 117 private final static String NEWLINE = System.lineSeparator(); 118 119 private String curSourceDBRef; 120 121 private HashSet<String> curDBRefAccessionIdsString; 122 123 private enum CoordinateSys 124 { 125 UNIPROT("UniProt"), PDB("PDBresnum"), PDBe("PDBe"); 126 127 private String name; 128 CoordinateSys(String name)129 private CoordinateSys(String name) 130 { 131 this.name = name; 132 } 133 getName()134 public String getName() 135 { 136 return name; 137 } 138 }; 139 140 private enum ResidueDetailType 141 { 142 NAME_SEC_STRUCTURE("nameSecondaryStructure"), 143 CODE_SEC_STRUCTURE("codeSecondaryStructure"), ANNOTATION("Annotation"); 144 145 private String code; 146 ResidueDetailType(String code)147 private ResidueDetailType(String code) 148 { 149 this.code = code; 150 } 151 getCode()152 public String getCode() 153 { 154 return code; 155 } 156 }; 157 158 /** 159 * Fetch SIFTs file for the given PDBfile and construct an instance of 160 * SiftsClient 161 * 162 * @param pdbId 163 * @throws SiftsException 164 */ SiftsClient(StructureFile pdb)165 public SiftsClient(StructureFile pdb) throws SiftsException 166 { 167 this.pdb = pdb; 168 this.pdbId = pdb.getId(); 169 File siftsFile = getSiftsFile(pdbId); 170 siftsEntry = parseSIFTs(siftsFile); 171 } 172 173 /** 174 * Parse the given SIFTs File and return a JAXB POJO of parsed data 175 * 176 * @param siftFile 177 * - the GZipped SIFTs XML file to parse 178 * @return 179 * @throws Exception 180 * if a problem occurs while parsing the SIFTs XML 181 */ parseSIFTs(File siftFile)182 private Entry parseSIFTs(File siftFile) throws SiftsException 183 { 184 try (InputStream in = new FileInputStream(siftFile); 185 GZIPInputStream gzis = new GZIPInputStream(in);) 186 { 187 // System.out.println("File : " + siftFile.getAbsolutePath()); 188 JAXBContext jc = JAXBContext.newInstance("jalview.xml.binding.sifts"); 189 XMLStreamReader streamReader = XMLInputFactory.newInstance() 190 .createXMLStreamReader(gzis); 191 Unmarshaller um = jc.createUnmarshaller(); 192 return (Entry) um.unmarshal(streamReader); 193 } catch (Exception e) 194 { 195 e.printStackTrace(); 196 throw new SiftsException(e.getMessage()); 197 } 198 } 199 200 /** 201 * Get a SIFTs XML file for a given PDB Id from Cache or download from FTP 202 * repository if not found in cache 203 * 204 * @param pdbId 205 * @return SIFTs XML file 206 * @throws SiftsException 207 */ getSiftsFile(String pdbId)208 public static File getSiftsFile(String pdbId) throws SiftsException 209 { 210 /* 211 * return mocked file if it has been set 212 */ 213 if (mockSiftsFile != null) 214 { 215 return mockSiftsFile; 216 } 217 218 String siftsFileName = SiftsSettings.getSiftDownloadDirectory() 219 + pdbId.toLowerCase() + ".xml.gz"; 220 File siftsFile = new File(siftsFileName); 221 if (siftsFile.exists()) 222 { 223 // The line below is required for unit testing... don't comment it out!!! 224 System.out.println(">>> SIFTS File already downloaded for " + pdbId); 225 226 if (isFileOlderThanThreshold(siftsFile, 227 SiftsSettings.getCacheThresholdInDays())) 228 { 229 File oldSiftsFile = new File(siftsFileName + "_old"); 230 BackupFiles.moveFileToFile(siftsFile, oldSiftsFile); 231 try 232 { 233 siftsFile = downloadSiftsFile(pdbId.toLowerCase()); 234 oldSiftsFile.delete(); 235 return siftsFile; 236 } catch (IOException e) 237 { 238 e.printStackTrace(); 239 BackupFiles.moveFileToFile(oldSiftsFile, siftsFile); 240 return new File(siftsFileName); 241 } 242 } 243 else 244 { 245 return siftsFile; 246 } 247 } 248 try 249 { 250 siftsFile = downloadSiftsFile(pdbId.toLowerCase()); 251 } catch (IOException e) 252 { 253 throw new SiftsException(e.getMessage()); 254 } 255 return siftsFile; 256 } 257 258 /** 259 * This method enables checking if a cached file has exceeded a certain 260 * threshold(in days) 261 * 262 * @param file 263 * the cached file 264 * @param noOfDays 265 * the threshold in days 266 * @return 267 */ isFileOlderThanThreshold(File file, int noOfDays)268 public static boolean isFileOlderThanThreshold(File file, int noOfDays) 269 { 270 Path filePath = file.toPath(); 271 BasicFileAttributes attr; 272 int diffInDays = 0; 273 try 274 { 275 attr = Files.readAttributes(filePath, BasicFileAttributes.class); 276 diffInDays = (int) ((new Date().getTime() 277 - attr.lastModifiedTime().toMillis()) 278 / (1000 * 60 * 60 * 24)); 279 // System.out.println("Diff in days : " + diffInDays); 280 } catch (IOException e) 281 { 282 e.printStackTrace(); 283 } 284 return noOfDays <= diffInDays; 285 } 286 287 /** 288 * Download a SIFTs XML file for a given PDB Id from an FTP repository 289 * 290 * @param pdbId 291 * @return downloaded SIFTs XML file 292 * @throws SiftsException 293 * @throws IOException 294 */ downloadSiftsFile(String pdbId)295 public static File downloadSiftsFile(String pdbId) 296 throws SiftsException, IOException 297 { 298 if (pdbId.contains(".cif")) 299 { 300 pdbId = pdbId.replace(".cif", ""); 301 } 302 String siftFile = pdbId + ".xml.gz"; 303 String siftsFileFTPURL = SIFTS_FTP_BASE_URL + siftFile; 304 String downloadedSiftsFile = SiftsSettings.getSiftDownloadDirectory() 305 + siftFile; 306 File siftsDownloadDir = new File( 307 SiftsSettings.getSiftDownloadDirectory()); 308 if (!siftsDownloadDir.exists()) 309 { 310 siftsDownloadDir.mkdirs(); 311 } 312 // System.out.println(">> Download ftp url : " + siftsFileFTPURL); 313 // long now = System.currentTimeMillis(); 314 URL url = new URL(siftsFileFTPURL); 315 URLConnection conn = url.openConnection(); 316 InputStream inputStream = conn.getInputStream(); 317 FileOutputStream outputStream = new FileOutputStream( 318 downloadedSiftsFile); 319 byte[] buffer = new byte[BUFFER_SIZE]; 320 int bytesRead = -1; 321 while ((bytesRead = inputStream.read(buffer)) != -1) 322 { 323 outputStream.write(buffer, 0, bytesRead); 324 } 325 outputStream.close(); 326 inputStream.close(); 327 // System.out.println(">>> File downloaded : " + downloadedSiftsFile 328 // + " took " + (System.currentTimeMillis() - now) + "ms"); 329 return new File(downloadedSiftsFile); 330 } 331 332 /** 333 * Delete the SIFTs file for the given PDB Id in the local SIFTs download 334 * directory 335 * 336 * @param pdbId 337 * @return true if the file was deleted or doesn't exist 338 */ deleteSiftsFileByPDBId(String pdbId)339 public static boolean deleteSiftsFileByPDBId(String pdbId) 340 { 341 File siftsFile = new File(SiftsSettings.getSiftDownloadDirectory() 342 + pdbId.toLowerCase() + ".xml.gz"); 343 if (siftsFile.exists()) 344 { 345 return siftsFile.delete(); 346 } 347 return true; 348 } 349 350 /** 351 * Get a valid SIFTs DBRef for the given sequence current SIFTs entry 352 * 353 * @param seq 354 * - the target sequence for the operation 355 * @return a valid DBRefEntry that is SIFTs compatible 356 * @throws Exception 357 * if no valid source DBRefEntry was found for the given sequences 358 */ getValidSourceDBRef(SequenceI seq)359 public DBRefEntryI getValidSourceDBRef(SequenceI seq) 360 throws SiftsException 361 { 362 List<DBRefEntry> dbRefs = seq.getPrimaryDBRefs(); 363 if (dbRefs == null || dbRefs.size() < 1) 364 { 365 throw new SiftsException( 366 "Source DBRef could not be determined. DBRefs might not have been retrieved."); 367 } 368 369 for (DBRefEntry dbRef : dbRefs) 370 { 371 if (dbRef == null || dbRef.getAccessionId() == null 372 || dbRef.getSource() == null) 373 { 374 continue; 375 } 376 String canonicalSource = DBRefUtils 377 .getCanonicalName(dbRef.getSource()); 378 if (isValidDBRefEntry(dbRef) 379 && (canonicalSource.equalsIgnoreCase(DBRefSource.UNIPROT) 380 || canonicalSource.equalsIgnoreCase(DBRefSource.PDB))) 381 { 382 return dbRef; 383 } 384 } 385 throw new SiftsException("Could not get source DB Ref"); 386 } 387 388 /** 389 * Check that the DBRef Entry is properly populated and is available in this 390 * SiftClient instance 391 * 392 * @param entry 393 * - DBRefEntry to validate 394 * @return true validation is successful otherwise false is returned. 395 */ isValidDBRefEntry(DBRefEntryI entry)396 boolean isValidDBRefEntry(DBRefEntryI entry) 397 { 398 return entry != null && entry.getAccessionId() != null 399 && isFoundInSiftsEntry(entry.getAccessionId()); 400 } 401 402 @Override getAllMappingAccession()403 public HashSet<String> getAllMappingAccession() 404 { 405 HashSet<String> accessions = new HashSet<String>(); 406 List<Entity> entities = siftsEntry.getEntity(); 407 for (Entity entity : entities) 408 { 409 List<Segment> segments = entity.getSegment(); 410 for (Segment segment : segments) 411 { 412 List<MapRegion> mapRegions = segment.getListMapRegion() 413 .getMapRegion(); 414 for (MapRegion mapRegion : mapRegions) 415 { 416 accessions 417 .add(mapRegion.getDb().getDbAccessionId().toLowerCase()); 418 } 419 } 420 } 421 return accessions; 422 } 423 424 @Override getSiftsStructureMapping(SequenceI seq, String pdbFile, String chain)425 public StructureMapping getSiftsStructureMapping(SequenceI seq, 426 String pdbFile, String chain) throws SiftsException 427 { 428 SequenceI aseq = seq; 429 while (seq.getDatasetSequence() != null) 430 { 431 seq = seq.getDatasetSequence(); 432 } 433 structId = (chain == null) ? pdbId : pdbId + "|" + chain; 434 System.out.println("Getting SIFTS mapping for " + structId + ": seq " 435 + seq.getName()); 436 437 final StringBuilder mappingDetails = new StringBuilder(128); 438 PrintStream ps = new PrintStream(System.out) 439 { 440 @Override 441 public void print(String x) 442 { 443 mappingDetails.append(x); 444 } 445 446 @Override 447 public void println() 448 { 449 mappingDetails.append(NEWLINE); 450 } 451 }; 452 HashMap<Integer, int[]> mapping = getGreedyMapping(chain, seq, ps); 453 454 String mappingOutput = mappingDetails.toString(); 455 StructureMapping siftsMapping = new StructureMapping(aseq, pdbFile, 456 pdbId, chain, mapping, mappingOutput, seqFromPdbMapping); 457 458 return siftsMapping; 459 } 460 461 @Override getGreedyMapping(String entityId, SequenceI seq, java.io.PrintStream os)462 public HashMap<Integer, int[]> getGreedyMapping(String entityId, 463 SequenceI seq, java.io.PrintStream os) throws SiftsException 464 { 465 List<Integer> omitNonObserved = new ArrayList<>(); 466 int nonObservedShiftIndex = 0, pdbeNonObserved = 0; 467 // System.out.println("Generating mappings for : " + entityId); 468 Entity entity = null; 469 entity = getEntityById(entityId); 470 String originalSeq = AlignSeq.extractGaps( 471 jalview.util.Comparison.GapChars, seq.getSequenceAsString()); 472 HashMap<Integer, int[]> mapping = new HashMap<Integer, int[]>(); 473 DBRefEntryI sourceDBRef; 474 sourceDBRef = getValidSourceDBRef(seq); 475 // TODO ensure sequence start/end is in the same coordinate system and 476 // consistent with the choosen sourceDBRef 477 478 // set sequence coordinate system - default value is UniProt 479 if (sourceDBRef.getSource().equalsIgnoreCase(DBRefSource.PDB)) 480 { 481 seqCoordSys = CoordinateSys.PDB; 482 } 483 484 HashSet<String> dbRefAccessionIdsString = new HashSet<String>(); 485 for (DBRefEntry dbref : seq.getDBRefs()) 486 { 487 dbRefAccessionIdsString.add(dbref.getAccessionId().toLowerCase()); 488 } 489 dbRefAccessionIdsString.add(sourceDBRef.getAccessionId().toLowerCase()); 490 491 curDBRefAccessionIdsString = dbRefAccessionIdsString; 492 curSourceDBRef = sourceDBRef.getAccessionId(); 493 494 TreeMap<Integer, String> resNumMap = new TreeMap<Integer, String>(); 495 List<Segment> segments = entity.getSegment(); 496 SegmentHelperPojo shp = new SegmentHelperPojo(seq, mapping, resNumMap, 497 omitNonObserved, nonObservedShiftIndex, pdbeNonObserved); 498 processSegments(segments, shp); 499 try 500 { 501 populateAtomPositions(entityId, mapping); 502 } catch (Exception e) 503 { 504 e.printStackTrace(); 505 } 506 if (seqCoordSys == CoordinateSys.UNIPROT) 507 { 508 padWithGaps(resNumMap, omitNonObserved); 509 } 510 int seqStart = UNASSIGNED; 511 int seqEnd = UNASSIGNED; 512 int pdbStart = UNASSIGNED; 513 int pdbEnd = UNASSIGNED; 514 515 if (mapping.isEmpty()) 516 { 517 throw new SiftsException("SIFTS mapping failed"); 518 } 519 // also construct a mapping object between the seq-coord sys and the PDB 520 // seq's coord sys 521 522 Integer[] keys = mapping.keySet().toArray(new Integer[0]); 523 Arrays.sort(keys); 524 seqStart = keys[0]; 525 seqEnd = keys[keys.length - 1]; 526 List<int[]> from = new ArrayList<>(), to = new ArrayList<>(); 527 int[] _cfrom = null, _cto = null; 528 String matchedSeq = originalSeq; 529 if (seqStart != UNASSIGNED) // fixme! seqStart can map to -1 for a pdb 530 // sequence that starts <-1 531 { 532 for (int seqps : keys) 533 { 534 int pdbpos = mapping.get(seqps)[PDBE_POS]; 535 if (pdbpos == UNASSIGNED) 536 { 537 // not correct - pdbpos might be -1, but leave it for now 538 continue; 539 } 540 if (_cfrom == null || seqps != _cfrom[1] + 1) 541 { 542 _cfrom = new int[] { seqps, seqps }; 543 from.add(_cfrom); 544 _cto = null; // discontinuity 545 } 546 else 547 { 548 _cfrom[1] = seqps; 549 } 550 if (_cto == null || pdbpos != 1 + _cto[1]) 551 { 552 _cto = new int[] { pdbpos, pdbpos }; 553 to.add(_cto); 554 } 555 else 556 { 557 _cto[1] = pdbpos; 558 } 559 } 560 _cfrom = new int[from.size() * 2]; 561 _cto = new int[to.size() * 2]; 562 int p = 0; 563 for (int[] range : from) 564 { 565 _cfrom[p++] = range[0]; 566 _cfrom[p++] = range[1]; 567 } 568 ; 569 p = 0; 570 for (int[] range : to) 571 { 572 _cto[p++] = range[0]; 573 _cto[p++] = range[1]; 574 } 575 ; 576 577 seqFromPdbMapping = new jalview.datamodel.Mapping(null, _cto, _cfrom, 578 1, 1); 579 pdbStart = mapping.get(seqStart)[PDB_RES_POS]; 580 pdbEnd = mapping.get(seqEnd)[PDB_RES_POS]; 581 int orignalSeqStart = seq.getStart(); 582 if (orignalSeqStart >= 1) 583 { 584 int subSeqStart = (seqStart >= orignalSeqStart) 585 ? seqStart - orignalSeqStart 586 : 0; 587 int subSeqEnd = seqEnd - (orignalSeqStart - 1); 588 subSeqEnd = originalSeq.length() < subSeqEnd ? originalSeq.length() 589 : subSeqEnd; 590 matchedSeq = originalSeq.substring(subSeqStart, subSeqEnd); 591 } 592 else 593 { 594 matchedSeq = originalSeq.substring(1, originalSeq.length()); 595 } 596 } 597 598 StringBuilder targetStrucSeqs = new StringBuilder(); 599 for (String res : resNumMap.values()) 600 { 601 targetStrucSeqs.append(res); 602 } 603 604 if (os != null) 605 { 606 MappingOutputPojo mop = new MappingOutputPojo(); 607 mop.setSeqStart(seqStart); 608 mop.setSeqEnd(seqEnd); 609 mop.setSeqName(seq.getName()); 610 mop.setSeqResidue(matchedSeq); 611 612 mop.setStrStart(pdbStart); 613 mop.setStrEnd(pdbEnd); 614 mop.setStrName(structId); 615 mop.setStrResidue(targetStrucSeqs.toString()); 616 617 mop.setType("pep"); 618 os.print(getMappingOutput(mop).toString()); 619 os.println(); 620 } 621 return mapping; 622 } 623 624 void processSegments(List<Segment> segments, SegmentHelperPojo shp) 625 { 626 SequenceI seq = shp.getSeq(); 627 HashMap<Integer, int[]> mapping = shp.getMapping(); 628 TreeMap<Integer, String> resNumMap = shp.getResNumMap(); 629 List<Integer> omitNonObserved = shp.getOmitNonObserved(); 630 int nonObservedShiftIndex = shp.getNonObservedShiftIndex(); 631 int pdbeNonObservedCount = shp.getPdbeNonObserved(); 632 int firstPDBResNum = UNASSIGNED; 633 for (Segment segment : segments) 634 { 635 // System.out.println("Mapping segments : " + segment.getSegId() + "\\"s 636 // + segStartEnd); 637 List<Residue> residues = segment.getListResidue().getResidue(); 638 for (Residue residue : residues) 639 { 640 boolean isObserved = isResidueObserved(residue); 641 int pdbeIndex = getLeadingIntegerValue(residue.getDbResNum(), 642 UNASSIGNED); 643 int currSeqIndex = UNASSIGNED; 644 List<CrossRefDb> cRefDbs = residue.getCrossRefDb(); 645 CrossRefDb pdbRefDb = null; 646 for (CrossRefDb cRefDb : cRefDbs) 647 { 648 if (cRefDb.getDbSource().equalsIgnoreCase(DBRefSource.PDB)) 649 { 650 pdbRefDb = cRefDb; 651 if (firstPDBResNum == UNASSIGNED) 652 { 653 firstPDBResNum = getLeadingIntegerValue(cRefDb.getDbResNum(), 654 UNASSIGNED); 655 } 656 else 657 { 658 if (isObserved) 659 { 660 // after we find the first observed residue we just increment 661 firstPDBResNum++; 662 } 663 } 664 } 665 if (cRefDb.getDbCoordSys().equalsIgnoreCase(seqCoordSys.getName()) 666 && isAccessionMatched(cRefDb.getDbAccessionId())) 667 { 668 currSeqIndex = getLeadingIntegerValue(cRefDb.getDbResNum(), 669 UNASSIGNED); 670 if (pdbRefDb != null) 671 { 672 break;// exit loop if pdb and uniprot are already found 673 } 674 } 675 } 676 if (!isObserved) 677 { 678 ++pdbeNonObservedCount; 679 } 680 if (seqCoordSys == seqCoordSys.PDB) // FIXME: is seqCoordSys ever PDBe 681 // ??? 682 { 683 // if the sequence has a primary reference to the PDB, then we are 684 // dealing with a sequence extracted directly from the PDB. In that 685 // case, numbering is PDBe - non-observed residues 686 currSeqIndex = seq.getStart() - 1 + pdbeIndex; 687 } 688 if (!isObserved) 689 { 690 if (seqCoordSys != CoordinateSys.UNIPROT) // FIXME: PDB or PDBe only 691 // here 692 { 693 // mapping to PDB or PDBe so we need to bookkeep for the 694 // non-observed 695 // SEQRES positions 696 omitNonObserved.add(currSeqIndex); 697 ++nonObservedShiftIndex; 698 } 699 } 700 if (currSeqIndex == UNASSIGNED) 701 { 702 // change in logic - unobserved residues with no currSeqIndex 703 // corresponding are still counted in both nonObservedShiftIndex and 704 // pdbeIndex... 705 continue; 706 } 707 // if (currSeqIndex >= seq.getStart() && currSeqIndex <= seqlength) // 708 // true 709 // numbering 710 // is 711 // not 712 // up 713 // to 714 // seq.getEnd() 715 { 716 717 int resNum = (pdbRefDb == null) 718 ? getLeadingIntegerValue(residue.getDbResNum(), 719 UNASSIGNED) 720 : getLeadingIntegerValue(pdbRefDb.getDbResNum(), 721 UNASSIGNED); 722 723 if (isObserved) 724 { 725 char resCharCode = ResidueProperties 726 .getSingleCharacterCode(ResidueProperties 727 .getCanonicalAminoAcid(residue.getDbResName())); 728 resNumMap.put(currSeqIndex, String.valueOf(resCharCode)); 729 730 int[] mappingcols = new int[] { Integer.valueOf(resNum), 731 UNASSIGNED, isObserved ? firstPDBResNum : UNASSIGNED }; 732 733 mapping.put(currSeqIndex - nonObservedShiftIndex, mappingcols); 734 } 735 } 736 } 737 } 738 } 739 740 /** 741 * Get the leading integer part of a string that begins with an integer. 742 * 743 * @param input 744 * - the string input to process 745 * @param failValue 746 * - value returned if unsuccessful 747 * @return 748 */ 749 static int getLeadingIntegerValue(String input, int failValue) 750 { 751 if (input == null) 752 { 753 return failValue; 754 } 755 String[] parts = input.split("(?=\\D)(?<=\\d)"); 756 if (parts != null && parts.length > 0 && parts[0].matches("[0-9]+")) 757 { 758 return Integer.valueOf(parts[0]); 759 } 760 return failValue; 761 } 762 763 /** 764 * 765 * @param chainId 766 * Target chain to populate mapping of its atom positions. 767 * @param mapping 768 * Two dimension array of residue index versus atom position 769 * @throws IllegalArgumentException 770 * Thrown if chainId or mapping is null 771 * @throws SiftsException 772 */ populateAtomPositions(String chainId, Map<Integer, int[]> mapping)773 void populateAtomPositions(String chainId, Map<Integer, int[]> mapping) 774 throws IllegalArgumentException, SiftsException 775 { 776 try 777 { 778 PDBChain chain = pdb.findChain(chainId); 779 780 if (chain == null || mapping == null) 781 { 782 throw new IllegalArgumentException( 783 "Chain id or mapping must not be null."); 784 } 785 for (int[] map : mapping.values()) 786 { 787 if (map[PDB_RES_POS] != UNASSIGNED) 788 { 789 map[PDB_ATOM_POS] = getAtomIndex(map[PDB_RES_POS], chain.atoms); 790 } 791 } 792 } catch (NullPointerException e) 793 { 794 throw new SiftsException(e.getMessage()); 795 } catch (Exception e) 796 { 797 throw new SiftsException(e.getMessage()); 798 } 799 } 800 801 /** 802 * 803 * @param residueIndex 804 * The residue index used for the search 805 * @param atoms 806 * A collection of Atom to search 807 * @return atom position for the given residue index 808 */ getAtomIndex(int residueIndex, Collection<Atom> atoms)809 int getAtomIndex(int residueIndex, Collection<Atom> atoms) 810 { 811 if (atoms == null) 812 { 813 throw new IllegalArgumentException( 814 "atoms collection must not be null!"); 815 } 816 for (Atom atom : atoms) 817 { 818 if (atom.resNumber == residueIndex) 819 { 820 return atom.atomIndex; 821 } 822 } 823 return UNASSIGNED; 824 } 825 826 /** 827 * Checks if the residue instance is marked 'Not_observed' or not 828 * 829 * @param residue 830 * @return 831 */ isResidueObserved(Residue residue)832 private boolean isResidueObserved(Residue residue) 833 { 834 Set<String> annotations = getResidueAnnotaitons(residue, 835 ResidueDetailType.ANNOTATION); 836 if (annotations == null || annotations.isEmpty()) 837 { 838 return true; 839 } 840 for (String annotation : annotations) 841 { 842 if (annotation.equalsIgnoreCase(NOT_OBSERVED)) 843 { 844 return false; 845 } 846 } 847 return true; 848 } 849 850 /** 851 * Get annotation String for a given residue and annotation type 852 * 853 * @param residue 854 * @param type 855 * @return 856 */ getResidueAnnotaitons(Residue residue, ResidueDetailType type)857 private Set<String> getResidueAnnotaitons(Residue residue, 858 ResidueDetailType type) 859 { 860 HashSet<String> foundAnnotations = new HashSet<String>(); 861 List<ResidueDetail> resDetails = residue.getResidueDetail(); 862 for (ResidueDetail resDetail : resDetails) 863 { 864 if (resDetail.getProperty().equalsIgnoreCase(type.getCode())) 865 { 866 foundAnnotations.add(resDetail.getContent()); 867 } 868 } 869 return foundAnnotations; 870 } 871 872 @Override isAccessionMatched(String accession)873 public boolean isAccessionMatched(String accession) 874 { 875 boolean isStrictMatch = true; 876 return isStrictMatch ? curSourceDBRef.equalsIgnoreCase(accession) 877 : curDBRefAccessionIdsString.contains(accession.toLowerCase()); 878 } 879 isFoundInSiftsEntry(String accessionId)880 private boolean isFoundInSiftsEntry(String accessionId) 881 { 882 Set<String> siftsDBRefs = getAllMappingAccession(); 883 return accessionId != null 884 && siftsDBRefs.contains(accessionId.toLowerCase()); 885 } 886 887 /** 888 * Pad omitted residue positions in PDB sequence with gaps 889 * 890 * @param resNumMap 891 */ padWithGaps(Map<Integer, String> resNumMap, List<Integer> omitNonObserved)892 void padWithGaps(Map<Integer, String> resNumMap, 893 List<Integer> omitNonObserved) 894 { 895 if (resNumMap == null || resNumMap.isEmpty()) 896 { 897 return; 898 } 899 Integer[] keys = resNumMap.keySet().toArray(new Integer[0]); 900 // Arrays.sort(keys); 901 int firstIndex = keys[0]; 902 int lastIndex = keys[keys.length - 1]; 903 // System.out.println("Min value " + firstIndex); 904 // System.out.println("Max value " + lastIndex); 905 for (int x = firstIndex; x <= lastIndex; x++) 906 { 907 if (!resNumMap.containsKey(x) && !omitNonObserved.contains(x)) 908 { 909 resNumMap.put(x, "-"); 910 } 911 } 912 } 913 914 @Override getEntityById(String id)915 public Entity getEntityById(String id) throws SiftsException 916 { 917 // Determines an entity to process by performing a heuristic matching of all 918 // Entities with the given chainId and choosing the best matching Entity 919 Entity entity = getEntityByMostOptimalMatchedId(id); 920 if (entity != null) 921 { 922 return entity; 923 } 924 throw new SiftsException("Entity " + id + " not found"); 925 } 926 927 /** 928 * This method was added because EntityId is NOT always equal to ChainId. 929 * Hence, it provides the logic to greedily detect the "true" Entity for a 930 * given chainId where discrepancies exist. 931 * 932 * @param chainId 933 * @return 934 */ getEntityByMostOptimalMatchedId(String chainId)935 public Entity getEntityByMostOptimalMatchedId(String chainId) 936 { 937 // System.out.println("---> advanced greedy entityId matching block 938 // entered.."); 939 List<Entity> entities = siftsEntry.getEntity(); 940 SiftsEntitySortPojo[] sPojo = new SiftsEntitySortPojo[entities.size()]; 941 int count = 0; 942 for (Entity entity : entities) 943 { 944 sPojo[count] = new SiftsEntitySortPojo(); 945 sPojo[count].entityId = entity.getEntityId(); 946 947 List<Segment> segments = entity.getSegment(); 948 for (Segment segment : segments) 949 { 950 List<Residue> residues = segment.getListResidue().getResidue(); 951 for (Residue residue : residues) 952 { 953 List<CrossRefDb> cRefDbs = residue.getCrossRefDb(); 954 for (CrossRefDb cRefDb : cRefDbs) 955 { 956 if (!cRefDb.getDbSource().equalsIgnoreCase("PDB")) 957 { 958 continue; 959 } 960 ++sPojo[count].resCount; 961 if (cRefDb.getDbChainId().equalsIgnoreCase(chainId)) 962 { 963 ++sPojo[count].chainIdFreq; 964 } 965 } 966 } 967 } 968 sPojo[count].pid = (100 * sPojo[count].chainIdFreq) 969 / sPojo[count].resCount; 970 ++count; 971 } 972 Arrays.sort(sPojo, Collections.reverseOrder()); 973 // System.out.println("highest matched entity : " + sPojo[0].entityId); 974 // System.out.println("highest matched pid : " + sPojo[0].pid); 975 976 if (sPojo[0].entityId != null) 977 { 978 if (sPojo[0].pid < 1) 979 { 980 return null; 981 } 982 for (Entity entity : entities) 983 { 984 if (!entity.getEntityId().equalsIgnoreCase(sPojo[0].entityId)) 985 { 986 continue; 987 } 988 return entity; 989 } 990 } 991 return null; 992 } 993 994 private class SiftsEntitySortPojo 995 implements Comparable<SiftsEntitySortPojo> 996 { 997 public String entityId; 998 999 public int chainIdFreq; 1000 1001 public int pid; 1002 1003 public int resCount; 1004 1005 @Override compareTo(SiftsEntitySortPojo o)1006 public int compareTo(SiftsEntitySortPojo o) 1007 { 1008 return this.pid - o.pid; 1009 } 1010 } 1011 1012 private class SegmentHelperPojo 1013 { 1014 private SequenceI seq; 1015 1016 private HashMap<Integer, int[]> mapping; 1017 1018 private TreeMap<Integer, String> resNumMap; 1019 1020 private List<Integer> omitNonObserved; 1021 1022 private int nonObservedShiftIndex; 1023 1024 /** 1025 * count of number of 'not observed' positions in the PDB record's SEQRES 1026 * (total number of residues with coordinates == length(SEQRES) - 1027 * pdbeNonObserved 1028 */ 1029 private int pdbeNonObserved; 1030 SegmentHelperPojo(SequenceI seq, HashMap<Integer, int[]> mapping, TreeMap<Integer, String> resNumMap, List<Integer> omitNonObserved, int nonObservedShiftIndex, int pdbeNonObserved)1031 public SegmentHelperPojo(SequenceI seq, HashMap<Integer, int[]> mapping, 1032 TreeMap<Integer, String> resNumMap, 1033 List<Integer> omitNonObserved, int nonObservedShiftIndex, 1034 int pdbeNonObserved) 1035 { 1036 setSeq(seq); 1037 setMapping(mapping); 1038 setResNumMap(resNumMap); 1039 setOmitNonObserved(omitNonObserved); 1040 setNonObservedShiftIndex(nonObservedShiftIndex); 1041 setPdbeNonObserved(pdbeNonObserved); 1042 1043 } 1044 setPdbeNonObserved(int pdbeNonObserved2)1045 public void setPdbeNonObserved(int pdbeNonObserved2) 1046 { 1047 this.pdbeNonObserved = pdbeNonObserved2; 1048 } 1049 getPdbeNonObserved()1050 public int getPdbeNonObserved() 1051 { 1052 return pdbeNonObserved; 1053 } 1054 getSeq()1055 public SequenceI getSeq() 1056 { 1057 return seq; 1058 } 1059 setSeq(SequenceI seq)1060 public void setSeq(SequenceI seq) 1061 { 1062 this.seq = seq; 1063 } 1064 getMapping()1065 public HashMap<Integer, int[]> getMapping() 1066 { 1067 return mapping; 1068 } 1069 setMapping(HashMap<Integer, int[]> mapping)1070 public void setMapping(HashMap<Integer, int[]> mapping) 1071 { 1072 this.mapping = mapping; 1073 } 1074 getResNumMap()1075 public TreeMap<Integer, String> getResNumMap() 1076 { 1077 return resNumMap; 1078 } 1079 setResNumMap(TreeMap<Integer, String> resNumMap)1080 public void setResNumMap(TreeMap<Integer, String> resNumMap) 1081 { 1082 this.resNumMap = resNumMap; 1083 } 1084 getOmitNonObserved()1085 public List<Integer> getOmitNonObserved() 1086 { 1087 return omitNonObserved; 1088 } 1089 setOmitNonObserved(List<Integer> omitNonObserved)1090 public void setOmitNonObserved(List<Integer> omitNonObserved) 1091 { 1092 this.omitNonObserved = omitNonObserved; 1093 } 1094 getNonObservedShiftIndex()1095 public int getNonObservedShiftIndex() 1096 { 1097 return nonObservedShiftIndex; 1098 } 1099 setNonObservedShiftIndex(int nonObservedShiftIndex)1100 public void setNonObservedShiftIndex(int nonObservedShiftIndex) 1101 { 1102 this.nonObservedShiftIndex = nonObservedShiftIndex; 1103 } 1104 1105 } 1106 1107 @Override getMappingOutput(MappingOutputPojo mp)1108 public StringBuilder getMappingOutput(MappingOutputPojo mp) 1109 throws SiftsException 1110 { 1111 String seqRes = mp.getSeqResidue(); 1112 String seqName = mp.getSeqName(); 1113 int sStart = mp.getSeqStart(); 1114 int sEnd = mp.getSeqEnd(); 1115 1116 String strRes = mp.getStrResidue(); 1117 String strName = mp.getStrName(); 1118 int pdbStart = mp.getStrStart(); 1119 int pdbEnd = mp.getStrEnd(); 1120 1121 String type = mp.getType(); 1122 1123 int maxid = (seqName.length() >= strName.length()) ? seqName.length() 1124 : strName.length(); 1125 int len = 72 - maxid - 1; 1126 1127 int nochunks = ((seqRes.length()) / len) 1128 + ((seqRes.length()) % len > 0 ? 1 : 0); 1129 // output mappings 1130 StringBuilder output = new StringBuilder(512); 1131 output.append(NEWLINE); 1132 output.append("Sequence \u27f7 Structure mapping details") 1133 .append(NEWLINE); 1134 output.append("Method: SIFTS"); 1135 output.append(NEWLINE).append(NEWLINE); 1136 1137 output.append(new Format("%" + maxid + "s").form(seqName)); 1138 output.append(" : "); 1139 output.append(String.valueOf(sStart)); 1140 output.append(" - "); 1141 output.append(String.valueOf(sEnd)); 1142 output.append(" Maps to "); 1143 output.append(NEWLINE); 1144 output.append(new Format("%" + maxid + "s").form(structId)); 1145 output.append(" : "); 1146 output.append(String.valueOf(pdbStart)); 1147 output.append(" - "); 1148 output.append(String.valueOf(pdbEnd)); 1149 output.append(NEWLINE).append(NEWLINE); 1150 1151 ScoreMatrix pam250 = ScoreModels.getInstance().getPam250(); 1152 int matchedSeqCount = 0; 1153 for (int j = 0; j < nochunks; j++) 1154 { 1155 // Print the first aligned sequence 1156 output.append(new Format("%" + (maxid) + "s").form(seqName)) 1157 .append(" "); 1158 1159 for (int i = 0; i < len; i++) 1160 { 1161 if ((i + (j * len)) < seqRes.length()) 1162 { 1163 output.append(seqRes.charAt(i + (j * len))); 1164 } 1165 } 1166 1167 output.append(NEWLINE); 1168 output.append(new Format("%" + (maxid) + "s").form(" ")).append(" "); 1169 1170 /* 1171 * Print out the match symbols: 1172 * | for exact match (ignoring case) 1173 * . if PAM250 score is positive 1174 * else a space 1175 */ 1176 for (int i = 0; i < len; i++) 1177 { 1178 try 1179 { 1180 if ((i + (j * len)) < seqRes.length()) 1181 { 1182 char c1 = seqRes.charAt(i + (j * len)); 1183 char c2 = strRes.charAt(i + (j * len)); 1184 boolean sameChar = Comparison.isSameResidue(c1, c2, false); 1185 if (sameChar && !Comparison.isGap(c1)) 1186 { 1187 matchedSeqCount++; 1188 output.append("|"); 1189 } 1190 else if (type.equals("pep")) 1191 { 1192 if (pam250.getPairwiseScore(c1, c2) > 0) 1193 { 1194 output.append("."); 1195 } 1196 else 1197 { 1198 output.append(" "); 1199 } 1200 } 1201 else 1202 { 1203 output.append(" "); 1204 } 1205 } 1206 } catch (IndexOutOfBoundsException e) 1207 { 1208 continue; 1209 } 1210 } 1211 // Now print the second aligned sequence 1212 output = output.append(NEWLINE); 1213 output = output.append(new Format("%" + (maxid) + "s").form(strName)) 1214 .append(" "); 1215 for (int i = 0; i < len; i++) 1216 { 1217 if ((i + (j * len)) < strRes.length()) 1218 { 1219 output.append(strRes.charAt(i + (j * len))); 1220 } 1221 } 1222 output.append(NEWLINE).append(NEWLINE); 1223 } 1224 float pid = (float) matchedSeqCount / seqRes.length() * 100; 1225 if (pid < SiftsSettings.getFailSafePIDThreshold()) 1226 { 1227 throw new SiftsException(">>> Low PID detected for SIFTs mapping..."); 1228 } 1229 output.append("Length of alignment = " + seqRes.length()) 1230 .append(NEWLINE); 1231 output.append(new Format("Percentage ID = %2.2f").form(pid)); 1232 return output; 1233 } 1234 1235 @Override getEntityCount()1236 public int getEntityCount() 1237 { 1238 return siftsEntry.getEntity().size(); 1239 } 1240 1241 @Override getDbAccessionId()1242 public String getDbAccessionId() 1243 { 1244 return siftsEntry.getDbAccessionId(); 1245 } 1246 1247 @Override getDbCoordSys()1248 public String getDbCoordSys() 1249 { 1250 return siftsEntry.getDbCoordSys(); 1251 } 1252 1253 @Override getDbSource()1254 public String getDbSource() 1255 { 1256 return siftsEntry.getDbSource(); 1257 } 1258 1259 @Override getDbVersion()1260 public String getDbVersion() 1261 { 1262 return siftsEntry.getDbVersion(); 1263 } 1264 setMockSiftsFile(File file)1265 public static void setMockSiftsFile(File file) 1266 { 1267 mockSiftsFile = file; 1268 } 1269 1270 } 1271