1 /* Copyright (C) 1997-2007 The Chemistry Development Kit (CDK) project 2 * 2014 Mark B Vine (orcid:0000-0002-7794-0426) 3 * 4 * Contact: cdk-devel@lists.sourceforge.net 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public License 8 * as published by the Free Software Foundation; either version 2.1 9 * of the License, or (at your option) any later version. 10 * All we ask is that proper credit is given for our work, which includes 11 * - but is not limited to - adding the above copyright notice to the beginning 12 * of your source code files, and to any copyright notice that you may distribute 13 * with programs based on this work. 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License for more details. 19 * 20 * You should have received a copy of the GNU Lesser General Public License 21 * along with this program; if not, write to the Free Software 22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 23 * */ 24 package org.openscience.cdk.io; 25 26 import java.io.BufferedReader; 27 import java.io.IOException; 28 import java.io.InputStream; 29 import java.io.InputStreamReader; 30 import java.io.Reader; 31 import java.io.StringReader; 32 import java.util.ArrayList; 33 import java.util.HashMap; 34 import java.util.Hashtable; 35 import java.util.List; 36 import java.util.Map; 37 38 import javax.vecmath.Point3d; 39 40 import org.openscience.cdk.CDKConstants; 41 import org.openscience.cdk.DefaultChemObjectBuilder; 42 import org.openscience.cdk.config.AtomTypeFactory; 43 import org.openscience.cdk.exception.CDKException; 44 import org.openscience.cdk.graph.rebond.RebondTool; 45 import org.openscience.cdk.interfaces.IAtom; 46 import org.openscience.cdk.interfaces.IAtomContainer; 47 import org.openscience.cdk.interfaces.IAtomContainerSet; 48 import org.openscience.cdk.interfaces.IAtomType; 49 import org.openscience.cdk.interfaces.IBond; 50 import org.openscience.cdk.interfaces.IChemFile; 51 import org.openscience.cdk.interfaces.IChemModel; 52 import org.openscience.cdk.interfaces.IChemObject; 53 import org.openscience.cdk.interfaces.IChemSequence; 54 import org.openscience.cdk.interfaces.IMonomer; 55 import org.openscience.cdk.interfaces.IStrand; 56 import org.openscience.cdk.io.formats.IResourceFormat; 57 import org.openscience.cdk.io.formats.PDBFormat; 58 import org.openscience.cdk.io.setting.BooleanIOSetting; 59 import org.openscience.cdk.io.setting.IOSetting; 60 import org.openscience.cdk.protein.data.PDBAtom; 61 import org.openscience.cdk.protein.data.PDBMonomer; 62 import org.openscience.cdk.protein.data.PDBPolymer; 63 import org.openscience.cdk.protein.data.PDBStrand; 64 import org.openscience.cdk.protein.data.PDBStructure; 65 import org.openscience.cdk.tools.ILoggingTool; 66 import org.openscience.cdk.tools.LoggingToolFactory; 67 import org.openscience.cdk.tools.manipulator.AtomTypeManipulator; 68 69 /** 70 * Reads the contents of a PDBFile. 71 * 72 * <p>A description can be found at <a href="http://www.rcsb.org/pdb/static.do?p=file_formats/pdb/index.html"> 73 * http://www.rcsb.org/pdb/static.do?p=file_formats/pdb/index.html</a>. 74 * 75 * @cdk.module pdb 76 * @cdk.githash 77 * @cdk.iooptions 78 * 79 * @author Edgar Luttmann 80 * @author Bradley Smith <bradley@baysmith.com> 81 * @author Martin Eklund <martin.eklund@farmbio.uu.se> 82 * @author Ola Spjuth <ola.spjuth@farmbio.uu.se> 83 * @author Gilleain Torrance <gilleain.torrance@gmail.com> 84 * @cdk.created 2001-08-06 85 * @cdk.keyword file format, PDB 86 * @cdk.bug 1714141 87 * @cdk.bug 1794439 88 */ 89 public class PDBReader extends DefaultChemObjectReader { 90 91 private static ILoggingTool logger = LoggingToolFactory.createLoggingTool(PDBReader.class); 92 private BufferedReader _oInput; // The internal used BufferedReader 93 private BooleanIOSetting useRebondTool; 94 private BooleanIOSetting readConnect; 95 private BooleanIOSetting useHetDictionary; 96 97 private Map<Integer, IAtom> atomNumberMap; 98 99 /* 100 * This is a temporary store for bonds from CONNECT records. As CONNECT is 101 * deliberately fully redundant (a->b and b->a) we need to use this to weed 102 * out the duplicates. 103 */ 104 private List<IBond> bondsFromConnectRecords; 105 106 /** 107 * A mapping between HETATM 3-letter codes + atomNames to CDK atom type 108 * names; for example "RFB.N13" maps to "N.planar3". 109 */ 110 private Map<String, String> hetDictionary; 111 112 private AtomTypeFactory cdkAtomTypeFactory; 113 114 private static final String hetDictionaryPath = "type_map.txt"; 115 116 /** 117 * 118 * Constructs a new PDBReader that can read Molecules from a given 119 * InputStream. 120 * 121 * @param oIn The InputStream to read from 122 * 123 */ PDBReader(InputStream oIn)124 public PDBReader(InputStream oIn) { 125 this(new InputStreamReader(oIn)); 126 } 127 128 /** 129 * 130 * Constructs a new PDBReader that can read Molecules from a given 131 * Reader. 132 * 133 * @param oIn The Reader to read from 134 * 135 */ PDBReader(Reader oIn)136 public PDBReader(Reader oIn) { 137 _oInput = new BufferedReader(oIn); 138 initIOSettings(); 139 hetDictionary = null; 140 cdkAtomTypeFactory = null; 141 } 142 PDBReader()143 public PDBReader() { 144 this(new StringReader("")); 145 } 146 147 @Override getFormat()148 public IResourceFormat getFormat() { 149 return PDBFormat.getInstance(); 150 } 151 152 @Override setReader(Reader input)153 public void setReader(Reader input) throws CDKException { 154 if (input instanceof BufferedReader) { 155 this._oInput = (BufferedReader) input; 156 } else { 157 this._oInput = new BufferedReader(input); 158 } 159 } 160 161 @Override setReader(InputStream input)162 public void setReader(InputStream input) throws CDKException { 163 setReader(new InputStreamReader(input)); 164 } 165 166 @Override accepts(Class<? extends IChemObject> classObject)167 public boolean accepts(Class<? extends IChemObject> classObject) { 168 Class<?>[] interfaces = classObject.getInterfaces(); 169 for (int i = 0; i < interfaces.length; i++) { 170 if (IChemFile.class.equals(interfaces[i])) return true; 171 } 172 if (IChemFile.class.equals(classObject)) return true; 173 Class superClass = classObject.getSuperclass(); 174 if (superClass != null) return this.accepts(superClass); 175 return false; 176 } 177 178 /** 179 * 180 * Takes an object which subclasses IChemObject, e.g. Molecule, and will 181 * read this (from file, database, internet etc). If the specific 182 * implementation does not support a specific IChemObject it will throw 183 * an Exception. 184 * 185 * @param oObj The object that subclasses IChemObject 186 * @return The IChemObject read 187 * @exception CDKException 188 * 189 */ 190 @Override read(T oObj)191 public <T extends IChemObject> T read(T oObj) throws CDKException { 192 if (oObj instanceof IChemFile) { 193 return (T) readChemFile((IChemFile) oObj); 194 } else { 195 throw new CDKException("Only supported is reading of ChemFile objects."); 196 } 197 } 198 199 /** 200 * Read a <code>ChemFile</code> from a file in PDB format. The molecules 201 * in the file are stored as <code>BioPolymer</code>s in the 202 * <code>ChemFile</code>. The residues are the monomers of the 203 * <code>BioPolymer</code>, and their names are the concatenation of the 204 * residue, chain id, and the sequence number. Separate chains (denoted by 205 * TER records) are stored as separate <code>BioPolymer</code> molecules. 206 * 207 * <p>Connectivity information is not currently read. 208 * 209 * @return The ChemFile that was read from the PDB file. 210 */ readChemFile(IChemFile oFile)211 private IChemFile readChemFile(IChemFile oFile) { 212 // initialize all containers 213 IChemSequence oSeq = oFile.getBuilder().newInstance(IChemSequence.class); 214 IChemModel oModel = oFile.getBuilder().newInstance(IChemModel.class); 215 IAtomContainerSet oSet = oFile.getBuilder().newInstance(IAtomContainerSet.class); 216 217 // some variables needed 218 String cCol; 219 PDBAtom oAtom; 220 PDBPolymer oBP = new PDBPolymer(); 221 IAtomContainer molecularStructure = oFile.getBuilder().newInstance(IAtomContainer.class); 222 StringBuffer cResidue; 223 String oObj; 224 IMonomer oMonomer; 225 String cRead = ""; 226 char chain = 'A'; // To ensure stringent name giving of monomers 227 IStrand oStrand; 228 int lineLength = 0; 229 230 boolean isProteinStructure = false; 231 232 atomNumberMap = new Hashtable<Integer, IAtom>(); 233 if (readConnect.isSet()) { 234 bondsFromConnectRecords = new ArrayList<IBond>(); 235 } 236 237 // do the reading of the Input 238 try { 239 do { 240 cRead = _oInput.readLine(); 241 logger.debug("Read line: ", cRead); 242 if (cRead != null) { 243 lineLength = cRead.length(); 244 245 // make sure the record name is 6 characters long 246 if (lineLength < 6) { 247 cRead = cRead + " "; 248 } 249 // check the first column to decide what to do 250 cCol = cRead.substring(0, 6); 251 if ("SEQRES".equalsIgnoreCase(cCol)) { 252 isProteinStructure = true; 253 } else if ("ATOM ".equalsIgnoreCase(cCol)) { 254 // read an atom record 255 oAtom = readAtom(cRead, lineLength); 256 257 if (isProteinStructure) { 258 // construct a string describing the residue 259 cResidue = new StringBuffer(8); 260 oObj = oAtom.getResName(); 261 if (oObj != null) { 262 cResidue = cResidue.append(oObj.trim()); 263 } 264 oObj = oAtom.getChainID(); 265 if (oObj != null) { 266 // cResidue = cResidue.append(((String)oObj).trim()); 267 cResidue = cResidue.append(String.valueOf(chain)); 268 } 269 oObj = oAtom.getResSeq(); 270 if (oObj != null) { 271 cResidue = cResidue.append(oObj.trim()); 272 } 273 274 // search for an existing strand or create a new one. 275 String strandName = oAtom.getChainID(); 276 if (strandName == null || strandName.length() == 0) { 277 strandName = String.valueOf(chain); 278 } 279 oStrand = oBP.getStrand(strandName); 280 if (oStrand == null) { 281 oStrand = new PDBStrand(); 282 oStrand.setStrandName(strandName); 283 oStrand.setID(String.valueOf(chain)); 284 } 285 286 // search for an existing monomer or create a new one. 287 oMonomer = oBP.getMonomer(cResidue.toString(), String.valueOf(chain)); 288 if (oMonomer == null) { 289 PDBMonomer monomer = new PDBMonomer(); 290 monomer.setMonomerName(cResidue.toString()); 291 monomer.setMonomerType(oAtom.getResName()); 292 monomer.setChainID(oAtom.getChainID()); 293 monomer.setICode(oAtom.getICode()); 294 monomer.setResSeq(oAtom.getResSeq()); 295 oMonomer = monomer; 296 } 297 298 // add the atom 299 oBP.addAtom(oAtom, oMonomer, oStrand); 300 } else { 301 molecularStructure.addAtom(oAtom); 302 } 303 304 if (readConnect.isSet() && atomNumberMap.put(oAtom.getSerial(), oAtom) != null) { 305 logger.warn("Duplicate serial ID found for atom: ", oAtom); 306 } 307 logger.debug("Added ATOM: ", oAtom); 308 309 /** As HETATMs cannot be considered to either belong to a certain monomer or strand, 310 * they are dealt with seperately.*/ 311 } else if ("HETATM".equalsIgnoreCase(cCol)) { 312 // read an atom record 313 oAtom = readAtom(cRead, lineLength); 314 oAtom.setHetAtom(true); 315 if (isProteinStructure) { 316 oBP.addAtom(oAtom); 317 } else { 318 molecularStructure.addAtom(oAtom); 319 } 320 if (atomNumberMap.put(oAtom.getSerial(), oAtom) != null) { 321 logger.warn("Duplicate serial ID found for atom: ", oAtom); 322 } 323 logger.debug("Added HETATM: ", oAtom); 324 } else if ("TER ".equalsIgnoreCase(cCol)) { 325 // start new strand 326 chain++; 327 oStrand = new PDBStrand(); 328 oStrand.setStrandName(String.valueOf(chain)); 329 logger.debug("Added new STRAND"); 330 } else if ("END ".equalsIgnoreCase(cCol)) { 331 atomNumberMap.clear(); 332 if (isProteinStructure) { 333 // create bonds and finish the molecule 334 oSet.addAtomContainer(oBP); 335 if (useRebondTool.isSet()) { 336 try { 337 if (!createBondsWithRebondTool(oBP)) { 338 // Get rid of all potentially created bonds. 339 logger.info("Bonds could not be created using the RebondTool when PDB file was read."); 340 oBP.removeAllBonds(); 341 } 342 } catch (Exception exception) { 343 logger.info("Bonds could not be created when PDB file was read."); 344 logger.debug(exception); 345 } 346 } 347 } else { 348 if (useRebondTool.isSet()) createBondsWithRebondTool(molecularStructure); 349 oSet.addAtomContainer(molecularStructure); 350 } 351 352 } else if (cCol.equals("MODEL ")) { 353 // OK, start a new model and save the current one first *if* it contains atoms 354 if (isProteinStructure) { 355 if (oBP.getAtomCount() > 0) { 356 // save the model 357 oSet.addAtomContainer(oBP); 358 oModel.setMoleculeSet(oSet); 359 oSeq.addChemModel(oModel); 360 // setup a new one 361 oBP = new PDBPolymer(); 362 oModel = oFile.getBuilder().newInstance(IChemModel.class); 363 oSet = oFile.getBuilder().newInstance(IAtomContainerSet.class); 364 // avoid duplicate atom warnings 365 atomNumberMap.clear(); 366 } 367 } else { 368 if (molecularStructure.getAtomCount() > 0) { 369 // save the model 370 oSet.addAtomContainer(molecularStructure); 371 oModel.setMoleculeSet(oSet); 372 oSeq.addChemModel(oModel); 373 // setup a new one 374 molecularStructure = oFile.getBuilder().newInstance(IAtomContainer.class); 375 oModel = oFile.getBuilder().newInstance(IChemModel.class); 376 oSet = oFile.getBuilder().newInstance(IAtomContainerSet.class); 377 } 378 } 379 } else if ("REMARK".equalsIgnoreCase(cCol)) { 380 Object comment = oFile.getProperty(CDKConstants.COMMENT); 381 if (comment == null) { 382 comment = ""; 383 } 384 if (lineLength > 12) { 385 comment = comment.toString() + cRead.substring(11).trim() 386 + "\n"; 387 oFile.setProperty(CDKConstants.COMMENT, comment); 388 } else { 389 logger.warn("REMARK line found without any comment!"); 390 } 391 } else if ("COMPND".equalsIgnoreCase(cCol)) { 392 String title = cRead.substring(10).trim(); 393 oFile.setProperty(CDKConstants.TITLE, title); 394 } 395 396 /* *********************************************************** 397 * Read connectivity information from CONECT records. Only 398 * covalent bonds are dealt with. Perhaps salt bridges 399 * should be dealt with in the same way..? 400 */ 401 else if (readConnect.isSet() && "CONECT".equalsIgnoreCase(cCol)) { 402 cRead.trim(); 403 if (cRead.length() < 16) { 404 logger.debug("Skipping unexpected empty CONECT line! : ", cRead); 405 } else { 406 int lineIndex = 6; 407 int atomFromNumber = -1; 408 int atomToNumber = -1; 409 IAtomContainer molecule = (isProteinStructure) ? oBP : molecularStructure; 410 while (lineIndex + 5 <= cRead.length()) { 411 String part = cRead.substring(lineIndex, lineIndex + 5).trim(); 412 if (atomFromNumber == -1) { 413 try { 414 atomFromNumber = Integer.parseInt(part); 415 } catch (NumberFormatException nfe) { 416 } 417 } else { 418 try { 419 atomToNumber = Integer.parseInt(part); 420 } catch (NumberFormatException nfe) { 421 atomToNumber = -1; 422 } 423 if (atomFromNumber != -1 && atomToNumber != -1) { 424 addBond(molecule, atomFromNumber, atomToNumber); 425 logger.debug("Bonded " + atomFromNumber + " with " + atomToNumber); 426 } 427 } 428 lineIndex += 5; 429 } 430 } 431 } 432 /* ********************************************************** */ 433 434 else if ("HELIX ".equalsIgnoreCase(cCol)) { 435 // HELIX 1 H1A CYS A 11 LYS A 18 1 RESIDUE 18 HAS POSITIVE PHI 1D66 72 436 // 1 2 3 4 5 6 7 437 // 01234567890123456789012345678901234567890123456789012345678901234567890123456789 438 PDBStructure structure = new PDBStructure(); 439 structure.setStructureType(PDBStructure.HELIX); 440 structure.setStartChainID(cRead.charAt(19)); 441 structure.setStartSequenceNumber(Integer.parseInt(cRead.substring(21, 25).trim())); 442 structure.setStartInsertionCode(cRead.charAt(25)); 443 structure.setEndChainID(cRead.charAt(31)); 444 structure.setEndSequenceNumber(Integer.parseInt(cRead.substring(33, 37).trim())); 445 structure.setEndInsertionCode(cRead.charAt(37)); 446 oBP.addStructure(structure); 447 } else if ("SHEET ".equalsIgnoreCase(cCol)) { 448 PDBStructure structure = new PDBStructure(); 449 structure.setStructureType(PDBStructure.SHEET); 450 structure.setStartChainID(cRead.charAt(21)); 451 structure.setStartSequenceNumber(Integer.parseInt(cRead.substring(22, 26).trim())); 452 structure.setStartInsertionCode(cRead.charAt(26)); 453 structure.setEndChainID(cRead.charAt(32)); 454 structure.setEndSequenceNumber(Integer.parseInt(cRead.substring(33, 37).trim())); 455 structure.setEndInsertionCode(cRead.charAt(37)); 456 oBP.addStructure(structure); 457 } else if ("TURN ".equalsIgnoreCase(cCol)) { 458 PDBStructure structure = new PDBStructure(); 459 structure.setStructureType(PDBStructure.TURN); 460 structure.setStartChainID(cRead.charAt(19)); 461 structure.setStartSequenceNumber(Integer.parseInt(cRead.substring(20, 24).trim())); 462 structure.setStartInsertionCode(cRead.charAt(24)); 463 structure.setEndChainID(cRead.charAt(30)); 464 structure.setEndSequenceNumber(Integer.parseInt(cRead.substring(31, 35).trim())); 465 structure.setEndInsertionCode(cRead.charAt(35)); 466 oBP.addStructure(structure); 467 } // ignore all other commands 468 } 469 } while (_oInput.ready() && (cRead != null)); 470 } catch (IOException | IllegalArgumentException | CDKException e) { 471 logger.error("Found a problem at line:"); 472 logger.error(cRead); 473 logger.error("01234567890123456789012345678901234567890123456789012345678901234567890123456789"); 474 logger.error(" 1 2 3 4 5 6 7 "); 475 logger.error(" error: " + e.getMessage()); 476 logger.debug(e); 477 e.printStackTrace(); 478 } 479 480 // try to close the Input 481 try { 482 _oInput.close(); 483 } catch (Exception e) { 484 logger.debug(e); 485 } 486 487 // Set all the dependencies 488 oModel.setMoleculeSet(oSet); 489 oSeq.addChemModel(oModel); 490 oFile.addChemSequence(oSeq); 491 492 return oFile; 493 } 494 addBond(IAtomContainer molecule, int bondAtomNo, int bondedAtomNo)495 private void addBond(IAtomContainer molecule, int bondAtomNo, int bondedAtomNo) { 496 IAtom firstAtom = atomNumberMap.get(bondAtomNo); 497 IAtom secondAtom = atomNumberMap.get(bondedAtomNo); 498 if (firstAtom == null) { 499 logger.error("Could not find bond start atom in map with serial id: ", bondAtomNo); 500 } 501 if (secondAtom == null) { 502 logger.error("Could not find bond target atom in map with serial id: ", bondAtomNo); 503 } 504 IBond bond = firstAtom.getBuilder().newInstance(IBond.class, firstAtom, secondAtom, IBond.Order.SINGLE); 505 for (int i = 0; i < bondsFromConnectRecords.size(); i++) { 506 IBond existingBond = (IBond) bondsFromConnectRecords.get(i); 507 IAtom a = existingBond.getBegin(); 508 IAtom b = existingBond.getEnd(); 509 if ((a.equals(firstAtom) && b.equals(secondAtom)) || (b.equals(firstAtom) && a.equals(secondAtom))) { 510 // already stored 511 return; 512 } 513 } 514 bondsFromConnectRecords.add(bond); 515 molecule.addBond(bond); 516 } 517 createBondsWithRebondTool(IAtomContainer molecule)518 private boolean createBondsWithRebondTool(IAtomContainer molecule) { 519 RebondTool tool = new RebondTool(2.0, 0.5, 0.5); 520 try { 521 // configure atoms 522 AtomTypeFactory factory = AtomTypeFactory.getInstance("org/openscience/cdk/config/data/jmol_atomtypes.txt", 523 molecule.getBuilder()); 524 for (IAtom atom : molecule.atoms()) { 525 try { 526 IAtomType[] types = factory.getAtomTypes(atom.getSymbol()); 527 if (types.length > 0) { 528 // just pick the first one 529 AtomTypeManipulator.configure(atom, types[0]); 530 } else { 531 logger.warn("Could not configure atom with symbol: " + atom.getSymbol()); 532 } 533 } catch (Exception e) { 534 logger.warn("Could not configure atom (but don't care): " + e.getMessage()); 535 logger.debug(e); 536 } 537 } 538 tool.rebond(molecule); 539 } catch (Exception e) { 540 logger.error("Could not rebond the polymer: " + e.getMessage()); 541 logger.debug(e); 542 } 543 return true; 544 } 545 isUpper(char c)546 private static boolean isUpper(char c) { 547 return c >= 'A' && c <= 'Z'; 548 } isLower(char c)549 private static boolean isLower(char c) { 550 return c >= 'a' && c <= 'z'; 551 } isDigit(char c)552 private static boolean isDigit(char c) { 553 return c >= '0' && c <= '9'; 554 } 555 parseAtomSymbol(String str)556 private String parseAtomSymbol(String str) { 557 558 if (str == null || str.isEmpty()) 559 return null; 560 561 final int len = str.length(); 562 563 StringBuilder sym = new StringBuilder(); 564 565 // try grabbing from end of line 566 567 if (len > 76 && isUpper(str.charAt(76))) { 568 sym.append(str.charAt(76)); 569 if (len > 77 && isUpper(str.charAt(77))) 570 sym.append(Character.toLowerCase(str.charAt(77))); 571 else if (len > 77 && isLower(str.charAt(77))) 572 sym.append(Character.toLowerCase(str.charAt(77))); 573 } else if (len > 76 &&str.charAt(76) == ' ') { 574 if (len > 77 && isUpper(str.charAt(77))) 575 sym.append(str.charAt(77)); 576 } 577 578 if (sym.length() > 0) 579 return sym.toString(); 580 581 // try getting from PDB atom name 582 if (len > 13 && isUpper(str.charAt(13))) { 583 if (str.charAt(12) == ' ') { 584 sym.append(str.charAt(13)); 585 if (isLower(str.charAt(14))) 586 sym.append(str.charAt(14)); 587 } else if (isUpper(str.charAt(12))) { 588 if (str.charAt(0) == 'A' && str.charAt(12) == 'H') { 589 sym.append('H'); // ATOM record H is always H 590 } else { 591 sym.append(str.charAt(12)); 592 sym.append(Character.toLowerCase(str.charAt(13))); 593 } 594 } else if (isDigit(str.charAt(12))) { 595 sym.append(str.charAt(13)); 596 } 597 } 598 599 if (sym.length() > 0) 600 return sym.toString(); 601 602 return null; 603 } 604 605 /** 606 * Creates an <code>Atom</code> and sets properties to their values from 607 * the ATOM or HETATM record. If the line is shorter than 80 characters, the 608 * information past 59 characters is treated as optional. If the line is 609 * shorter than 59 characters, a <code>RuntimeException</code> is thrown. 610 * 611 * @param cLine the PDB ATOM or HEATATM record. 612 * @return the <code>Atom</code> created from the record. 613 * @throws RuntimeException if the line is too short (less than 59 characters). 614 */ readAtom(String cLine, int lineLength)615 private PDBAtom readAtom(String cLine, int lineLength) throws CDKException { 616 // a line can look like (two in old format, then two in new format): 617 // 618 // 1 2 3 4 5 6 7 619 // 01234567890123456789012345678901234567890123456789012345678901234567890123456789 620 // ATOM 1 O5* C A 1 20.662 36.632 23.475 1.00 10.00 114D 45 621 // ATOM 1186 1H ALA 1 10.105 5.945 -6.630 1.00 0.00 1ALE1288 622 // ATOM 31 CA SER A 3 -0.891 17.523 51.925 1.00 28.64 C 623 // HETATM 3486 MG MG A 302 24.885 14.008 59.194 1.00 29.42 MG+2 624 // 625 // note: the first two examples have the PDBID in col 72-75 626 // note: the last two examples have the element symbol in col 76-77 627 // note: the last (Mg hetatm) has a charge in col 78-79 628 629 if (lineLength < 59) { 630 throw new RuntimeException("PDBReader error during readAtom(): line too short"); 631 } 632 633 boolean isHetatm = cLine.substring(0, 6).equals("HETATM"); 634 String atomName = cLine.substring(12, 16).trim(); 635 String resName = cLine.substring(17, 20).trim(); 636 String symbol = parseAtomSymbol(cLine); 637 638 if (symbol == null) 639 handleError("Cannot parse symbol from " + atomName); 640 641 PDBAtom oAtom = new PDBAtom(symbol, new Point3d(Double.parseDouble(cLine.substring(30, 38)), 642 Double.parseDouble(cLine.substring(38, 46)), Double.parseDouble(cLine.substring(46, 54)))); 643 if (useHetDictionary.isSet() && isHetatm) { 644 String cdkType = typeHetatm(resName, atomName); 645 oAtom.setAtomTypeName(cdkType); 646 if (cdkType != null) { 647 try { 648 cdkAtomTypeFactory.configure(oAtom); 649 } catch (CDKException cdke) { 650 logger.warn("Could not configure", resName, " ", atomName); 651 } 652 } 653 } 654 655 oAtom.setRecord(cLine); 656 oAtom.setSerial(Integer.parseInt(cLine.substring(6, 11).trim())); 657 oAtom.setName(atomName.trim()); 658 oAtom.setAltLoc(cLine.substring(16, 17).trim()); 659 oAtom.setResName(resName); 660 oAtom.setChainID(cLine.substring(21, 22).trim()); 661 oAtom.setResSeq(cLine.substring(22, 26).trim()); 662 oAtom.setICode(cLine.substring(26, 27).trim()); 663 if (useHetDictionary.isSet() && isHetatm) { 664 oAtom.setID(oAtom.getResName() + "." + atomName); 665 } else { 666 oAtom.setAtomTypeName(oAtom.getResName() + "." + atomName); 667 } 668 if (lineLength >= 59) { 669 String frag = cLine.substring(54, Math.min(lineLength, 60)).trim(); 670 if (frag.length() > 0) { 671 oAtom.setOccupancy(Double.parseDouble(frag)); 672 } 673 } 674 if (lineLength >= 65) { 675 String frag = cLine.substring(60, Math.min(lineLength, 66)).trim(); 676 if (frag.length() > 0) { 677 oAtom.setTempFactor(Double.parseDouble(frag)); 678 } 679 } 680 if (lineLength >= 75) { 681 oAtom.setSegID(cLine.substring(72, Math.min(lineLength, 76)).trim()); 682 } 683 // if (lineLength >= 78) { 684 // oAtom.setSymbol((new String(cLine.substring(76, 78))).trim()); 685 // } 686 if (lineLength >= 79) { 687 String frag; 688 if (lineLength >= 80) { 689 frag = cLine.substring(78, 80).trim(); 690 } else { 691 frag = cLine.substring(78); 692 } 693 if (frag.length() > 0) { 694 // see Format_v33_A4.pdf, p. 178 695 if (frag.endsWith("-") || frag.endsWith("+")) { 696 oAtom.setCharge(Double.parseDouble(new StringBuilder(frag).reverse().toString())); 697 } else { 698 oAtom.setCharge(Double.parseDouble(frag)); 699 } 700 } 701 } 702 703 /* *********************************************************************************** 704 * It sets a flag in the property content of an atom, which is used when 705 * bonds are created to check if the atom is an OXT-record => needs 706 * special treatment. 707 */ 708 String oxt = cLine.substring(13, 16).trim(); 709 710 if (oxt.equals("OXT")) { 711 oAtom.setOxt(true); 712 } else { 713 oAtom.setOxt(false); 714 } 715 /* ********************************************************************************** */ 716 717 return oAtom; 718 } 719 typeHetatm(String resName, String atomName)720 private String typeHetatm(String resName, String atomName) { 721 if (hetDictionary == null) { 722 readHetDictionary(); 723 cdkAtomTypeFactory = AtomTypeFactory.getInstance("org/openscience/cdk/dict/data/cdk-atom-types.owl", 724 DefaultChemObjectBuilder.getInstance()); 725 } 726 String key = resName + "." + atomName; 727 if (hetDictionary.containsKey(key)) { 728 return hetDictionary.get(key); 729 } 730 return null; 731 } 732 readHetDictionary()733 private void readHetDictionary() { 734 try { 735 InputStream ins = getClass().getResourceAsStream(hetDictionaryPath); 736 BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(ins)); 737 hetDictionary = new HashMap<String, String>(); 738 String line; 739 while ((line = bufferedReader.readLine()) != null) { 740 int colonIndex = line.indexOf(':'); 741 if (colonIndex == -1) continue; 742 String typeKey = line.substring(0, colonIndex); 743 String typeValue = line.substring(colonIndex + 1); 744 if (typeValue.equals("null")) { 745 hetDictionary.put(typeKey, null); 746 } else { 747 hetDictionary.put(typeKey, typeValue); 748 } 749 } 750 bufferedReader.close(); 751 } catch (IOException ioe) { 752 logger.error(ioe.getMessage()); 753 } 754 } 755 756 @Override close()757 public void close() throws IOException { 758 _oInput.close(); 759 } 760 initIOSettings()761 private void initIOSettings() { 762 useRebondTool = addSetting(new BooleanIOSetting("UseRebondTool", IOSetting.Importance.LOW, 763 "Should the PDBReader deduce bonding patterns?", "false")); 764 readConnect = addSetting(new BooleanIOSetting("ReadConnectSection", IOSetting.Importance.LOW, 765 "Should the CONECT be read?", "true")); 766 useHetDictionary = addSetting(new BooleanIOSetting("UseHetDictionary", IOSetting.Importance.LOW, 767 "Should the PDBReader use the HETATM dictionary for atom types?", "false")); 768 } 769 customizeJob()770 public void customizeJob() { 771 for (IOSetting setting : getSettings()) { 772 fireIOSettingQuestion(setting); 773 } 774 } 775 776 } 777