1 /* Copyright (C) 1997-2007  The Chemistry Development Kit (CDK) project
2  *                    2014  Mark B Vine (orcid:0000-0002-7794-0426)
3  *
4  * Contact: cdk-devel@lists.sourceforge.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public License
8  * as published by the Free Software Foundation; either version 2.1
9  * of the License, or (at your option) any later version.
10  * All we ask is that proper credit is given for our work, which includes
11  * - but is not limited to - adding the above copyright notice to the beginning
12  * of your source code files, and to any copyright notice that you may distribute
13  * with programs based on this work.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
23  *  */
24 package org.openscience.cdk.io;
25 
26 import java.io.BufferedReader;
27 import java.io.IOException;
28 import java.io.InputStream;
29 import java.io.InputStreamReader;
30 import java.io.Reader;
31 import java.io.StringReader;
32 import java.util.ArrayList;
33 import java.util.HashMap;
34 import java.util.Hashtable;
35 import java.util.List;
36 import java.util.Map;
37 
38 import javax.vecmath.Point3d;
39 
40 import org.openscience.cdk.CDKConstants;
41 import org.openscience.cdk.DefaultChemObjectBuilder;
42 import org.openscience.cdk.config.AtomTypeFactory;
43 import org.openscience.cdk.exception.CDKException;
44 import org.openscience.cdk.graph.rebond.RebondTool;
45 import org.openscience.cdk.interfaces.IAtom;
46 import org.openscience.cdk.interfaces.IAtomContainer;
47 import org.openscience.cdk.interfaces.IAtomContainerSet;
48 import org.openscience.cdk.interfaces.IAtomType;
49 import org.openscience.cdk.interfaces.IBond;
50 import org.openscience.cdk.interfaces.IChemFile;
51 import org.openscience.cdk.interfaces.IChemModel;
52 import org.openscience.cdk.interfaces.IChemObject;
53 import org.openscience.cdk.interfaces.IChemSequence;
54 import org.openscience.cdk.interfaces.IMonomer;
55 import org.openscience.cdk.interfaces.IStrand;
56 import org.openscience.cdk.io.formats.IResourceFormat;
57 import org.openscience.cdk.io.formats.PDBFormat;
58 import org.openscience.cdk.io.setting.BooleanIOSetting;
59 import org.openscience.cdk.io.setting.IOSetting;
60 import org.openscience.cdk.protein.data.PDBAtom;
61 import org.openscience.cdk.protein.data.PDBMonomer;
62 import org.openscience.cdk.protein.data.PDBPolymer;
63 import org.openscience.cdk.protein.data.PDBStrand;
64 import org.openscience.cdk.protein.data.PDBStructure;
65 import org.openscience.cdk.tools.ILoggingTool;
66 import org.openscience.cdk.tools.LoggingToolFactory;
67 import org.openscience.cdk.tools.manipulator.AtomTypeManipulator;
68 
69 /**
70  * Reads the contents of a PDBFile.
71  *
72  * <p>A description can be found at <a href="http://www.rcsb.org/pdb/static.do?p=file_formats/pdb/index.html">
73  * http://www.rcsb.org/pdb/static.do?p=file_formats/pdb/index.html</a>.
74  *
75  * @cdk.module  pdb
76  * @cdk.githash
77  * @cdk.iooptions
78  *
79  * @author      Edgar Luttmann
80  * @author Bradley Smith &lt;bradley@baysmith.com&gt;
81  * @author Martin Eklund &lt;martin.eklund@farmbio.uu.se&gt;
82  * @author Ola Spjuth &lt;ola.spjuth@farmbio.uu.se&gt;
83  * @author Gilleain Torrance &lt;gilleain.torrance@gmail.com&gt;
84  * @cdk.created 2001-08-06
85  * @cdk.keyword file format, PDB
86  * @cdk.bug     1714141
87  * @cdk.bug     1794439
88  */
89 public class PDBReader extends DefaultChemObjectReader {
90 
91     private static ILoggingTool    logger            = LoggingToolFactory.createLoggingTool(PDBReader.class);
92     private BufferedReader         _oInput;                                                                  // The internal used BufferedReader
93     private BooleanIOSetting       useRebondTool;
94     private BooleanIOSetting       readConnect;
95     private BooleanIOSetting       useHetDictionary;
96 
97     private Map<Integer, IAtom>    atomNumberMap;
98 
99     /*
100      * This is a temporary store for bonds from CONNECT records. As CONNECT is
101      * deliberately fully redundant (a->b and b->a) we need to use this to weed
102      * out the duplicates.
103      */
104     private List<IBond>            bondsFromConnectRecords;
105 
106     /**
107      * A mapping between HETATM 3-letter codes + atomNames to CDK atom type
108      * names; for example "RFB.N13" maps to "N.planar3".
109      */
110     private Map<String, String>    hetDictionary;
111 
112     private AtomTypeFactory        cdkAtomTypeFactory;
113 
114     private static final String    hetDictionaryPath = "type_map.txt";
115 
116     /**
117      *
118      * Constructs a new PDBReader that can read Molecules from a given
119      * InputStream.
120      *
121      * @param oIn  The InputStream to read from
122      *
123      */
PDBReader(InputStream oIn)124     public PDBReader(InputStream oIn) {
125         this(new InputStreamReader(oIn));
126     }
127 
128     /**
129      *
130      * Constructs a new PDBReader that can read Molecules from a given
131      * Reader.
132      *
133      * @param oIn  The Reader to read from
134      *
135      */
PDBReader(Reader oIn)136     public PDBReader(Reader oIn) {
137         _oInput = new BufferedReader(oIn);
138         initIOSettings();
139         hetDictionary = null;
140         cdkAtomTypeFactory = null;
141     }
142 
PDBReader()143     public PDBReader() {
144         this(new StringReader(""));
145     }
146 
147     @Override
getFormat()148     public IResourceFormat getFormat() {
149         return PDBFormat.getInstance();
150     }
151 
152     @Override
setReader(Reader input)153     public void setReader(Reader input) throws CDKException {
154         if (input instanceof BufferedReader) {
155             this._oInput = (BufferedReader) input;
156         } else {
157             this._oInput = new BufferedReader(input);
158         }
159     }
160 
161     @Override
setReader(InputStream input)162     public void setReader(InputStream input) throws CDKException {
163         setReader(new InputStreamReader(input));
164     }
165 
166     @Override
accepts(Class<? extends IChemObject> classObject)167     public boolean accepts(Class<? extends IChemObject> classObject) {
168         Class<?>[] interfaces = classObject.getInterfaces();
169         for (int i = 0; i < interfaces.length; i++) {
170             if (IChemFile.class.equals(interfaces[i])) return true;
171         }
172         if (IChemFile.class.equals(classObject)) return true;
173         Class superClass = classObject.getSuperclass();
174         if (superClass != null) return this.accepts(superClass);
175         return false;
176     }
177 
178     /**
179      *
180      * Takes an object which subclasses IChemObject, e.g. Molecule, and will
181      * read this (from file, database, internet etc). If the specific
182      * implementation does not support a specific IChemObject it will throw
183      * an Exception.
184      *
185      * @param oObj  The object that subclasses IChemObject
186      * @return      The IChemObject read
187      * @exception   CDKException
188      *
189      */
190     @Override
read(T oObj)191     public <T extends IChemObject> T read(T oObj) throws CDKException {
192         if (oObj instanceof IChemFile) {
193             return (T) readChemFile((IChemFile) oObj);
194         } else {
195             throw new CDKException("Only supported is reading of ChemFile objects.");
196         }
197     }
198 
199     /**
200      * Read a <code>ChemFile</code> from a file in PDB format. The molecules
201      * in the file are stored as <code>BioPolymer</code>s in the
202      * <code>ChemFile</code>. The residues are the monomers of the
203      * <code>BioPolymer</code>, and their names are the concatenation of the
204      * residue, chain id, and the sequence number. Separate chains (denoted by
205      * TER records) are stored as separate <code>BioPolymer</code> molecules.
206      *
207      * <p>Connectivity information is not currently read.
208      *
209      * @return The ChemFile that was read from the PDB file.
210      */
readChemFile(IChemFile oFile)211     private IChemFile readChemFile(IChemFile oFile) {
212         // initialize all containers
213         IChemSequence oSeq = oFile.getBuilder().newInstance(IChemSequence.class);
214         IChemModel oModel = oFile.getBuilder().newInstance(IChemModel.class);
215         IAtomContainerSet oSet = oFile.getBuilder().newInstance(IAtomContainerSet.class);
216 
217         // some variables needed
218         String cCol;
219         PDBAtom oAtom;
220         PDBPolymer oBP = new PDBPolymer();
221         IAtomContainer molecularStructure = oFile.getBuilder().newInstance(IAtomContainer.class);
222         StringBuffer cResidue;
223         String oObj;
224         IMonomer oMonomer;
225         String cRead = "";
226         char chain = 'A'; // To ensure stringent name giving of monomers
227         IStrand oStrand;
228         int lineLength = 0;
229 
230         boolean isProteinStructure = false;
231 
232         atomNumberMap = new Hashtable<Integer, IAtom>();
233         if (readConnect.isSet()) {
234             bondsFromConnectRecords = new ArrayList<IBond>();
235         }
236 
237         // do the reading of the Input
238         try {
239             do {
240                 cRead = _oInput.readLine();
241                 logger.debug("Read line: ", cRead);
242                 if (cRead != null) {
243                     lineLength = cRead.length();
244 
245                     // make sure the record name is 6 characters long
246                     if (lineLength < 6) {
247                         cRead = cRead + "      ";
248                     }
249                     // check the first column to decide what to do
250                     cCol = cRead.substring(0, 6);
251                     if ("SEQRES".equalsIgnoreCase(cCol)) {
252                         isProteinStructure = true;
253                     } else if ("ATOM  ".equalsIgnoreCase(cCol)) {
254                         // read an atom record
255                         oAtom = readAtom(cRead, lineLength);
256 
257                         if (isProteinStructure) {
258                             // construct a string describing the residue
259                             cResidue = new StringBuffer(8);
260                             oObj = oAtom.getResName();
261                             if (oObj != null) {
262                                 cResidue = cResidue.append(oObj.trim());
263                             }
264                             oObj = oAtom.getChainID();
265                             if (oObj != null) {
266                                 // cResidue = cResidue.append(((String)oObj).trim());
267                                 cResidue = cResidue.append(String.valueOf(chain));
268                             }
269                             oObj = oAtom.getResSeq();
270                             if (oObj != null) {
271                                 cResidue = cResidue.append(oObj.trim());
272                             }
273 
274                             // search for an existing strand or create a new one.
275                             String strandName = oAtom.getChainID();
276                             if (strandName == null || strandName.length() == 0) {
277                                 strandName = String.valueOf(chain);
278                             }
279                             oStrand = oBP.getStrand(strandName);
280                             if (oStrand == null) {
281                                 oStrand = new PDBStrand();
282                                 oStrand.setStrandName(strandName);
283                                 oStrand.setID(String.valueOf(chain));
284                             }
285 
286                             // search for an existing monomer or create a new one.
287                             oMonomer = oBP.getMonomer(cResidue.toString(), String.valueOf(chain));
288                             if (oMonomer == null) {
289                                 PDBMonomer monomer = new PDBMonomer();
290                                 monomer.setMonomerName(cResidue.toString());
291                                 monomer.setMonomerType(oAtom.getResName());
292                                 monomer.setChainID(oAtom.getChainID());
293                                 monomer.setICode(oAtom.getICode());
294                                 monomer.setResSeq(oAtom.getResSeq());
295                                 oMonomer = monomer;
296                             }
297 
298                             // add the atom
299                             oBP.addAtom(oAtom, oMonomer, oStrand);
300                         } else {
301                             molecularStructure.addAtom(oAtom);
302                         }
303 
304                         if (readConnect.isSet() && atomNumberMap.put(oAtom.getSerial(), oAtom) != null) {
305                             logger.warn("Duplicate serial ID found for atom: ", oAtom);
306                         }
307                         logger.debug("Added ATOM: ", oAtom);
308 
309                         /** As HETATMs cannot be considered to either belong to a certain monomer or strand,
310                          * they are dealt with seperately.*/
311                     } else if ("HETATM".equalsIgnoreCase(cCol)) {
312                         // read an atom record
313                         oAtom = readAtom(cRead, lineLength);
314                         oAtom.setHetAtom(true);
315                         if (isProteinStructure) {
316                             oBP.addAtom(oAtom);
317                         } else {
318                             molecularStructure.addAtom(oAtom);
319                         }
320                         if (atomNumberMap.put(oAtom.getSerial(), oAtom) != null) {
321                             logger.warn("Duplicate serial ID found for atom: ", oAtom);
322                         }
323                         logger.debug("Added HETATM: ", oAtom);
324                     } else if ("TER   ".equalsIgnoreCase(cCol)) {
325                         // start new strand
326                         chain++;
327                         oStrand = new PDBStrand();
328                         oStrand.setStrandName(String.valueOf(chain));
329                         logger.debug("Added new STRAND");
330                     } else if ("END   ".equalsIgnoreCase(cCol)) {
331                         atomNumberMap.clear();
332                         if (isProteinStructure) {
333                             // create bonds and finish the molecule
334                             oSet.addAtomContainer(oBP);
335                             if (useRebondTool.isSet()) {
336                                 try {
337                                     if (!createBondsWithRebondTool(oBP)) {
338                                         // Get rid of all potentially created bonds.
339                                         logger.info("Bonds could not be created using the RebondTool when PDB file was read.");
340                                         oBP.removeAllBonds();
341                                     }
342                                 } catch (Exception exception) {
343                                     logger.info("Bonds could not be created when PDB file was read.");
344                                     logger.debug(exception);
345                                 }
346                             }
347                         } else {
348                             if (useRebondTool.isSet()) createBondsWithRebondTool(molecularStructure);
349                             oSet.addAtomContainer(molecularStructure);
350                         }
351 
352                     } else if (cCol.equals("MODEL ")) {
353                         // OK, start a new model and save the current one first *if* it contains atoms
354                         if (isProteinStructure) {
355                             if (oBP.getAtomCount() > 0) {
356                                 // save the model
357                                 oSet.addAtomContainer(oBP);
358                                 oModel.setMoleculeSet(oSet);
359                                 oSeq.addChemModel(oModel);
360                                 // setup a new one
361                                 oBP = new PDBPolymer();
362                                 oModel = oFile.getBuilder().newInstance(IChemModel.class);
363                                 oSet = oFile.getBuilder().newInstance(IAtomContainerSet.class);
364                                 // avoid duplicate atom warnings
365                                 atomNumberMap.clear();
366                             }
367                         } else {
368                             if (molecularStructure.getAtomCount() > 0) {
369                                 //								 save the model
370                                 oSet.addAtomContainer(molecularStructure);
371                                 oModel.setMoleculeSet(oSet);
372                                 oSeq.addChemModel(oModel);
373                                 // setup a new one
374                                 molecularStructure = oFile.getBuilder().newInstance(IAtomContainer.class);
375                                 oModel = oFile.getBuilder().newInstance(IChemModel.class);
376                                 oSet = oFile.getBuilder().newInstance(IAtomContainerSet.class);
377                             }
378                         }
379                     } else if ("REMARK".equalsIgnoreCase(cCol)) {
380                         Object comment = oFile.getProperty(CDKConstants.COMMENT);
381                         if (comment == null) {
382                             comment = "";
383                         }
384                         if (lineLength > 12) {
385                             comment = comment.toString() + cRead.substring(11).trim()
386                                     + "\n";
387                             oFile.setProperty(CDKConstants.COMMENT, comment);
388                         } else {
389                             logger.warn("REMARK line found without any comment!");
390                         }
391                     } else if ("COMPND".equalsIgnoreCase(cCol)) {
392                         String title = cRead.substring(10).trim();
393                         oFile.setProperty(CDKConstants.TITLE, title);
394                     }
395 
396                     /* ***********************************************************
397                      * Read connectivity information from CONECT records. Only
398                      * covalent bonds are dealt with. Perhaps salt bridges
399                      * should be dealt with in the same way..?
400                      */
401                     else if (readConnect.isSet() && "CONECT".equalsIgnoreCase(cCol)) {
402                         cRead.trim();
403                         if (cRead.length() < 16) {
404                             logger.debug("Skipping unexpected empty CONECT line! : ", cRead);
405                         } else {
406                             int lineIndex = 6;
407                             int atomFromNumber = -1;
408                             int atomToNumber = -1;
409                             IAtomContainer molecule = (isProteinStructure) ? oBP : molecularStructure;
410                             while (lineIndex + 5 <= cRead.length()) {
411                                 String part = cRead.substring(lineIndex, lineIndex + 5).trim();
412                                 if (atomFromNumber == -1) {
413                                     try {
414                                         atomFromNumber = Integer.parseInt(part);
415                                     } catch (NumberFormatException nfe) {
416                                     }
417                                 } else {
418                                     try {
419                                         atomToNumber = Integer.parseInt(part);
420                                     } catch (NumberFormatException nfe) {
421                                         atomToNumber = -1;
422                                     }
423                                     if (atomFromNumber != -1 && atomToNumber != -1) {
424                                         addBond(molecule, atomFromNumber, atomToNumber);
425                                         logger.debug("Bonded " + atomFromNumber + " with " + atomToNumber);
426                                     }
427                                 }
428                                 lineIndex += 5;
429                             }
430                         }
431                     }
432                     /* ********************************************************** */
433 
434                     else if ("HELIX ".equalsIgnoreCase(cCol)) {
435                         //						HELIX    1 H1A CYS A   11  LYS A   18  1 RESIDUE 18 HAS POSITIVE PHI    1D66  72
436                         //						          1         2         3         4         5         6         7
437                         //						01234567890123456789012345678901234567890123456789012345678901234567890123456789
438                         PDBStructure structure = new PDBStructure();
439                         structure.setStructureType(PDBStructure.HELIX);
440                         structure.setStartChainID(cRead.charAt(19));
441                         structure.setStartSequenceNumber(Integer.parseInt(cRead.substring(21, 25).trim()));
442                         structure.setStartInsertionCode(cRead.charAt(25));
443                         structure.setEndChainID(cRead.charAt(31));
444                         structure.setEndSequenceNumber(Integer.parseInt(cRead.substring(33, 37).trim()));
445                         structure.setEndInsertionCode(cRead.charAt(37));
446                         oBP.addStructure(structure);
447                     } else if ("SHEET ".equalsIgnoreCase(cCol)) {
448                         PDBStructure structure = new PDBStructure();
449                         structure.setStructureType(PDBStructure.SHEET);
450                         structure.setStartChainID(cRead.charAt(21));
451                         structure.setStartSequenceNumber(Integer.parseInt(cRead.substring(22, 26).trim()));
452                         structure.setStartInsertionCode(cRead.charAt(26));
453                         structure.setEndChainID(cRead.charAt(32));
454                         structure.setEndSequenceNumber(Integer.parseInt(cRead.substring(33, 37).trim()));
455                         structure.setEndInsertionCode(cRead.charAt(37));
456                         oBP.addStructure(structure);
457                     } else if ("TURN  ".equalsIgnoreCase(cCol)) {
458                         PDBStructure structure = new PDBStructure();
459                         structure.setStructureType(PDBStructure.TURN);
460                         structure.setStartChainID(cRead.charAt(19));
461                         structure.setStartSequenceNumber(Integer.parseInt(cRead.substring(20, 24).trim()));
462                         structure.setStartInsertionCode(cRead.charAt(24));
463                         structure.setEndChainID(cRead.charAt(30));
464                         structure.setEndSequenceNumber(Integer.parseInt(cRead.substring(31, 35).trim()));
465                         structure.setEndInsertionCode(cRead.charAt(35));
466                         oBP.addStructure(structure);
467                     } // ignore all other commands
468                 }
469             } while (_oInput.ready() && (cRead != null));
470         } catch (IOException | IllegalArgumentException | CDKException e) {
471             logger.error("Found a problem at line:");
472             logger.error(cRead);
473             logger.error("01234567890123456789012345678901234567890123456789012345678901234567890123456789");
474             logger.error("          1         2         3         4         5         6         7         ");
475             logger.error("  error: " + e.getMessage());
476             logger.debug(e);
477             e.printStackTrace();
478         }
479 
480         // try to close the Input
481         try {
482             _oInput.close();
483         } catch (Exception e) {
484             logger.debug(e);
485         }
486 
487         // Set all the dependencies
488         oModel.setMoleculeSet(oSet);
489         oSeq.addChemModel(oModel);
490         oFile.addChemSequence(oSeq);
491 
492         return oFile;
493     }
494 
addBond(IAtomContainer molecule, int bondAtomNo, int bondedAtomNo)495     private void addBond(IAtomContainer molecule, int bondAtomNo, int bondedAtomNo) {
496         IAtom firstAtom = atomNumberMap.get(bondAtomNo);
497         IAtom secondAtom = atomNumberMap.get(bondedAtomNo);
498         if (firstAtom == null) {
499             logger.error("Could not find bond start atom in map with serial id: ", bondAtomNo);
500         }
501         if (secondAtom == null) {
502             logger.error("Could not find bond target atom in map with serial id: ", bondAtomNo);
503         }
504         IBond bond = firstAtom.getBuilder().newInstance(IBond.class, firstAtom, secondAtom, IBond.Order.SINGLE);
505         for (int i = 0; i < bondsFromConnectRecords.size(); i++) {
506             IBond existingBond = (IBond) bondsFromConnectRecords.get(i);
507             IAtom a = existingBond.getBegin();
508             IAtom b = existingBond.getEnd();
509             if ((a.equals(firstAtom) && b.equals(secondAtom)) || (b.equals(firstAtom) && a.equals(secondAtom))) {
510                 // already stored
511                 return;
512             }
513         }
514         bondsFromConnectRecords.add(bond);
515         molecule.addBond(bond);
516     }
517 
createBondsWithRebondTool(IAtomContainer molecule)518     private boolean createBondsWithRebondTool(IAtomContainer molecule) {
519         RebondTool tool = new RebondTool(2.0, 0.5, 0.5);
520         try {
521             //			 configure atoms
522             AtomTypeFactory factory = AtomTypeFactory.getInstance("org/openscience/cdk/config/data/jmol_atomtypes.txt",
523                     molecule.getBuilder());
524             for (IAtom atom : molecule.atoms()) {
525                 try {
526                     IAtomType[] types = factory.getAtomTypes(atom.getSymbol());
527                     if (types.length > 0) {
528                         // just pick the first one
529                         AtomTypeManipulator.configure(atom, types[0]);
530                     } else {
531                         logger.warn("Could not configure atom with symbol: " + atom.getSymbol());
532                     }
533                 } catch (Exception e) {
534                     logger.warn("Could not configure atom (but don't care): " + e.getMessage());
535                     logger.debug(e);
536                 }
537             }
538             tool.rebond(molecule);
539         } catch (Exception e) {
540             logger.error("Could not rebond the polymer: " + e.getMessage());
541             logger.debug(e);
542         }
543         return true;
544     }
545 
isUpper(char c)546     private static boolean isUpper(char c) {
547         return c >= 'A' && c <= 'Z';
548     }
isLower(char c)549     private static boolean isLower(char c) {
550         return c >= 'a' && c <= 'z';
551     }
isDigit(char c)552     private static boolean isDigit(char c) {
553         return c >= '0' && c <= '9';
554     }
555 
parseAtomSymbol(String str)556     private String parseAtomSymbol(String str) {
557 
558         if (str == null || str.isEmpty())
559             return null;
560 
561         final int len = str.length();
562 
563         StringBuilder sym = new StringBuilder();
564 
565         // try grabbing from end of line
566 
567         if (len > 76 && isUpper(str.charAt(76))) {
568             sym.append(str.charAt(76));
569             if (len > 77 && isUpper(str.charAt(77)))
570                 sym.append(Character.toLowerCase(str.charAt(77)));
571             else if (len > 77 && isLower(str.charAt(77)))
572                 sym.append(Character.toLowerCase(str.charAt(77)));
573         } else if (len > 76 &&str.charAt(76) == ' ') {
574             if (len > 77 && isUpper(str.charAt(77)))
575                 sym.append(str.charAt(77));
576         }
577 
578         if (sym.length() > 0)
579             return sym.toString();
580 
581         // try getting from PDB atom name
582         if (len > 13 && isUpper(str.charAt(13))) {
583             if (str.charAt(12) == ' ') {
584                 sym.append(str.charAt(13));
585                 if (isLower(str.charAt(14)))
586                     sym.append(str.charAt(14));
587             } else if (isUpper(str.charAt(12))) {
588                 if (str.charAt(0) == 'A' && str.charAt(12) == 'H') {
589                     sym.append('H'); // ATOM record H is always H
590                 } else {
591                     sym.append(str.charAt(12));
592                     sym.append(Character.toLowerCase(str.charAt(13)));
593                 }
594             } else if (isDigit(str.charAt(12))) {
595                 sym.append(str.charAt(13));
596             }
597         }
598 
599         if (sym.length() > 0)
600             return sym.toString();
601 
602         return null;
603     }
604 
605     /**
606      * Creates an <code>Atom</code> and sets properties to their values from
607      * the ATOM or HETATM record. If the line is shorter than 80 characters, the
608      * information past 59 characters is treated as optional. If the line is
609      * shorter than 59 characters, a <code>RuntimeException</code> is thrown.
610      *
611      * @param cLine  the PDB ATOM or HEATATM record.
612      * @return the <code>Atom</code> created from the record.
613      * @throws RuntimeException if the line is too short (less than 59 characters).
614      */
readAtom(String cLine, int lineLength)615     private PDBAtom readAtom(String cLine, int lineLength) throws CDKException {
616         // a line can look like (two in old format, then two in new format):
617         //
618         //           1         2         3         4         5         6         7
619         // 01234567890123456789012345678901234567890123456789012345678901234567890123456789
620         // ATOM      1  O5*   C A   1      20.662  36.632  23.475  1.00 10.00      114D  45
621         // ATOM   1186 1H   ALA     1      10.105   5.945  -6.630  1.00  0.00      1ALE1288
622         // ATOM     31  CA  SER A   3      -0.891  17.523  51.925  1.00 28.64           C
623         // HETATM 3486 MG    MG A 302      24.885  14.008  59.194  1.00 29.42          MG+2
624         //
625         // note: the first two examples have the PDBID in col 72-75
626         // note: the last two examples have the element symbol in col 76-77
627         // note: the last (Mg hetatm) has a charge in col 78-79
628 
629         if (lineLength < 59) {
630             throw new RuntimeException("PDBReader error during readAtom(): line too short");
631         }
632 
633         boolean isHetatm = cLine.substring(0, 6).equals("HETATM");
634         String  atomName = cLine.substring(12, 16).trim();
635         String  resName  = cLine.substring(17, 20).trim();
636         String  symbol   = parseAtomSymbol(cLine);
637 
638         if (symbol == null)
639             handleError("Cannot parse symbol from " + atomName);
640 
641         PDBAtom oAtom = new PDBAtom(symbol, new Point3d(Double.parseDouble(cLine.substring(30, 38)),
642                 Double.parseDouble(cLine.substring(38, 46)), Double.parseDouble(cLine.substring(46, 54))));
643         if (useHetDictionary.isSet() && isHetatm) {
644             String cdkType = typeHetatm(resName, atomName);
645             oAtom.setAtomTypeName(cdkType);
646             if (cdkType != null) {
647                 try {
648                     cdkAtomTypeFactory.configure(oAtom);
649                 } catch (CDKException cdke) {
650                     logger.warn("Could not configure", resName, " ", atomName);
651                 }
652             }
653         }
654 
655         oAtom.setRecord(cLine);
656         oAtom.setSerial(Integer.parseInt(cLine.substring(6, 11).trim()));
657         oAtom.setName(atomName.trim());
658         oAtom.setAltLoc(cLine.substring(16, 17).trim());
659         oAtom.setResName(resName);
660         oAtom.setChainID(cLine.substring(21, 22).trim());
661         oAtom.setResSeq(cLine.substring(22, 26).trim());
662         oAtom.setICode(cLine.substring(26, 27).trim());
663         if (useHetDictionary.isSet() && isHetatm) {
664             oAtom.setID(oAtom.getResName() + "." + atomName);
665         } else {
666             oAtom.setAtomTypeName(oAtom.getResName() + "." + atomName);
667         }
668         if (lineLength >= 59) {
669             String frag = cLine.substring(54, Math.min(lineLength, 60)).trim();
670             if (frag.length() > 0) {
671                 oAtom.setOccupancy(Double.parseDouble(frag));
672             }
673         }
674         if (lineLength >= 65) {
675             String frag = cLine.substring(60, Math.min(lineLength, 66)).trim();
676             if (frag.length() > 0) {
677                 oAtom.setTempFactor(Double.parseDouble(frag));
678             }
679         }
680         if (lineLength >= 75) {
681             oAtom.setSegID(cLine.substring(72, Math.min(lineLength, 76)).trim());
682         }
683         //		if (lineLength >= 78) {
684         //            oAtom.setSymbol((new String(cLine.substring(76, 78))).trim());
685         //		}
686         if (lineLength >= 79) {
687             String frag;
688             if (lineLength >= 80) {
689                 frag = cLine.substring(78, 80).trim();
690             } else {
691                 frag = cLine.substring(78);
692             }
693             if (frag.length() > 0) {
694                 // see Format_v33_A4.pdf, p. 178
695                 if (frag.endsWith("-") || frag.endsWith("+")) {
696                     oAtom.setCharge(Double.parseDouble(new StringBuilder(frag).reverse().toString()));
697                 } else {
698                     oAtom.setCharge(Double.parseDouble(frag));
699                 }
700             }
701         }
702 
703         /* ***********************************************************************************
704          * It sets a flag in the property content of an atom, which is used when
705          * bonds are created to check if the atom is an OXT-record => needs
706          * special treatment.
707          */
708         String oxt = cLine.substring(13, 16).trim();
709 
710         if (oxt.equals("OXT")) {
711             oAtom.setOxt(true);
712         } else {
713             oAtom.setOxt(false);
714         }
715         /* ********************************************************************************** */
716 
717         return oAtom;
718     }
719 
typeHetatm(String resName, String atomName)720     private String typeHetatm(String resName, String atomName) {
721         if (hetDictionary == null) {
722             readHetDictionary();
723             cdkAtomTypeFactory = AtomTypeFactory.getInstance("org/openscience/cdk/dict/data/cdk-atom-types.owl",
724                     DefaultChemObjectBuilder.getInstance());
725         }
726         String key = resName + "." + atomName;
727         if (hetDictionary.containsKey(key)) {
728             return hetDictionary.get(key);
729         }
730         return null;
731     }
732 
readHetDictionary()733     private void readHetDictionary() {
734         try {
735             InputStream ins = getClass().getResourceAsStream(hetDictionaryPath);
736             BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(ins));
737             hetDictionary = new HashMap<String, String>();
738             String line;
739             while ((line = bufferedReader.readLine()) != null) {
740                 int colonIndex = line.indexOf(':');
741                 if (colonIndex == -1) continue;
742                 String typeKey = line.substring(0, colonIndex);
743                 String typeValue = line.substring(colonIndex + 1);
744                 if (typeValue.equals("null")) {
745                     hetDictionary.put(typeKey, null);
746                 } else {
747                     hetDictionary.put(typeKey, typeValue);
748                 }
749             }
750             bufferedReader.close();
751         } catch (IOException ioe) {
752             logger.error(ioe.getMessage());
753         }
754     }
755 
756     @Override
close()757     public void close() throws IOException {
758         _oInput.close();
759     }
760 
initIOSettings()761     private void initIOSettings() {
762         useRebondTool = addSetting(new BooleanIOSetting("UseRebondTool", IOSetting.Importance.LOW,
763                 "Should the PDBReader deduce bonding patterns?", "false"));
764         readConnect = addSetting(new BooleanIOSetting("ReadConnectSection", IOSetting.Importance.LOW,
765                 "Should the CONECT be read?", "true"));
766         useHetDictionary = addSetting(new BooleanIOSetting("UseHetDictionary", IOSetting.Importance.LOW,
767                 "Should the PDBReader use the HETATM dictionary for atom types?", "false"));
768     }
769 
customizeJob()770     public void customizeJob() {
771         for (IOSetting setting : getSettings()) {
772             fireIOSettingQuestion(setting);
773         }
774     }
775 
776 }
777