1 //
2 //  Copyright (C) 2013-2014 Greg Landrum and NextMove Software
3 //
4 //   @@ All Rights Reserved @@
5 //  This file is part of the RDKit.
6 //  The contents are covered by the terms of the BSD license
7 //  which is included in the file license.txt, found at the root
8 //  of the RDKit source tree.
9 //
10 #include <cstring>
11 #include <cstdio>
12 #include <string>
13 #include <iostream>
14 #include <fstream>
15 #include <RDGeneral/BoostStartInclude.h>
16 #include <boost/lexical_cast.hpp>
17 #include <boost/algorithm/string.hpp>
18 #include <RDGeneral/BoostEndInclude.h>
19 #include <RDGeneral/BadFileException.h>
20 #include <RDGeneral/FileParseException.h>
21 #include <GraphMol/FileParsers/FileParsers.h>
22 #include <GraphMol/FileParsers/FileParserUtils.h>
23 #include <RDGeneral/LocaleSwitcher.h>
24 #include "ProximityBonds.h"
25 
26 #include <GraphMol/MonomerInfo.h>
27 
28 // PDBWriter support multiple "flavors" of PDB output
29 // flavor & 1 : Ignore atoms in alternate conformations and dummy atoms
30 // flavor & 2 : Read each MODEL into a separate molecule.
31 namespace RDKit {
32 
33 namespace {
34 
BCNAM(char A,char B,char C)35 constexpr int BCNAM(char A, char B, char C) { return (A << 16) | (B << 8) | C; }
36 
PDBAtomFromSymbol(const char * symb)37 Atom *PDBAtomFromSymbol(const char *symb) {
38   PRECONDITION(symb, "bad char ptr");
39   if (symb[0] == 'D' && !symb[1]) {
40     auto *result = new Atom(1);
41     result->setIsotope(2);
42     return result;
43   } else if (symb[0] == 'T' && !symb[1]) {
44     auto *result = new Atom(1);
45     result->setIsotope(3);
46     return result;
47   }
48   int elemno = PeriodicTable::getTable()->getAtomicNumber(symb);
49   return elemno > 0 ? new Atom(elemno) : (Atom *)nullptr;
50 }
51 
PDBAtomLine(RWMol * mol,const char * ptr,unsigned int len,unsigned int flavor,std::map<int,Atom * > & amap)52 void PDBAtomLine(RWMol *mol, const char *ptr, unsigned int len,
53                  unsigned int flavor, std::map<int, Atom *> &amap) {
54   PRECONDITION(mol, "bad mol");
55   PRECONDITION(ptr, "bad char ptr");
56   std::string tmp;
57 
58   if (len < 16) {
59     return;
60   }
61 
62   if ((flavor & 1) == 0) {
63     // Ignore alternate locations of atoms.
64     if (len >= 17 && ptr[16] != ' ' && ptr[16] != 'A' && ptr[16] != '1') {
65       return;
66     }
67     // Ignore XPLOR pseudo atoms
68     if (len >= 54 && !memcmp(ptr + 30, "9999.0009999.0009999.000", 24)) {
69       return;
70     }
71     // Ignore NMR pseudo atoms
72     if (ptr[12] == ' ' && ptr[13] == 'Q') {
73       return;
74     }
75     // Ignore PDB dummy residues
76     if (len >= 20 && !memcmp(ptr + 18, "DUM", 3)) {
77       return;
78     }
79   }
80 
81   int serialno;
82   tmp = std::string(ptr + 6, 5);
83   try {
84     serialno = FileParserUtils::toInt(tmp);
85   } catch (boost::bad_lexical_cast &) {
86     std::ostringstream errout;
87     errout << "Non-integer PDB serial number " << tmp;
88     throw FileParseException(errout.str());
89   }
90 
91   Atom *atom = (Atom *)nullptr;
92   char symb[3];
93 
94   // Attempt #1:  Atomic Symbol in columns 76 and 77
95   if (len >= 78) {
96     if (ptr[76] >= 'A' && ptr[76] <= 'Z') {
97       symb[0] = ptr[76];
98       if (ptr[77] >= 'A' && ptr[77] <= 'Z') {
99         symb[1] = ptr[77] + 32;  // tolower
100         symb[2] = '\0';
101       } else if (ptr[77] >= 'a' && ptr[77] <= 'z') {
102         symb[1] = ptr[77];
103         symb[2] = '\0';
104       } else {
105         symb[1] = '\0';
106       }
107     } else if (ptr[76] == ' ' && ptr[77] >= 'A' && ptr[77] <= 'Z') {
108       symb[0] = ptr[77];
109       symb[1] = '\0';
110     } else {
111       symb[0] = '\0';
112     }
113   } else if (len == 77) {
114     if (ptr[76] >= 'A' && ptr[76] <= 'Z') {
115       symb[0] = ptr[76];
116       symb[1] = '\0';
117     } else {
118       symb[0] = '\0';
119     }
120   } else {
121     symb[0] = '\0';
122   }
123 
124   if (symb[0]) {
125     atom = PDBAtomFromSymbol(symb);
126   }
127 
128   if (!atom) {
129     // Attempt #2: Atomic Symbol from PDB atom name
130     if (ptr[13] >= 'A' && ptr[13] <= 'Z') {
131       if (ptr[12] == ' ') {
132         symb[0] = ptr[13];
133         if (ptr[14] >= 'a' && ptr[14] <= 'z') {
134           symb[1] = ptr[14];
135           symb[2] = '\0';
136         } else {
137           symb[1] = '\0';
138         }
139       } else if (ptr[12] >= 'A' && ptr[12] <= 'Z') {
140         symb[0] = ptr[12];
141         symb[1] = ptr[13] + 32;  // tolower
142         symb[2] = '\0';
143         if (ptr[12] == 'H' && ptr[0] == 'A') {
144           // No He, Hf, Hg, Ho or Hs in ATOM records
145           symb[0] = 'H';
146           symb[1] = '\0';
147         }
148       } else if (ptr[12] >= '0' && ptr[12] <= '9') {
149         symb[0] = ptr[13];
150         symb[1] = '\0';
151       } else {
152         symb[0] = '\0';
153       }
154     } else {
155       symb[0] = '\0';
156     }
157 
158     if (symb[0]) {
159       atom = PDBAtomFromSymbol(symb);
160     }
161   }
162 
163   if (!atom) {
164     std::ostringstream errout;
165     errout << "Cannot determine element for PDB atom #" << serialno;
166     throw FileParseException(errout.str());
167   }
168 
169   mol->addAtom(atom, true, true);
170   amap[serialno] = atom;
171 
172   if (len >= 38) {
173     RDGeom::Point3D pos;
174     try {
175       pos.x = FileParserUtils::toDouble(std::string(ptr + 30, 8));
176       if (len >= 46) {
177         pos.y = FileParserUtils::toDouble(std::string(ptr + 38, 8));
178       }
179       if (len >= 54) {
180         pos.z = FileParserUtils::toDouble(std::string(ptr + 46, 8));
181       }
182     } catch (boost::bad_lexical_cast &) {
183       std::ostringstream errout;
184       errout << "Problem with coordinates for PDB atom #" << serialno;
185       throw FileParseException(errout.str());
186     }
187 
188     Conformer *conf;
189     if (!mol->getNumConformers()) {
190       conf = new RDKit::Conformer(mol->getNumAtoms());
191       conf->set3D(pos.z != 0.0);
192       conf->setId(0);
193       mol->addConformer(conf, false);
194     } else {
195       conf = &mol->getConformer();
196       if (pos.z != 0.0) {
197         conf->set3D(true);
198       }
199     }
200     conf->setAtomPos(atom->getIdx(), pos);
201   }
202 
203   if (len >= 79) {
204     int charge = 0;
205     if (ptr[78] >= '1' && ptr[78] <= '9') {
206       if (ptr[79] == '-') {
207         charge = -(ptr[78] - '0');
208       } else if (ptr[79] == '+' || ptr[79] == ' ' || !ptr[79]) {
209         charge = ptr[78] - '0';
210       }
211     } else if (ptr[78] == '+') {
212       if (ptr[79] >= '1' && ptr[79] <= '9') {
213         charge = ptr[79] - '0';
214       } else if (ptr[79] == '+') {
215         charge = 2;
216       } else if (ptr[79] != '0') {
217         charge = 1;
218       }
219     } else if (ptr[78] == '-') {
220       if (ptr[79] >= '1' && ptr[79] <= '9') {
221         charge = ptr[79] - '0';
222       } else if (ptr[79] == '-') {
223         charge = -2;
224       } else if (ptr[79] != '0') {
225         charge = -1;
226       }
227     } else if (ptr[78] == ' ') {
228       if (ptr[79] >= '1' && ptr[79] <= '9') {
229         charge = ptr[79] - '0';
230       } else if (ptr[79] == '+') {
231         charge = 1;
232       } else if (ptr[79] == '-') {
233         charge = -1;
234       }
235     }
236     if (charge != 0) {
237       atom->setFormalCharge(charge);
238     }
239   }
240 
241   tmp = std::string(ptr + 12, 4);
242   AtomPDBResidueInfo *info = new AtomPDBResidueInfo(tmp, serialno);
243   atom->setMonomerInfo(info);
244 
245   if (len >= 20) {
246     tmp = std::string(ptr + 17, 3);
247     // boost::trim(tmp);
248   } else {
249     tmp = "UNL";
250   }
251   info->setResidueName(tmp);
252   if (ptr[0] == 'H') {
253     info->setIsHeteroAtom(true);
254   }
255   if (len >= 17) {
256     tmp = std::string(ptr + 16, 1);
257   } else {
258     tmp = " ";
259   }
260   info->setAltLoc(tmp);
261   if (len >= 22) {
262     tmp = std::string(ptr + 21, 1);
263   } else {
264     tmp = " ";
265   }
266   info->setChainId(tmp);
267   if (len >= 27) {
268     tmp = std::string(ptr + 26, 1);
269   } else {
270     tmp = " ";
271   }
272   info->setInsertionCode(tmp);
273 
274   int resno = 1;
275   if (len >= 26) {
276     try {
277       resno = FileParserUtils::toInt(std::string(ptr + 22, 4));
278     } catch (boost::bad_lexical_cast &) {
279       std::ostringstream errout;
280       errout << "Problem with residue number for PDB atom #" << serialno;
281       throw FileParseException(errout.str());
282     }
283   }
284   info->setResidueNumber(resno);
285 
286   double occup = 1.0;
287   if (len >= 60) {
288     try {
289       occup = FileParserUtils::toDouble(std::string(ptr + 54, 6));
290     } catch (boost::bad_lexical_cast &) {
291       std::ostringstream errout;
292       errout << "Problem with occupancy for PDB atom #" << serialno;
293       throw FileParseException(errout.str());
294     }
295   }
296   info->setOccupancy(occup);
297 
298   double bfactor = 0.0;
299   if (len >= 66) {
300     try {
301       bfactor = FileParserUtils::toDouble(std::string(ptr + 60, 6));
302     } catch (boost::bad_lexical_cast &) {
303       std::ostringstream errout;
304       errout << "Problem with temperature factor for PDB atom #" << serialno;
305       throw FileParseException(errout.str());
306     }
307   }
308   info->setTempFactor(bfactor);
309 }
310 
PDBBondLine(RWMol * mol,const char * ptr,unsigned int len,std::map<int,Atom * > & amap,std::map<Bond *,int> & bmap)311 void PDBBondLine(RWMol *mol, const char *ptr, unsigned int len,
312                  std::map<int, Atom *> &amap, std::map<Bond *, int> &bmap) {
313   PRECONDITION(mol, "bad mol");
314   PRECONDITION(ptr, "bad char ptr");
315 
316   if (len < 16) {
317     return;
318   }
319 
320   std::string tmp(ptr + 6, 5);
321   bool fail = false;
322   int src, dst;
323 
324   try {
325     src = FileParserUtils::toInt(tmp);
326     if (amap.find(src) == amap.end()) {
327       return;
328     }
329   } catch (boost::bad_lexical_cast &) {
330     fail = true;
331   }
332 
333   if (!fail) {
334     if (len > 41) {
335       len = 41;
336     }
337     for (unsigned int pos = 11; pos + 5 <= len; pos += 5) {
338       if (!memcmp(ptr + pos, "     ", 5)) {
339         break;
340       }
341       try {
342         dst = FileParserUtils::toInt(std::string(ptr + pos, 5));
343         if (dst == src || amap.find(dst) == amap.end()) {
344           continue;
345         }
346       } catch (boost::bad_lexical_cast &) {
347         fail = true;
348       }
349 
350       if (!fail) {
351         Bond *bond =
352             mol->getBondBetweenAtoms(amap[src]->getIdx(), amap[dst]->getIdx());
353         if (bond && bond->getBondType() != Bond::ZERO) {
354           // Here we use a single byte bitmap to count duplicates
355           // Low nibble counts src < dst, high nibble for src > dst
356           int seen = bmap[bond];
357           if (src < dst) {
358             if ((seen & 0x0f) == 0x01) {
359               bmap[bond] = seen | 0x02;
360               if ((seen & 0x20) == 0) {
361                 bond->setBondType(Bond::DOUBLE);
362               }
363             } else if ((seen & 0x0f) == 0x03) {
364               bmap[bond] = seen | 0x04;
365               if ((seen & 0x40) == 0) {
366                 bond->setBondType(Bond::TRIPLE);
367               }
368             } else if ((seen & 0x0f) == 0x07) {
369               bmap[bond] = seen | 0x08;
370               if ((seen & 0x80) == 0) {
371                 bond->setBondType(Bond::QUADRUPLE);
372               }
373             }
374           } else /* src < dst */ {
375             if ((seen & 0xf0) == 0x10) {
376               bmap[bond] = seen | 0x20;
377               if ((seen & 0x02) == 0) {
378                 bond->setBondType(Bond::DOUBLE);
379               }
380             } else if ((seen & 0xf0) == 0x30) {
381               bmap[bond] = seen | 0x40;
382               if ((seen & 0x04) == 0) {
383                 bond->setBondType(Bond::TRIPLE);
384               }
385             } else if ((seen & 0xf0) == 0x70) {
386               bmap[bond] = seen | 0x80;
387               if ((seen & 0x08) == 0) {
388                 bond->setBondType(Bond::QUADRUPLE);
389               }
390             }
391           }
392         } else if (!bond) {
393           // Bonds in PDB file are explicit
394           // if they are not sanitize friendly, set their order to zero
395           if (IsBlacklistedPair(amap[src], amap[dst])) {
396             bond = new Bond(Bond::ZERO);
397           } else {
398             bond = new Bond(Bond::SINGLE);
399           }
400           bond->setOwningMol(mol);
401           bond->setBeginAtom(amap[src]);
402           bond->setEndAtom(amap[dst]);
403           mol->addBond(bond, true);
404           bmap[bond] = (src < dst) ? 0x01 : 0x10;
405         } else {
406           break;
407         }
408       } else {
409         break;
410       }
411     }
412   }
413 
414   if (fail) {
415     std::ostringstream errout;
416     errout << "Problem with CONECT record for PDB atom #" << tmp;
417     throw FileParseException(errout.str());
418   }
419 }
420 
PDBTitleLine(RWMol * mol,const char * ptr,unsigned int len)421 void PDBTitleLine(RWMol *mol, const char *ptr, unsigned int len) {
422   PRECONDITION(mol, "bad mol");
423   PRECONDITION(ptr, "bad char ptr");
424   std::string title;
425   while (ptr[len - 1] == ' ') {
426     len--;
427   }
428   if (ptr[len - 1] == ';') {
429     len--;
430   }
431   if (len > 21 && !strncmp(ptr + 10, " MOLECULE: ", 11)) {
432     title = std::string(ptr + 21, len - 21);
433   } else if (len > 10) {
434     title = std::string(ptr + 10, len - 10);
435   }
436   if (!title.empty()) {
437     mol->setProp(common_properties::_Name, title);
438   }
439 }
440 
PDBConformerLine(RWMol * mol,const char * ptr,unsigned int len,Conformer * & conf,int & conformer_atmidx)441 void PDBConformerLine(RWMol *mol, const char *ptr, unsigned int len,
442                       Conformer *&conf, int &conformer_atmidx) {
443   PRECONDITION(mol, "bad mol");
444   PRECONDITION(ptr, "bad char ptr");
445 
446   if (len >= 38) {
447     RDGeom::Point3D pos;
448     try {
449       pos.x = FileParserUtils::toDouble(std::string(ptr + 30, 8));
450       if (len >= 46) {
451         pos.y = FileParserUtils::toDouble(std::string(ptr + 38, 8));
452       }
453       if (len >= 54) {
454         pos.z = FileParserUtils::toDouble(std::string(ptr + 46, 8));
455       }
456     } catch (boost::bad_lexical_cast &) {
457       std::ostringstream errout;
458       errout << "Problem with multi-conformer coordinates";
459       throw FileParseException(errout.str());
460     }
461 
462     if (conformer_atmidx == 0) {
463       conf = new RDKit::Conformer(mol->getNumAtoms());
464       conf->setId(mol->getNumConformers());
465       conf->set3D(pos.z != 0.0);
466       mol->addConformer(conf, false);
467     } else if (pos.z != 0.0) {
468       conf->set3D(true);
469     }
470 
471     if (conformer_atmidx < rdcast<int>(mol->getNumAtoms())) {
472       conf->setAtomPos(conformer_atmidx, pos);
473       conformer_atmidx++;
474     }
475   }
476 }
477 
478 // This function determines whether a standard atom name in
479 // in a recognized PDB amino acid should be chiral or not.
480 // This is used to avoid chirality on VAL.CG and LEU.CG.
StandardPDBChiralAtom(const char * resnam,const char * atmnam)481 bool StandardPDBChiralAtom(const char *resnam, const char *atmnam) {
482   switch (BCNAM(resnam[0], resnam[1], resnam[2])) {
483     case BCNAM('G', 'L', 'Y'):
484       return false;
485     case BCNAM('I', 'L', 'E'):
486     case BCNAM('T', 'H', 'R'):
487       // Alpha and beta carbons (" CA " and " CB ").
488       return atmnam[0] == ' ' && atmnam[1] == 'C' &&
489              (atmnam[2] == 'A' || atmnam[2] == 'B') && atmnam[3] == ' ';
490     case BCNAM('A', 'L', 'A'):
491     case BCNAM('A', 'R', 'G'):
492     case BCNAM('A', 'S', 'N'):
493     case BCNAM('A', 'S', 'P'):
494     case BCNAM('C', 'Y', 'S'):
495     case BCNAM('G', 'L', 'N'):
496     case BCNAM('G', 'L', 'U'):
497     case BCNAM('H', 'I', 'S'):
498     case BCNAM('L', 'E', 'U'):
499     case BCNAM('L', 'Y', 'S'):
500     case BCNAM('M', 'E', 'T'):
501     case BCNAM('P', 'H', 'E'):
502     case BCNAM('P', 'R', 'O'):
503     case BCNAM('S', 'E', 'R'):
504     case BCNAM('T', 'R', 'P'):
505     case BCNAM('T', 'Y', 'R'):
506     case BCNAM('V', 'A', 'L'):
507       return atmnam[0] == ' ' && atmnam[1] == 'C' && atmnam[2] == 'A' &&
508              atmnam[3] == ' ';
509   }
510   return false;
511 }
512 
StandardPDBResidueChirality(RWMol * mol)513 void StandardPDBResidueChirality(RWMol *mol) {
514   for (ROMol::AtomIterator atomIt = mol->beginAtoms();
515        atomIt != mol->endAtoms(); ++atomIt) {
516     Atom *atom = *atomIt;
517     if (atom->getChiralTag() != Atom::CHI_UNSPECIFIED) {
518       auto *info = (AtomPDBResidueInfo *)atom->getMonomerInfo();
519       if (info && info->getMonomerType() == AtomMonomerInfo::PDBRESIDUE &&
520           !info->getIsHeteroAtom() &&
521           !StandardPDBChiralAtom(info->getResidueName().c_str(),
522                                  info->getName().c_str())) {
523         atom->setChiralTag(Atom::CHI_UNSPECIFIED);
524         if (atom->hasProp(common_properties::_CIPCode)) {
525           atom->clearProp(common_properties::_CIPCode);
526         }
527       }
528     }
529   }
530 }
531 
BasicPDBCleanup(RWMol & mol)532 void BasicPDBCleanup(RWMol &mol) {
533   ROMol::VERTEX_ITER atBegin, atEnd;
534   boost::tie(atBegin, atEnd) = mol.getVertices();
535   while (atBegin != atEnd) {
536     Atom *atom = mol[*atBegin];
537     atom->calcExplicitValence(false);
538 
539     // correct four-valent neutral N -> N+
540     // This was github #1029
541     if (atom->getAtomicNum() == 7 && atom->getFormalCharge() == 0 &&
542         atom->getExplicitValence() == 4) {
543       atom->setFormalCharge(1);
544     }
545     ++atBegin;
546   }
547 }
548 
parsePdbBlock(RWMol * & mol,const char * str,bool sanitize,bool removeHs,unsigned int flavor,bool proximityBonding)549 void parsePdbBlock(RWMol *&mol, const char *str, bool sanitize, bool removeHs,
550                    unsigned int flavor, bool proximityBonding) {
551   PRECONDITION(str, "bad char ptr");
552   std::map<int, Atom *> amap;
553   std::map<Bond *, int> bmap;
554   Utils::LocaleSwitcher ls;
555   bool multi_conformer = false;
556   int conformer_atmidx = 0;
557   Conformer *conf = nullptr;
558 
559   while (*str) {
560     unsigned int len;
561     const char *next = str + 1;
562     for (;;) {
563       if (*next == '\r') {
564         len = (unsigned int)(next - str);
565         if (next[1] == '\n') {
566           next += 2;
567         } else {
568           next++;
569         }
570         break;
571       } else if (*next == '\n') {
572         len = (unsigned int)(next - str);
573         next++;
574         break;
575       } else if (*next == '\0') {
576         len = (unsigned int)(next - str);
577         break;
578       }
579       next++;
580     }
581 
582     // ATOM records
583     if (str[0] == 'A' && str[1] == 'T' && str[2] == 'O' && str[3] == 'M' &&
584         str[4] == ' ' && str[5] == ' ') {
585       if (!multi_conformer) {
586         if (!mol) {
587           mol = new RWMol();
588         }
589         PDBAtomLine(mol, str, len, flavor, amap);
590       } else {
591         PDBConformerLine(mol, str, len, conf, conformer_atmidx);
592       }
593       // HETATM records
594     } else if (str[0] == 'H' && str[1] == 'E' && str[2] == 'T' &&
595                str[3] == 'A' && str[4] == 'T' && str[5] == 'M') {
596       if (!multi_conformer) {
597         if (!mol) {
598           mol = new RWMol();
599         }
600         PDBAtomLine(mol, str, len, flavor, amap);
601       } else {
602         PDBConformerLine(mol, str, len, conf, conformer_atmidx);
603       }
604       // CONECT records
605     } else if (str[0] == 'C' && str[1] == 'O' && str[2] == 'N' &&
606                str[3] == 'E' && str[4] == 'C' && str[5] == 'T') {
607       if (mol && !multi_conformer) {
608         PDBBondLine(mol, str, len, amap, bmap);
609       }
610       // COMPND records
611     } else if (str[0] == 'C' && str[1] == 'O' && str[2] == 'M' &&
612                str[3] == 'P' && str[4] == 'N' && str[5] == 'D') {
613       if (!mol) {
614         mol = new RWMol();
615       }
616       if (len > 10 &&
617           (str[9] == ' ' || !strncmp(str + 9, "2 MOLECULE: ", 12))) {
618         PDBTitleLine(mol, str, len);
619       }
620       // HEADER records
621     } else if (str[0] == 'H' && str[1] == 'E' && str[2] == 'A' &&
622                str[3] == 'D' && str[4] == 'E' && str[5] == 'R') {
623       if (!mol) {
624         mol = new RWMol();
625       }
626       PDBTitleLine(mol, str, len < 50 ? len : 50);
627       // ENDMDL records
628     } else if (str[0] == 'E' && str[1] == 'N' && str[2] == 'D' &&
629                str[3] == 'M' && str[4] == 'D' && str[5] == 'L') {
630       if (!mol) {
631         break;
632       }
633       multi_conformer = true;
634       conformer_atmidx = 0;
635       conf = nullptr;
636     }
637     str = next;
638   }
639 
640   if (!mol) {
641     return;
642   }
643 
644   if (proximityBonding) {
645     ConnectTheDots(mol, ctdIGNORE_H_H_CONTACTS);
646   }
647   // flavor & 8 doesn't encode double bonds
648   if (proximityBonding || flavor & 8) {
649     StandardPDBResidueBondOrders(mol);
650   }
651 
652   BasicPDBCleanup(*mol);
653 
654   if (sanitize) {
655     if (removeHs) {
656       MolOps::removeHs(*mol, false, false);
657     } else {
658       MolOps::sanitizeMol(*mol);
659     }
660   } else {
661     // we need some properties for the chiral setup
662     mol->updatePropertyCache(false);
663   }
664 
665   /* Set tetrahedral chirality from 3D co-ordinates */
666   MolOps::assignChiralTypesFrom3D(*mol);
667   StandardPDBResidueChirality(mol);
668 }
669 }  // namespace
670 
PDBBlockToMol(const char * str,bool sanitize,bool removeHs,unsigned int flavor,bool proximityBonding)671 RWMol *PDBBlockToMol(const char *str, bool sanitize, bool removeHs,
672                      unsigned int flavor, bool proximityBonding) {
673   RWMol *mol = nullptr;
674   try {
675     parsePdbBlock(mol, str, sanitize, removeHs, flavor, proximityBonding);
676   } catch (...) {
677     delete mol;
678     throw;
679   }
680 
681   return mol;
682 }
683 
PDBBlockToMol(const std::string & str,bool sanitize,bool removeHs,unsigned int flavor,bool proximityBonding)684 RWMol *PDBBlockToMol(const std::string &str, bool sanitize, bool removeHs,
685                      unsigned int flavor, bool proximityBonding) {
686   return PDBBlockToMol(str.c_str(), sanitize, removeHs, flavor,
687                        proximityBonding);
688 }
689 
PDBDataStreamToMol(std::istream * inStream,bool sanitize,bool removeHs,unsigned int flavor,bool proximityBonding)690 RWMol *PDBDataStreamToMol(std::istream *inStream, bool sanitize, bool removeHs,
691                           unsigned int flavor, bool proximityBonding) {
692   PRECONDITION(inStream, "bad stream");
693   std::string buffer;
694   while (!inStream->eof() && !inStream->fail()) {
695     std::string line;
696     std::getline(*inStream, line);
697     buffer += line;
698     buffer += '\n';
699     auto ptr = line.c_str();
700     // Check for END
701     if (ptr[0] == 'E' && ptr[1] == 'N' && ptr[2] == 'D' &&
702         (ptr[3] == ' ' || ptr[3] == '\r' || ptr[3] == '\n' || !ptr[3])) {
703       break;
704     }
705     // Check for ENDMDL
706     if ((flavor & 2) != 0 && ptr[0] == 'E' && ptr[1] == 'N' && ptr[2] == 'D' &&
707         ptr[3] == 'M' && ptr[4] == 'D' && ptr[5] == 'L') {
708       break;
709     }
710   }
711   return PDBBlockToMol(buffer.c_str(), sanitize, removeHs, flavor,
712                        proximityBonding);
713 }
PDBDataStreamToMol(std::istream & inStream,bool sanitize,bool removeHs,unsigned int flavor,bool proximityBonding)714 RWMol *PDBDataStreamToMol(std::istream &inStream, bool sanitize, bool removeHs,
715                           unsigned int flavor, bool proximityBonding) {
716   return PDBDataStreamToMol(&inStream, sanitize, removeHs, flavor,
717                             proximityBonding);
718 }
719 
PDBFileToMol(const std::string & fileName,bool sanitize,bool removeHs,unsigned int flavor,bool proximityBonding)720 RWMol *PDBFileToMol(const std::string &fileName, bool sanitize, bool removeHs,
721                     unsigned int flavor, bool proximityBonding) {
722   std::ifstream ifs(fileName.c_str(), std::ios_base::binary);
723   if (!ifs || ifs.bad()) {
724     std::ostringstream errout;
725     errout << "Bad input file " << fileName;
726     throw BadFileException(errout.str());
727   }
728   return PDBDataStreamToMol(static_cast<std::istream *>(&ifs), sanitize,
729                             removeHs, flavor, proximityBonding);
730 }
731 }  // namespace RDKit
732