1 //
2 // Copyright (C) 2013-2014 Greg Landrum and NextMove Software
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <cstring>
11 #include <cstdio>
12 #include <string>
13 #include <iostream>
14 #include <fstream>
15 #include <RDGeneral/BoostStartInclude.h>
16 #include <boost/lexical_cast.hpp>
17 #include <boost/algorithm/string.hpp>
18 #include <RDGeneral/BoostEndInclude.h>
19 #include <RDGeneral/BadFileException.h>
20 #include <RDGeneral/FileParseException.h>
21 #include <GraphMol/FileParsers/FileParsers.h>
22 #include <GraphMol/FileParsers/FileParserUtils.h>
23 #include <RDGeneral/LocaleSwitcher.h>
24 #include "ProximityBonds.h"
25
26 #include <GraphMol/MonomerInfo.h>
27
28 // PDBWriter support multiple "flavors" of PDB output
29 // flavor & 1 : Ignore atoms in alternate conformations and dummy atoms
30 // flavor & 2 : Read each MODEL into a separate molecule.
31 namespace RDKit {
32
33 namespace {
34
BCNAM(char A,char B,char C)35 constexpr int BCNAM(char A, char B, char C) { return (A << 16) | (B << 8) | C; }
36
PDBAtomFromSymbol(const char * symb)37 Atom *PDBAtomFromSymbol(const char *symb) {
38 PRECONDITION(symb, "bad char ptr");
39 if (symb[0] == 'D' && !symb[1]) {
40 auto *result = new Atom(1);
41 result->setIsotope(2);
42 return result;
43 } else if (symb[0] == 'T' && !symb[1]) {
44 auto *result = new Atom(1);
45 result->setIsotope(3);
46 return result;
47 }
48 int elemno = PeriodicTable::getTable()->getAtomicNumber(symb);
49 return elemno > 0 ? new Atom(elemno) : (Atom *)nullptr;
50 }
51
PDBAtomLine(RWMol * mol,const char * ptr,unsigned int len,unsigned int flavor,std::map<int,Atom * > & amap)52 void PDBAtomLine(RWMol *mol, const char *ptr, unsigned int len,
53 unsigned int flavor, std::map<int, Atom *> &amap) {
54 PRECONDITION(mol, "bad mol");
55 PRECONDITION(ptr, "bad char ptr");
56 std::string tmp;
57
58 if (len < 16) {
59 return;
60 }
61
62 if ((flavor & 1) == 0) {
63 // Ignore alternate locations of atoms.
64 if (len >= 17 && ptr[16] != ' ' && ptr[16] != 'A' && ptr[16] != '1') {
65 return;
66 }
67 // Ignore XPLOR pseudo atoms
68 if (len >= 54 && !memcmp(ptr + 30, "9999.0009999.0009999.000", 24)) {
69 return;
70 }
71 // Ignore NMR pseudo atoms
72 if (ptr[12] == ' ' && ptr[13] == 'Q') {
73 return;
74 }
75 // Ignore PDB dummy residues
76 if (len >= 20 && !memcmp(ptr + 18, "DUM", 3)) {
77 return;
78 }
79 }
80
81 int serialno;
82 tmp = std::string(ptr + 6, 5);
83 try {
84 serialno = FileParserUtils::toInt(tmp);
85 } catch (boost::bad_lexical_cast &) {
86 std::ostringstream errout;
87 errout << "Non-integer PDB serial number " << tmp;
88 throw FileParseException(errout.str());
89 }
90
91 Atom *atom = (Atom *)nullptr;
92 char symb[3];
93
94 // Attempt #1: Atomic Symbol in columns 76 and 77
95 if (len >= 78) {
96 if (ptr[76] >= 'A' && ptr[76] <= 'Z') {
97 symb[0] = ptr[76];
98 if (ptr[77] >= 'A' && ptr[77] <= 'Z') {
99 symb[1] = ptr[77] + 32; // tolower
100 symb[2] = '\0';
101 } else if (ptr[77] >= 'a' && ptr[77] <= 'z') {
102 symb[1] = ptr[77];
103 symb[2] = '\0';
104 } else {
105 symb[1] = '\0';
106 }
107 } else if (ptr[76] == ' ' && ptr[77] >= 'A' && ptr[77] <= 'Z') {
108 symb[0] = ptr[77];
109 symb[1] = '\0';
110 } else {
111 symb[0] = '\0';
112 }
113 } else if (len == 77) {
114 if (ptr[76] >= 'A' && ptr[76] <= 'Z') {
115 symb[0] = ptr[76];
116 symb[1] = '\0';
117 } else {
118 symb[0] = '\0';
119 }
120 } else {
121 symb[0] = '\0';
122 }
123
124 if (symb[0]) {
125 atom = PDBAtomFromSymbol(symb);
126 }
127
128 if (!atom) {
129 // Attempt #2: Atomic Symbol from PDB atom name
130 if (ptr[13] >= 'A' && ptr[13] <= 'Z') {
131 if (ptr[12] == ' ') {
132 symb[0] = ptr[13];
133 if (ptr[14] >= 'a' && ptr[14] <= 'z') {
134 symb[1] = ptr[14];
135 symb[2] = '\0';
136 } else {
137 symb[1] = '\0';
138 }
139 } else if (ptr[12] >= 'A' && ptr[12] <= 'Z') {
140 symb[0] = ptr[12];
141 symb[1] = ptr[13] + 32; // tolower
142 symb[2] = '\0';
143 if (ptr[12] == 'H' && ptr[0] == 'A') {
144 // No He, Hf, Hg, Ho or Hs in ATOM records
145 symb[0] = 'H';
146 symb[1] = '\0';
147 }
148 } else if (ptr[12] >= '0' && ptr[12] <= '9') {
149 symb[0] = ptr[13];
150 symb[1] = '\0';
151 } else {
152 symb[0] = '\0';
153 }
154 } else {
155 symb[0] = '\0';
156 }
157
158 if (symb[0]) {
159 atom = PDBAtomFromSymbol(symb);
160 }
161 }
162
163 if (!atom) {
164 std::ostringstream errout;
165 errout << "Cannot determine element for PDB atom #" << serialno;
166 throw FileParseException(errout.str());
167 }
168
169 mol->addAtom(atom, true, true);
170 amap[serialno] = atom;
171
172 if (len >= 38) {
173 RDGeom::Point3D pos;
174 try {
175 pos.x = FileParserUtils::toDouble(std::string(ptr + 30, 8));
176 if (len >= 46) {
177 pos.y = FileParserUtils::toDouble(std::string(ptr + 38, 8));
178 }
179 if (len >= 54) {
180 pos.z = FileParserUtils::toDouble(std::string(ptr + 46, 8));
181 }
182 } catch (boost::bad_lexical_cast &) {
183 std::ostringstream errout;
184 errout << "Problem with coordinates for PDB atom #" << serialno;
185 throw FileParseException(errout.str());
186 }
187
188 Conformer *conf;
189 if (!mol->getNumConformers()) {
190 conf = new RDKit::Conformer(mol->getNumAtoms());
191 conf->set3D(pos.z != 0.0);
192 conf->setId(0);
193 mol->addConformer(conf, false);
194 } else {
195 conf = &mol->getConformer();
196 if (pos.z != 0.0) {
197 conf->set3D(true);
198 }
199 }
200 conf->setAtomPos(atom->getIdx(), pos);
201 }
202
203 if (len >= 79) {
204 int charge = 0;
205 if (ptr[78] >= '1' && ptr[78] <= '9') {
206 if (ptr[79] == '-') {
207 charge = -(ptr[78] - '0');
208 } else if (ptr[79] == '+' || ptr[79] == ' ' || !ptr[79]) {
209 charge = ptr[78] - '0';
210 }
211 } else if (ptr[78] == '+') {
212 if (ptr[79] >= '1' && ptr[79] <= '9') {
213 charge = ptr[79] - '0';
214 } else if (ptr[79] == '+') {
215 charge = 2;
216 } else if (ptr[79] != '0') {
217 charge = 1;
218 }
219 } else if (ptr[78] == '-') {
220 if (ptr[79] >= '1' && ptr[79] <= '9') {
221 charge = ptr[79] - '0';
222 } else if (ptr[79] == '-') {
223 charge = -2;
224 } else if (ptr[79] != '0') {
225 charge = -1;
226 }
227 } else if (ptr[78] == ' ') {
228 if (ptr[79] >= '1' && ptr[79] <= '9') {
229 charge = ptr[79] - '0';
230 } else if (ptr[79] == '+') {
231 charge = 1;
232 } else if (ptr[79] == '-') {
233 charge = -1;
234 }
235 }
236 if (charge != 0) {
237 atom->setFormalCharge(charge);
238 }
239 }
240
241 tmp = std::string(ptr + 12, 4);
242 AtomPDBResidueInfo *info = new AtomPDBResidueInfo(tmp, serialno);
243 atom->setMonomerInfo(info);
244
245 if (len >= 20) {
246 tmp = std::string(ptr + 17, 3);
247 // boost::trim(tmp);
248 } else {
249 tmp = "UNL";
250 }
251 info->setResidueName(tmp);
252 if (ptr[0] == 'H') {
253 info->setIsHeteroAtom(true);
254 }
255 if (len >= 17) {
256 tmp = std::string(ptr + 16, 1);
257 } else {
258 tmp = " ";
259 }
260 info->setAltLoc(tmp);
261 if (len >= 22) {
262 tmp = std::string(ptr + 21, 1);
263 } else {
264 tmp = " ";
265 }
266 info->setChainId(tmp);
267 if (len >= 27) {
268 tmp = std::string(ptr + 26, 1);
269 } else {
270 tmp = " ";
271 }
272 info->setInsertionCode(tmp);
273
274 int resno = 1;
275 if (len >= 26) {
276 try {
277 resno = FileParserUtils::toInt(std::string(ptr + 22, 4));
278 } catch (boost::bad_lexical_cast &) {
279 std::ostringstream errout;
280 errout << "Problem with residue number for PDB atom #" << serialno;
281 throw FileParseException(errout.str());
282 }
283 }
284 info->setResidueNumber(resno);
285
286 double occup = 1.0;
287 if (len >= 60) {
288 try {
289 occup = FileParserUtils::toDouble(std::string(ptr + 54, 6));
290 } catch (boost::bad_lexical_cast &) {
291 std::ostringstream errout;
292 errout << "Problem with occupancy for PDB atom #" << serialno;
293 throw FileParseException(errout.str());
294 }
295 }
296 info->setOccupancy(occup);
297
298 double bfactor = 0.0;
299 if (len >= 66) {
300 try {
301 bfactor = FileParserUtils::toDouble(std::string(ptr + 60, 6));
302 } catch (boost::bad_lexical_cast &) {
303 std::ostringstream errout;
304 errout << "Problem with temperature factor for PDB atom #" << serialno;
305 throw FileParseException(errout.str());
306 }
307 }
308 info->setTempFactor(bfactor);
309 }
310
PDBBondLine(RWMol * mol,const char * ptr,unsigned int len,std::map<int,Atom * > & amap,std::map<Bond *,int> & bmap)311 void PDBBondLine(RWMol *mol, const char *ptr, unsigned int len,
312 std::map<int, Atom *> &amap, std::map<Bond *, int> &bmap) {
313 PRECONDITION(mol, "bad mol");
314 PRECONDITION(ptr, "bad char ptr");
315
316 if (len < 16) {
317 return;
318 }
319
320 std::string tmp(ptr + 6, 5);
321 bool fail = false;
322 int src, dst;
323
324 try {
325 src = FileParserUtils::toInt(tmp);
326 if (amap.find(src) == amap.end()) {
327 return;
328 }
329 } catch (boost::bad_lexical_cast &) {
330 fail = true;
331 }
332
333 if (!fail) {
334 if (len > 41) {
335 len = 41;
336 }
337 for (unsigned int pos = 11; pos + 5 <= len; pos += 5) {
338 if (!memcmp(ptr + pos, " ", 5)) {
339 break;
340 }
341 try {
342 dst = FileParserUtils::toInt(std::string(ptr + pos, 5));
343 if (dst == src || amap.find(dst) == amap.end()) {
344 continue;
345 }
346 } catch (boost::bad_lexical_cast &) {
347 fail = true;
348 }
349
350 if (!fail) {
351 Bond *bond =
352 mol->getBondBetweenAtoms(amap[src]->getIdx(), amap[dst]->getIdx());
353 if (bond && bond->getBondType() != Bond::ZERO) {
354 // Here we use a single byte bitmap to count duplicates
355 // Low nibble counts src < dst, high nibble for src > dst
356 int seen = bmap[bond];
357 if (src < dst) {
358 if ((seen & 0x0f) == 0x01) {
359 bmap[bond] = seen | 0x02;
360 if ((seen & 0x20) == 0) {
361 bond->setBondType(Bond::DOUBLE);
362 }
363 } else if ((seen & 0x0f) == 0x03) {
364 bmap[bond] = seen | 0x04;
365 if ((seen & 0x40) == 0) {
366 bond->setBondType(Bond::TRIPLE);
367 }
368 } else if ((seen & 0x0f) == 0x07) {
369 bmap[bond] = seen | 0x08;
370 if ((seen & 0x80) == 0) {
371 bond->setBondType(Bond::QUADRUPLE);
372 }
373 }
374 } else /* src < dst */ {
375 if ((seen & 0xf0) == 0x10) {
376 bmap[bond] = seen | 0x20;
377 if ((seen & 0x02) == 0) {
378 bond->setBondType(Bond::DOUBLE);
379 }
380 } else if ((seen & 0xf0) == 0x30) {
381 bmap[bond] = seen | 0x40;
382 if ((seen & 0x04) == 0) {
383 bond->setBondType(Bond::TRIPLE);
384 }
385 } else if ((seen & 0xf0) == 0x70) {
386 bmap[bond] = seen | 0x80;
387 if ((seen & 0x08) == 0) {
388 bond->setBondType(Bond::QUADRUPLE);
389 }
390 }
391 }
392 } else if (!bond) {
393 // Bonds in PDB file are explicit
394 // if they are not sanitize friendly, set their order to zero
395 if (IsBlacklistedPair(amap[src], amap[dst])) {
396 bond = new Bond(Bond::ZERO);
397 } else {
398 bond = new Bond(Bond::SINGLE);
399 }
400 bond->setOwningMol(mol);
401 bond->setBeginAtom(amap[src]);
402 bond->setEndAtom(amap[dst]);
403 mol->addBond(bond, true);
404 bmap[bond] = (src < dst) ? 0x01 : 0x10;
405 } else {
406 break;
407 }
408 } else {
409 break;
410 }
411 }
412 }
413
414 if (fail) {
415 std::ostringstream errout;
416 errout << "Problem with CONECT record for PDB atom #" << tmp;
417 throw FileParseException(errout.str());
418 }
419 }
420
PDBTitleLine(RWMol * mol,const char * ptr,unsigned int len)421 void PDBTitleLine(RWMol *mol, const char *ptr, unsigned int len) {
422 PRECONDITION(mol, "bad mol");
423 PRECONDITION(ptr, "bad char ptr");
424 std::string title;
425 while (ptr[len - 1] == ' ') {
426 len--;
427 }
428 if (ptr[len - 1] == ';') {
429 len--;
430 }
431 if (len > 21 && !strncmp(ptr + 10, " MOLECULE: ", 11)) {
432 title = std::string(ptr + 21, len - 21);
433 } else if (len > 10) {
434 title = std::string(ptr + 10, len - 10);
435 }
436 if (!title.empty()) {
437 mol->setProp(common_properties::_Name, title);
438 }
439 }
440
PDBConformerLine(RWMol * mol,const char * ptr,unsigned int len,Conformer * & conf,int & conformer_atmidx)441 void PDBConformerLine(RWMol *mol, const char *ptr, unsigned int len,
442 Conformer *&conf, int &conformer_atmidx) {
443 PRECONDITION(mol, "bad mol");
444 PRECONDITION(ptr, "bad char ptr");
445
446 if (len >= 38) {
447 RDGeom::Point3D pos;
448 try {
449 pos.x = FileParserUtils::toDouble(std::string(ptr + 30, 8));
450 if (len >= 46) {
451 pos.y = FileParserUtils::toDouble(std::string(ptr + 38, 8));
452 }
453 if (len >= 54) {
454 pos.z = FileParserUtils::toDouble(std::string(ptr + 46, 8));
455 }
456 } catch (boost::bad_lexical_cast &) {
457 std::ostringstream errout;
458 errout << "Problem with multi-conformer coordinates";
459 throw FileParseException(errout.str());
460 }
461
462 if (conformer_atmidx == 0) {
463 conf = new RDKit::Conformer(mol->getNumAtoms());
464 conf->setId(mol->getNumConformers());
465 conf->set3D(pos.z != 0.0);
466 mol->addConformer(conf, false);
467 } else if (pos.z != 0.0) {
468 conf->set3D(true);
469 }
470
471 if (conformer_atmidx < rdcast<int>(mol->getNumAtoms())) {
472 conf->setAtomPos(conformer_atmidx, pos);
473 conformer_atmidx++;
474 }
475 }
476 }
477
478 // This function determines whether a standard atom name in
479 // in a recognized PDB amino acid should be chiral or not.
480 // This is used to avoid chirality on VAL.CG and LEU.CG.
StandardPDBChiralAtom(const char * resnam,const char * atmnam)481 bool StandardPDBChiralAtom(const char *resnam, const char *atmnam) {
482 switch (BCNAM(resnam[0], resnam[1], resnam[2])) {
483 case BCNAM('G', 'L', 'Y'):
484 return false;
485 case BCNAM('I', 'L', 'E'):
486 case BCNAM('T', 'H', 'R'):
487 // Alpha and beta carbons (" CA " and " CB ").
488 return atmnam[0] == ' ' && atmnam[1] == 'C' &&
489 (atmnam[2] == 'A' || atmnam[2] == 'B') && atmnam[3] == ' ';
490 case BCNAM('A', 'L', 'A'):
491 case BCNAM('A', 'R', 'G'):
492 case BCNAM('A', 'S', 'N'):
493 case BCNAM('A', 'S', 'P'):
494 case BCNAM('C', 'Y', 'S'):
495 case BCNAM('G', 'L', 'N'):
496 case BCNAM('G', 'L', 'U'):
497 case BCNAM('H', 'I', 'S'):
498 case BCNAM('L', 'E', 'U'):
499 case BCNAM('L', 'Y', 'S'):
500 case BCNAM('M', 'E', 'T'):
501 case BCNAM('P', 'H', 'E'):
502 case BCNAM('P', 'R', 'O'):
503 case BCNAM('S', 'E', 'R'):
504 case BCNAM('T', 'R', 'P'):
505 case BCNAM('T', 'Y', 'R'):
506 case BCNAM('V', 'A', 'L'):
507 return atmnam[0] == ' ' && atmnam[1] == 'C' && atmnam[2] == 'A' &&
508 atmnam[3] == ' ';
509 }
510 return false;
511 }
512
StandardPDBResidueChirality(RWMol * mol)513 void StandardPDBResidueChirality(RWMol *mol) {
514 for (ROMol::AtomIterator atomIt = mol->beginAtoms();
515 atomIt != mol->endAtoms(); ++atomIt) {
516 Atom *atom = *atomIt;
517 if (atom->getChiralTag() != Atom::CHI_UNSPECIFIED) {
518 auto *info = (AtomPDBResidueInfo *)atom->getMonomerInfo();
519 if (info && info->getMonomerType() == AtomMonomerInfo::PDBRESIDUE &&
520 !info->getIsHeteroAtom() &&
521 !StandardPDBChiralAtom(info->getResidueName().c_str(),
522 info->getName().c_str())) {
523 atom->setChiralTag(Atom::CHI_UNSPECIFIED);
524 if (atom->hasProp(common_properties::_CIPCode)) {
525 atom->clearProp(common_properties::_CIPCode);
526 }
527 }
528 }
529 }
530 }
531
BasicPDBCleanup(RWMol & mol)532 void BasicPDBCleanup(RWMol &mol) {
533 ROMol::VERTEX_ITER atBegin, atEnd;
534 boost::tie(atBegin, atEnd) = mol.getVertices();
535 while (atBegin != atEnd) {
536 Atom *atom = mol[*atBegin];
537 atom->calcExplicitValence(false);
538
539 // correct four-valent neutral N -> N+
540 // This was github #1029
541 if (atom->getAtomicNum() == 7 && atom->getFormalCharge() == 0 &&
542 atom->getExplicitValence() == 4) {
543 atom->setFormalCharge(1);
544 }
545 ++atBegin;
546 }
547 }
548
parsePdbBlock(RWMol * & mol,const char * str,bool sanitize,bool removeHs,unsigned int flavor,bool proximityBonding)549 void parsePdbBlock(RWMol *&mol, const char *str, bool sanitize, bool removeHs,
550 unsigned int flavor, bool proximityBonding) {
551 PRECONDITION(str, "bad char ptr");
552 std::map<int, Atom *> amap;
553 std::map<Bond *, int> bmap;
554 Utils::LocaleSwitcher ls;
555 bool multi_conformer = false;
556 int conformer_atmidx = 0;
557 Conformer *conf = nullptr;
558
559 while (*str) {
560 unsigned int len;
561 const char *next = str + 1;
562 for (;;) {
563 if (*next == '\r') {
564 len = (unsigned int)(next - str);
565 if (next[1] == '\n') {
566 next += 2;
567 } else {
568 next++;
569 }
570 break;
571 } else if (*next == '\n') {
572 len = (unsigned int)(next - str);
573 next++;
574 break;
575 } else if (*next == '\0') {
576 len = (unsigned int)(next - str);
577 break;
578 }
579 next++;
580 }
581
582 // ATOM records
583 if (str[0] == 'A' && str[1] == 'T' && str[2] == 'O' && str[3] == 'M' &&
584 str[4] == ' ' && str[5] == ' ') {
585 if (!multi_conformer) {
586 if (!mol) {
587 mol = new RWMol();
588 }
589 PDBAtomLine(mol, str, len, flavor, amap);
590 } else {
591 PDBConformerLine(mol, str, len, conf, conformer_atmidx);
592 }
593 // HETATM records
594 } else if (str[0] == 'H' && str[1] == 'E' && str[2] == 'T' &&
595 str[3] == 'A' && str[4] == 'T' && str[5] == 'M') {
596 if (!multi_conformer) {
597 if (!mol) {
598 mol = new RWMol();
599 }
600 PDBAtomLine(mol, str, len, flavor, amap);
601 } else {
602 PDBConformerLine(mol, str, len, conf, conformer_atmidx);
603 }
604 // CONECT records
605 } else if (str[0] == 'C' && str[1] == 'O' && str[2] == 'N' &&
606 str[3] == 'E' && str[4] == 'C' && str[5] == 'T') {
607 if (mol && !multi_conformer) {
608 PDBBondLine(mol, str, len, amap, bmap);
609 }
610 // COMPND records
611 } else if (str[0] == 'C' && str[1] == 'O' && str[2] == 'M' &&
612 str[3] == 'P' && str[4] == 'N' && str[5] == 'D') {
613 if (!mol) {
614 mol = new RWMol();
615 }
616 if (len > 10 &&
617 (str[9] == ' ' || !strncmp(str + 9, "2 MOLECULE: ", 12))) {
618 PDBTitleLine(mol, str, len);
619 }
620 // HEADER records
621 } else if (str[0] == 'H' && str[1] == 'E' && str[2] == 'A' &&
622 str[3] == 'D' && str[4] == 'E' && str[5] == 'R') {
623 if (!mol) {
624 mol = new RWMol();
625 }
626 PDBTitleLine(mol, str, len < 50 ? len : 50);
627 // ENDMDL records
628 } else if (str[0] == 'E' && str[1] == 'N' && str[2] == 'D' &&
629 str[3] == 'M' && str[4] == 'D' && str[5] == 'L') {
630 if (!mol) {
631 break;
632 }
633 multi_conformer = true;
634 conformer_atmidx = 0;
635 conf = nullptr;
636 }
637 str = next;
638 }
639
640 if (!mol) {
641 return;
642 }
643
644 if (proximityBonding) {
645 ConnectTheDots(mol, ctdIGNORE_H_H_CONTACTS);
646 }
647 // flavor & 8 doesn't encode double bonds
648 if (proximityBonding || flavor & 8) {
649 StandardPDBResidueBondOrders(mol);
650 }
651
652 BasicPDBCleanup(*mol);
653
654 if (sanitize) {
655 if (removeHs) {
656 MolOps::removeHs(*mol, false, false);
657 } else {
658 MolOps::sanitizeMol(*mol);
659 }
660 } else {
661 // we need some properties for the chiral setup
662 mol->updatePropertyCache(false);
663 }
664
665 /* Set tetrahedral chirality from 3D co-ordinates */
666 MolOps::assignChiralTypesFrom3D(*mol);
667 StandardPDBResidueChirality(mol);
668 }
669 } // namespace
670
PDBBlockToMol(const char * str,bool sanitize,bool removeHs,unsigned int flavor,bool proximityBonding)671 RWMol *PDBBlockToMol(const char *str, bool sanitize, bool removeHs,
672 unsigned int flavor, bool proximityBonding) {
673 RWMol *mol = nullptr;
674 try {
675 parsePdbBlock(mol, str, sanitize, removeHs, flavor, proximityBonding);
676 } catch (...) {
677 delete mol;
678 throw;
679 }
680
681 return mol;
682 }
683
PDBBlockToMol(const std::string & str,bool sanitize,bool removeHs,unsigned int flavor,bool proximityBonding)684 RWMol *PDBBlockToMol(const std::string &str, bool sanitize, bool removeHs,
685 unsigned int flavor, bool proximityBonding) {
686 return PDBBlockToMol(str.c_str(), sanitize, removeHs, flavor,
687 proximityBonding);
688 }
689
PDBDataStreamToMol(std::istream * inStream,bool sanitize,bool removeHs,unsigned int flavor,bool proximityBonding)690 RWMol *PDBDataStreamToMol(std::istream *inStream, bool sanitize, bool removeHs,
691 unsigned int flavor, bool proximityBonding) {
692 PRECONDITION(inStream, "bad stream");
693 std::string buffer;
694 while (!inStream->eof() && !inStream->fail()) {
695 std::string line;
696 std::getline(*inStream, line);
697 buffer += line;
698 buffer += '\n';
699 auto ptr = line.c_str();
700 // Check for END
701 if (ptr[0] == 'E' && ptr[1] == 'N' && ptr[2] == 'D' &&
702 (ptr[3] == ' ' || ptr[3] == '\r' || ptr[3] == '\n' || !ptr[3])) {
703 break;
704 }
705 // Check for ENDMDL
706 if ((flavor & 2) != 0 && ptr[0] == 'E' && ptr[1] == 'N' && ptr[2] == 'D' &&
707 ptr[3] == 'M' && ptr[4] == 'D' && ptr[5] == 'L') {
708 break;
709 }
710 }
711 return PDBBlockToMol(buffer.c_str(), sanitize, removeHs, flavor,
712 proximityBonding);
713 }
PDBDataStreamToMol(std::istream & inStream,bool sanitize,bool removeHs,unsigned int flavor,bool proximityBonding)714 RWMol *PDBDataStreamToMol(std::istream &inStream, bool sanitize, bool removeHs,
715 unsigned int flavor, bool proximityBonding) {
716 return PDBDataStreamToMol(&inStream, sanitize, removeHs, flavor,
717 proximityBonding);
718 }
719
PDBFileToMol(const std::string & fileName,bool sanitize,bool removeHs,unsigned int flavor,bool proximityBonding)720 RWMol *PDBFileToMol(const std::string &fileName, bool sanitize, bool removeHs,
721 unsigned int flavor, bool proximityBonding) {
722 std::ifstream ifs(fileName.c_str(), std::ios_base::binary);
723 if (!ifs || ifs.bad()) {
724 std::ostringstream errout;
725 errout << "Bad input file " << fileName;
726 throw BadFileException(errout.str());
727 }
728 return PDBDataStreamToMol(static_cast<std::istream *>(&ifs), sanitize,
729 removeHs, flavor, proximityBonding);
730 }
731 } // namespace RDKit
732