1""" 2 Routines for PDB2PQR 3 4 This module contains the protein object used in PDB2PQR and methods 5 used to correct, analyze, and optimize that protein. 6 7 ---------------------------- 8 9 PDB2PQR -- An automated pipeline for the setup, execution, and analysis of 10 Poisson-Boltzmann electrostatics calculations 11 12 Copyright (c) 2002-2011, Jens Erik Nielsen, University College Dublin; 13 Nathan A. Baker, Battelle Memorial Institute, Developed at the Pacific 14 Northwest National Laboratory, operated by Battelle Memorial Institute, 15 Pacific Northwest Division for the U.S. Department Energy.; 16 Paul Czodrowski & Gerhard Klebe, University of Marburg. 17 18 All rights reserved. 19 20 Redistribution and use in source and binary forms, with or without modification, 21 are permitted provided that the following conditions are met: 22 23 * Redistributions of source code must retain the above copyright notice, 24 this list of conditions and the following disclaimer. 25 * Redistributions in binary form must reproduce the above copyright notice, 26 this list of conditions and the following disclaimer in the documentation 27 and/or other materials provided with the distribution. 28 * Neither the names of University College Dublin, Battelle Memorial Institute, 29 Pacific Northwest National Laboratory, US Department of Energy, or University 30 of Marburg nor the names of its contributors may be used to endorse or promote 31 products derived from this software without specific prior written permission. 32 33 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 34 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 35 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 36 IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 37 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 38 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 39 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 40 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 41 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 42 OF THE POSSIBILITY OF SUCH DAMAGE. 43 44 ---------------------------- 45 46""" 47 48__date__ = "1 August 2008" 49__author__ = "Jens Erik Nielsen, Todd Dolinsky, Yong Huang" 50 51CELL_SIZE = 2 52BUMP_DIST = 2.0 53BUMP_HDIST = 1.5 54BUMP_HYDROGEN_SIZE = 0.5 55BUMP_HEAVY_SIZE = 1.0 56BONDED_SS_LIMIT = 2.5 57PEPTIDE_DIST = 1.7 58REPAIR_LIMIT = 10 59AAS = ["ALA", "ARG", "ASH", "ASN", "ASP", "CYS", "CYM", "GLN", "GLU", "GLH", "GLY", \ 60 "HIS", "HID", "HIE", "HIP", "HSD", "HSE", "HSP", "ILE", "LEU", "LYS", "LYN", \ 61 "MET", "PHE", "PRO", "SER", "THR", "TRP", "TYR", "TYM", "VAL"] 62NAS = ["A", "A5", "A3", "C", "C5", "C3", "G", "G5", "G3", "T", "T5", "T3", "U", \ 63 "U5", "U3", "RA", "RG", "RC", "RU", "DA", "DG", "DC", "DT"] 64 65import math 66import copy 67from pdb import * 68from utilities import * 69from quatfit import * 70from forcefield import * 71from structures import * 72from protein import * 73from definitions import * 74from StringIO import StringIO 75from errors import PDBInputError, PDBInternalError, PDB2PKAError 76from pprint import pformat 77 78 79class Routines: 80 def __init__(self, protein, verbose, definition=None): 81 """ 82 Initialize the Routines class. The class contains most 83 of the main routines that run PDB2PQR 84 85 Parameters 86 protein: The protein to run PDB2PQR on (Protein) 87 verbose: A flag to determine whether to write to 88 stdout 89 """ 90 self.protein = protein 91 self.definition = definition 92 self.aadef = None 93 self.verbose = verbose 94 self.warnings = [] 95 self.cells = {} 96 if definition != None: 97 self.aadef = definition.getAA() 98 self.nadef = definition.getNA() 99 100 101 def write(self, message, indent=0): 102 """ 103 Write a message to stdout for debugging if verbose 104 105 Parameters 106 message: The message to write (string) 107 indent : The indent level (int, default=0) 108 """ 109 out = "" 110 if self.verbose: 111 for i in range(indent): 112 out += "\t" 113 out += message 114 sys.stdout.write(out) 115 116 def getWarnings(self): 117 """ 118 Get all warnings generated from routines 119 """ 120 return self.warnings 121 122 def applyNameScheme(self, forcefield): 123 """ 124 Apply the naming scheme of the given forcefield to the atoms 125 within the protein 126 127 Parameters 128 forcefield: The forcefield object (forcefield) 129 130 """ 131 self.write("Applying the naming scheme to the protein...") 132 for residue in self.protein.getResidues(): 133 if isinstance(residue, (Amino, WAT, Nucleic)): 134 resname = residue.ffname 135 else: 136 resname = residue.name 137 138 for atom in residue.getAtoms(): 139 rname, aname = forcefield.getNames(resname, atom.name) 140 if resname not in ['LIG', 'WAT', 'ACE', 'NME'] and rname != None: 141 try: 142 if (residue.isNterm or residue.isCterm) and rname != residue.name: 143 rname = residue.name 144 except AttributeError: 145 pass 146 if aname != None and rname != None: 147 atom.resName = rname 148 atom.name = aname 149 150 self.write("Done.\n") 151 152 def applyForcefield(self, forcefield): 153 """ 154 Apply the forcefield to the atoms within the protein 155 156 Parameters 157 forcefield: The forcefield object (forcefield) 158 Returns 159 hitlist: A list of atoms that were found in 160 the forcefield (list) 161 misslist: A list of atoms that were not found in 162 the forcefield (list) 163 """ 164 self.write("Applying the forcefield to the protein...") 165 misslist = [] 166 hitlist = [] 167 for residue in self.protein.getResidues(): 168 if isinstance(residue, (Amino, WAT, Nucleic)): 169 resname = residue.ffname 170 else: 171 resname = residue.name 172 173 # Apply the parameters 174 175 for atom in residue.getAtoms(): 176 atomname = atom.get("name") 177 charge, radius = forcefield.getParams(resname, atomname) 178 if charge != None and radius != None: 179 atom.set("ffcharge", charge) 180 atom.set("radius", radius) 181 hitlist.append(atom) 182 else: 183 misslist.append(atom) 184 185 self.write("Done.\n") 186 return hitlist, misslist 187 188 def updateResidueTypes(self): 189 """ 190 Find the type of residue as notated in the Amino Acid definition 191 """ 192 self.write("Updating Residue Types... ") 193 for chain in self.protein.getChains(): 194 for residue in chain.get("residues"): 195 name = residue.get("name") 196 if name in AAS: 197 residue.set("type", 1) 198 elif name == "WAT": 199 residue.set("type", 3) 200 elif name in NAS: 201 residue.set("type", 4) 202 else: # Residue is a ligand or unknown 203 residue.set("type", 2) 204 205 self.write("Done\n") 206 207 def updateSSbridges(self): 208 """ 209 Check for SS-bridge partners, and if present, set appropriate 210 partners 211 """ 212 self.write("Updating SS bridges...\n") 213 SGpartners = {} 214 for residue in self.protein.getResidues(): 215 if isinstance(residue, CYS): 216 atom = residue.getAtom("SG") 217 if atom != None: 218 SGpartners[atom] = [] 219 220 for atom in SGpartners: 221 for partner in SGpartners: 222 if atom == partner or SGpartners[atom] != []: continue 223 dist = distance(atom.getCoords(), partner.getCoords()) 224 if dist < BONDED_SS_LIMIT: 225 SGpartners[atom].append(partner) 226 SGpartners[partner].append(atom) 227 228 for atom in SGpartners: 229 res1 = atom.get("residue") 230 numpartners = len(SGpartners[atom]) 231 if numpartners == 1: 232 partner = SGpartners[atom][0] 233 res2 = partner.get("residue") 234 res1.set("SSbonded", 1) 235 res1.set("SSbondedpartner", partner) 236 self.applyPatch("CYX", res1) 237 self.write("%s - %s\n" % (res1, res2), 1) 238 elif numpartners > 1: 239 error = "WARNING: %s has multiple potential " % res1 240 error += "SS-bridge partners\n" 241 self.write(error, 1) 242 self.warnings.append(error) 243 elif numpartners == 0: 244 self.write("%s is a free cysteine\n" % res1, 1) 245 self.write("Done.\n") 246 247 def updateInternalBonds(self): 248 """ 249 Update the internal bonding network using the reference 250 objects in each atom. 251 """ 252 for residue in self.protein.getResidues(): 253 if isinstance(residue, (Amino, WAT, Nucleic)): 254 for atom in residue.getAtoms(): 255 if not atom.hasReference(): continue 256 for bond in atom.reference.bonds: 257 if not residue.hasAtom(bond): continue 258 bondatom = residue.getAtom(bond) 259 if bondatom not in atom.bonds: 260 atom.addBond(bondatom) 261 262 def updateBonds(self): 263 """ 264 Update the bonding network of the protein. This happens 265 in 3 steps: 266 1. Applying the PEPTIDE patch to all Amino residues 267 so as to add reference for the N(i+1) and C(i-1) 268 atoms 269 2. UpdateInternalBonds for inter-residue linking 270 3. Set the links to the N(i+1) and C(i-1) atoms 271 """ 272 273 # Apply the peptide patch 274 275 for residue in self.protein.getResidues(): 276 if isinstance(residue, Amino): 277 if residue.isNterm or residue.isCterm: 278 continue 279 else: 280 self.applyPatch("PEPTIDE", residue) 281 282 # Update all internal bonds 283 284 self.updateInternalBonds() 285 286 # Set the peptide bond pointers 287 288 for chain in self.protein.getChains(): 289 for i in range(chain.numResidues() - 1): 290 res1 = chain.residues[i] 291 res2 = chain.residues[i + 1] 292 if not isinstance(res1, Amino) or not isinstance(res2, Amino): 293 continue 294 atom1 = res1.getAtom("C") 295 atom2 = res2.getAtom("N") 296 297 if atom1 != None: 298 res2.peptideC = atom1 299 if atom2 != None: 300 res1.peptideN = atom2 301 if atom1 == None or atom2 == None: 302 continue 303 304 if distance(atom1.getCoords(), atom2.getCoords()) > PEPTIDE_DIST: 305 text = "Gap in backbone detected between %s and %s!\n" % \ 306 (res1, res2) 307 self.write(text, 1) 308 self.warnings.append(text) 309 res2.peptideC = None 310 res1.peptideN = None 311 312 def applyPatch(self, patchname, residue): 313 """ 314 Apply a patch to the given residue. This is one of the key 315 functions in PDB2PQR. A similar function appears in 316 definitions.py - that version is needed for residue level 317 subtitutions so certain protonation states (i.e. CYM, HSE) 318 are detectatble on input. 319 320 This version looks up the particular patch name in the 321 patchmap stored in the protein, and then applies the 322 various commands to the reference and actual residue 323 structures. 324 325 See the inline comments for a more detailed explanation. 326 327 Parameters 328 patchname: The name of the patch (string) 329 residue: The residue to apply the patch to (residue) 330 """ 331 if patchname not in self.protein.patchmap: 332 raise PDBInternalError("Unable to find patch %s!" % patchname) 333 334 self.write('PATCH INFO: %s patched with %s\n' % (residue,patchname),1) 335 336 # Make a copy of the reference, i.e. a new reference for 337 # this patch. Two examples: 338 # PEPTIDE is a special case, as it applies to 339 # every residue. 340 # CTERM only applies to one specific residue, so a 341 # deep copy is used. 342 343 if patchname == "PEPTIDE": 344 newreference = residue.reference 345 else: 346 newreference = copy.deepcopy(residue.reference) 347 348 patch = self.protein.patchmap[patchname] 349 350 # Add atoms from patch 351 352 for atomname in patch.map: 353 newreference.map[atomname] = patch.map[atomname] 354 for bond in patch.map[atomname].bonds: 355 if bond not in newreference.map: continue 356 if atomname not in newreference.map[bond].bonds: 357 newreference.map[bond].bonds.append(atomname) 358 359 # Remove atoms as directed by patch 360 361 for remove in patch.remove: 362 if remove in residue.map: residue.removeAtom(remove) 363 if remove not in newreference.map: continue 364 removebonds = newreference.map[remove].bonds 365 del newreference.map[remove] 366 for bond in removebonds: 367 index = newreference.map[bond].bonds.index(remove) 368 del newreference.map[bond].bonds[index] 369 370 # Add the new dihedrals 371 372 for dihedral in patch.dihedrals: 373 newreference.dihedrals.append(dihedral) 374 375 # Point at the new reference 376 377 residue.reference = newreference 378 residue.patches.append(patchname) 379 380 # Rename atoms as directed by patch 381 382 for atom in residue.getAtoms(): 383 if atom.name in patch.altnames: 384 residue.renameAtom(atom.name, patch.altnames[atom.name]) 385 386 # Replace each atom's reference with the new one 387 388 for atomname in residue.map: 389 if newreference.hasAtom(atomname): 390 atom = residue.getAtom(atomname) 391 atom.reference = newreference.map[atomname] 392 393 def setStates(self): 394 """ 395 Set the state of each residue. This is the last step 396 before assigning the forcefield, but is necessary so 397 as to distinguish between various protonation states. 398 399 See aa.py for residue-specific functions. 400 """ 401 for residue in self.protein.getResidues(): 402 if isinstance(residue, (Amino, Nucleic)): 403 residue.setState() 404 405 def assignTermini(self, chain, neutraln=False, neutralc=False): 406 """ 407 Assign the termini for the given chain by looking at 408 the start and end residues. 409 """ 410 411 if len(chain.residues) == 0: 412 text = "Error: chain \"%s\" has 0 residues!" % chain.chainID 413 raise PDBInputError(text) 414 415 # Set the N-Terminus/ 5' Terminus 416 417 res0 = chain.residues[0] 418 if isinstance(res0, Amino): 419 res0.set("isNterm", 1) 420 if isinstance(res0, PRO): 421 self.applyPatch("NEUTRAL-NTERM", res0) 422 elif neutraln: 423 self.applyPatch("NEUTRAL-NTERM", res0) 424 else: 425 self.applyPatch("NTERM", res0) 426 elif isinstance(res0, Nucleic): 427 res0.set("is5term", 1) 428 self.applyPatch("5TERM", res0) 429 430 # Set the C-Terminus/ 3' Terminus 431 432 reslast = chain.residues[-1] 433 if isinstance(reslast, Amino): 434 reslast.set("isCterm", 1) 435 if neutralc: 436 self.applyPatch("NEUTRAL-CTERM", reslast) 437 else: 438 self.applyPatch("CTERM", reslast) 439 elif isinstance(reslast, Nucleic): 440 reslast.set("is3term", 1) 441 self.applyPatch("3TERM", reslast) 442 else: 443 for i in range(len(chain.residues)): 444 resthis = chain.residues[-1 - i] 445 if isinstance(resthis, Amino): 446 resthis.set("isCterm", 1) 447 if neutralc: 448 self.applyPatch("NEUTRAL-CTERM", resthis) 449 else: 450 self.applyPatch("CTERM", resthis) 451 break 452 elif resthis.name in ["NH2", "NME"]: break 453 elif isinstance(resthis, Nucleic): 454 resthis.set("is3term", 1) 455 self.applyPatch("3TERM", resthis) 456 break 457 458 def setTermini(self, neutraln=False, neutralc=False): 459 """ 460 Set the termini for the protein. First set all known 461 termini by looking at the ends of the chain. Then 462 examine each residue, looking for internal chain breaks. 463 """ 464 465 self.write("Setting the termini... \n") 466 467 # First assign the known termini 468 469 for chain in self.protein.getChains(): 470 self.assignTermini(chain, neutraln, neutralc) 471 472 # Now determine if there are any hidden chains 473 474 letters = string.ascii_uppercase + string.ascii_lowercase 475 c = 0 476 477 while c < len(self.protein.getChains()): 478 chain = self.protein.chains[c] 479 reslist = [] 480 481 origlist = [] 482 483 # origlist holds the original residue list for the chain 484 485 for residue in chain.getResidues(): 486 origlist.append(residue) 487 488 for residue in origlist: 489 reslist.append(residue) 490 oldid = residue.chainID 491 492 # Look for ending termini 493 494 fixflag = 0 495 if isinstance(residue, Amino): 496 if (residue.hasAtom("OXT") and not residue.isCterm): 497 fixflag = 1 498 499 elif isinstance(residue, Nucleic): 500 if ((residue.hasAtom("H3T") or residue.name.endswith("3"))\ 501 and not residue.is3term): 502 fixflag = 1 503 504 if fixflag: 505 506 # Get an available chain ID 507 chainid = letters[0] 508 id = 0 509 idLength = 1 510 while chainid in self.protein.chainmap: 511 id += 1 512 if id >= len(letters): 513 idLength += 1 514 id = 0 515 chainid = letters[id] * idLength 516 517 if(idLength > 1): 518 message = 'Warning: Reusing chain id: ' + chainid[0] + '\n' 519 self.write(message) 520 521 # Make a new chain with these residues 522 newchain = Chain(chainid[0]) 523 524 self.protein.chainmap[chainid] = newchain 525 self.protein.chains.insert(c, newchain) 526 527 for res in reslist: 528 newchain.addResidue(res) 529 chain.residues.remove(res) 530 res.setChainID(chainid[0]) 531 532 self.assignTermini(chain, neutraln, neutralc) 533 self.assignTermini(newchain, neutraln, neutralc) 534 535 reslist = [] 536 c += 1 537 538 c += 1 539 540 # Update the final chain's chainID if it is "" unless it's all water 541 542 if "" in self.protein.chainmap: 543 544 notwat = 0 545 for res in chain.residues: 546 if not isinstance(res, WAT): 547 notwat = 1 548 break 549 550 if notwat == 0: 551 self.write("Done.\n") 552 return 553 554 chain = self.protein.chainmap[""] 555 chainid = letters[0] 556 id = 0 557 idLength = 1 558 while chainid in self.protein.chainmap: 559 id += 1 560 if id >= len(letters): 561 idLength += 1 562 id = 0 563 chainid = letters[id] * idLength 564 565 if(idLength > 1): 566 message = 'Warning: Reusing chain id: ' + chainid[0] + '\n' 567 self.write(message) 568 569 # Use the new chainID 570 571 self.protein.chainmap[chainid] = chain 572 del self.protein.chainmap[""] 573 574 for res in chain.residues: 575 res.setChainID(chainid[0]) 576 577 self.write("Done.\n") 578 579 580 def findMissingHeavy(self): 581 """ 582 Repair residues that contain missing heavy (non-Hydrogen) atoms 583 """ 584 self.write("Checking for missing heavy atoms... \n") 585 misscount = 0 586 heavycount = 0 587 for residue in self.protein.getResidues(): 588 if not isinstance(residue, (Amino, Nucleic)): continue 589 590 # Check for Missing Heavy Atoms 591 592 for refatomname in residue.reference.map: 593 if refatomname.startswith("H"): continue 594 if refatomname in ["N+1", "C-1"]: continue 595 if refatomname in ["O1P", "O2P"]: 596 if residue.hasAtom("OP1") and residue.hasAtom("OP2"): continue 597 heavycount += 1 598 if not residue.hasAtom(refatomname): 599 self.write("Missing %s in %s\n" % \ 600 (refatomname, residue), 1) 601 misscount += 1 602 residue.addMissing(refatomname) 603 604 # Check for Extra Atoms 605 606 atomlist = [] 607 for atom in residue.get("atoms"): 608 atomlist.append(atom) 609 610 for atom in atomlist: 611 atomname = atom.get("name") 612 if atomname in ["OP1", "OP2"] and residue.reference.hasAtom("O1P") \ 613 and residue.reference.hasAtom("O2P"): continue 614 if not residue.reference.hasAtom(atomname): 615 self.write("Extra atom %s in %s! - " % \ 616 (atomname, residue), 1) 617 residue.removeAtom(atomname) 618 self.write("Deleted this atom.\n") 619 620 if heavycount == 0: 621 raise PDBInputError("No heavy atoms found. " + 622 "You may also see this message if PDB2PQR does not have parameters for any residue in your protein.") 623 624 misspct = 100.0 * float(misscount) / heavycount 625 if misspct > REPAIR_LIMIT: 626 error = "This PDB file is missing too many (%i out of " % misscount 627 error += "%i, %.2f%%) heavy atoms to accurately repair the file. " % \ 628 (heavycount, misspct) 629 error += "The current repair limit is set at %i%%. " % REPAIR_LIMIT 630 error += "You may also see this message if PDB2PQR does not have parameters for enough residues in your protein." 631 raise PDBInputError(error) 632 elif misscount > 0: 633 self.write("Missing %i out of %i heavy atoms (%.2f percent) - " % \ 634 (misscount, heavycount, misspct)) 635 self.write("Will attempt to repair.\n") 636 self.repairHeavy() 637 else: 638 self.write("No heavy atoms found missing - Done.\n") 639 640 @staticmethod 641 def rebuildTetrahedral(residue, atomname): 642 """ 643 Rebuild a tetrahedral hydrogen group. This is necessary 644 due to the shortcomings of the quatfit routine - given a 645 tetrahedral geometry and two existing hydrogens, the 646 quatfit routines have two potential solutions. This function 647 uses basic tetrahedral geometry to fix this issue. 648 649 Parameters 650 residue: The residue in question (residue) 651 atomname: The atomname to add (string) 652 Returns 653 1 if successful, 0 otherwise 654 """ 655 656 hcount = 0 657 nextatomname = None 658 659 atomref = residue.reference.map.get(atomname) 660 if atomref is None: 661 return False 662 bondname = atomref.bonds[0] 663 664 # Return if the bonded atom does not exist 665 666 if not residue.hasAtom(bondname): 667 return False 668 669 # This group is tetrahedral if bondatom has 4 bonds, 670 # 3 of which are hydrogens 671 672 for bond in residue.reference.map[bondname].bonds: 673 if bond.startswith("H"): 674 hcount += 1 675 elif bond != 'C-1' and bond != 'N+1': 676 nextatomname = bond 677 678 # Check if this is a tetrahedral group 679 680 if hcount != 3 or nextatomname == None: 681 return False 682 683 # Now rebuild according to the tetrahedral geometry 684 685 bondatom = residue.getAtom(bondname) 686 nextatom = residue.getAtom(nextatomname) 687 numbonds = len(bondatom.bonds) 688 689 if numbonds == 1: 690 691 # Place according to two atoms 692 693 coords = [bondatom.getCoords(), nextatom.getCoords()] 694 refcoords = [residue.reference.map[bondname].getCoords(), \ 695 residue.reference.map[nextatomname].getCoords()] 696 refatomcoords = atomref.getCoords() 697 newcoords = findCoordinates(2, coords, refcoords, refatomcoords) 698 residue.createAtom(atomname, newcoords) 699 700 # For LEU and ILE residues only: make sure the Hydrogens are in staggered conformation instead of eclipsed. 701 702 if isinstance(residue, LEU): 703 hcoords = newcoords 704 cbatom = residue.getAtom('CB') 705 ang = getDihedral(cbatom.getCoords(), nextatom.getCoords(), bondatom.getCoords(), hcoords) 706 diffangle = 60 - ang 707 residue.rotateTetrahedral(nextatom, bondatom, diffangle) 708 709 elif isinstance(residue, ILE): 710 hcoords = newcoords 711 cg1atom = residue.getAtom('CG1') 712 cbatom = residue.getAtom('CB') 713 if bondatom.name == 'CD1': 714 ang = getDihedral(cbatom.getCoords(), nextatom.getCoords(), bondatom.getCoords(), hcoords) 715 elif bondatom.name == 'CG2': 716 ang = getDihedral(cg1atom.getCoords(), nextatom.getCoords(), bondatom.getCoords(), hcoords) 717 else: 718 ang = getDihedral(cbatom.getCoords(), nextatom.getCoords(), bondatom.getCoords(), hcoords) 719 720 diffangle = 60 - ang 721 residue.rotateTetrahedral(nextatom, bondatom, diffangle) 722 723 return 1 724 725 elif numbonds == 2: 726 727 # Get the single hydrogen coordinates 728 729 hatom = None 730 for bond in bondatom.reference.bonds: 731 if residue.hasAtom(bond) and bond.startswith("H"): 732 hatom = residue.getAtom(bond) 733 break 734 735 # Use the existing hydrogen and rotate about the bond 736 737 residue.rotateTetrahedral(nextatom, bondatom, 120) 738 newcoords = hatom.getCoords() 739 residue.rotateTetrahedral(nextatom, bondatom, -120) 740 residue.createAtom(atomname, newcoords) 741 742 return 1 743 744 elif numbonds == 3: 745 746 # Find the one spot the atom can be 747 748 hatoms = [] 749 for bond in bondatom.reference.bonds: 750 if residue.hasAtom(bond) and bond.startswith("H"): 751 hatoms.append(residue.getAtom(bond)) 752 753 # If this is more than two something is wrong 754 755 if len(hatoms) != 2: return 0 756 757 # Use the existing hydrogen and rotate about the bond 758 759 residue.rotateTetrahedral(nextatom, bondatom, 120) 760 newcoords1 = hatoms[0].getCoords() 761 residue.rotateTetrahedral(nextatom, bondatom, 120) 762 newcoords2 = hatoms[0].getCoords() 763 residue.rotateTetrahedral(nextatom, bondatom, 120) 764 765 # Determine which one hatoms[1] is not in 766 767 if distance(hatoms[1].getCoords(), newcoords1) > 0.1: 768 residue.createAtom(atomname, newcoords1) 769 else: 770 residue.createAtom(atomname, newcoords2) 771 772 return 1 773 774 def addHydrogens(self): 775 """ 776 Add the hydrogens to the protein. This requires either 777 the rebuildTetrahedral function for tetrahedral geometries 778 or the standard quatfit methods. These methods use three 779 nearby bonds to rebuild the atom; the closer the bonds, the 780 more accurate the results. As such the peptide bonds are 781 used when available. 782 """ 783 count = 0 784 self.write("Adding hydrogens to the protein...\n") 785 for residue in self.protein.getResidues(): 786 if not isinstance(residue, (Amino, Nucleic)): 787 continue 788 for atomname in residue.reference.map: 789 if not atomname.startswith("H"): 790 continue 791 if residue.hasAtom(atomname): 792 continue 793 if isinstance(residue, CYS) and residue.SSbonded and atomname == "HG": 794 continue 795 796 # If this hydrogen is part of a tetrahedral group, 797 # follow a different codepath 798 799 if Routines.rebuildTetrahedral(residue, atomname): 800 count += 1 801 continue 802 803 # Otherwise use the standard quatfit methods 804 805 coords = [] 806 refcoords = [] 807 808 refatomcoords = residue.reference.map[atomname].getCoords() 809 bondlist = residue.reference.getNearestBonds(atomname) 810 811 for bond in bondlist: 812 if bond == "N+1": 813 atom = residue.peptideN 814 elif bond == "C-1": 815 atom = residue.peptideC 816 else: 817 atom = residue.getAtom(bond) 818 819 if atom == None: 820 continue 821 822 # Get coordinates, reference coordinates 823 824 coords.append(atom.getCoords()) 825 refcoords.append(residue.reference.map[bond].getCoords()) 826 827 # Exit if we have enough atoms 828 829 if len(coords) == 3: 830 break 831 832 if len(coords) == 3: 833 newcoords = findCoordinates(3, coords, refcoords, refatomcoords) 834 residue.createAtom(atomname, newcoords) 835 count += 1 836 else: 837 self.write("Couldn't rebuild %s in %s!\n" % (atomname, residue), 1) 838 839 self.write(" Added %i hydrogen atoms.\n" % count) 840 841 def removeHydrogens(self): 842 self.write("Stripping hydrogens from the protein...\n") 843 844 for residue in self.protein.getResidues(): 845 if not isinstance(residue, (Amino, Nucleic)): 846 continue 847 for atom in residue.atoms[:]: 848 if atom.isHydrogen(): 849 residue.removeAtom(atom.name) 850 851 def repairHeavy(self): 852 """ 853 Repair all heavy atoms. Unfortunately the first time we 854 get to an atom we might not be able to rebuild it - it 855 might depend on other atoms to be rebuild first (think side 856 chains). As such a 'seenmap' is used to keep track of what 857 we've already seen and subsequent attempts to rebuild the 858 atom. 859 """ 860 self.write("Rebuilding missing heavy atoms... \n") 861 for residue in self.protein.getResidues(): 862 if not isinstance(residue, (Amino, Nucleic)): 863 continue 864 missing = residue.get("missing") 865 if missing == []: continue 866 867 # Initialize some variables 868 869 seenmap = {} 870 nummissing = len(missing) 871 872 while len(missing) > 0: 873 coords = [] 874 refcoords = [] 875 876 atomname = missing.pop(0) 877 refatomcoords = residue.reference.map[atomname].getCoords() 878 bondlist = residue.reference.getNearestBonds(atomname) 879 880 for bond in bondlist: 881 if bond == "N+1": atom = residue.peptideN 882 elif bond == "C-1": atom = residue.peptideC 883 else: atom = residue.getAtom(bond) 884 885 if atom == None: continue 886 887 # Get coordinates, reference coordinates 888 889 coords.append(atom.getCoords()) 890 refcoords.append(residue.reference.map[bond].getCoords()) 891 892 # Exit if we have enough atoms 893 894 if len(coords) == 3: break 895 896 # We might need other atoms to be rebuilt first 897 898 if len(coords) < 3: 899 try: 900 seenmap[atomname] += 1 901 except KeyError: 902 seenmap[atomname] = 1 903 904 missing.append(atomname) 905 if seenmap[atomname] > nummissing: 906 text = "Too few atoms present to reconstruct or cap residue %s in structure!\n" % (residue) 907 text += "This error is generally caused by missing backbone atoms in this protein;\n" 908 text += "you must use an external program to complete gaps in the protein backbone.\n" 909 text += "Heavy atoms missing from %s: " % (residue) 910 text += ' '.join(missing) 911 raise PDBInputError(text) 912 913 else: # Rebuild the atom 914 newcoords = findCoordinates(3, coords, refcoords, refatomcoords) 915 residue.createAtom(atomname, newcoords) 916 self.write("Added %s to %s at coordinates" % (atomname, residue), 1) 917 self.write(" %.3f %.3f %.3f\n" % \ 918 (newcoords[0], newcoords[1], newcoords[2])) 919 920 self.write("Done.\n") 921 922 def setReferenceDistance(self): 923 """ 924 Set the distance to the CA atom in the residue. 925 This is necessary for determining which atoms are 926 allowed to move during rotations. Uses the 927 shortestPath algorithm found in utilities.py. 928 """ 929 for residue in self.protein.getResidues(): 930 if not isinstance(residue, Amino): continue 931 932 # Initialize some variables 933 934 map = {} 935 caatom = residue.getAtom("CA") 936 937 if caatom == None: 938 text = "Cannot set references to %s without CA atom!\n" 939 raise PDBInputError(text) 940 941 # Set up the linked map 942 943 for atom in residue.getAtoms(): 944 map[atom] = atom.bonds 945 946 # Run the algorithm 947 948 for atom in residue.getAtoms(): 949 if atom.isBackbone(): 950 atom.refdistance = -1 951 elif residue.isCterm and atom.name == "HO": # special case for HO in Cterm 952 atom.refdistance = 3 953 elif residue.isNterm and (atom.name == "H3" or atom.name == "H2"): # special case for H2 or H3 in Nterm 954 atom.refdistance = 2 955 else: 956 atom.refdistance = len(shortestPath(map, atom, caatom)) - 1 957 958 def getbumpscore(self, residue): 959 """Get an bump score for the current structure""" 960 961 # Do some setup 962 963 self.cells = Cells(CELL_SIZE) 964 self.cells.assignCells(self.protein) 965 966 self.calculateDihedralAngles() 967 self.setDonorsAndAcceptors() 968 self.updateInternalBonds() 969 self.setReferenceDistance() 970 bumpscore = 0.0 971 #for residue in self.protein.getResidues(): 972 if not isinstance(residue, Amino): return 0.0 973 # Initialize variables 974 975 conflictnames = [] 976 977 for atom in residue.getAtoms(): 978 atomname = atom.name 979 #if not atom.added: continue 980 if atomname[0] != "H": continue 981 #if atom.optimizeable: continue 982 #print atomname,atom.optimizeable,atom.added 983 bumpscore = bumpscore + self.getbumpscore_atom(atom) 984 return bumpscore 985 986 987 def getbumpscore_atom(self, atom): 988 """ 989 Find nearby atoms for conflict-checking. Uses 990 neighboring cells to compare atoms rather than an all 991 versus all O(n^2) algorithm, which saves a great deal 992 of time. There are several instances where we ignore 993 potential conflicts; these include donor/acceptor pairs, 994 atoms in the same residue, and bonded CYS bridges. 995 996 Parameters 997 atom: Find nearby atoms to this atom (Atom) 998 Returns 999 bumpscore: a bump score sum((dist-cutoff)**20 for all near atoms 1000 1001 Jens rewrote this function from findNearbyAtoms to 1002 be usable for detecting bumps for optimzable hydrogens 1003 """ 1004 # Initialize some variables 1005 nearatoms = {} 1006 residue = atom.residue 1007 #print 'Cutoff distance',cutoff 1008 atom_size = BUMP_HYDROGEN_SIZE if atom.isHydrogen() else BUMP_HEAVY_SIZE 1009 1010 # Get atoms from nearby cells 1011 1012 closeatoms = self.cells.getNearCells(atom) 1013 1014 # Loop through and see if any are within the cutoff 1015 1016 bumpscore = 0.0 1017 for closeatom in closeatoms: 1018 closeresidue = closeatom.residue 1019 if closeresidue == residue and (closeatom in atom.bonds or atom in closeatom.bonds): 1020 continue 1021 1022 if not isinstance(closeresidue, Amino): 1023 continue 1024 if isinstance(residue, CYS): 1025 if residue.SSbondedpartner == closeatom: continue 1026 1027 # Also ignore if this is a donor/acceptor pair 1028 pair_ignored = False 1029 if atom.isHydrogen() and len(atom.bonds) != 0 and atom.bonds[0].hdonor \ 1030 and closeatom.hacceptor: 1031 #pair_ignored = True 1032 continue 1033 if closeatom.isHydrogen() and len(closeatom.bonds) != 0 and closeatom.bonds[0].hdonor \ 1034 and atom.hacceptor: 1035 #pair_ignored = True 1036 continue 1037 1038 dist = distance(atom.getCoords(), closeatom.getCoords()) 1039 other_size = BUMP_HYDROGEN_SIZE if closeatom.isHydrogen() else BUMP_HEAVY_SIZE 1040 cutoff = atom_size + other_size 1041 if dist < cutoff: 1042 bumpscore = bumpscore + 1000.0 1043 if pair_ignored: 1044 self.write('This bump is a donor/acceptor pair.\n') 1045# if heavy_not_ignored: 1046# print 'Bumped {0} against {1} within residue'.format(atom.name, closeatom.name) 1047 #nearatoms[closeatom] = (dist-cutoff)**2 1048 self.write('BUMPSCORE ' + str(bumpscore) + '\n') 1049 return bumpscore 1050 1051 1052 def debumpProtein(self): 1053 """ 1054 Make sure that none of the added atoms were rebuilt 1055 on top of existing atoms. See each called function 1056 for more information. 1057 """ 1058 1059 self.write("Checking if we must debump any residues... \n") 1060 1061 # Do some setup 1062 1063 self.cells = Cells(CELL_SIZE) 1064 self.cells.assignCells(self.protein) 1065 1066 self.calculateDihedralAngles() 1067 self.setDonorsAndAcceptors() 1068 self.updateInternalBonds() 1069 self.setReferenceDistance() 1070 1071 # Determine which residues to debump 1072 1073 for residue in self.protein.getResidues(): 1074 if not isinstance(residue, Amino): continue 1075 1076 # Initialize variables 1077 1078 conflictnames = self.findResidueConflicts(residue, True) 1079 1080 if not conflictnames: 1081 continue 1082 1083 # Otherwise debump the residue 1084 1085 self.write("Starting to debump %s...\n" % residue, 1) 1086 self.write("Debumping cutoffs: %2.1f for heavy-heavy, %2.1f for hydrogen-heavy, and %2.1f for hydrogen-hydrogen.\n" % 1087 (BUMP_HEAVY_SIZE*2, 1088 BUMP_HYDROGEN_SIZE+BUMP_HEAVY_SIZE, 1089 BUMP_HYDROGEN_SIZE*2), 1) 1090 if self.debumpResidue(residue, conflictnames): 1091 self.write("Debumping Successful!\n\n", 1) 1092 else: 1093 text = "WARNING: Unable to debump %s\n" % residue 1094 self.write("********\n%s********\n\n" % text) 1095 self.warnings.append(text) 1096 1097 self.write("Done.\n") 1098 1099 def findResidueConflicts(self, residue, writeConflictInfo=False): 1100 conflictnames = [] 1101 for atom in residue.getAtoms(): 1102 atomname = atom.name 1103 if not atom.added: continue 1104 if atomname == "H": continue 1105 if atom.optimizeable: continue 1106 1107 nearatoms = self.findNearbyAtoms(atom) 1108 1109 # If something is too close, we must debump the residue 1110 1111 if nearatoms != {}: 1112 conflictnames.append(atomname) 1113 if writeConflictInfo: 1114 for repatom in nearatoms: 1115 self.write("%s %s is too close to %s %s\n" % \ 1116 (residue, atomname, repatom.residue, repatom.name), 1) 1117 1118 return conflictnames 1119 1120 def scoreDihedralAngle(self, residue, anglenum): 1121 score = 0 1122 atomnames = residue.reference.dihedrals[anglenum].split() 1123 pivot = atomnames[2] 1124 moveablenames = self.getMoveableNames(residue, pivot) 1125 for name in moveablenames: 1126 nearatoms = self.findNearbyAtoms(residue.getAtom(name)) 1127 for v in nearatoms.values(): 1128 score += v 1129 1130 return score 1131 1132 1133 def debumpResidue(self, residue, conflictnames): 1134 """ 1135 Debump a specific residue. Only should be called 1136 if the residue has been detected to have a conflict. 1137 If called, try to rotate about dihedral angles to 1138 resolve the conflict. 1139 1140 Parameters 1141 residue: The residue in question 1142 conflictnames: A list of atomnames that were 1143 rebuilt too close to other atoms 1144 Returns 1145 True if successful, False otherwise 1146 """ 1147 1148 # Initialize some variables 1149 1150 ANGLE_STEPS = 72 1151 ANGLE_STEP_SIZE = float(360 // ANGLE_STEPS) 1152 1153 ANGLE_TEST_COUNT = 10 1154 1155 anglenum = -1 1156 currentConflictNames = conflictnames 1157 1158 EPSILON = 0.0000001 1159 1160 # Try (up to 10 times) to find a workable solution 1161 1162 for _ in range(ANGLE_TEST_COUNT): 1163 1164 anglenum = self.pickDihedralAngle(residue, currentConflictNames, anglenum) 1165 1166 if anglenum == -1: return False 1167 1168 self.write("Using dihedral angle number %i to debump the residue.\n" % anglenum, 1) 1169 1170 bestscore = self.scoreDihedralAngle(residue, anglenum) 1171 foundImprovement = False 1172 bestangle = originalAngle = residue.dihedrals[anglenum] 1173 1174 #Skip the first angle as it's already known. 1175 for i in xrange(1, ANGLE_STEPS): 1176 newangle = originalAngle + (ANGLE_STEP_SIZE * i) 1177 self.setDihedralAngle(residue, anglenum, newangle) 1178 1179 # Check for conflicts 1180 1181 score = self.scoreDihedralAngle(residue, anglenum) 1182 1183 if score == 0: 1184 if not self.findResidueConflicts(residue): 1185 self.write("No conflicts found at angle "+repr(newangle)+"\n", 1) 1186 return True 1187 else: 1188 bestangle = newangle 1189 foundImprovement = True 1190 break 1191 1192 # Set the best angle 1193 elif score < bestscore: 1194 diff = abs(bestscore - score) 1195 #Don't update if it's effectively a tie 1196 if diff > EPSILON: 1197 bestscore = score 1198 bestangle = newangle 1199 foundImprovement = True 1200 1201 self.setDihedralAngle(residue, anglenum, bestangle) 1202 currentConflictNames = self.findResidueConflicts(residue) 1203 1204 if foundImprovement: 1205 self.write("Best score of "+repr(bestscore)+" at angle "+repr(bestangle)+". New conflict set: ", 1) 1206 self.write(str(currentConflictNames)+"\n", 1) 1207 else: 1208 self.write("No improvement found for this dihedral angle.\n", 1) 1209 1210 # If we're here, debumping was unsuccessful 1211 1212 return False 1213 1214 def calculateDihedralAngles(self): 1215 """ 1216 Calculate the dihedral angle for every residue within the protein 1217 """ 1218 for residue in self.protein.getResidues(): 1219 if not isinstance(residue, Amino): continue 1220 residue.dihedrals = [] 1221 1222 refangles = residue.reference.dihedrals 1223 for di in refangles: 1224 coords = [] 1225 atoms = di.split() 1226 for i in range(4): 1227 atomname = atoms[i] 1228 if residue.hasAtom(atomname): 1229 coords.append(residue.getAtom(atomname).getCoords()) 1230 1231 if len(coords) == 4: angle = getDihedral(coords[0], coords[1], coords[2], coords[3]) 1232 else: angle = None 1233 1234 residue.addDihedralAngle(angle) 1235 1236 def getClosestAtom(self, atom): 1237 """ 1238 Get the closest atom that does not form a donor/acceptor pair. 1239 Used to detect potential conflicts. 1240 1241 NOTE: Cells must be set before using this function. 1242 1243 Parameters 1244 atom: The atom in question (Atom) 1245 Returns 1246 bestatom: The closest atom to the input atom that does not 1247 satisfy a donor/acceptor pair. 1248 """ 1249 # Initialize some variables 1250 1251 bestdist = 999.99 1252 bestwatdist = 999.99 1253 bestatom = None 1254 bestwatatom = None 1255 residue = atom.residue 1256 1257 # Get atoms from nearby cells 1258 1259 closeatoms = self.cells.getNearCells(atom) 1260 1261 # Loop through and see which is the closest 1262 1263 for closeatom in closeatoms: 1264 closeresidue = closeatom.residue 1265 if closeresidue == residue: continue 1266 if not isinstance(closeresidue, (Amino,WAT)): continue 1267 if isinstance(residue, CYS): 1268 if residue.SSbondedpartner == closeatom: continue 1269 1270 # Also ignore if this is a donor/acceptor pair 1271 1272 if atom.isHydrogen() and atom.bonds[0].hdonor \ 1273 and closeatom.hacceptor: continue 1274 if closeatom.isHydrogen() and closeatom.bonds[0].hdonor \ 1275 and atom.hacceptor: 1276 continue 1277 1278 dist = distance(atom.getCoords(), closeatom.getCoords()) 1279 1280 if isinstance(closeresidue, WAT): 1281 if dist < bestwatdist: 1282 bestwatdist = dist 1283 bestwatatom = closeatom 1284 else: 1285 if dist < bestdist: 1286 bestdist = dist 1287 bestatom = closeatom 1288 1289 if bestdist > bestwatdist: 1290 txt = "Warning: %s in %s skipped when optimizing %s in %s\n" % (bestwatatom.name, 1291 bestwatatom.residue, 1292 atom.name, residue) 1293 if txt not in self.warnings: 1294 self.warnings.append(txt) 1295 1296 return bestatom 1297 1298 def findNearbyAtoms(self, atom): 1299 """ 1300 Find nearby atoms for conflict-checking. Uses 1301 neighboring cells to compare atoms rather than an all 1302 versus all O(n^2) algorithm, which saves a great deal 1303 of time. There are several instances where we ignore 1304 potential conflicts; these include donor/acceptor pairs, 1305 atoms in the same residue, and bonded CYS bridges. 1306 1307 Parameters 1308 atom: Find nearby atoms to this atom (Atom) 1309 Returns 1310 nearatoms: A dictionary of <Atom too close> to <amount of overlap for that atom>. 1311 """ 1312 # Initialize some variables 1313 1314 nearatoms = {} 1315 residue = atom.residue 1316 atom_size = BUMP_HYDROGEN_SIZE if atom.isHydrogen() else BUMP_HEAVY_SIZE 1317 1318 # Get atoms from nearby cells 1319 1320 closeatoms = self.cells.getNearCells(atom) 1321 1322 # Loop through and see if any are within the cutoff 1323 1324 for closeatom in closeatoms: 1325 closeresidue = closeatom.residue 1326 if closeresidue == residue and (closeatom in atom.bonds or atom in closeatom.bonds): 1327 continue 1328 1329 if not isinstance(closeresidue, (Amino, WAT)): 1330 continue 1331 if isinstance(residue, CYS) and residue.SSbondedpartner == closeatom: 1332 continue 1333 1334 # Also ignore if this is a donor/acceptor pair 1335 1336 if (atom.isHydrogen() and len(atom.bonds) != 0 and 1337 atom.bonds[0].hdonor and closeatom.hacceptor): 1338 continue 1339 1340 if (closeatom.isHydrogen() and len(closeatom.bonds) != 0 and 1341 closeatom.bonds[0].hdonor and atom.hacceptor): 1342 continue 1343 1344 dist = distance(atom.getCoords(), closeatom.getCoords()) 1345 other_size = BUMP_HYDROGEN_SIZE if closeatom.isHydrogen() else BUMP_HEAVY_SIZE 1346 cutoff = atom_size + other_size 1347 if dist < cutoff: 1348 nearatoms[closeatom] = cutoff - dist 1349 1350 return nearatoms 1351 1352 1353 def pickDihedralAngle(self, residue, conflictnames, oldnum=None): 1354 """ 1355 Choose an angle number to use in debumping 1356 1357 Algorithm 1358 Instead of simply picking a random chiangle, this function 1359 uses a more intelligent method to improve efficiency. 1360 The algorithm uses the names of the conflicting atoms 1361 within the residue to determine which angle number 1362 has the best chance of fixing the problem(s). The method 1363 also insures that the same chiangle will not be run twice 1364 in a row. 1365 Parameters 1366 residue: The residue that is being debumped (Residue) 1367 conflictnames: A list of atom names that are currently 1368 conflicts (list) 1369 oldnum : The old dihedral angle number (int) 1370 Returns 1371 bestnum : The new dihedral angle number (int) 1372 """ 1373 bestnum = -1 1374 best = 0 1375 1376 iList = range(len(residue.dihedrals)) 1377 #Make sure our testing is done round robin. 1378 if oldnum is not None and oldnum >= 0 and len(iList) > 0: 1379 del iList[oldnum] 1380 testDihedralIndecies = iList[oldnum:] + iList[:oldnum] 1381 else: 1382 testDihedralIndecies = iList 1383 1384 for i in testDihedralIndecies: 1385 if i == oldnum: continue 1386 if residue.dihedrals[i] is None: continue 1387 1388 score = 0 1389 atomnames = residue.reference.dihedrals[i].split() 1390 pivot = atomnames[2] 1391 1392 moveablenames = self.getMoveableNames(residue, pivot) 1393 1394 # If this pivot only moves the conflict atoms, pick it 1395 1396 if conflictnames == moveablenames: return i 1397 1398 # Otherwise find the pivot with the most matches 1399 1400 for name in conflictnames: 1401 if name in moveablenames: 1402 score += 1 1403 if score > best: 1404 best = score 1405 bestnum = i 1406 1407 # Return the best angle. If none were found, return -1. 1408 1409 return bestnum 1410 1411 def setDihedralAngle(self, residue, anglenum, angle): 1412 """ 1413 Rotate a residue about a given angle. Uses the quatfit 1414 methods to perform the matrix mathematics. 1415 1416 Parameters 1417 residue: The residue to rotate 1418 anglenum: The number of the angle to rotate as 1419 listed in residue.dihedrals 1420 angle: The desired angle. 1421 1422 """ 1423 coordlist = [] 1424 initcoords = [] 1425 movecoords = [] 1426 pivot = "" 1427 1428 oldangle = residue.dihedrals[anglenum] 1429 diff = angle - oldangle 1430 1431 atomnames = residue.reference.dihedrals[anglenum].split() 1432 1433 pivot = atomnames[2] 1434 for atomname in atomnames: 1435 if residue.hasAtom(atomname): 1436 coordlist.append(residue.getAtom(atomname).getCoords()) 1437 else: 1438 raise PDBInputError("Error occurred while trying to debump!") 1439 1440 initcoords = subtract(coordlist[2], coordlist[1]) 1441 1442 moveablenames = self.getMoveableNames(residue, pivot) 1443 1444 for name in moveablenames: 1445 atom = residue.getAtom(name) 1446 movecoords.append(subtract(atom.getCoords(), coordlist[1])) 1447 1448 newcoords = qchichange(initcoords, movecoords, diff) 1449 1450 for i in range(len(moveablenames)): 1451 atom = residue.getAtom(moveablenames[i]) 1452 self.cells.removeCell(atom) 1453 x = (newcoords[i][0] + coordlist[1][0]) 1454 y = (newcoords[i][1] + coordlist[1][1]) 1455 z = (newcoords[i][2] + coordlist[1][2]) 1456 atom.set("x", x) 1457 atom.set("y", y) 1458 atom.set("z", z) 1459 self.cells.addCell(atom) 1460 1461 1462 # Set the new angle 1463 1464 coordlist = [] 1465 for atomname in atomnames: 1466 if residue.hasAtom(atomname): 1467 coordlist.append(residue.getAtom(atomname).getCoords()) 1468 else: 1469 raise PDBInputError("Error occurred while trying to debump!") 1470 1471 di = getDihedral(coordlist[0], coordlist[1], coordlist[2], coordlist[3]) 1472 residue.dihedrals[anglenum] = di 1473 1474 def getMoveableNames(self, residue, pivot): 1475 """ 1476 Return all atomnames that are further away than the 1477 pivot atom. 1478 1479 Parameters 1480 residue: The residue to use 1481 pivot: The pivot atomname 1482 """ 1483 movenames = [] 1484 refdist = residue.getAtom(pivot).refdistance 1485 for atom in residue.getAtoms(): 1486 if atom.refdistance > refdist: 1487 movenames.append(atom.name) 1488 return movenames 1489 1490 def setDonorsAndAcceptors(self): 1491 """ 1492 Set the donors and acceptors within the protein 1493 """ 1494 for residue in self.protein.getResidues(): 1495 residue.setDonorsAndAcceptors() 1496 1497 def runPDB2PKA(self, ph, ff, pdblist, ligand, verbose, pdb2pka_params): 1498 if ff.lower() != 'parse': 1499 PDB2PKAError('PDB2PKA can only be run with the PARSE force field.') 1500 1501 self.write("Running PDB2PKA and applying at pH %.2f... \n" % ph) 1502 1503 import pka 1504 from pdb2pka import pka_routines 1505 init_params = pdb2pka_params.copy() 1506 init_params.pop('pairene') 1507 init_params.pop('clean_output') 1508 1509 results = pka.pre_init(original_pdb_list=pdblist, 1510 ff=ff, 1511 verbose=verbose, 1512 ligand=ligand, 1513 **init_params) 1514 output_dir, protein, routines, forcefield,apbs_setup, ligand_titratable_groups, maps, sd = results 1515 1516 mypkaRoutines = pka_routines.pKaRoutines(protein, routines, forcefield, apbs_setup, output_dir, maps, sd, 1517 restart=pdb2pka_params.get('clean_output'), 1518 pairene=pdb2pka_params.get('pairene')) 1519 1520 print 'Doing full pKa calculation' 1521 mypkaRoutines.runpKa() 1522 1523 pdb2pka_warnings = mypkaRoutines.warnings[:] 1524 1525 self.warnings.extend(pdb2pka_warnings) 1526 1527 residue_ph = {} 1528 for pka_residue_tuple, calc_ph in mypkaRoutines.ph_at_0_5.iteritems(): 1529 tit_type, chain_id, number_str = pka_residue_tuple 1530 if tit_type == 'NTR': 1531 tit_type = 'N+' 1532 elif tit_type == 'CTR': 1533 tit_type = 'C-' 1534 1535 key = ' '.join([tit_type, number_str, chain_id]) 1536 residue_ph[key] = calc_ph 1537 1538 pformat(residue_ph) 1539 1540 self.apply_pka_values(ff, ph, residue_ph) 1541 1542 self.write('Finished running PDB2PKA.\n') 1543 1544 1545 def runPROPKA(self, ph, ff, rootname, outname, options): 1546 """ 1547 Run PROPKA on the current protein, setting protonation states to 1548 the correct values 1549 1550 Parameters 1551 ph: The desired pH of the system 1552 ff: The forcefield name to be used 1553 outname: The name of the PQR outfile 1554 """ 1555 self.write("Running PROPKA and applying at pH %.2f... \n" % ph) 1556 1557 from propka30.Source.protein import Protein as pkaProtein 1558 from propka30.Source.pdb import readPDB as pkaReadPDB 1559 from propka30.Source.lib import residueList, setVerbose 1560 1561 setVerbose(options.verbose) 1562 1563 # Initialize some variables 1564 1565 linelen = 70 1566 pkadic = {} 1567 1568 # Reorder the atoms in each residue to start with N 1569 1570 for residue in self.protein.getResidues(): 1571 residue.reorder() 1572 1573 # Make a string with all non-hydrogen atoms 1574 1575 HFreeProteinFile = StringIO() 1576 1577 for atom in self.protein.getAtoms(): 1578 if not atom.isHydrogen(): 1579 atomtxt = atom.getPDBString() 1580 atomtxt = atomtxt[:linelen] 1581 HFreeProteinFile.write(atomtxt) 1582 HFreeProteinFile.write('\n') 1583 1584 1585 HFreeProteinFile.seek(0) 1586 1587 # Run PropKa 1588 1589 atoms = pkaReadPDB('', file=HFreeProteinFile) 1590 1591 # creating protein object 1592 myPkaProtein = pkaProtein(atoms=atoms, name=rootname, options=options) 1593 # calculating pKa values for ionizable residues 1594 myPkaProtein.calculatePKA(options=options) 1595 # printing pka file 1596 myPkaProtein.writePKA(options=options, filename=outname) 1597 1598 # Parse the results 1599 # This is the method used to generate the summary in the first place. 1600 residue_list = residueList("propka1") 1601 for chain in myPkaProtein.chains: 1602 for residue_type in residue_list: 1603 for residue in chain.residues: 1604 if residue.resName == residue_type: 1605 #Strip out the extra space after C- or N+ 1606 key = string.strip('%s %s %s' % (string.strip(residue.resName), 1607 residue.resNumb, residue.chainID)) 1608 pkadic[key] = residue.pKa_pro 1609 1610 if len(pkadic) == 0: 1611 return 1612 1613 # Now apply each pka to the appropriate residue 1614 self.apply_pka_values(ff, ph, pkadic) 1615 1616 self.write("Done.\n") 1617 1618 def apply_pka_values(self, ff, ph, pkadic): 1619 self.write('Applying pKa values at a pH of %.2f:\n' % ph) 1620 formatted_pkadict = pformat(pkadic) 1621 self.write(formatted_pkadict+'\n\n') 1622 1623 warnings = [] 1624 for residue in self.protein.getResidues(): 1625 if not isinstance(residue, Amino): 1626 continue 1627 resname = residue.name 1628 resnum = residue.resSeq 1629 chainID = residue.chainID 1630 1631 if residue.isNterm: 1632 key = "N+ %i %s" % (resnum, chainID) 1633 key = string.strip(key) 1634 if key in pkadic: 1635 value = pkadic[key] 1636 del pkadic[key] 1637 if ph >= value: 1638 if ff in ["amber", "charmm", "tyl06", "peoepb", "swanson"]: 1639 warn = ("N-terminal %s" % key, "neutral") 1640 warnings.append(warn) 1641 else: 1642 self.applyPatch("NEUTRAL-NTERM", residue) 1643 1644 if residue.isCterm: 1645 key = "C- %i %s" % (resnum, chainID) 1646 key = string.strip(key) 1647 if key in pkadic: 1648 value = pkadic[key] 1649 del pkadic[key] 1650 if ph < value: 1651 if ff in ["amber", "charmm", "tyl06", "peoepb", "swanson"]: 1652 warn = ("C-terminal %s" % key, "neutral") 1653 warnings.append(warn) 1654 else: 1655 self.applyPatch("NEUTRAL-CTERM", residue) 1656 1657 key = "%s %i %s" % (resname, resnum, chainID) 1658 key = string.strip(key) 1659 if key in pkadic: 1660 value = pkadic[key] 1661 del pkadic[key] 1662 if resname == "ARG" and ph >= value: 1663 if ff == "parse": 1664 self.applyPatch("AR0", residue) 1665 txt = "WARNING: Neutral arginines are very rare. Please double\n" 1666 self.warnings.append(txt) 1667 self.write(txt) 1668 txt = " check your system and caculation setup.\n" 1669 self.warnings.append(txt) 1670 self.write(txt) 1671 else: 1672 warn = (key, "neutral") 1673 warnings.append(warn) 1674 elif resname == "ASP" and ph < value: 1675 if residue.isCterm and ff in ["amber", "tyl06", "swanson"]: 1676 warn = (key, "Protonated at C-Terminal") 1677 warnings.append(warn) 1678 elif residue.isNterm and ff in ["amber", "tyl06", "swanson"]: 1679 warn = (key, "Protonated at N-Terminal") 1680 warnings.append(warn) 1681 else: 1682 self.applyPatch("ASH", residue) 1683 elif resname == "CYS" and ph >= value: 1684 if ff == "charmm": 1685 warn = (key, "negative") 1686 warnings.append(warn) 1687 else: 1688 self.applyPatch("CYM", residue) 1689 elif resname == "GLU" and ph < value: 1690 if residue.isCterm and ff in ["amber", "tyl06", "swanson"]: 1691 warn = (key, "Protonated at C-Terminal") 1692 warnings.append(warn) 1693 elif residue.isNterm and ff in ["amber", "tyl06", "swanson"]: 1694 warn = (key, "Protonated at N-Terminal") 1695 warnings.append(warn) 1696 else: 1697 self.applyPatch("GLH", residue) 1698 elif resname == "HIS" and ph < value: 1699 self.applyPatch("HIP", residue) 1700 elif resname == "LYS" and ph >= value: 1701 if ff == "charmm": 1702 warn = (key, "neutral") 1703 warnings.append(warn) 1704 elif ff in ["amber", "tyl06", "swanson"] and residue.get("isCterm"): 1705 warn = (key, "neutral at C-Terminal") 1706 warnings.append(warn) 1707 elif ff == "tyl06" and residue.get("isNterm"): 1708 warn = (key, "neutral at N-Terminal") 1709 warnings.append(warn) 1710 else: 1711 self.applyPatch("LYN", residue) 1712 elif resname == "TYR" and ph >= value: 1713 if ff in ["charmm", "amber", "tyl06", "peoepb", "swanson"]: 1714 warn = (key, "negative") 1715 warnings.append(warn) 1716 else: 1717 self.applyPatch("TYM", residue) 1718 1719 if len(warnings) > 0: 1720 init = "WARNING: PDB2PKA determined the following residues to be\n" 1721 self.warnings.append(init) 1722 self.write(init) 1723 init = " in a protonation state not supported by the\n" 1724 self.warnings.append(init) 1725 self.write(init) 1726 init = " %s forcefield!\n" % ff 1727 self.warnings.append(init) 1728 self.write(init) 1729 init = " All were reset to their standard pH 7.0 state.\n" 1730 self.warnings.append(init) 1731 self.write(init) 1732 self.warnings.append("\n") 1733 self.write('\n') 1734 for warn in warnings: 1735 text = " %s (%s)\n" % (warn[0], warn[1]) 1736 self.warnings.append(text) 1737 self.write(text) 1738 self.warnings.append("\n") 1739 self.write('\n') 1740 1741 if len(pkadic) > 0: 1742 warn = " PDB2PQR could not identify the following residues\n" 1743 self.warnings.append(warn) 1744 self.write(warn) 1745 warn = " and residue numbers as returned by PROPKA or PDB2PKA:\n" 1746 self.warnings.append(warn) 1747 self.warnings.append("\n") 1748 self.write(warn) 1749 self.write('\n') 1750 for item in pkadic: 1751 text = " %s\n" % item 1752 self.warnings.append(text) 1753 self.write(text) 1754 self.warnings.append("\n") 1755 self.write('\n') 1756 1757 1758class Cells: 1759 """ 1760 The cells object provides a better way to search for nearby atoms. A 1761 pure all versus all search is O(n^2) - for every atom, every other atom 1762 must be searched. This is rather inefficient, especially for large 1763 proteins where cells may be tens of angstroms apart. The cell class 1764 breaks down the xyz protein space into several 3-D cells of desired 1765 size - then by simply examining atoms that fall into the adjacent 1766 cells one can quickly find nearby cells. 1767 1768 NOTE: Ideally this should be somehow separated from the routines 1769 object... 1770 """ 1771 def __init__(self, cellsize): 1772 """ 1773 Initialize the cells. 1774 1775 Parameters 1776 cellsize: The size of each cell (int) 1777 """ 1778 self.cellmap = {} 1779 self.cellsize = cellsize 1780 1781 def assignCells(self, protein): 1782 """ 1783 Place each atom in a virtual cell for easy neighbor comparison 1784 """ 1785 for atom in protein.getAtoms(): 1786 atom.cell = None 1787 self.addCell(atom) 1788 1789 def addCell(self, atom): 1790 """ 1791 Add an atom to the cell 1792 1793 Parameters 1794 atom: The atom to add (atom) 1795 """ 1796 size = self.cellsize 1797 1798 x = atom.get("x") 1799 if x < 0: 1800 x = (int(x) - 1) / size * size 1801 else: 1802 x = int(x) / size * size 1803 1804 y = atom.get("y") 1805 if y < 0: 1806 y = (int(y) - 1) / size * size 1807 else: 1808 y = int(y) / size * size 1809 1810 z = atom.get("z") 1811 if z < 0: 1812 z = (int(z) - 1) / size * size 1813 else: 1814 z = int(z) / size * size 1815 1816 key = (x, y, z) 1817 try: 1818 self.cellmap[key].append(atom) 1819 except KeyError: 1820 self.cellmap[key] = [atom] 1821 atom.set("cell", key) 1822 1823 def removeCell(self, atom): 1824 """ 1825 Remove the atom from a cell 1826 1827 Parameters 1828 atom: The atom to add (atom) 1829 """ 1830 oldcell = atom.get("cell") 1831 if oldcell == None: return 1832 atom.set("cell", None) 1833 self.cellmap[oldcell].remove(atom) 1834 1835 def getNearCells(self, atom): 1836 """ 1837 Find all atoms in bordering cells to an atom 1838 1839 Parameters 1840 atom: The atom to use (atom) 1841 Returns 1842 closeatoms: A list of nearby atoms (list) 1843 """ 1844 size = self.cellsize 1845 closeatoms = [] 1846 cell = atom.get("cell") 1847 if cell == None: 1848 return closeatoms 1849 else: 1850 x = cell[0] 1851 y = cell[1] 1852 z = cell[2] 1853 for i in range(-1 * size, 2 * size, size): 1854 for j in range(-1 * size, 2 * size, size): 1855 for k in range(-1 * size, 2 * size, size): 1856 newkey = (x + i, y + j, z + k) 1857 try: 1858 newatoms = self.cellmap[newkey] 1859 for atom2 in newatoms: 1860 if atom == atom2: continue 1861 closeatoms.append(atom2) 1862 except KeyError: pass 1863 1864 return closeatoms 1865