1"""
2    Routines for PDB2PQR
3
4    This module contains the protein object used in PDB2PQR and methods
5    used to correct, analyze, and optimize that protein.
6
7    ----------------------------
8
9    PDB2PQR -- An automated pipeline for the setup, execution, and analysis of
10    Poisson-Boltzmann electrostatics calculations
11
12    Copyright (c) 2002-2011, Jens Erik Nielsen, University College Dublin;
13    Nathan A. Baker, Battelle Memorial Institute, Developed at the Pacific
14    Northwest National Laboratory, operated by Battelle Memorial Institute,
15    Pacific Northwest Division for the U.S. Department Energy.;
16    Paul Czodrowski & Gerhard Klebe, University of Marburg.
17
18	All rights reserved.
19
20	Redistribution and use in source and binary forms, with or without modification,
21	are permitted provided that the following conditions are met:
22
23		* Redistributions of source code must retain the above copyright notice,
24		  this list of conditions and the following disclaimer.
25		* Redistributions in binary form must reproduce the above copyright notice,
26		  this list of conditions and the following disclaimer in the documentation
27		  and/or other materials provided with the distribution.
28        * Neither the names of University College Dublin, Battelle Memorial Institute,
29          Pacific Northwest National Laboratory, US Department of Energy, or University
30          of Marburg nor the names of its contributors may be used to endorse or promote
31          products derived from this software without specific prior written permission.
32
33	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
34	ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35	WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
36	IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
37	INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
38	BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
39	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40	LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
41	OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
42	OF THE POSSIBILITY OF SUCH DAMAGE.
43
44    ----------------------------
45
46"""
47
48__date__ = "1 August 2008"
49__author__ = "Jens Erik Nielsen, Todd Dolinsky, Yong Huang"
50
51CELL_SIZE = 2
52BUMP_DIST = 2.0
53BUMP_HDIST = 1.5
54BUMP_HYDROGEN_SIZE = 0.5
55BUMP_HEAVY_SIZE = 1.0
56BONDED_SS_LIMIT = 2.5
57PEPTIDE_DIST = 1.7
58REPAIR_LIMIT = 10
59AAS = ["ALA", "ARG", "ASH", "ASN", "ASP", "CYS", "CYM", "GLN", "GLU", "GLH", "GLY", \
60       "HIS", "HID", "HIE", "HIP", "HSD", "HSE", "HSP", "ILE", "LEU", "LYS", "LYN", \
61       "MET", "PHE", "PRO", "SER", "THR", "TRP", "TYR", "TYM", "VAL"]
62NAS = ["A", "A5", "A3", "C", "C5", "C3", "G", "G5", "G3", "T", "T5", "T3", "U", \
63       "U5", "U3", "RA", "RG", "RC", "RU", "DA", "DG", "DC", "DT"]
64
65import math
66import copy
67from pdb import *
68from utilities import *
69from quatfit import *
70from forcefield import *
71from structures import *
72from protein import *
73from definitions import *
74from StringIO import StringIO
75from errors import PDBInputError, PDBInternalError, PDB2PKAError
76from pprint import pformat
77
78
79class Routines:
80    def __init__(self, protein, verbose, definition=None):
81        """
82            Initialize the Routines class.  The class contains most
83            of the main routines that run PDB2PQR
84
85            Parameters
86                protein:  The protein to run PDB2PQR on (Protein)
87                verbose:  A flag to determine whether to write to
88                          stdout
89        """
90        self.protein = protein
91        self.definition = definition
92        self.aadef = None
93        self.verbose = verbose
94        self.warnings = []
95        self.cells = {}
96        if definition != None:
97            self.aadef = definition.getAA()
98            self.nadef = definition.getNA()
99
100
101    def write(self, message, indent=0):
102        """
103            Write a message to stdout for debugging if verbose
104
105            Parameters
106                message: The message to write (string)
107                indent : The indent level (int, default=0)
108        """
109        out = ""
110        if self.verbose:
111            for i in range(indent):
112                out += "\t"
113            out += message
114            sys.stdout.write(out)
115
116    def getWarnings(self):
117        """
118            Get all warnings generated from routines
119        """
120        return self.warnings
121
122    def applyNameScheme(self, forcefield):
123        """
124            Apply the naming scheme of the given forcefield to the atoms
125            within the protein
126
127            Parameters
128                forcefield: The forcefield object (forcefield)
129
130        """
131        self.write("Applying the naming scheme to the protein...")
132        for residue in self.protein.getResidues():
133            if isinstance(residue, (Amino, WAT, Nucleic)):
134                resname = residue.ffname
135            else:
136                resname = residue.name
137
138            for atom in residue.getAtoms():
139                rname, aname = forcefield.getNames(resname, atom.name)
140                if resname not in ['LIG', 'WAT', 'ACE', 'NME'] and rname != None:
141                    try:
142                        if (residue.isNterm or residue.isCterm) and rname != residue.name:
143                            rname = residue.name
144                    except AttributeError:
145                        pass
146                if aname != None and rname != None:
147                    atom.resName = rname
148                    atom.name = aname
149
150        self.write("Done.\n")
151
152    def applyForcefield(self, forcefield):
153        """
154            Apply the forcefield to the atoms within the protein
155
156            Parameters
157                forcefield: The forcefield object (forcefield)
158            Returns
159                hitlist:    A list of atoms that were found in
160                            the forcefield (list)
161                misslist:   A list of atoms that were not found in
162                            the forcefield (list)
163        """
164        self.write("Applying the forcefield to the protein...")
165        misslist = []
166        hitlist = []
167        for residue in self.protein.getResidues():
168            if isinstance(residue, (Amino, WAT, Nucleic)):
169                resname = residue.ffname
170            else:
171                resname = residue.name
172
173            # Apply the parameters
174
175            for atom in residue.getAtoms():
176                atomname = atom.get("name")
177                charge, radius = forcefield.getParams(resname, atomname)
178                if charge != None and radius != None:
179                    atom.set("ffcharge", charge)
180                    atom.set("radius", radius)
181                    hitlist.append(atom)
182                else:
183                    misslist.append(atom)
184
185        self.write("Done.\n")
186        return hitlist, misslist
187
188    def updateResidueTypes(self):
189        """
190            Find the type of residue as notated in the Amino Acid definition
191        """
192        self.write("Updating Residue Types... ")
193        for chain in self.protein.getChains():
194            for residue in chain.get("residues"):
195                name = residue.get("name")
196                if name in AAS:
197                    residue.set("type", 1)
198                elif name == "WAT":
199                    residue.set("type", 3)
200                elif name in NAS:
201                    residue.set("type", 4)
202                else: # Residue is a ligand or unknown
203                    residue.set("type", 2)
204
205        self.write("Done\n")
206
207    def updateSSbridges(self):
208        """
209            Check for SS-bridge partners, and if present, set appropriate
210            partners
211        """
212        self.write("Updating SS bridges...\n")
213        SGpartners = {}
214        for residue in self.protein.getResidues():
215            if isinstance(residue, CYS):
216                atom = residue.getAtom("SG")
217                if atom != None:
218                    SGpartners[atom] = []
219
220        for atom in SGpartners:
221            for partner in SGpartners:
222                if atom == partner or SGpartners[atom] != []: continue
223                dist = distance(atom.getCoords(), partner.getCoords())
224                if dist < BONDED_SS_LIMIT:
225                    SGpartners[atom].append(partner)
226                    SGpartners[partner].append(atom)
227
228        for atom in SGpartners:
229            res1 = atom.get("residue")
230            numpartners = len(SGpartners[atom])
231            if numpartners == 1:
232                partner = SGpartners[atom][0]
233                res2 = partner.get("residue")
234                res1.set("SSbonded", 1)
235                res1.set("SSbondedpartner", partner)
236                self.applyPatch("CYX", res1)
237                self.write("%s - %s\n" % (res1, res2), 1)
238            elif numpartners > 1:
239                error = "WARNING: %s has multiple potential " % res1
240                error += "SS-bridge partners\n"
241                self.write(error, 1)
242                self.warnings.append(error)
243            elif numpartners == 0:
244                self.write("%s is a free cysteine\n" % res1, 1)
245        self.write("Done.\n")
246
247    def updateInternalBonds(self):
248        """
249            Update the internal bonding network using the reference
250            objects in each atom.
251        """
252        for residue in self.protein.getResidues():
253            if isinstance(residue, (Amino, WAT, Nucleic)):
254                for atom in residue.getAtoms():
255                    if not atom.hasReference(): continue
256                    for bond in atom.reference.bonds:
257                        if not residue.hasAtom(bond): continue
258                        bondatom = residue.getAtom(bond)
259                        if bondatom not in atom.bonds:
260                            atom.addBond(bondatom)
261
262    def updateBonds(self):
263        """
264            Update the bonding network of the protein.  This happens
265            in 3 steps:
266              1.  Applying the PEPTIDE patch to all Amino residues
267                  so as to add reference for the N(i+1) and C(i-1)
268                  atoms
269              2.  UpdateInternalBonds for inter-residue linking
270              3.  Set the links to the N(i+1) and C(i-1) atoms
271        """
272
273        # Apply the peptide patch
274
275        for residue in self.protein.getResidues():
276            if isinstance(residue, Amino):
277                if residue.isNterm or residue.isCterm:
278                    continue
279                else:
280                    self.applyPatch("PEPTIDE", residue)
281
282        # Update all internal bonds
283
284        self.updateInternalBonds()
285
286        # Set the peptide bond pointers
287
288        for chain in self.protein.getChains():
289            for i in range(chain.numResidues() - 1):
290                res1 = chain.residues[i]
291                res2 = chain.residues[i + 1]
292                if not isinstance(res1, Amino) or not isinstance(res2, Amino):
293                    continue
294                atom1 = res1.getAtom("C")
295                atom2 = res2.getAtom("N")
296
297                if atom1 != None:
298                    res2.peptideC = atom1
299                if atom2 != None:
300                    res1.peptideN = atom2
301                if atom1 == None or atom2 == None:
302                    continue
303
304                if distance(atom1.getCoords(), atom2.getCoords()) > PEPTIDE_DIST:
305                    text = "Gap in backbone detected between %s and %s!\n" % \
306                           (res1, res2)
307                    self.write(text, 1)
308                    self.warnings.append(text)
309                    res2.peptideC = None
310                    res1.peptideN = None
311
312    def applyPatch(self, patchname, residue):
313        """
314            Apply a patch to the given residue.  This is one of the key
315            functions in PDB2PQR.  A similar function appears in
316            definitions.py - that version is needed for residue level
317            subtitutions so certain protonation states (i.e. CYM, HSE)
318            are detectatble on input.
319
320            This version looks up the particular patch name in the
321            patchmap stored in the protein, and then applies the
322            various commands to the reference and actual residue
323            structures.
324
325            See the inline comments for a more detailed explanation.
326
327            Parameters
328                patchname:  The name of the patch (string)
329                residue:    The residue to apply the patch to (residue)
330        """
331        if patchname not in self.protein.patchmap:
332            raise PDBInternalError("Unable to find patch %s!" % patchname)
333
334        self.write('PATCH INFO: %s patched with %s\n' % (residue,patchname),1)
335
336        # Make a copy of the reference, i.e. a new reference for
337        # this patch.  Two examples:
338        #     PEPTIDE is a special case, as it applies to
339        #             every residue.
340        #     CTERM only applies to one specific residue, so a
341        #             deep copy is used.
342
343        if patchname == "PEPTIDE":
344            newreference = residue.reference
345        else:
346            newreference = copy.deepcopy(residue.reference)
347
348        patch = self.protein.patchmap[patchname]
349
350        # Add atoms from patch
351
352        for atomname in patch.map:
353            newreference.map[atomname] = patch.map[atomname]
354            for bond in patch.map[atomname].bonds:
355                if bond not in newreference.map: continue
356                if atomname not in newreference.map[bond].bonds:
357                    newreference.map[bond].bonds.append(atomname)
358
359        # Remove atoms as directed by patch
360
361        for remove in patch.remove:
362            if remove in residue.map: residue.removeAtom(remove)
363            if remove not in newreference.map: continue
364            removebonds = newreference.map[remove].bonds
365            del newreference.map[remove]
366            for bond in removebonds:
367                index = newreference.map[bond].bonds.index(remove)
368                del newreference.map[bond].bonds[index]
369
370        # Add the new dihedrals
371
372        for dihedral in patch.dihedrals:
373            newreference.dihedrals.append(dihedral)
374
375        # Point at the new reference
376
377        residue.reference = newreference
378        residue.patches.append(patchname)
379
380        # Rename atoms as directed by patch
381
382        for atom in residue.getAtoms():
383            if atom.name in patch.altnames:
384                residue.renameAtom(atom.name, patch.altnames[atom.name])
385
386        # Replace each atom's reference with the new one
387
388        for atomname in residue.map:
389            if newreference.hasAtom(atomname):
390                atom = residue.getAtom(atomname)
391                atom.reference = newreference.map[atomname]
392
393    def setStates(self):
394        """
395            Set the state of each residue.  This is the last step
396            before assigning the forcefield, but is necessary so
397            as to distinguish between various protonation states.
398
399            See aa.py for residue-specific functions.
400        """
401        for residue in self.protein.getResidues():
402            if isinstance(residue, (Amino, Nucleic)):
403                residue.setState()
404
405    def assignTermini(self, chain, neutraln=False, neutralc=False):
406        """
407            Assign the termini for the given chain by looking at
408            the start and end residues.
409        """
410
411        if len(chain.residues) == 0:
412            text = "Error: chain \"%s\" has 0 residues!" % chain.chainID
413            raise PDBInputError(text)
414
415        # Set the N-Terminus/ 5' Terminus
416
417        res0 = chain.residues[0]
418        if isinstance(res0, Amino):
419            res0.set("isNterm", 1)
420            if isinstance(res0, PRO):
421                self.applyPatch("NEUTRAL-NTERM", res0)
422            elif neutraln:
423                self.applyPatch("NEUTRAL-NTERM", res0)
424            else:
425                self.applyPatch("NTERM", res0)
426        elif isinstance(res0, Nucleic):
427            res0.set("is5term", 1)
428            self.applyPatch("5TERM", res0)
429
430        # Set the C-Terminus/ 3' Terminus
431
432        reslast = chain.residues[-1]
433        if isinstance(reslast, Amino):
434            reslast.set("isCterm", 1)
435            if neutralc:
436                self.applyPatch("NEUTRAL-CTERM", reslast)
437            else:
438                self.applyPatch("CTERM", reslast)
439        elif isinstance(reslast, Nucleic):
440            reslast.set("is3term", 1)
441            self.applyPatch("3TERM", reslast)
442        else:
443            for i in range(len(chain.residues)):
444                resthis = chain.residues[-1 - i]
445                if isinstance(resthis, Amino):
446                    resthis.set("isCterm", 1)
447                    if neutralc:
448                        self.applyPatch("NEUTRAL-CTERM", resthis)
449                    else:
450                        self.applyPatch("CTERM", resthis)
451                    break
452                elif resthis.name in ["NH2", "NME"]: break
453                elif isinstance(resthis, Nucleic):
454                    resthis.set("is3term", 1)
455                    self.applyPatch("3TERM", resthis)
456                    break
457
458    def setTermini(self, neutraln=False, neutralc=False):
459        """
460            Set the termini for the protein. First set all known
461            termini by looking at the ends of the chain. Then
462            examine each residue, looking for internal chain breaks.
463        """
464
465        self.write("Setting the termini... \n")
466
467        # First assign the known termini
468
469        for chain in self.protein.getChains():
470            self.assignTermini(chain, neutraln, neutralc)
471
472        # Now determine if there are any hidden chains
473
474        letters = string.ascii_uppercase + string.ascii_lowercase
475        c = 0
476
477        while c < len(self.protein.getChains()):
478            chain = self.protein.chains[c]
479            reslist = []
480
481            origlist = []
482
483            # origlist holds the original residue list for the chain
484
485            for residue in chain.getResidues():
486                origlist.append(residue)
487
488            for residue in origlist:
489                reslist.append(residue)
490                oldid = residue.chainID
491
492                # Look for ending termini
493
494                fixflag = 0
495                if isinstance(residue, Amino):
496                    if (residue.hasAtom("OXT") and not residue.isCterm):
497                        fixflag = 1
498
499                elif isinstance(residue, Nucleic):
500                    if ((residue.hasAtom("H3T") or residue.name.endswith("3"))\
501                      and not residue.is3term):
502                        fixflag = 1
503
504                if fixflag:
505
506                    # Get an available chain ID
507                    chainid = letters[0]
508                    id = 0
509                    idLength = 1
510                    while chainid in self.protein.chainmap:
511                        id += 1
512                        if id >= len(letters):
513                            idLength += 1
514                            id = 0
515                        chainid = letters[id] * idLength
516
517                    if(idLength > 1):
518                        message = 'Warning: Reusing chain id: ' + chainid[0] + '\n'
519                        self.write(message)
520
521                    # Make a new chain with these residues
522                    newchain = Chain(chainid[0])
523
524                    self.protein.chainmap[chainid] = newchain
525                    self.protein.chains.insert(c, newchain)
526
527                    for res in reslist:
528                        newchain.addResidue(res)
529                        chain.residues.remove(res)
530                        res.setChainID(chainid[0])
531
532                    self.assignTermini(chain, neutraln, neutralc)
533                    self.assignTermini(newchain, neutraln, neutralc)
534
535                    reslist = []
536                    c += 1
537
538            c += 1
539
540        # Update the final chain's chainID if it is "" unless it's all water
541
542        if "" in self.protein.chainmap:
543
544            notwat = 0
545            for res in chain.residues:
546                if not isinstance(res, WAT):
547                    notwat = 1
548                    break
549
550            if notwat == 0:
551                self.write("Done.\n")
552                return
553
554            chain = self.protein.chainmap[""]
555            chainid = letters[0]
556            id = 0
557            idLength = 1
558            while chainid in self.protein.chainmap:
559                id += 1
560                if id >= len(letters):
561                    idLength += 1
562                    id = 0
563                chainid = letters[id] * idLength
564
565            if(idLength > 1):
566                message = 'Warning: Reusing chain id: ' + chainid[0] + '\n'
567                self.write(message)
568
569            # Use the new chainID
570
571            self.protein.chainmap[chainid] = chain
572            del self.protein.chainmap[""]
573
574            for res in chain.residues:
575                res.setChainID(chainid[0])
576
577        self.write("Done.\n")
578
579
580    def findMissingHeavy(self):
581        """
582            Repair residues that contain missing heavy (non-Hydrogen) atoms
583        """
584        self.write("Checking for missing heavy atoms... \n")
585        misscount = 0
586        heavycount = 0
587        for residue in self.protein.getResidues():
588            if not isinstance(residue, (Amino, Nucleic)): continue
589
590            # Check for Missing Heavy Atoms
591
592            for refatomname in residue.reference.map:
593                if refatomname.startswith("H"): continue
594                if refatomname in ["N+1", "C-1"]: continue
595                if refatomname in ["O1P", "O2P"]:
596                    if residue.hasAtom("OP1") and residue.hasAtom("OP2"): continue
597                heavycount += 1
598                if not residue.hasAtom(refatomname):
599                    self.write("Missing %s in %s\n" % \
600                               (refatomname, residue), 1)
601                    misscount += 1
602                    residue.addMissing(refatomname)
603
604            # Check for Extra Atoms
605
606            atomlist = []
607            for atom in residue.get("atoms"):
608                atomlist.append(atom)
609
610            for atom in atomlist:
611                atomname = atom.get("name")
612                if atomname in ["OP1", "OP2"] and residue.reference.hasAtom("O1P") \
613                    and residue.reference.hasAtom("O2P"): continue
614                if not residue.reference.hasAtom(atomname):
615                    self.write("Extra atom %s in %s! - " % \
616                               (atomname, residue), 1)
617                    residue.removeAtom(atomname)
618                    self.write("Deleted this atom.\n")
619
620        if heavycount == 0:
621            raise PDBInputError("No heavy atoms found. " +
622                                "You may also see this message if PDB2PQR does not have parameters for any residue in your protein.")
623
624        misspct = 100.0 * float(misscount) / heavycount
625        if misspct > REPAIR_LIMIT:
626            error = "This PDB file is missing too many (%i out of " % misscount
627            error += "%i, %.2f%%) heavy atoms to accurately repair the file.  " % \
628                     (heavycount, misspct)
629            error += "The current repair limit is set at %i%%. " % REPAIR_LIMIT
630            error += "You may also see this message if PDB2PQR does not have parameters for enough residues in your protein."
631            raise PDBInputError(error)
632        elif misscount > 0:
633            self.write("Missing %i out of %i heavy atoms (%.2f percent) - " % \
634                       (misscount, heavycount, misspct))
635            self.write("Will attempt to repair.\n")
636            self.repairHeavy()
637        else:
638            self.write("No heavy atoms found missing - Done.\n")
639
640    @staticmethod
641    def rebuildTetrahedral(residue, atomname):
642        """
643            Rebuild a tetrahedral hydrogen group.  This is necessary
644            due to the shortcomings of the quatfit routine - given a
645            tetrahedral geometry and two existing hydrogens, the
646            quatfit routines have two potential solutions.  This function
647            uses basic tetrahedral geometry to fix this issue.
648
649            Parameters
650                residue:  The residue in question (residue)
651                atomname: The atomname to add (string)
652            Returns
653                1 if successful, 0 otherwise
654        """
655
656        hcount = 0
657        nextatomname = None
658
659        atomref = residue.reference.map.get(atomname)
660        if atomref is None:
661            return False
662        bondname = atomref.bonds[0]
663
664        # Return if the bonded atom does not exist
665
666        if not residue.hasAtom(bondname):
667            return False
668
669        # This group is tetrahedral if bondatom has 4 bonds,
670        #  3 of which are hydrogens
671
672        for bond in residue.reference.map[bondname].bonds:
673            if bond.startswith("H"):
674                hcount += 1
675            elif bond != 'C-1' and bond != 'N+1':
676                nextatomname = bond
677
678        # Check if this is a tetrahedral group
679
680        if hcount != 3 or nextatomname == None:
681            return False
682
683        # Now rebuild according to the tetrahedral geometry
684
685        bondatom = residue.getAtom(bondname)
686        nextatom = residue.getAtom(nextatomname)
687        numbonds = len(bondatom.bonds)
688
689        if numbonds == 1:
690
691            # Place according to two atoms
692
693            coords = [bondatom.getCoords(), nextatom.getCoords()]
694            refcoords = [residue.reference.map[bondname].getCoords(), \
695                         residue.reference.map[nextatomname].getCoords()]
696            refatomcoords = atomref.getCoords()
697            newcoords = findCoordinates(2, coords, refcoords, refatomcoords)
698            residue.createAtom(atomname, newcoords)
699
700            # For LEU and ILE residues only: make sure the Hydrogens are in staggered conformation instead of eclipsed.
701
702            if isinstance(residue, LEU):
703                hcoords = newcoords
704                cbatom = residue.getAtom('CB')
705                ang = getDihedral(cbatom.getCoords(), nextatom.getCoords(), bondatom.getCoords(), hcoords)
706                diffangle = 60 - ang
707                residue.rotateTetrahedral(nextatom, bondatom, diffangle)
708
709            elif isinstance(residue, ILE):
710                hcoords = newcoords
711                cg1atom = residue.getAtom('CG1')
712                cbatom = residue.getAtom('CB')
713                if bondatom.name == 'CD1':
714                    ang = getDihedral(cbatom.getCoords(), nextatom.getCoords(), bondatom.getCoords(), hcoords)
715                elif bondatom.name == 'CG2':
716                    ang = getDihedral(cg1atom.getCoords(), nextatom.getCoords(), bondatom.getCoords(), hcoords)
717                else:
718                    ang = getDihedral(cbatom.getCoords(), nextatom.getCoords(), bondatom.getCoords(), hcoords)
719
720                diffangle = 60 - ang
721                residue.rotateTetrahedral(nextatom, bondatom, diffangle)
722
723            return 1
724
725        elif numbonds == 2:
726
727            # Get the single hydrogen coordinates
728
729            hatom = None
730            for bond in bondatom.reference.bonds:
731                if residue.hasAtom(bond) and bond.startswith("H"):
732                    hatom = residue.getAtom(bond)
733                    break
734
735            # Use the existing hydrogen and rotate about the bond
736
737            residue.rotateTetrahedral(nextatom, bondatom, 120)
738            newcoords = hatom.getCoords()
739            residue.rotateTetrahedral(nextatom, bondatom, -120)
740            residue.createAtom(atomname, newcoords)
741
742            return 1
743
744        elif numbonds == 3:
745
746            # Find the one spot the atom can be
747
748            hatoms = []
749            for bond in bondatom.reference.bonds:
750                if residue.hasAtom(bond) and bond.startswith("H"):
751                    hatoms.append(residue.getAtom(bond))
752
753            # If this is more than two something is wrong
754
755            if len(hatoms) != 2: return 0
756
757            # Use the existing hydrogen and rotate about the bond
758
759            residue.rotateTetrahedral(nextatom, bondatom, 120)
760            newcoords1 = hatoms[0].getCoords()
761            residue.rotateTetrahedral(nextatom, bondatom, 120)
762            newcoords2 = hatoms[0].getCoords()
763            residue.rotateTetrahedral(nextatom, bondatom, 120)
764
765            # Determine which one hatoms[1] is not in
766
767            if distance(hatoms[1].getCoords(), newcoords1) > 0.1:
768                residue.createAtom(atomname, newcoords1)
769            else:
770                residue.createAtom(atomname, newcoords2)
771
772            return 1
773
774    def addHydrogens(self):
775        """
776            Add the hydrogens to the protein.  This requires either
777            the rebuildTetrahedral function for tetrahedral geometries
778            or the standard quatfit methods.  These methods use three
779            nearby bonds to rebuild the atom; the closer the bonds, the
780            more accurate the results.  As such the peptide bonds are
781            used when available.
782        """
783        count = 0
784        self.write("Adding hydrogens to the protein...\n")
785        for residue in self.protein.getResidues():
786            if not isinstance(residue, (Amino, Nucleic)):
787                continue
788            for atomname in residue.reference.map:
789                if not atomname.startswith("H"):
790                    continue
791                if residue.hasAtom(atomname):
792                    continue
793                if isinstance(residue, CYS) and residue.SSbonded and atomname == "HG":
794                    continue
795
796                # If this hydrogen is part of a tetrahedral group,
797                #  follow a different codepath
798
799                if Routines.rebuildTetrahedral(residue, atomname):
800                    count += 1
801                    continue
802
803                # Otherwise use the standard quatfit methods
804
805                coords = []
806                refcoords = []
807
808                refatomcoords = residue.reference.map[atomname].getCoords()
809                bondlist = residue.reference.getNearestBonds(atomname)
810
811                for bond in bondlist:
812                    if bond == "N+1":
813                        atom = residue.peptideN
814                    elif bond == "C-1":
815                        atom = residue.peptideC
816                    else:
817                        atom = residue.getAtom(bond)
818
819                    if atom == None:
820                        continue
821
822                    # Get coordinates, reference coordinates
823
824                    coords.append(atom.getCoords())
825                    refcoords.append(residue.reference.map[bond].getCoords())
826
827                    # Exit if we have enough atoms
828
829                    if len(coords) == 3:
830                        break
831
832                if len(coords) == 3:
833                    newcoords = findCoordinates(3, coords, refcoords, refatomcoords)
834                    residue.createAtom(atomname, newcoords)
835                    count += 1
836                else:
837                    self.write("Couldn't rebuild %s in %s!\n" % (atomname, residue), 1)
838
839        self.write(" Added %i hydrogen atoms.\n" % count)
840
841    def removeHydrogens(self):
842        self.write("Stripping hydrogens from the protein...\n")
843
844        for residue in self.protein.getResidues():
845            if not isinstance(residue, (Amino, Nucleic)):
846                continue
847            for atom in residue.atoms[:]:
848                if atom.isHydrogen():
849                    residue.removeAtom(atom.name)
850
851    def repairHeavy(self):
852        """
853            Repair all heavy atoms.  Unfortunately the first time we
854            get to an atom we might not be able to rebuild it - it
855            might depend on other atoms to be rebuild first (think side
856            chains).  As such a 'seenmap' is used to keep track of what
857            we've already seen and subsequent attempts to rebuild the
858            atom.
859        """
860        self.write("Rebuilding missing heavy atoms... \n")
861        for residue in self.protein.getResidues():
862            if not isinstance(residue, (Amino, Nucleic)):
863                continue
864            missing = residue.get("missing")
865            if missing == []: continue
866
867            # Initialize some variables
868
869            seenmap = {}
870            nummissing = len(missing)
871
872            while len(missing) > 0:
873                coords = []
874                refcoords = []
875
876                atomname = missing.pop(0)
877                refatomcoords = residue.reference.map[atomname].getCoords()
878                bondlist = residue.reference.getNearestBonds(atomname)
879
880                for bond in bondlist:
881                    if bond == "N+1": atom = residue.peptideN
882                    elif bond == "C-1": atom = residue.peptideC
883                    else: atom = residue.getAtom(bond)
884
885                    if atom == None: continue
886
887                    # Get coordinates, reference coordinates
888
889                    coords.append(atom.getCoords())
890                    refcoords.append(residue.reference.map[bond].getCoords())
891
892                    # Exit if we have enough atoms
893
894                    if len(coords) == 3: break
895
896                # We might need other atoms to be rebuilt first
897
898                if len(coords) < 3:
899                    try:
900                        seenmap[atomname] += 1
901                    except KeyError:
902                        seenmap[atomname] = 1
903
904                    missing.append(atomname)
905                    if seenmap[atomname] > nummissing:
906                        text = "Too few atoms present to reconstruct or cap residue %s in structure!\n" % (residue)
907                        text += "This error is generally caused by missing backbone atoms in this protein;\n"
908                        text += "you must use an external program to complete gaps in the protein backbone.\n"
909                        text += "Heavy atoms missing from %s: " % (residue)
910                        text += ' '.join(missing)
911                        raise PDBInputError(text)
912
913                else: # Rebuild the atom
914                    newcoords = findCoordinates(3, coords, refcoords, refatomcoords)
915                    residue.createAtom(atomname, newcoords)
916                    self.write("Added %s to %s at coordinates" % (atomname, residue), 1)
917                    self.write(" %.3f %.3f %.3f\n" % \
918                           (newcoords[0], newcoords[1], newcoords[2]))
919
920        self.write("Done.\n")
921
922    def setReferenceDistance(self):
923        """
924            Set the distance to the CA atom in the residue.
925            This is necessary for determining which atoms are
926            allowed to move during rotations.  Uses the
927            shortestPath algorithm found in utilities.py.
928        """
929        for residue in self.protein.getResidues():
930            if not isinstance(residue, Amino): continue
931
932            # Initialize some variables
933
934            map = {}
935            caatom = residue.getAtom("CA")
936
937            if caatom == None:
938                text = "Cannot set references to %s without CA atom!\n"
939                raise PDBInputError(text)
940
941            # Set up the linked map
942
943            for atom in residue.getAtoms():
944                map[atom] = atom.bonds
945
946            # Run the algorithm
947
948            for atom in residue.getAtoms():
949                if atom.isBackbone():
950                    atom.refdistance = -1
951                elif residue.isCterm and atom.name == "HO":   # special case for HO in Cterm
952                    atom.refdistance = 3
953                elif residue.isNterm and (atom.name == "H3" or atom.name == "H2"):  # special case for H2 or H3 in Nterm
954                    atom.refdistance = 2
955                else:
956                    atom.refdistance = len(shortestPath(map, atom, caatom)) - 1
957
958    def getbumpscore(self, residue):
959        """Get an bump score for the current structure"""
960
961        # Do some setup
962
963        self.cells = Cells(CELL_SIZE)
964        self.cells.assignCells(self.protein)
965
966        self.calculateDihedralAngles()
967        self.setDonorsAndAcceptors()
968        self.updateInternalBonds()
969        self.setReferenceDistance()
970        bumpscore = 0.0
971        #for residue in self.protein.getResidues():
972        if not isinstance(residue, Amino): return 0.0
973        # Initialize variables
974
975        conflictnames = []
976
977        for atom in residue.getAtoms():
978            atomname = atom.name
979            #if not atom.added: continue
980            if atomname[0] != "H": continue
981            #if atom.optimizeable: continue
982            #print atomname,atom.optimizeable,atom.added
983            bumpscore = bumpscore + self.getbumpscore_atom(atom)
984        return bumpscore
985
986
987    def getbumpscore_atom(self, atom):
988        """
989            Find nearby atoms for conflict-checking.  Uses
990            neighboring cells to compare atoms rather than an all
991            versus all O(n^2) algorithm, which saves a great deal
992            of time.  There are several instances where we ignore
993            potential conflicts; these include donor/acceptor pairs,
994            atoms in the same residue, and bonded CYS bridges.
995
996            Parameters
997                atom:  Find nearby atoms to this atom (Atom)
998            Returns
999                bumpscore: a bump score sum((dist-cutoff)**20 for all near atoms
1000
1001            Jens rewrote this function from findNearbyAtoms to
1002            be usable for detecting bumps for optimzable hydrogens
1003        """
1004        # Initialize some variables
1005        nearatoms = {}
1006        residue = atom.residue
1007        #print 'Cutoff distance',cutoff
1008        atom_size = BUMP_HYDROGEN_SIZE if atom.isHydrogen() else BUMP_HEAVY_SIZE
1009
1010        # Get atoms from nearby cells
1011
1012        closeatoms = self.cells.getNearCells(atom)
1013
1014        # Loop through and see if any are within the cutoff
1015
1016        bumpscore = 0.0
1017        for closeatom in closeatoms:
1018            closeresidue = closeatom.residue
1019            if closeresidue == residue and (closeatom in atom.bonds or atom in closeatom.bonds):
1020                continue
1021
1022            if not isinstance(closeresidue, Amino):
1023                continue
1024            if isinstance(residue, CYS):
1025                if residue.SSbondedpartner == closeatom: continue
1026
1027            # Also ignore if this is a donor/acceptor pair
1028            pair_ignored = False
1029            if atom.isHydrogen() and len(atom.bonds) != 0 and atom.bonds[0].hdonor \
1030               and closeatom.hacceptor:
1031                #pair_ignored = True
1032                continue
1033            if closeatom.isHydrogen() and len(closeatom.bonds) != 0 and closeatom.bonds[0].hdonor \
1034                   and atom.hacceptor:
1035                #pair_ignored = True
1036                continue
1037
1038            dist = distance(atom.getCoords(), closeatom.getCoords())
1039            other_size = BUMP_HYDROGEN_SIZE if closeatom.isHydrogen() else BUMP_HEAVY_SIZE
1040            cutoff = atom_size + other_size
1041            if dist < cutoff:
1042                bumpscore = bumpscore + 1000.0
1043                if pair_ignored:
1044                    self.write('This bump is a donor/acceptor pair.\n')
1045#                if heavy_not_ignored:
1046#                    print 'Bumped {0} against {1} within residue'.format(atom.name, closeatom.name)
1047                #nearatoms[closeatom] = (dist-cutoff)**2
1048        self.write('BUMPSCORE ' + str(bumpscore) + '\n')
1049        return bumpscore
1050
1051
1052    def debumpProtein(self):
1053        """
1054            Make sure that none of the added atoms were rebuilt
1055            on top of existing atoms.  See each called function
1056            for more information.
1057        """
1058
1059        self.write("Checking if we must debump any residues... \n")
1060
1061        # Do some setup
1062
1063        self.cells = Cells(CELL_SIZE)
1064        self.cells.assignCells(self.protein)
1065
1066        self.calculateDihedralAngles()
1067        self.setDonorsAndAcceptors()
1068        self.updateInternalBonds()
1069        self.setReferenceDistance()
1070
1071        # Determine which residues to debump
1072
1073        for residue in self.protein.getResidues():
1074            if not isinstance(residue, Amino): continue
1075
1076            # Initialize variables
1077
1078            conflictnames = self.findResidueConflicts(residue, True)
1079
1080            if not conflictnames:
1081                continue
1082
1083            # Otherwise debump the residue
1084
1085            self.write("Starting to debump %s...\n" % residue, 1)
1086            self.write("Debumping cutoffs: %2.1f for heavy-heavy, %2.1f for hydrogen-heavy, and %2.1f for hydrogen-hydrogen.\n" %
1087                       (BUMP_HEAVY_SIZE*2,
1088                        BUMP_HYDROGEN_SIZE+BUMP_HEAVY_SIZE,
1089                        BUMP_HYDROGEN_SIZE*2), 1)
1090            if self.debumpResidue(residue, conflictnames):
1091                self.write("Debumping Successful!\n\n", 1)
1092            else:
1093                text = "WARNING: Unable to debump %s\n" % residue
1094                self.write("********\n%s********\n\n" % text)
1095                self.warnings.append(text)
1096
1097        self.write("Done.\n")
1098
1099    def findResidueConflicts(self, residue, writeConflictInfo=False):
1100        conflictnames = []
1101        for atom in residue.getAtoms():
1102            atomname = atom.name
1103            if not atom.added: continue
1104            if atomname == "H": continue
1105            if atom.optimizeable: continue
1106
1107            nearatoms = self.findNearbyAtoms(atom)
1108
1109            # If something is too close, we must debump the residue
1110
1111            if nearatoms != {}:
1112                conflictnames.append(atomname)
1113                if writeConflictInfo:
1114                    for repatom in nearatoms:
1115                        self.write("%s %s is too close to %s %s\n" % \
1116                                   (residue, atomname, repatom.residue, repatom.name), 1)
1117
1118        return conflictnames
1119
1120    def scoreDihedralAngle(self, residue, anglenum):
1121        score = 0
1122        atomnames = residue.reference.dihedrals[anglenum].split()
1123        pivot = atomnames[2]
1124        moveablenames = self.getMoveableNames(residue, pivot)
1125        for name in moveablenames:
1126            nearatoms = self.findNearbyAtoms(residue.getAtom(name))
1127            for v in nearatoms.values():
1128                score += v
1129
1130        return score
1131
1132
1133    def debumpResidue(self, residue, conflictnames):
1134        """
1135            Debump a specific residue.  Only should be called
1136            if the residue has been detected to have a conflict.
1137            If called, try to rotate about dihedral angles to
1138            resolve the conflict.
1139
1140            Parameters
1141                residue:  The residue in question
1142                conflictnames:  A list of atomnames that were
1143                                rebuilt too close to other atoms
1144            Returns
1145                True if successful, False otherwise
1146        """
1147
1148        # Initialize some variables
1149
1150        ANGLE_STEPS = 72
1151        ANGLE_STEP_SIZE = float(360 // ANGLE_STEPS)
1152
1153        ANGLE_TEST_COUNT = 10
1154
1155        anglenum = -1
1156        currentConflictNames = conflictnames
1157
1158        EPSILON = 0.0000001
1159
1160        # Try (up to 10 times) to find a workable solution
1161
1162        for _ in range(ANGLE_TEST_COUNT):
1163
1164            anglenum = self.pickDihedralAngle(residue, currentConflictNames, anglenum)
1165
1166            if anglenum == -1: return False
1167
1168            self.write("Using dihedral angle number %i to debump the residue.\n" % anglenum, 1)
1169
1170            bestscore = self.scoreDihedralAngle(residue, anglenum)
1171            foundImprovement = False
1172            bestangle = originalAngle = residue.dihedrals[anglenum]
1173
1174            #Skip the first angle as it's already known.
1175            for i in xrange(1, ANGLE_STEPS):
1176                newangle = originalAngle + (ANGLE_STEP_SIZE * i)
1177                self.setDihedralAngle(residue, anglenum, newangle)
1178
1179                # Check for conflicts
1180
1181                score = self.scoreDihedralAngle(residue, anglenum)
1182
1183                if score == 0:
1184                    if not self.findResidueConflicts(residue):
1185                        self.write("No conflicts found at angle "+repr(newangle)+"\n", 1)
1186                        return True
1187                    else:
1188                        bestangle = newangle
1189                        foundImprovement = True
1190                        break
1191
1192                # Set the best angle
1193                elif score < bestscore:
1194                    diff = abs(bestscore - score)
1195                    #Don't update if it's effectively a tie
1196                    if diff > EPSILON:
1197                        bestscore = score
1198                        bestangle = newangle
1199                        foundImprovement = True
1200
1201            self.setDihedralAngle(residue, anglenum, bestangle)
1202            currentConflictNames = self.findResidueConflicts(residue)
1203
1204            if foundImprovement:
1205                self.write("Best score of "+repr(bestscore)+" at angle "+repr(bestangle)+". New conflict set: ", 1)
1206                self.write(str(currentConflictNames)+"\n", 1)
1207            else:
1208                self.write("No improvement found for this dihedral angle.\n", 1)
1209
1210        # If we're here, debumping was unsuccessful
1211
1212        return False
1213
1214    def calculateDihedralAngles(self):
1215        """
1216            Calculate the dihedral angle for every residue within the protein
1217        """
1218        for residue in self.protein.getResidues():
1219            if not isinstance(residue, Amino): continue
1220            residue.dihedrals = []
1221
1222            refangles = residue.reference.dihedrals
1223            for di in refangles:
1224                coords = []
1225                atoms = di.split()
1226                for i in range(4):
1227                    atomname = atoms[i]
1228                    if residue.hasAtom(atomname):
1229                        coords.append(residue.getAtom(atomname).getCoords())
1230
1231                if len(coords) == 4: angle = getDihedral(coords[0], coords[1], coords[2], coords[3])
1232                else: angle = None
1233
1234                residue.addDihedralAngle(angle)
1235
1236    def getClosestAtom(self, atom):
1237        """
1238            Get the closest atom that does not form a donor/acceptor pair.
1239            Used to detect potential conflicts.
1240
1241            NOTE:  Cells must be set before using this function.
1242
1243            Parameters
1244                atom:  The atom in question (Atom)
1245            Returns
1246                bestatom:  The closest atom to the input atom that does not
1247                           satisfy a donor/acceptor pair.
1248        """
1249        # Initialize some variables
1250
1251        bestdist = 999.99
1252        bestwatdist = 999.99
1253        bestatom = None
1254        bestwatatom = None
1255        residue = atom.residue
1256
1257        # Get atoms from nearby cells
1258
1259        closeatoms = self.cells.getNearCells(atom)
1260
1261        # Loop through and see which is the closest
1262
1263        for closeatom in closeatoms:
1264            closeresidue = closeatom.residue
1265            if closeresidue == residue: continue
1266            if not isinstance(closeresidue, (Amino,WAT)): continue
1267            if isinstance(residue, CYS):
1268                if residue.SSbondedpartner == closeatom: continue
1269
1270            # Also ignore if this is a donor/acceptor pair
1271
1272            if atom.isHydrogen() and atom.bonds[0].hdonor \
1273               and closeatom.hacceptor: continue
1274            if closeatom.isHydrogen() and closeatom.bonds[0].hdonor \
1275                   and atom.hacceptor:
1276                continue
1277
1278            dist = distance(atom.getCoords(), closeatom.getCoords())
1279
1280            if isinstance(closeresidue, WAT):
1281                if dist < bestwatdist:
1282                    bestwatdist = dist
1283                    bestwatatom = closeatom
1284            else:
1285                if dist < bestdist:
1286                    bestdist = dist
1287                    bestatom = closeatom
1288
1289        if bestdist > bestwatdist:
1290            txt = "Warning: %s in %s skipped when optimizing %s in %s\n" % (bestwatatom.name,
1291                                                                           bestwatatom.residue,
1292                                                                           atom.name, residue)
1293            if txt not in self.warnings:
1294                self.warnings.append(txt)
1295
1296        return bestatom
1297
1298    def findNearbyAtoms(self, atom):
1299        """
1300            Find nearby atoms for conflict-checking.  Uses
1301            neighboring cells to compare atoms rather than an all
1302            versus all O(n^2) algorithm, which saves a great deal
1303            of time.  There are several instances where we ignore
1304            potential conflicts; these include donor/acceptor pairs,
1305            atoms in the same residue, and bonded CYS bridges.
1306
1307            Parameters
1308                atom:  Find nearby atoms to this atom (Atom)
1309            Returns
1310                nearatoms:  A dictionary of <Atom too close> to <amount of overlap for that atom>.
1311        """
1312        # Initialize some variables
1313
1314        nearatoms = {}
1315        residue = atom.residue
1316        atom_size = BUMP_HYDROGEN_SIZE if atom.isHydrogen() else BUMP_HEAVY_SIZE
1317
1318        # Get atoms from nearby cells
1319
1320        closeatoms = self.cells.getNearCells(atom)
1321
1322        # Loop through and see if any are within the cutoff
1323
1324        for closeatom in closeatoms:
1325            closeresidue = closeatom.residue
1326            if closeresidue == residue and (closeatom in atom.bonds or atom in closeatom.bonds):
1327                continue
1328
1329            if not isinstance(closeresidue, (Amino, WAT)):
1330                continue
1331            if isinstance(residue, CYS) and residue.SSbondedpartner == closeatom:
1332                continue
1333
1334            # Also ignore if this is a donor/acceptor pair
1335
1336            if (atom.isHydrogen() and len(atom.bonds) != 0 and
1337                atom.bonds[0].hdonor and closeatom.hacceptor):
1338                continue
1339
1340            if (closeatom.isHydrogen() and len(closeatom.bonds) != 0 and
1341                closeatom.bonds[0].hdonor and atom.hacceptor):
1342                continue
1343
1344            dist = distance(atom.getCoords(), closeatom.getCoords())
1345            other_size = BUMP_HYDROGEN_SIZE if closeatom.isHydrogen() else BUMP_HEAVY_SIZE
1346            cutoff = atom_size + other_size
1347            if dist < cutoff:
1348                nearatoms[closeatom] = cutoff - dist
1349
1350        return nearatoms
1351
1352
1353    def pickDihedralAngle(self, residue, conflictnames, oldnum=None):
1354        """
1355            Choose an angle number to use in debumping
1356
1357            Algorithm
1358                Instead of simply picking a random chiangle, this function
1359                uses a more intelligent method to improve efficiency.
1360                The algorithm uses the names of the conflicting atoms
1361                within the residue to determine which angle number
1362                has the best chance of fixing the problem(s). The method
1363                also insures that the same chiangle will not be run twice
1364                in a row.
1365            Parameters
1366                residue:    The residue that is being debumped (Residue)
1367                conflictnames: A list of atom names that are currently
1368                               conflicts (list)
1369                oldnum    : The old dihedral angle number (int)
1370            Returns
1371                bestnum    : The new dihedral angle number (int)
1372        """
1373        bestnum = -1
1374        best = 0
1375
1376        iList = range(len(residue.dihedrals))
1377        #Make sure our testing is done round robin.
1378        if oldnum is not None and oldnum >= 0 and len(iList) > 0:
1379            del iList[oldnum]
1380            testDihedralIndecies = iList[oldnum:] + iList[:oldnum]
1381        else:
1382            testDihedralIndecies = iList
1383
1384        for i in testDihedralIndecies:
1385            if i == oldnum: continue
1386            if residue.dihedrals[i] is None: continue
1387
1388            score = 0
1389            atomnames = residue.reference.dihedrals[i].split()
1390            pivot = atomnames[2]
1391
1392            moveablenames = self.getMoveableNames(residue, pivot)
1393
1394            # If this pivot only moves the conflict atoms, pick it
1395
1396            if conflictnames == moveablenames: return i
1397
1398            # Otherwise find the pivot with the most matches
1399
1400            for name in conflictnames:
1401                if name in moveablenames:
1402                    score += 1
1403                    if score > best:
1404                        best = score
1405                        bestnum = i
1406
1407        # Return the best angle.  If none were found, return -1.
1408
1409        return bestnum
1410
1411    def setDihedralAngle(self, residue, anglenum, angle):
1412        """
1413            Rotate a residue about a given angle. Uses the quatfit
1414            methods to perform the matrix mathematics.
1415
1416            Parameters
1417                residue:   The residue to rotate
1418                anglenum:  The number of the angle to rotate as
1419                           listed in residue.dihedrals
1420                angle:     The desired angle.
1421
1422        """
1423        coordlist = []
1424        initcoords = []
1425        movecoords = []
1426        pivot = ""
1427
1428        oldangle = residue.dihedrals[anglenum]
1429        diff = angle - oldangle
1430
1431        atomnames = residue.reference.dihedrals[anglenum].split()
1432
1433        pivot = atomnames[2]
1434        for atomname in atomnames:
1435            if residue.hasAtom(atomname):
1436                coordlist.append(residue.getAtom(atomname).getCoords())
1437            else:
1438                raise PDBInputError("Error occurred while trying to debump!")
1439
1440        initcoords = subtract(coordlist[2], coordlist[1])
1441
1442        moveablenames = self.getMoveableNames(residue, pivot)
1443
1444        for name in moveablenames:
1445            atom = residue.getAtom(name)
1446            movecoords.append(subtract(atom.getCoords(), coordlist[1]))
1447
1448        newcoords = qchichange(initcoords, movecoords, diff)
1449
1450        for i in range(len(moveablenames)):
1451            atom = residue.getAtom(moveablenames[i])
1452            self.cells.removeCell(atom)
1453            x = (newcoords[i][0] + coordlist[1][0])
1454            y = (newcoords[i][1] + coordlist[1][1])
1455            z = (newcoords[i][2] + coordlist[1][2])
1456            atom.set("x", x)
1457            atom.set("y", y)
1458            atom.set("z", z)
1459            self.cells.addCell(atom)
1460
1461
1462        # Set the new angle
1463
1464        coordlist = []
1465        for atomname in atomnames:
1466            if residue.hasAtom(atomname):
1467                coordlist.append(residue.getAtom(atomname).getCoords())
1468            else:
1469                raise PDBInputError("Error occurred while trying to debump!")
1470
1471        di = getDihedral(coordlist[0], coordlist[1], coordlist[2], coordlist[3])
1472        residue.dihedrals[anglenum] = di
1473
1474    def getMoveableNames(self, residue, pivot):
1475        """
1476            Return all atomnames that are further away than the
1477            pivot atom.
1478
1479            Parameters
1480                residue:  The residue to use
1481                pivot:    The pivot atomname
1482        """
1483        movenames = []
1484        refdist = residue.getAtom(pivot).refdistance
1485        for atom in residue.getAtoms():
1486            if atom.refdistance > refdist:
1487                movenames.append(atom.name)
1488        return movenames
1489
1490    def setDonorsAndAcceptors(self):
1491        """
1492            Set the donors and acceptors within the protein
1493        """
1494        for residue in self.protein.getResidues():
1495            residue.setDonorsAndAcceptors()
1496
1497    def runPDB2PKA(self, ph, ff, pdblist, ligand, verbose, pdb2pka_params):
1498        if ff.lower() != 'parse':
1499            PDB2PKAError('PDB2PKA can only be run with the PARSE force field.')
1500
1501        self.write("Running PDB2PKA and applying at pH %.2f... \n" % ph)
1502
1503        import pka
1504        from pdb2pka import pka_routines
1505        init_params = pdb2pka_params.copy()
1506        init_params.pop('pairene')
1507        init_params.pop('clean_output')
1508
1509        results = pka.pre_init(original_pdb_list=pdblist,
1510                               ff=ff,
1511                               verbose=verbose,
1512                               ligand=ligand,
1513                               **init_params)
1514        output_dir, protein, routines, forcefield,apbs_setup, ligand_titratable_groups, maps, sd = results
1515
1516        mypkaRoutines = pka_routines.pKaRoutines(protein, routines, forcefield, apbs_setup, output_dir, maps, sd,
1517                                                 restart=pdb2pka_params.get('clean_output'),
1518                                                 pairene=pdb2pka_params.get('pairene'))
1519
1520        print 'Doing full pKa calculation'
1521        mypkaRoutines.runpKa()
1522
1523        pdb2pka_warnings = mypkaRoutines.warnings[:]
1524
1525        self.warnings.extend(pdb2pka_warnings)
1526
1527        residue_ph = {}
1528        for pka_residue_tuple, calc_ph in mypkaRoutines.ph_at_0_5.iteritems():
1529            tit_type, chain_id, number_str = pka_residue_tuple
1530            if tit_type == 'NTR':
1531                tit_type = 'N+'
1532            elif tit_type == 'CTR':
1533                tit_type = 'C-'
1534
1535            key = ' '.join([tit_type, number_str, chain_id])
1536            residue_ph[key] = calc_ph
1537
1538        pformat(residue_ph)
1539
1540        self.apply_pka_values(ff, ph, residue_ph)
1541
1542        self.write('Finished running PDB2PKA.\n')
1543
1544
1545    def runPROPKA(self, ph, ff, rootname, outname, options):
1546        """
1547            Run PROPKA on the current protein, setting protonation states to
1548            the correct values
1549
1550            Parameters
1551               ph:  The desired pH of the system
1552               ff:  The forcefield name to be used
1553               outname: The name of the PQR outfile
1554        """
1555        self.write("Running PROPKA and applying at pH %.2f... \n" % ph)
1556
1557        from propka30.Source.protein import Protein as pkaProtein
1558        from propka30.Source.pdb import readPDB as pkaReadPDB
1559        from propka30.Source.lib import residueList, setVerbose
1560
1561        setVerbose(options.verbose)
1562
1563        # Initialize some variables
1564
1565        linelen = 70
1566        pkadic = {}
1567
1568        # Reorder the atoms in each residue to start with N
1569
1570        for residue in self.protein.getResidues():
1571            residue.reorder()
1572
1573        # Make a string with all non-hydrogen atoms
1574
1575        HFreeProteinFile = StringIO()
1576
1577        for atom in self.protein.getAtoms():
1578            if not atom.isHydrogen():
1579                atomtxt = atom.getPDBString()
1580                atomtxt = atomtxt[:linelen]
1581                HFreeProteinFile.write(atomtxt)
1582                HFreeProteinFile.write('\n')
1583
1584
1585        HFreeProteinFile.seek(0)
1586
1587        # Run PropKa
1588
1589        atoms = pkaReadPDB('', file=HFreeProteinFile)
1590
1591        # creating protein object
1592        myPkaProtein = pkaProtein(atoms=atoms, name=rootname, options=options)
1593        # calculating pKa values for ionizable residues
1594        myPkaProtein.calculatePKA(options=options)
1595        # printing pka file
1596        myPkaProtein.writePKA(options=options, filename=outname)
1597
1598        # Parse the results
1599        # This is the method used to generate the summary in the first place.
1600        residue_list = residueList("propka1")
1601        for chain in myPkaProtein.chains:
1602            for residue_type in residue_list:
1603                for residue in chain.residues:
1604                    if residue.resName == residue_type:
1605                        #Strip out the extra space after C- or N+
1606                        key = string.strip('%s %s %s' % (string.strip(residue.resName),
1607                                                        residue.resNumb, residue.chainID))
1608                        pkadic[key] = residue.pKa_pro
1609
1610        if len(pkadic) == 0:
1611            return
1612
1613        # Now apply each pka to the appropriate residue
1614        self.apply_pka_values(ff, ph, pkadic)
1615
1616        self.write("Done.\n")
1617
1618    def apply_pka_values(self, ff, ph, pkadic):
1619        self.write('Applying pKa values at a pH of %.2f:\n' % ph)
1620        formatted_pkadict = pformat(pkadic)
1621        self.write(formatted_pkadict+'\n\n')
1622
1623        warnings = []
1624        for residue in self.protein.getResidues():
1625            if not isinstance(residue, Amino):
1626                continue
1627            resname = residue.name
1628            resnum = residue.resSeq
1629            chainID = residue.chainID
1630
1631            if residue.isNterm:
1632                key = "N+ %i %s" % (resnum, chainID)
1633                key = string.strip(key)
1634                if key in pkadic:
1635                    value = pkadic[key]
1636                    del pkadic[key]
1637                    if ph >= value:
1638                        if ff in ["amber", "charmm", "tyl06", "peoepb", "swanson"]:
1639                            warn = ("N-terminal %s" % key, "neutral")
1640                            warnings.append(warn)
1641                        else:
1642                            self.applyPatch("NEUTRAL-NTERM", residue)
1643
1644            if residue.isCterm:
1645                key = "C- %i %s" % (resnum, chainID)
1646                key = string.strip(key)
1647                if key in pkadic:
1648                    value = pkadic[key]
1649                    del pkadic[key]
1650                    if ph < value:
1651                        if ff in ["amber", "charmm", "tyl06", "peoepb", "swanson"]:
1652                            warn = ("C-terminal %s" % key, "neutral")
1653                            warnings.append(warn)
1654                        else:
1655                            self.applyPatch("NEUTRAL-CTERM", residue)
1656
1657            key = "%s %i %s" % (resname, resnum, chainID)
1658            key = string.strip(key)
1659            if key in pkadic:
1660                value = pkadic[key]
1661                del pkadic[key]
1662                if resname == "ARG" and ph >= value:
1663                    if ff == "parse":
1664                        self.applyPatch("AR0", residue)
1665                        txt = "WARNING: Neutral arginines are very rare. Please double\n"
1666                        self.warnings.append(txt)
1667                        self.write(txt)
1668                        txt = "         check your system and caculation setup.\n"
1669                        self.warnings.append(txt)
1670                        self.write(txt)
1671                    else:
1672                        warn = (key, "neutral")
1673                        warnings.append(warn)
1674                elif resname == "ASP" and ph < value:
1675                    if residue.isCterm and ff in ["amber", "tyl06", "swanson"]:
1676                        warn = (key, "Protonated at C-Terminal")
1677                        warnings.append(warn)
1678                    elif residue.isNterm and ff in ["amber", "tyl06", "swanson"]:
1679                        warn = (key, "Protonated at N-Terminal")
1680                        warnings.append(warn)
1681                    else:
1682                        self.applyPatch("ASH", residue)
1683                elif resname == "CYS" and ph >= value:
1684                    if ff == "charmm":
1685                        warn = (key, "negative")
1686                        warnings.append(warn)
1687                    else:
1688                        self.applyPatch("CYM", residue)
1689                elif resname == "GLU" and ph < value:
1690                    if residue.isCterm and ff in ["amber", "tyl06", "swanson"]:
1691                        warn = (key, "Protonated at C-Terminal")
1692                        warnings.append(warn)
1693                    elif residue.isNterm and ff in ["amber", "tyl06", "swanson"]:
1694                        warn = (key, "Protonated at N-Terminal")
1695                        warnings.append(warn)
1696                    else:
1697                        self.applyPatch("GLH", residue)
1698                elif resname == "HIS" and ph < value:
1699                    self.applyPatch("HIP", residue)
1700                elif resname == "LYS" and ph >= value:
1701                    if ff == "charmm":
1702                        warn = (key, "neutral")
1703                        warnings.append(warn)
1704                    elif ff in ["amber", "tyl06", "swanson"] and residue.get("isCterm"):
1705                        warn = (key, "neutral at C-Terminal")
1706                        warnings.append(warn)
1707                    elif ff == "tyl06" and residue.get("isNterm"):
1708                        warn = (key, "neutral at N-Terminal")
1709                        warnings.append(warn)
1710                    else:
1711                        self.applyPatch("LYN", residue)
1712                elif resname == "TYR" and ph >= value:
1713                    if ff in ["charmm", "amber", "tyl06", "peoepb", "swanson"]:
1714                        warn = (key, "negative")
1715                        warnings.append(warn)
1716                    else:
1717                        self.applyPatch("TYM", residue)
1718
1719        if len(warnings) > 0:
1720            init = "WARNING: PDB2PKA determined the following residues to be\n"
1721            self.warnings.append(init)
1722            self.write(init)
1723            init = "         in a protonation state not supported by the\n"
1724            self.warnings.append(init)
1725            self.write(init)
1726            init = "         %s forcefield!\n" % ff
1727            self.warnings.append(init)
1728            self.write(init)
1729            init = "         All were reset to their standard pH 7.0 state.\n"
1730            self.warnings.append(init)
1731            self.write(init)
1732            self.warnings.append("\n")
1733            self.write('\n')
1734            for warn in warnings:
1735                text = "             %s (%s)\n" % (warn[0], warn[1])
1736                self.warnings.append(text)
1737                self.write(text)
1738            self.warnings.append("\n")
1739            self.write('\n')
1740
1741        if len(pkadic) > 0:
1742            warn = "         PDB2PQR could not identify the following residues\n"
1743            self.warnings.append(warn)
1744            self.write(warn)
1745            warn = "         and residue numbers as returned by PROPKA or PDB2PKA:\n"
1746            self.warnings.append(warn)
1747            self.warnings.append("\n")
1748            self.write(warn)
1749            self.write('\n')
1750            for item in pkadic:
1751                text = "             %s\n" % item
1752                self.warnings.append(text)
1753                self.write(text)
1754            self.warnings.append("\n")
1755            self.write('\n')
1756
1757
1758class Cells:
1759    """
1760        The cells object provides a better way to search for nearby atoms. A
1761        pure all versus all search is O(n^2) - for every atom, every other atom
1762        must be searched.  This is rather inefficient, especially for large
1763        proteins where cells may be tens of angstroms apart.  The cell class
1764        breaks down the xyz protein space into several 3-D cells of desired
1765        size - then by simply examining atoms that fall into the adjacent
1766        cells one can quickly find nearby cells.
1767
1768        NOTE:  Ideally this should be somehow separated from the routines
1769               object...
1770        """
1771    def __init__(self, cellsize):
1772        """
1773            Initialize the cells.
1774
1775            Parameters
1776                cellsize:  The size of each cell (int)
1777        """
1778        self.cellmap = {}
1779        self.cellsize = cellsize
1780
1781    def assignCells(self, protein):
1782        """
1783            Place each atom in a virtual cell for easy neighbor comparison
1784        """
1785        for atom in protein.getAtoms():
1786            atom.cell = None
1787            self.addCell(atom)
1788
1789    def addCell(self, atom):
1790        """
1791            Add an atom to the cell
1792
1793            Parameters
1794                atom:  The atom to add (atom)
1795        """
1796        size = self.cellsize
1797
1798        x = atom.get("x")
1799        if x < 0:
1800            x = (int(x) - 1) / size * size
1801        else:
1802            x = int(x) / size * size
1803
1804        y = atom.get("y")
1805        if y < 0:
1806            y = (int(y) - 1) / size * size
1807        else:
1808            y = int(y) / size * size
1809
1810        z = atom.get("z")
1811        if z < 0:
1812            z = (int(z) - 1) / size * size
1813        else:
1814            z = int(z) / size * size
1815
1816        key = (x, y, z)
1817        try:
1818            self.cellmap[key].append(atom)
1819        except KeyError:
1820            self.cellmap[key] = [atom]
1821        atom.set("cell", key)
1822
1823    def removeCell(self, atom):
1824        """
1825             Remove the atom from a cell
1826
1827             Parameters
1828                 atom:   The atom to add (atom)
1829        """
1830        oldcell = atom.get("cell")
1831        if oldcell == None: return
1832        atom.set("cell", None)
1833        self.cellmap[oldcell].remove(atom)
1834
1835    def getNearCells(self, atom):
1836        """
1837            Find all atoms in bordering cells to an atom
1838
1839            Parameters
1840                atom:  The atom to use (atom)
1841            Returns
1842                closeatoms:  A list of nearby atoms (list)
1843        """
1844        size = self.cellsize
1845        closeatoms = []
1846        cell = atom.get("cell")
1847        if cell == None:
1848            return closeatoms
1849        else:
1850            x = cell[0]
1851            y = cell[1]
1852            z = cell[2]
1853            for i in range(-1 * size, 2 * size, size):
1854                for j in range(-1 * size, 2 * size, size):
1855                    for k in range(-1 * size, 2 * size, size):
1856                        newkey = (x + i, y + j, z + k)
1857                        try:
1858                            newatoms = self.cellmap[newkey]
1859                            for atom2 in newatoms:
1860                                if atom == atom2: continue
1861                                closeatoms.append(atom2)
1862                        except KeyError: pass
1863
1864            return closeatoms
1865