1# Copyright 2019 by Robert T. Miller. All rights reserved. 2# This file is part of the Biopython distribution and governed by your 3# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". 4# Please see the LICENSE file that should have been included as part of this 5# package. 6 7"""Classes to support internal coordinates for protein structures. 8 9Internal coordinates comprise Psi, Phi and Omega dihedral angles along the 10protein backbone, Chi angles along the sidechains, and all 3-atom angles and 11bond lengths comprising a protein chain. These routines can compute internal 12coordinates from atom XYZ coordinates, and compute atom XYZ coordinates from 13internal coordinates. 14 15Internal coordinates are defined on sequences of atoms which span 16residues or follow accepted nomenclature along sidechains. To manage these 17sequences and support Biopython's disorder mechanisms, AtomKey specifiers are 18implemented to capture residue, atom and variant identification in a single 19object. A Hedron object is specified as three sequential AtomKeys, comprising 20two bond lengths and the bond angle between them. A Dihedron consists of four 21sequential AtomKeys, linking two Hedra with a dihedral angle between them. 22 23A Protein Internal Coordinate (.pic) file format is defined to capture 24sufficient detail to reproduce a PDB file from chain starting coordinates 25(first residue N, Ca, C XYZ coordinates) and remaining internal coordinates. 26These files are used internally to verify that a given structure can be 27regenerated from its internal coordinates. 28 29Internal coordinates may also be exported as OpenSCAD data arrays for 30generating 3D printed protein models. OpenSCAD software is provided as 31proof-of-concept for generating such models. 32 33The following classes comprise the core functionality for processing internal 34coordinates and are sufficiently related and coupled to place them together in 35this module: 36 37IC_Chain: Extends Biopython Chain on .internal_coord attribute. 38 Manages connected sequence of residues and chain breaks; methods generally 39 apply IC_Residue methods along chain. 40 41IC_Residue: Extends for Biopython Residue on .internal_coord attribute. 42 Most control and methods of interest are in this class, see API. 43 44Dihedron: four joined atoms forming a dihedral angle. 45 Dihedral angle, homogeneous atom coordinates in local coordinate space, 46 references to relevant Hedra and IC_Residue. Methods to compute 47 residue dihedral angles, bond angles and bond lengths. 48 49Hedron: three joined atoms forming a plane. 50 Contains homogeneous atom coordinates in local coordinate space as well as 51 bond lengths and angle between them. 52 53Edron: base class for Hedron and Dihedron classes. 54 Tuple of AtomKeys comprising child, string ID, mainchain membership boolean 55 and other routines common for both Hedra and Dihedra. Implements rich 56 comparison. 57 58AtomKey: keys (dictionary and string) for referencing atom sequences. 59 Capture residue and disorder/occupancy information, provides a 60 no-whitespace key for .pic files, and implements rich comparison. 61 62Custom exception classes: HedronMatchError and MissingAtomError 63""" 64 65import re 66from collections import deque, namedtuple 67 68try: 69 import numpy # type: ignore 70except ImportError: 71 from Bio import MissingPythonDependencyError 72 73 raise MissingPythonDependencyError( 74 "Install NumPy to build proteins from internal coordinates." 75 ) 76 77from Bio.PDB.Atom import Atom, DisorderedAtom 78from Bio.PDB.Polypeptide import three_to_one 79 80from Bio.PDB.vectors import coord_space, multi_rot_Z, multi_rot_Y 81 82# , calc_dihedral, Vector 83from Bio.PDB.ic_data import ic_data_backbone, ic_data_sidechains 84from Bio.PDB.ic_data import ic_data_sidechain_extras, residue_atom_bond_state 85 86# for type checking only 87from typing import ( 88 List, 89 Dict, 90 Set, 91 TextIO, 92 Union, 93 Tuple, 94 cast, 95 TYPE_CHECKING, 96 Optional, 97) 98 99if TYPE_CHECKING: 100 from Bio.PDB.Residue import Residue 101 from Bio.PDB.Chain import Chain 102 103HKT = Tuple["AtomKey", "AtomKey", "AtomKey"] # Hedron key tuple 104DKT = Tuple["AtomKey", "AtomKey", "AtomKey", "AtomKey"] # Dihedron Key Tuple 105EKT = Union[HKT, DKT] # Edron Key Tuple 106BKT = Tuple["AtomKey", "AtomKey"] # Bond Key Tuple 107 108# HACS = Tuple[numpy.array, numpy.array, numpy.array] # Hedron Atom Coord Set 109HACS = numpy.array # Hedron Atom Coord Set 110DACS = Tuple[ 111 numpy.array, numpy.array, numpy.array, numpy.array 112] # Dihedron Atom Coord Set 113 114 115class IC_Chain: 116 """Class to extend Biopython Chain with internal coordinate data. 117 118 Attributes 119 ---------- 120 chain: biopython Chain object reference 121 The Chain object this extends 122 123 initNCaC: AtomKey indexed dictionary of N, Ca, C atom coordinates. 124 NCaCKeys start chain segments (first residue or after chain break). 125 These 3 atoms define the coordinate space for a contiguous chain segment, 126 as initially specified by PDB or mmCIF file. 127 128 MaxPeptideBond: **Class** attribute to detect chain breaks. 129 Override for fully contiguous chains with some very long bonds - e.g. 130 for 3D printing (OpenSCAD output) a structure with fully disordered 131 (missing) residues. 132 133 ordered_aa_ic_list: list of IC_Residue objects 134 IC_Residue objects ic algorithms can process (e.g. no waters) 135 136 hedra: dict indexed by 3-tuples of AtomKeys 137 Hedra forming residues in this chain 138 139 hedraLen: int length of hedra dict 140 141 hedraNdx: dict mapping hedra AtomKeys to numpy array data 142 143 dihedra: dict indexed by 4-tuples of AtomKeys 144 Dihedra forming (overlapping) this residue 145 146 dihedraLen: int length of dihedra dict 147 148 dihedraNdx: dict mapping dihedra AtomKeys to numpy array data 149 150 atomArray: numpy array of homogeneous atom coords for chain 151 152 atomArrayIndex: dict mapping AtomKeys to atomArray indexes 153 154 numpy arrays for vector processing of chain di/hedra: 155 156 hedraIC: length-angle-length entries for each hedron 157 158 hAtoms: homogeneous atom coordinates (3x4) of hedra, central atom at origin 159 160 hAtomsR: hAtoms in reverse order 161 162 hAtoms_needs_update: booleans indicating whether hAtoms represent hedraIC 163 164 dihedraIC: dihedral angles for each dihedron 165 166 dAtoms: homogeneous atom coordinates (4x4) of dihedra, second atom at origin 167 168 dAtoms_needs_update: booleans indicating whether dAtoms represent dihedraIC 169 170 Methods 171 ------- 172 internal_to_atom_coordinates(verbose, start, fin) 173 Process ic data to Residue/Atom coordinates; calls assemble_residues() 174 followed by coords_to_structure() 175 assemble_residues(verbose, start, fin) 176 Generate IC_Residue atom coords from internal coordinates 177 coords_to_structure() 178 update Biopython Residue.Atom coords from IC_Residue coords for all 179 Residues with IC_Residue attributes 180 atom_to_internal_coordinates(verbose) 181 Calculate dihedrals, angles, bond lengths (internal coordinates) for 182 Atom data 183 link_residues() 184 Call link_dihedra() on each IC_Residue (needs rprev, rnext set) 185 set_residues() 186 Add .internal_coord attribute for all Residues in parent Chain, populate 187 ordered_aa_ic_list, set IC_Residue rprev, rnext or initNCaC coordinates 188 write_SCAD() 189 Write OpenSCAD matrices for internal coordinate data comprising chain 190 191 """ 192 193 MaxPeptideBond = 1.4 # larger C-N distance than this is chain break 194 195 def __init__(self, parent: "Chain", verbose: bool = False) -> None: 196 """Initialize IC_Chain object, with or without residue/Atom data. 197 198 :param parent: Biopython Chain object 199 Chain object this extends 200 """ 201 # type hinting parent as Chain leads to import cycle 202 self.chain = parent 203 self.ordered_aa_ic_list: List[IC_Residue] = [] 204 self.initNCaC: Dict[Tuple[str], Dict["AtomKey", numpy.array]] = {} 205 self.sqMaxPeptideBond = IC_Chain.MaxPeptideBond * IC_Chain.MaxPeptideBond 206 # need init here for _gen_edra(): 207 self.hedra = {} 208 # self.hedraNdx = {} 209 self.dihedra = {} 210 # self.dihedraNdx = {} 211 self.set_residues(verbose) # no effect if no residues loaded 212 213 # return True if a0, a1 within supplied cutoff 214 def _atm_dist_chk(self, a0: Atom, a1: Atom, cutoff: float, sqCutoff: float) -> bool: 215 diff = a0.coord - a1.coord 216 sum = 0 217 for axis in diff: 218 if axis > cutoff: 219 # print("axis: ", axis) 220 return False 221 sum += axis * axis 222 if sum > sqCutoff: 223 # print("sq axis: ", sqrt(sum)) # need import math.sqrt 224 return False 225 return True 226 227 # return a string describing issue, or None if OK 228 def _peptide_check(self, prev: "Residue", curr: "Residue") -> Optional[str]: 229 if 0 == len(curr.child_dict): 230 # curr residue with no atoms => reading pic file, no break 231 return None 232 if (0 != len(curr.child_dict)) and (0 == len(prev.child_dict)): 233 # prev residue with no atoms, curr has atoms => reading pic file, 234 # have break 235 return "PIC data missing atoms" 236 237 # handle non-standard AA not marked as HETATM (1KQF, 1NTH) 238 if not prev.internal_coord.is20AA: 239 return "previous residue not standard amino acid" 240 241 # both biopython Residues have Atoms, so check distance 242 Natom = curr.child_dict.get("N", None) 243 pCatom = prev.child_dict.get("C", None) 244 if Natom is None or pCatom is None: 245 return f"missing {'previous C' if pCatom is None else 'N'} atom" 246 247 # confirm previous residue has all backbone atoms 248 pCAatom = prev.child_dict.get("CA", None) 249 pNatom = prev.child_dict.get("N", None) 250 if pNatom is None or pCAatom is None: 251 return "previous residue missing N or Ca" 252 253 tooFar = f"MaxPeptideBond ({IC_Chain.MaxPeptideBond} angstroms) exceeded" 254 if not Natom.is_disordered() and not pCatom.is_disordered(): 255 dc = self._atm_dist_chk( 256 Natom, pCatom, IC_Chain.MaxPeptideBond, self.sqMaxPeptideBond 257 ) 258 if dc: 259 return None 260 else: 261 return tooFar 262 263 Nlist: List[Atom] = [] 264 pClist: List[Atom] = [] 265 if Natom.is_disordered(): 266 Nlist.extend(Natom.child_dict.values()) 267 else: 268 Nlist = [Natom] 269 if pCatom.is_disordered(): 270 pClist.extend(pCatom.child_dict.values()) 271 else: 272 pClist = [pCatom] 273 274 for n in Nlist: 275 for c in pClist: 276 if self._atm_dist_chk( 277 Natom, pCatom, IC_Chain.MaxPeptideBond, self.sqMaxPeptideBond 278 ): 279 return None 280 return tooFar 281 282 def clear_ic(self): 283 """Clear residue internal_coord settings for this chain.""" 284 for res in self.chain.get_residues(): 285 res.internal_coord = None 286 287 def _add_residue( 288 self, res: "Residue", last_res: List, last_ord_res: List, verbose: bool = False 289 ) -> bool: 290 """Set rprev, rnext, determine chain break.""" 291 if not res.internal_coord: 292 res.internal_coord = IC_Residue(res) 293 res.internal_coord.cic = self 294 if ( 295 0 < len(last_res) 296 and last_ord_res == last_res 297 and self._peptide_check(last_ord_res[0].residue, res) is None 298 ): 299 # no chain break 300 for prev in last_ord_res: 301 prev.rnext.append(res.internal_coord) 302 res.internal_coord.rprev.append(prev) 303 return True 304 elif all(atm in res.child_dict for atm in ("N", "CA", "C")): 305 # chain break, save coords for restart 306 if verbose and len(last_res) != 0: # not first residue 307 if last_ord_res != last_res: 308 reason = "disordered residues after {last_ord_res.pretty_str()}" 309 else: 310 reason = cast( 311 str, self._peptide_check(last_ord_res[0].residue, res) 312 ) 313 print( 314 f"chain break at {res.internal_coord.pretty_str()} due to {reason}" 315 ) 316 initNCaC: Dict["AtomKey", numpy.array] = {} 317 ric = res.internal_coord 318 for atm in ("N", "CA", "C"): 319 bpAtm = res.child_dict[atm] 320 if bpAtm.is_disordered(): 321 for altAtom in bpAtm.child_dict.values(): 322 ak = AtomKey(ric, altAtom) 323 initNCaC[ak] = IC_Residue.atm241(altAtom.coord) 324 else: 325 ak = AtomKey(ric, bpAtm) 326 initNCaC[ak] = IC_Residue.atm241(bpAtm.coord) 327 self.initNCaC[ric.rbase] = initNCaC 328 return True 329 elif ( 330 0 == len(res.child_list) 331 and self.chain.child_list[0].id == res.id 332 and res.internal_coord.is20AA 333 ): 334 # this is first residue, no atoms at all, is std amino acid 335 # conclude reading pic file with no N-Ca-C coords 336 return True 337 # chain break but do not have N, Ca, C coords to restart from 338 return False 339 340 def set_residues(self, verbose: bool = False) -> None: 341 """Initialize internal_coord data for loaded Residues. 342 343 Add IC_Residue as .internal_coord attribute for each Residue in parent 344 Chain; populate ordered_aa_ic_list with IC_Residue references for residues 345 which can be built (amino acids and some hetatms); set rprev and rnext 346 on each sequential IC_Residue, populate initNCaC at start and after 347 chain breaks. 348 """ 349 # ndx = 0 350 last_res: List["IC_Residue"] = [] 351 last_ord_res: List["IC_Residue"] = [] 352 353 for res in self.chain.get_residues(): 354 # select only not hetero or accepted hetero 355 if res.id[0] == " " or res.id[0] in IC_Residue.accept_resnames: 356 this_res: List["IC_Residue"] = [] 357 if 2 == res.is_disordered(): 358 # print('disordered res:', res.is_disordered(), res) 359 for r in res.child_dict.values(): 360 if self._add_residue(r, last_res, last_ord_res, verbose): 361 this_res.append(r.internal_coord) 362 else: 363 if self._add_residue(res, last_res, last_ord_res, verbose): 364 this_res.append(res.internal_coord) 365 366 if 0 < len(this_res): 367 self.ordered_aa_ic_list.extend(this_res) 368 last_ord_res = this_res 369 last_res = this_res 370 371 def link_residues(self) -> None: 372 """link_dihedra() for each IC_Residue; needs rprev, rnext set. 373 374 Called by PICIO:read_PIC() after finished reading chain 375 """ 376 for ric in self.ordered_aa_ic_list: 377 ric.cic = self 378 ric.link_dihedra() 379 380 def assemble_residues( 381 self, 382 verbose: bool = False, 383 start: Optional[int] = None, 384 fin: Optional[int] = None, 385 ) -> None: 386 """Generate IC_Residue atom coords from internal coordinates. 387 388 Filter positions between start and fin if set, find appropriate start 389 coordinates for each residue and pass to IC_Residue.assemble() 390 391 :param verbose bool: default False 392 describe runtime problems 393 :param: start, fin lists 394 sequence position, insert code for begin, end of subregion to 395 process 396 397 """ 398 for ric in self.ordered_aa_ic_list: 399 ric.clear_transforms() 400 401 for ric in self.ordered_aa_ic_list: 402 if not hasattr(ric, "NCaCKey"): 403 if verbose: 404 print( 405 f"no assembly for {str(ric)} due to missing N, Ca and/or C atoms" 406 ) 407 continue 408 respos = ric.residue.id[1] 409 if start and start > respos: 410 continue 411 if fin and fin < respos: 412 continue 413 414 ric.atom_coords = cast( 415 Dict[AtomKey, numpy.array], ric.assemble(verbose=verbose) 416 ) 417 if ric.atom_coords: 418 ric.ak_set = set(ric.atom_coords.keys()) 419 420 def coords_to_structure(self) -> None: 421 """Promote all ic atom_coords to Biopython Residue/Atom coords. 422 423 IC atom_coords are homogeneous [4], Biopython atom coords are XYZ [3]. 424 """ 425 self.ndx = 0 426 for res in self.chain.get_residues(): 427 if 2 == res.is_disordered(): 428 for r in res.child_dict.values(): 429 if r.internal_coord: 430 if r.internal_coord.atom_coords: 431 r.internal_coord.coords_to_residue() 432 elif ( 433 r.internal_coord.rprev 434 and r.internal_coord.rprev[0].atom_coords 435 ): 436 r.internal_coord.rprev[0].coords_to_residue(rnext=True) 437 elif res.internal_coord: 438 if res.internal_coord.atom_coords: 439 res.internal_coord.coords_to_residue() 440 elif ( 441 res.internal_coord.rprev and res.internal_coord.rprev[0].atom_coords 442 ): 443 res.internal_coord.rprev[0].coords_to_residue(rnext=True) 444 445 def init_edra(self) -> None: 446 """Create chain level di/hedra arrays. 447 448 If called by read_PIC, self.di/hedra = {} and object tree has IC data. 449 -> build chain arrays from IC data 450 451 If called at start of atom_to_internal_coords, self.di/hedra fully 452 populated. -> create empty chain numpy arrays 453 454 In both cases, fix di/hedra object attributes to be views on 455 chain-level array data 456 """ 457 # hedra: 458 459 if self.hedra == {}: 460 # loaded objects from PIC file, so no chain-level hedra 461 hLAL = {} 462 for ric in self.ordered_aa_ic_list: 463 for k, h in ric.hedra.items(): 464 self.hedra[k] = h 465 hLAL[k] = h.lal 466 self.hedraLen = len(self.hedra) 467 self.hedraIC = numpy.array(tuple(hLAL.values())) 468 else: 469 # atom_to_internal_coords() populates self.hedra via _gen_edra() 470 # a_to_ic will set ic so create empty 471 self.hedraLen = len(self.hedra) 472 self.hedraIC = numpy.empty((self.hedraLen, 3), dtype=numpy.float64) 473 474 self.hedraNdx = dict(zip(self.hedra.keys(), range(len(self.hedra)))) 475 476 self.hAtoms: numpy.ndarray = numpy.zeros( 477 (self.hedraLen, 3, 4), dtype=numpy.float64 478 ) 479 self.hAtoms[:, :, 3] = 1.0 # homogeneous 480 self.hAtomsR: numpy.ndarray = numpy.copy(self.hAtoms) 481 self.hAtoms_needs_update = numpy.full(self.hedraLen, True) 482 483 for ric in self.ordered_aa_ic_list: 484 for k, h in ric.hedra.items(): 485 # all h.lal become views on hedraIC 486 h.lal = self.hedraIC[self.hedraNdx[k]] 487 488 # dihedra: 489 490 if self.dihedra == {}: 491 # loaded objects from PIC file, so no chain-level hedra 492 dic = {} 493 for ric in self.ordered_aa_ic_list: 494 for k, d in ric.dihedra.items(): 495 self.dihedra[k] = d 496 dic[k] = d.angle 497 self.dihedraIC = numpy.array(tuple(dic.values())) 498 self.dihedraICr = numpy.deg2rad(self.dihedraIC) 499 self.dihedraLen = len(self.dihedra) 500 else: 501 # atom_to_internal_coords() populates self.hedra via _gen_edra() 502 # a_to_ic will set ic so create empty 503 self.dihedraLen = len(self.dihedra) 504 self.dihedraIC = numpy.empty(self.dihedraLen) 505 self.dihedraICr = numpy.empty(self.dihedraLen) 506 507 self.dihedraNdx = dict(zip(self.dihedra.keys(), range(len(self.dihedra)))) 508 509 self.dAtoms: numpy.ndarray = numpy.empty( 510 (self.dihedraLen, 4, 4), dtype=numpy.float64 511 ) 512 self.dAtoms[:, :, 3] = 1.0 # homogeneous 513 self.a4_pre_rotation = numpy.empty((self.dihedraLen, 4)) 514 515 for k, d in self.dihedra.items(): 516 d.initial_coords = self.dAtoms[self.dihedraNdx[k]] 517 d.a4_pre_rotation = self.a4_pre_rotation[self.dihedraNdx[k]] 518 519 self.dAtoms_needs_update = numpy.full(self.dihedraLen, True) 520 521 self.dRev = numpy.array(tuple(d.reverse for d in self.dihedra.values())) 522 self.dFwd = self.dRev != True # noqa: E712 523 self.dH1ndx = numpy.array( 524 tuple(self.hedraNdx[d.h1key] for d in self.dihedra.values()) 525 ) 526 self.dH2ndx = numpy.array( 527 tuple(self.hedraNdx[d.h2key] for d in self.dihedra.values()) 528 ) 529 530 # @profile 531 def init_atom_coords(self) -> None: 532 """Set chain level di/hedra initial coord arrays from IC_Residue data.""" 533 if not numpy.all(self.dAtoms_needs_update): 534 self.dAtoms_needs_update |= (self.hAtoms_needs_update[self.dH1ndx]) | ( 535 self.hAtoms_needs_update[self.dH2ndx] 536 ) 537 538 if numpy.any(self.hAtoms_needs_update): 539 # hedra initial coords 540 541 # supplementary angle radian: angles which add to 180 are supplementary 542 sar = numpy.deg2rad( 543 180.0 - self.hedraIC[:, 1][self.hAtoms_needs_update] 544 ) # angle 545 sinSar = numpy.sin(sar) 546 cosSarN = numpy.cos(sar) * -1 547 548 # a2 is len3 up from a2 on Z axis, X=Y=0 549 self.hAtoms[:, 2, 2][self.hAtoms_needs_update] = self.hedraIC[:, 2][ 550 self.hAtoms_needs_update 551 ] 552 553 # a0 X is sin( sar ) * len12 554 self.hAtoms[:, 0, 0][self.hAtoms_needs_update] = ( 555 sinSar * self.hedraIC[:, 0][self.hAtoms_needs_update] 556 ) 557 558 # a0 Z is -(cos( sar ) * len12) 559 # (assume angle always obtuse, so a0 is in -Z) 560 self.hAtoms[:, 0, 2][self.hAtoms_needs_update] = ( 561 cosSarN * self.hedraIC[:, 0][self.hAtoms_needs_update] 562 ) 563 564 # same again but 'reversed' : a0 on Z axis, a1 at origin, a2 in -Z 565 566 # a0r is len12 up from a1 on Z axis, X=Y=0 567 self.hAtomsR[:, 0, 2][self.hAtoms_needs_update] = self.hedraIC[:, 0][ 568 self.hAtoms_needs_update 569 ] 570 # a2r X is sin( sar ) * len23 571 self.hAtomsR[:, 2, 0][self.hAtoms_needs_update] = ( 572 sinSar * self.hedraIC[:, 2][self.hAtoms_needs_update] 573 ) 574 # a2r Z is -(cos( sar ) * len23) 575 self.hAtomsR[:, 2, 2][self.hAtoms_needs_update] = ( 576 cosSarN * self.hedraIC[:, 2][self.hAtoms_needs_update] 577 ) 578 579 self.hAtoms_needs_update[...] = False 580 581 # dihedra initial coords 582 583 dhlen = numpy.sum(self.dAtoms_needs_update) # self.dihedraLen 584 585 # full size masks: 586 mdFwd = self.dFwd & self.dAtoms_needs_update 587 mdRev = self.dRev & self.dAtoms_needs_update 588 589 # update size masks 590 udFwd = self.dFwd[self.dAtoms_needs_update] 591 udRev = self.dRev[self.dAtoms_needs_update] 592 593 # only 4th atom takes work: 594 # pick 4th atom based on rev flag 595 self.a4_pre_rotation[mdRev] = self.hAtoms[self.dH2ndx, 0][mdRev] 596 self.a4_pre_rotation[mdFwd] = self.hAtomsR[self.dH2ndx, 2][mdFwd] 597 598 # numpy multiply, add operations below intermediate array but out= not 599 # working with masking: 600 self.a4_pre_rotation[:, 2][self.dAtoms_needs_update] = numpy.multiply( 601 self.a4_pre_rotation[:, 2][self.dAtoms_needs_update], -1 602 ) # a4 to +Z 603 604 a4shift = numpy.empty(dhlen) 605 a4shift[udRev] = self.hedraIC[self.dH2ndx, 2][mdRev] # len23 606 a4shift[udFwd] = self.hedraIC[self.dH2ndx, 0][mdFwd] # len12 607 608 self.a4_pre_rotation[:, 2][self.dAtoms_needs_update] = numpy.add( 609 self.a4_pre_rotation[:, 2][self.dAtoms_needs_update], a4shift, 610 ) # so a2 at origin 611 612 # build rz rotation matrix for dihedral angle 613 rz = multi_rot_Z(self.dihedraICr[self.dAtoms_needs_update]) 614 615 # p = numpy.matmul(mt, dha[:, 0].reshape(-1, 4, 1)).reshape(-1, 4) 616 a4rot = numpy.matmul( 617 rz, self.a4_pre_rotation[self.dAtoms_needs_update][:].reshape(-1, 4, 1) 618 ).reshape(-1, 4) 619 # a4rot = rz.dot(self.a4_pre_rotation) # numpy.matmul(self.a4_pre_rotation, rz) 620 621 # now build dihedra initial coords 622 623 dH1atoms = self.hAtoms[self.dH1ndx] # fancy indexing so 624 dH1atomsR = self.hAtomsR[self.dH1ndx] # these copy not view 625 626 self.dAtoms[:, :3][mdFwd] = dH1atoms[mdFwd] 627 self.dAtoms[:, 3][mdFwd] = a4rot[udFwd] # [self.dFwd] 628 629 self.dAtoms[:, :3][mdRev] = dH1atomsR[:, 2::-1][mdRev] 630 self.dAtoms[:, 3][mdRev] = a4rot[udRev] # [self.dRev] 631 632 self.dAtoms_needs_update[...] = False 633 634 def internal_to_atom_coordinates( 635 self, 636 verbose: bool = False, 637 start: Optional[int] = None, 638 fin: Optional[int] = None, 639 promote: Optional[bool] = True, 640 ) -> None: 641 """Process, IC data to Residue/Atom coords. 642 643 Not yet vectorized. 644 645 :param verbose bool: default False 646 describe runtime problems 647 :param: start, fin lists 648 sequence position, insert code for begin, end of subregion to 649 process 650 :param promote bool: default True 651 If True (the default) copy result atom XYZ coordinates to 652 Biopython Atom objects for access by other Biopython methods; 653 otherwise, updated atom coordinates must be accessed through 654 IC_Residue and hedron objects. 655 """ 656 if self.dihedra == {}: 657 return # escape if nothing to process 658 659 self.init_atom_coords() 660 self.assemble_residues( 661 verbose=verbose, start=start, fin=fin 662 ) # internal to XYZ coordinates 663 if promote: 664 self.coords_to_structure() # promote to BioPython Residue/Atom 665 666 # @profile 667 def atom_to_internal_coordinates(self, verbose: bool = False) -> None: 668 """Calculate dihedrals, angles, bond lengths for Atom data. 669 670 :param verbose bool: default False 671 describe runtime problems 672 """ 673 hedraAtomDict = {} 674 dihedraAtomDict = {} 675 hInDset = set() 676 hedraDict2 = {} 677 gCBdihedra = set() 678 679 for ric in self.ordered_aa_ic_list: 680 ric.atom_to_internal_coordinates(verbose=verbose) # builds di/hedra objects 681 682 self.init_edra() 683 684 for ric in self.ordered_aa_ic_list: 685 for k, d in ric.dihedra.items(): 686 hInDset.update((d.h1key, d.h2key)) 687 try: 688 # get tuple of atom_coords from ric dict 689 dihedraAtomDict[k] = d.gen_acs(ric.atom_coords) 690 except KeyError: 691 gCBdihedra.add(d) # no atom_coords yet for gly CB 692 # init to rough approximation, overwrite later 693 # dihedron = O-C-Ca-Cb 694 # h1 = Ca-C-O (reversed) 695 # h2 = Cb-Ca-C (reversed) 696 # need dihedron atom coords all forward 697 h1 = d.hedron1.gen_acs(ric.atom_coords) 698 h1 = numpy.flipud(h1) # reverse h1 coords for building dihedron 699 xgcb = numpy.append(h1, [h1[2]], axis=0) 700 xgcb[3, 0] = xgcb[3, 0] + 1.0 701 dihedraAtomDict[k] = xgcb 702 703 for k, h in ric.hedra.items(): 704 if k not in hInDset: 705 # print("inaccessible hedron outside dihedron: ", h) 706 try: 707 hedraAtomDict[k] = h.gen_acs(ric.atom_coords) 708 except KeyError: # gly CB 709 hedraAtomDict[k] = numpy.array( 710 [[1, 2, 3, 1], [2, 2, 3, 1], [3, 2, 3, 1]] 711 ) 712 713 if self.dihedra == {}: 714 return # escape if no hedra loaded for this chain 715 716 if hedraAtomDict != {}: 717 # some hedra not in dihedra to process 718 # issue from alternate CB path, triggered by residue sidechain path not 719 # including n-ca-cb-xg 720 # not needed to build chain but include for consistency / statistics 721 hedraDict2 = {k: h for k, h in self.hedra.items() if k not in hInDset} 722 lh2a = len(hedraDict2) 723 if lh2a > 0: 724 h2a = numpy.array(tuple(hedraAtomDict.values())) 725 h2ai = dict(zip(hedraDict2.keys(), range(lh2a))) 726 727 # get dad for hedra 728 h_a0a1 = numpy.linalg.norm(h2a[:, 0] - h2a[:, 1], axis=1) 729 h_a1a2 = numpy.linalg.norm(h2a[:, 1] - h2a[:, 2], axis=1) 730 h_a0a2 = numpy.linalg.norm(h2a[:, 0] - h2a[:, 2], axis=1) 731 h_a0a1a2 = numpy.rad2deg( 732 numpy.arccos( 733 ((h_a0a1 * h_a0a1) + (h_a1a2 * h_a1a2) - (h_a0a2 * h_a0a2)) 734 / (2 * h_a0a1 * h_a1a2) 735 ) 736 ) 737 738 for k, h in hedraDict2.items(): 739 hndx = h2ai[k] 740 h.lal[:] = (h_a0a1[hndx], h_a0a1a2[hndx], h_a1a2[hndx]) 741 742 # now process dihedra 743 dLen = self.dihedraLen 744 dha = numpy.array(tuple(dihedraAtomDict.values())) 745 dhai = dict(zip(self.dihedra.keys(), range(dLen))) 746 747 # get dadad dist-angle-dist-angle-dist for dihedra 748 a0a1 = numpy.linalg.norm(dha[:, 0] - dha[:, 1], axis=1) 749 a1a2 = numpy.linalg.norm(dha[:, 1] - dha[:, 2], axis=1) 750 a2a3 = numpy.linalg.norm(dha[:, 2] - dha[:, 3], axis=1) 751 752 a0a2 = numpy.linalg.norm(dha[:, 0] - dha[:, 2], axis=1) 753 a1a3 = numpy.linalg.norm(dha[:, 1] - dha[:, 3], axis=1) 754 sqr_a1a2 = numpy.multiply(a1a2, a1a2) 755 756 a0a1a2 = numpy.rad2deg( 757 numpy.arccos(((a0a1 * a0a1) + sqr_a1a2 - (a0a2 * a0a2)) / (2 * a0a1 * a1a2)) 758 ) 759 760 a1a2a3 = numpy.rad2deg( 761 numpy.arccos((sqr_a1a2 + (a2a3 * a2a3) - (a1a3 * a1a3)) / (2 * a1a2 * a2a3)) 762 ) 763 764 # develop coord_space matrix for 1st 3 atoms of dihedra: 765 766 # build tm translation matrix: atom1 to origin 767 tm = numpy.empty((dLen, 4, 4)) 768 tm[...] = numpy.identity(4) 769 tm[:, 0:3, 3] = -dha[:, 1, 0:3] 770 771 # directly translate a2 into new space using a1 772 p = dha[:, 2] - dha[:, 1] 773 774 # get spherical coords of translated a2 (p) 775 r = numpy.linalg.norm(p, axis=1) 776 azimuth = numpy.arctan2(p[:, 1], p[:, 0]) 777 polar_angle = numpy.arccos(numpy.divide(p[:, 2], r, where=r != 0)) 778 779 # build rz rotation matrix: translated a2 -azimuth around Z 780 # (enables next step rotating around Y to align with Z) 781 rz = multi_rot_Z(-azimuth) 782 783 # build ry rotation matrix: translated a2 -polar_angle around Y 784 ry = multi_rot_Y(-polar_angle) 785 786 # mt completes a1-a2 on Z-axis, still need to align a0 with XZ plane 787 mt = numpy.matmul(ry, numpy.matmul(rz, tm)) 788 789 # transform a0 to mt space 790 p = numpy.matmul(mt, dha[:, 0].reshape(-1, 4, 1)).reshape(-1, 4) 791 # print("mt[0]:\n", mt[0], "\ndha[0][0] (a0):\n", dha[0][0], "\np[0]:\n", p[0]) 792 793 # get azimuth of translated a0 794 azimuth2 = numpy.arctan2(p[:, 1], p[:, 0]) 795 796 # build rotation matrix rz2 to rotate a0 -azimuth about Z to align with X 797 rz2 = multi_rot_Z(-azimuth2) 798 799 # update mt to be complete transform into hedron coordinate space 800 mt = numpy.matmul(rz2, mt[:]) 801 802 # now put atom 4 into that coordinate space and read dihedral as azimuth 803 do4 = numpy.matmul(mt, dha[:, 3].reshape(-1, 4, 1)).reshape(-1, 4) 804 805 numpy.arctan2(do4[:, 1], do4[:, 0], out=self.dihedraICr) 806 numpy.rad2deg(self.dihedraICr, out=self.dihedraIC) 807 808 # build hedra arrays 809 810 hIC = self.hedraIC 811 hNdx = self.hedraNdx 812 813 for k, d in self.dihedra.items(): 814 dndx = dhai[k] 815 # d.angle = dh1d[dndx] 816 rev, hed1, hed2 = (d.reverse, d.hedron1, d.hedron2) 817 h1ndx, h2ndx = (hNdx[d.h1key], hNdx[d.h2key]) 818 if not rev: 819 hIC[h1ndx, :] = (a0a1[dndx], a0a1a2[dndx], a1a2[dndx]) 820 hIC[h2ndx, :] = (a1a2[dndx], a1a2a3[dndx], a2a3[dndx]) 821 # hed1.len12 = a0a1[dndx] 822 # hed1.len23 = hed2.len12 = a1a2[dndx] 823 # hed2.len23 = a2a3[dndx] 824 else: 825 hIC[h1ndx, :] = (a1a2[dndx], a0a1a2[dndx], a0a1[dndx]) 826 hIC[h2ndx, :] = (a2a3[dndx], a1a2a3[dndx], a1a2[dndx]) 827 # hed1.len23 = a0a1[dndx] 828 # hed1.len12 = hed2.len23 = a1a2[dndx] 829 # hed2.len12 = a2a3[dndx] 830 831 hed1.lal = hIC[h1ndx] 832 hed2.lal = hIC[h2ndx] 833 834 # hed1.angle = a0a1a2[dndx] 835 # hed2.angle = a1a2a3[dndx] 836 837 for gCBd in gCBdihedra: 838 gCBd.ic_residue.build_glyCB(gCBd) 839 840 @staticmethod 841 def _write_mtx(fp: TextIO, mtx: numpy.array) -> None: 842 fp.write("[ ") 843 rowsStarted = False 844 for row in mtx: 845 if rowsStarted: 846 fp.write(", [ ") 847 else: 848 fp.write("[ ") 849 rowsStarted = True 850 colsStarted = False 851 for col in row: 852 if colsStarted: 853 fp.write(", " + str(col)) 854 else: 855 fp.write(str(col)) 856 colsStarted = True 857 fp.write(" ]") # close row 858 fp.write(" ]") 859 860 @staticmethod 861 def _writeSCAD_dihed( 862 fp: TextIO, d: "Dihedron", hedraNdx: Dict, hedraSet: Set[EKT] 863 ) -> None: 864 fp.write( 865 "[ {:9.5f}, {}, {}, {}, ".format( 866 d.angle, hedraNdx[d.h1key], hedraNdx[d.h2key], (1 if d.reverse else 0) 867 ) 868 ) 869 fp.write( 870 "{}, {}, ".format( 871 (0 if d.h1key in hedraSet else 1), (0 if d.h2key in hedraSet else 1) 872 ) 873 ) 874 fp.write( 875 " // {} [ {} -- {} ] {}\n".format( 876 d.id, d.hedron1.id, d.hedron2.id, ("reversed" if d.reverse else "") 877 ) 878 ) 879 fp.write(" ") 880 IC_Chain._write_mtx(fp, d.rcst) 881 fp.write(" ]") # close residue array of dihedra entry 882 883 def write_SCAD(self, fp: TextIO, backboneOnly: bool) -> None: 884 """Write self to file fp as OpenSCAD data matrices. 885 886 Works with write_SCAD() and embedded OpenSCAD routines in SCADIO.py. 887 The OpenSCAD code explicitly creates spheres and cylinders to 888 represent atoms and bonds in a 3D model. Options are available 889 to support rotatable bonds and magnetic hydrogen bonds. 890 891 Matrices are written to link, enumerate and describe residues, 892 dihedra, hedra, and chains, mirroring contents of the relevant IC_* 893 data structures. 894 895 The OpenSCAD matrix of hedra has additional information as follows: 896 897 * the atom and bond state (single, double, resonance) are logged 898 so that covalent radii may be used for atom spheres in the 3D models 899 900 * bonds and atoms are tracked so that each is only created once 901 902 * bond options for rotation and magnet holders for hydrogen bonds 903 may be specified 904 905 Note the application of IC_Chain attribute MaxPeptideBond: missing 906 residues may be linked (joining chain segments with arbitrarily long 907 bonds) by setting this to a large value. 908 909 All ALTLOC (disordered) residues and atoms are written to the output model. 910 """ 911 fp.write(f' "{self.chain.id}", // chain id\n') 912 913 # generate dict for all hedra to eliminate redundant references 914 hedra = {} 915 for ric in self.ordered_aa_ic_list: 916 respos, resicode = ric.residue.id[1:] 917 for k, h in ric.hedra.items(): 918 hedra[k] = h 919 atomSet: Set[AtomKey] = set() 920 bondDict: Dict = {} # set() 921 hedraSet: Set[EKT] = set() 922 ndx = 0 923 hedraNdx = {} 924 925 for hk in sorted(hedra): 926 hedraNdx[hk] = ndx 927 ndx += 1 928 929 # write residue dihedra table 930 931 fp.write(" [ // residue array of dihedra") 932 resNdx = {} 933 dihedraNdx = {} 934 ndx = 0 935 chnStarted = False 936 for ric in self.ordered_aa_ic_list: 937 if "O" not in ric.akc: 938 if ric.lc != "G" and ric.lc != "A": 939 print( 940 f"Unable to generate complete sidechain for {ric} {ric.lc} missing O atom" 941 ) 942 resNdx[ric] = ndx 943 if chnStarted: 944 fp.write("\n ],") 945 else: 946 chnStarted = True 947 fp.write( 948 "\n [ // " 949 + str(ndx) 950 + " : " 951 + str(ric.residue.id) 952 + " " 953 + ric.lc 954 + " backbone\n" 955 ) 956 ndx += 1 957 # assemble with no start position, return transform matrices 958 ric.clear_transforms() 959 ric.assemble(resetLocation=True) 960 ndx2 = 0 961 started = False 962 for i in range(1 if backboneOnly else 2): 963 if i == 1: 964 cma = "," if started else "" 965 fp.write( 966 f"{cma}\n // {str(ric.residue.id)} {ric.lc} sidechain\n" 967 ) 968 started = False 969 for dk, d in sorted(ric.dihedra.items()): 970 if d.h2key in hedraNdx and ( 971 (i == 0 and d.is_backbone()) or (i == 1 and not d.is_backbone()) 972 ): 973 if d.rcst is not None: 974 if started: 975 fp.write(",\n") 976 else: 977 started = True 978 fp.write(" ") 979 IC_Chain._writeSCAD_dihed(fp, d, hedraNdx, hedraSet) 980 dihedraNdx[dk] = ndx2 981 hedraSet.add(d.h1key) 982 hedraSet.add(d.h2key) 983 ndx2 += 1 984 else: 985 print( 986 f"Atom missing for {d.id3}-{d.id32}, OpenSCAD chain may be discontiguous" 987 ) 988 fp.write(" ],") # end of residue entry dihedra table 989 fp.write("\n ],\n") # end of all dihedra table 990 991 # write hedra table 992 993 fp.write(" [ //hedra\n") 994 for hk in sorted(hedra): 995 hed = hedra[hk] 996 fp.write(" [ ") 997 fp.write( 998 "{:9.5f}, {:9.5f}, {:9.5f}".format( 999 set_accuracy_95(hed.lal[0]), # len12 1000 set_accuracy_95(hed.lal[1]), # angle 1001 set_accuracy_95(hed.lal[2]), # len23 1002 ) 1003 ) 1004 atom_str = "" # atom and bond state 1005 atom_done_str = "" # create each only once 1006 akndx = 0 1007 for ak in hed.aks: 1008 atm = ak.akl[AtomKey.fields.atm] 1009 res = ak.akl[AtomKey.fields.resname] 1010 # try first for generic backbone/Cbeta atoms 1011 ab_state_res = residue_atom_bond_state["X"] 1012 ab_state = ab_state_res.get(atm, None) 1013 if "H" == atm[0]: 1014 ab_state = "Hsb" 1015 if ab_state is None: 1016 # not found above, must be sidechain atom 1017 ab_state_res = residue_atom_bond_state.get(res, None) 1018 if ab_state_res is not None: 1019 ab_state = ab_state_res.get(atm, "") 1020 else: 1021 ab_state = "" 1022 atom_str += ', "' + ab_state + '"' 1023 1024 if ak in atomSet: 1025 atom_done_str += ", 0" 1026 elif hk in hedraSet: 1027 if ( 1028 hasattr(hed, "flex_female_1") or hasattr(hed, "flex_male_1") 1029 ) and akndx != 2: 1030 if akndx == 0: 1031 atom_done_str += ", 0" 1032 elif akndx == 1: 1033 atom_done_str += ", 1" 1034 atomSet.add(ak) 1035 elif ( 1036 hasattr(hed, "flex_female_2") or hasattr(hed, "flex_male_2") 1037 ) and akndx != 0: 1038 if akndx == 2: 1039 atom_done_str += ", 0" 1040 elif akndx == 1: 1041 atom_done_str += ", 1" 1042 atomSet.add(ak) 1043 else: 1044 atom_done_str += ", 1" 1045 atomSet.add(ak) 1046 else: 1047 atom_done_str += ", 0" 1048 akndx += 1 1049 fp.write(atom_str) 1050 fp.write(atom_done_str) 1051 1052 # specify bond options 1053 1054 bond = [] 1055 bond.append(hed.aks[0].id + "-" + hed.aks[1].id) 1056 bond.append(hed.aks[1].id + "-" + hed.aks[2].id) 1057 b0 = True 1058 for b in bond: 1059 wstr = "" 1060 if b in bondDict and bondDict[b] == "StdBond": 1061 wstr = ", 0" 1062 elif hk in hedraSet: 1063 bondType = "StdBond" 1064 if b0: 1065 if hasattr(hed, "flex_female_1"): 1066 bondType = "FemaleJoinBond" 1067 elif hasattr(hed, "flex_male_1"): 1068 bondType = "MaleJoinBond" 1069 elif hasattr(hed, "skinny_1"): 1070 bondType = "SkinnyBond" 1071 elif hasattr(hed, "hbond_1"): 1072 bondType = "HBond" 1073 else: 1074 if hasattr(hed, "flex_female_2"): 1075 bondType = "FemaleJoinBond" 1076 elif hasattr(hed, "flex_male_2"): 1077 bondType = "MaleJoinBond" 1078 # elif hasattr(hed, 'skinny_2'): # unused 1079 # bondType = 'SkinnyBond' 1080 elif hasattr(hed, "hbond_2"): 1081 bondType = "HBond" 1082 if b in bondDict: 1083 bondDict[b] = "StdBond" 1084 else: 1085 bondDict[b] = bondType 1086 wstr = ", " + str(bondType) 1087 else: 1088 wstr = ", 0" 1089 fp.write(wstr) 1090 b0 = False 1091 akl = hed.aks[0].akl 1092 fp.write( 1093 ', "' 1094 + akl[AtomKey.fields.resname] 1095 + '", ' 1096 + akl[AtomKey.fields.respos] 1097 + ', "' 1098 + hed.dh_class 1099 + '"' 1100 ) 1101 fp.write(" ], // " + str(hk) + "\n") 1102 fp.write(" ],\n") # end of hedra table 1103 1104 # write chain table 1105 1106 fp.write("\n[ // chain - world transform for each residue\n") 1107 chnStarted = False 1108 for ric in self.ordered_aa_ic_list: 1109 # handle start / end 1110 for NCaCKey in sorted(ric.NCaCKey): # type: ignore 1111 if 0 < len(ric.rprev): 1112 for rpr in ric.rprev: 1113 acl = [rpr.atom_coords[ak] for ak in NCaCKey] 1114 mt, mtr = coord_space(acl[0], acl[1], acl[2], True) 1115 else: 1116 mtr = numpy.identity(4, dtype=numpy.float64) 1117 if chnStarted: 1118 fp.write(",\n") 1119 else: 1120 chnStarted = True 1121 fp.write(" [ " + str(resNdx[ric]) + ', "' + str(ric.residue.id[1])) 1122 fp.write(ric.lc + '", //' + str(NCaCKey) + "\n") 1123 IC_Chain._write_mtx(fp, mtr) 1124 fp.write(" ]") 1125 fp.write("\n ]\n") 1126 1127 1128class IC_Residue: 1129 """Class to extend Biopython Residue with internal coordinate data. 1130 1131 Parameters 1132 ---------- 1133 parent: biopython Residue object this class extends 1134 NO_ALTLOC: bool default False 1135 Disable processing of ALTLOC atoms if True, use only selected atoms. 1136 1137 Attributes 1138 ---------- 1139 residue: Biopython Residue object reference 1140 The Residue object this extends 1141 hedra: dict indexed by 3-tuples of AtomKeys 1142 Hedra forming this residue 1143 dihedra: dict indexed by 4-tuples of AtomKeys 1144 Dihedra forming (overlapping) this residue 1145 rprev, rnext: lists of IC_Residue objects 1146 References to adjacent (bonded, not missing, possibly disordered) 1147 residues in chain 1148 atom_coords: AtomKey indexed dict of numpy [4] arrays 1149 Local copy of atom homogeneous coordinates [4] for work 1150 distinct from Bopython Residue/Atom values 1151 alt_ids: list of char 1152 AltLoc IDs from PDB file 1153 bfactors: dict 1154 AtomKey indexed B-factors as read from PDB file 1155 NCaCKey: List of tuples of AtomKeys 1156 List of tuples of N, Ca, C backbone atom AtomKeys; usually only 1 1157 but more if backbone altlocs. Set by link_dihedra() 1158 is20AA: bool 1159 True if residue is one of 20 standard amino acids, based on 1160 Residue resname 1161 accept_atoms: tuple 1162 list of PDB atom names to use when generatiing internal coordinates. 1163 Default is: 1164 1165 `accept_atoms = accept_mainchain + accept_hydrogens` 1166 1167 to exclude hydrogens in internal coordinates and generated PDB files, 1168 override as: 1169 1170 `IC_Residue.accept_atoms = IC_Residue.accept_mainchain` 1171 1172 to get only backbone atoms plus amide proton, use: 1173 1174 `IC_Residue.accept_atoms = IC_Residue.accept_mainchain + ('H',)` 1175 1176 to convert D atoms to H, set `AtomKey.d2h = True` and use: 1177 1178 `IC_Residue.accept_atoms = accept_mainchain + accept_hydrogens + accept_deuteriums` 1179 1180 There is currently no option to output internal coordinates with D 1181 instead of H 1182 1183 accept_resnames: tuple 1184 list of 3-letter residue names for HETATMs to accept when generating 1185 internal coordinates from atoms. HETATM sidechain will be ignored, but normal 1186 backbone atoms (N, CA, C, O, CB) will be included. Currently only 1187 CYG, YCM and UNK; override at your own risk. To generate 1188 sidechain, add appropriate entries to ic_data_sidechains in 1189 ic_data.py and support in atom_to_internal_coordinates() 1190 gly_Cbeta: bool default False 1191 override class variable to True to generate internal coordinates for 1192 glycine CB atoms in atom_to_internal_coordinates(). 1193 1194 `IC_Residue.gly_Cbeta = True` 1195 allBonds: bool default False 1196 whereas a PDB file just specifies atoms, OpenSCAD output for 3D printing 1197 needs all bonds specified explicitly - otherwise, e.g. PHE rings will not 1198 be closed. This variable is managed by the Write_SCAD() code and enables 1199 this. 1200 cic: IC_Chain default None 1201 parent chain IC_Chain object 1202 1203 scale: optional float 1204 used for OpenSCAD output to generate gly_Cbeta bond length 1205 1206 Methods 1207 ------- 1208 applyMtx() 1209 multiply all IC_Residue atom_cords by passed matrix 1210 assemble(atomCoordsIn, resetLocation, verbose) 1211 Compute atom coordinates for this residue from internal coordinates 1212 atm241(coord) 1213 Convert 1x3 cartesian coords to 4x1 homogeneous coords 1214 coords_to_residue() 1215 Convert homogeneous atom_coords to Biopython cartesian Atom coords 1216 atom_to_internal_coordinates(verbose) 1217 Create hedra and dihedra for atom coordinates 1218 get_angle() 1219 Return angle for passed key 1220 get_length() 1221 Return bond length for specified pair 1222 link_dihedra() 1223 Link dihedra to this residue, form id3_dh_index 1224 load_PIC(edron) 1225 Process parsed (di-/h-)edron data from PIC file 1226 pick_angle() 1227 Find Hedron or Dihedron for passed key 1228 pick_length() 1229 Find hedra for passed AtomKey pair 1230 rak(atom info) 1231 Residue AtomKey - per residue AtomKey result cache 1232 set_angle() 1233 Set angle for passed key (no position updates) 1234 set_length() 1235 Set bond length in all relevant hedra for specified pair 1236 write_PIC(pdbid, chainId, s) 1237 Generate PIC format strings for this residue 1238 1239 """ 1240 1241 # add 3-letter residue name here for non-standard residues with 1242 # normal backbone. CYG for test case 4LGY (1305 residue contiguous 1243 # chain) 1244 accept_resnames = ("CYG", "YCM", "UNK") 1245 1246 AllBonds: bool = False # For OpenSCAD, generate explicit hedra covering all bonds if True. 1247 1248 def __init__(self, parent: "Residue", NO_ALTLOC: bool = False) -> None: 1249 """Initialize IC_Residue with parent Biopython Residue. 1250 1251 :param parent: Biopython Residue object 1252 The Biopython Residue this object extends 1253 :param NO_ALTLOC: bool default False 1254 Option to disable processing altloc disordered atoms, use selected. 1255 """ 1256 # NO_ALTLOC=True will turn off alotloc positions and just use selected 1257 self.residue = parent 1258 self.cic: IC_Chain 1259 # dict of hedron objects indexed by hedron keys 1260 self.hedra: Dict[HKT, Hedron] = {} 1261 # dict of dihedron objects indexed by dihedron keys 1262 self.dihedra: Dict[DKT, Dihedron] = {} 1263 # map of dihedron key (first 3 atom keys) to dihedron 1264 # for all dihedra in Residue 1265 # built by link_dihedra() 1266 self.id3_dh_index: Dict[HKT, List[Dihedron]] = {} 1267 # cache of AtomKey results for rak() 1268 self.akc: Dict[Union[str, Atom], AtomKey] = {} 1269 # set of AtomKeys involved in dihedra, used by split_akl, build_rak_cache 1270 # built by __init__ for XYZ (PDB coord) input, link_dihedra for PIC input 1271 self.ak_set: Set[AtomKey] = set() 1272 # reference to adjacent residues in chain 1273 self.rprev: List[IC_Residue] = [] 1274 self.rnext: List[IC_Residue] = [] 1275 # local copy, homogeneous coordinates for atoms, numpy [4] 1276 # generated from dihedra include some i+1 atoms 1277 # or initialised here from parent residue if loaded from coordinates 1278 self.atom_coords: Dict["AtomKey", numpy.array] = {} 1279 # bfactors copied from PDB file 1280 self.bfactors: Dict[str, float] = {} 1281 self.alt_ids: Union[List[str], None] = None if NO_ALTLOC else [] 1282 self.is20AA = True 1283 # rbase = position, insert code or none, resname (1 letter if in 20) 1284 rid = parent.id 1285 rbase = [rid[1], rid[2] if " " != rid[2] else None, parent.resname] 1286 try: 1287 rbase[2] = three_to_one(rbase[2]).upper() 1288 except KeyError: 1289 self.is20AA = False 1290 1291 self.rbase = tuple(rbase) 1292 self.lc = rbase[2] 1293 1294 if self.is20AA or rbase[2] in self.accept_resnames: 1295 for atom in parent.get_atoms(): 1296 if hasattr(atom, "child_dict"): 1297 if NO_ALTLOC: 1298 self._add_atom(atom.selected_child) 1299 else: 1300 for atm in atom.child_dict.values(): 1301 self._add_atom(atm) 1302 else: 1303 self._add_atom(atom) 1304 if self.ak_set: # only for coordinate (pdb) input 1305 self.build_rak_cache() # init cache ready for atom_to_internal_coords 1306 # self.NCaCKey = [(self.rak("N"), self.rak("CA"), self.rak("C"))] 1307 1308 # print(self.atom_coords) 1309 1310 def rak(self, atm: Union[str, Atom]) -> "AtomKey": 1311 """Cache calls to AtomKey for this residue.""" 1312 try: 1313 ak = self.akc[atm] 1314 except (KeyError): 1315 ak = self.akc[atm] = AtomKey(self, atm) 1316 if isinstance(atm, str): 1317 # print(atm) # debug code 1318 ak.missing = True 1319 return ak 1320 1321 def build_rak_cache(self) -> None: 1322 """Create explicit entries for for atoms so don't miss altlocs.""" 1323 for ak in sorted(self.ak_set): 1324 atmName = ak.akl[3] 1325 if self.akc.get(atmName) is None: 1326 self.akc[atmName] = ak 1327 1328 accept_mainchain = ( 1329 "N", 1330 "CA", 1331 "C", 1332 "O", 1333 "CB", 1334 "CG", 1335 "CG1", 1336 "OG1", 1337 "OG", 1338 "SG", 1339 "CG2", 1340 "CD", 1341 "CD1", 1342 "SD", 1343 "OD1", 1344 "ND1", 1345 "CD2", 1346 "ND2", 1347 "CE", 1348 "CE1", 1349 "NE", 1350 "OE1", 1351 "NE1", 1352 "CE2", 1353 "OE2", 1354 "NE2", 1355 "CE3", 1356 "CZ", 1357 "NZ", 1358 "CZ2", 1359 "CZ3", 1360 "OD2", 1361 "OH", 1362 "CH2", 1363 "OXT", 1364 ) 1365 accept_hydrogens = ( 1366 "H", 1367 "H1", 1368 "H2", 1369 "H3", 1370 "HA", 1371 "HA2", 1372 "HA3", 1373 "HB", 1374 "HB1", 1375 "HB2", 1376 "HB3", 1377 "HG2", 1378 "HG3", 1379 "HD2", 1380 "HD3", 1381 "HE2", 1382 "HE3", 1383 "HZ1", 1384 "HZ2", 1385 "HZ3", 1386 "HG11", 1387 "HG12", 1388 "HG13", 1389 "HG21", 1390 "HG22", 1391 "HG23", 1392 "HZ", 1393 "HD1", 1394 "HE1", 1395 "HD11", 1396 "HD12", 1397 "HD13", 1398 "HG", 1399 "HG1", 1400 "HD21", 1401 "HD22", 1402 "HD23", 1403 "NH1", 1404 "NH2", 1405 "HE", 1406 "HH11", 1407 "HH12", 1408 "HH21", 1409 "HH22", 1410 "HE21", 1411 "HE22", 1412 "HE2", 1413 "HH", 1414 "HH2", 1415 ) 1416 accept_deuteriums = ( 1417 "D", 1418 "D1", 1419 "D2", 1420 "D3", 1421 "DA", 1422 "DA2", 1423 "DA3", 1424 "DB", 1425 "DB1", 1426 "DB2", 1427 "DB3", 1428 "DG2", 1429 "DG3", 1430 "DD2", 1431 "DD3", 1432 "DE2", 1433 "DE3", 1434 "DZ1", 1435 "DZ2", 1436 "DZ3", 1437 "DG11", 1438 "DG12", 1439 "DG13", 1440 "DG21", 1441 "DG22", 1442 "DG23", 1443 "DZ", 1444 "DD1", 1445 "DE1", 1446 "DD11", 1447 "DD12", 1448 "DD13", 1449 "DG", 1450 "DG1", 1451 "DD21", 1452 "DD22", 1453 "DD23", 1454 "ND1", 1455 "ND2", 1456 "DE", 1457 "DH11", 1458 "DH12", 1459 "DH21", 1460 "DH22", 1461 "DE21", 1462 "DE22", 1463 "DE2", 1464 "DH", 1465 "DH2", 1466 ) 1467 accept_atoms = accept_mainchain + accept_hydrogens 1468 1469 gly_Cbeta = False 1470 1471 @staticmethod 1472 def atm241(coord: numpy.array) -> numpy.array: 1473 """Convert 1x3 cartesian coordinates to 4x1 homogeneous coordinates.""" 1474 arr41 = numpy.empty(4) 1475 arr41[0:3] = coord 1476 arr41[3] = 1.0 1477 return arr41 1478 1479 def _add_atom(self, atm: Atom) -> None: 1480 """Filter Biopython Atom with accept_atoms; set atom_coords, ak_set. 1481 1482 Arbitrarily renames O' and O'' to O and OXT 1483 """ 1484 if "O'" == atm.name: 1485 atm.name = "O" 1486 if "O''" == atm.name: 1487 atm.name = "OXT" 1488 1489 if atm.name not in self.accept_atoms: 1490 # print('skip:', atm.name) 1491 return 1492 ak = self.rak(atm) # passing Atom here not string 1493 self.atom_coords[ak] = IC_Residue.atm241(atm.coord) 1494 self.ak_set.add(ak) 1495 1496 def __repr__(self) -> str: 1497 """Print string is parent Residue ID.""" 1498 return str(self.residue.full_id) 1499 1500 def pretty_str(self) -> str: 1501 """Nice string for residue ID.""" 1502 id = self.residue.id 1503 return f"{self.residue.resname} {id[0]}{str(id[1])}{id[2]}" 1504 1505 def load_PIC(self, edron: Dict[str, str]) -> None: 1506 """Process parsed (di-/h-)edron data from PIC file. 1507 1508 :param edron: parse dictionary from Edron.edron_re 1509 """ 1510 if edron["a4"] is None: # PIC line is Hedron 1511 ek = (AtomKey(edron["a1"]), AtomKey(edron["a2"]), AtomKey(edron["a3"])) 1512 self.hedra[ek] = Hedron(ek, **edron) 1513 else: # PIC line is Dihedron 1514 ek = ( 1515 AtomKey(edron["a1"]), 1516 AtomKey(edron["a2"]), 1517 AtomKey(edron["a3"]), 1518 AtomKey(edron["a4"]), 1519 ) 1520 self.dihedra[ek] = Dihedron(ek, **edron) 1521 1522 def link_dihedra(self, verbose: bool = False) -> None: 1523 """Housekeeping after loading all residues and dihedra. 1524 1525 - Link dihedra to this residue 1526 - form id3_dh_index 1527 - form ak_set 1528 - set NCaCKey to be available AtomKeys 1529 """ 1530 id3i: Dict[HKT, List[Dihedron]] = {} 1531 for dh in self.dihedra.values(): 1532 dh.ic_residue = self # each dihedron can find its IC_Residue 1533 dh.cic = self.cic # each dihedron can update chain dihedral angles 1534 id3 = dh.id3 1535 if id3 not in id3i: 1536 id3i[id3] = [] 1537 id3i[id3].append(dh) 1538 self.ak_set.update(dh.aks) 1539 dh.set_hedra() 1540 for h in self.hedra.values(): # collect any atoms in orphan hedra 1541 self.ak_set.update(h.aks) # e.g. alternate CB path with no O 1542 h.cic = self.cic # each hedron can update chain hedra 1543 1544 # map to find each dihedron from atom tokens 1-3 1545 self.id3_dh_index = id3i 1546 1547 # if loaded PIC data, akc not initialised yet 1548 if not self.akc: 1549 self.build_rak_cache() 1550 1551 # initialise NCaCKey here: 1552 1553 # not rak here to avoid polluting akc cache with no-altloc keys 1554 # so starting with 'generic' key: 1555 self.NCaCKey = [(AtomKey(self, "N"), AtomKey(self, "CA"), AtomKey(self, "C"))] 1556 1557 newNCaCKey: List[Tuple["AtomKey", ...]] = [] 1558 1559 try: 1560 for tpl in sorted(self.NCaCKey): 1561 newNCaCKey.extend(self._split_akl(tpl)) 1562 self.NCaCKey = cast(List[HKT], newNCaCKey) 1563 # if len(newNCaCKey) != 1 and len(self.rprev) == 0: 1564 # debug code to find examples of chains starting with disordered residues 1565 # print(f"chain start multiple NCaCKey {newNCaCKey} : {self}") 1566 except AttributeError: 1567 if verbose: 1568 print( 1569 f"Missing N, Ca and/or C atoms for residue {str(self.residue)} chain {self.residue.parent.id}" 1570 ) 1571 1572 def set_flexible(self) -> None: 1573 """For OpenSCAD, mark N-CA and CA-C bonds to be flexible joints.""" 1574 for h in self.hedra.values(): 1575 if h.dh_class == "NCAC": 1576 h.flex_female_1 = True 1577 h.flex_female_2 = True 1578 elif h.dh_class.endswith("NCA"): 1579 h.flex_male_2 = True 1580 elif h.dh_class.startswith("CAC") and h.aks[1].akl[3] == "C": 1581 h.flex_male_1 = True 1582 elif h.dh_class == "CBCAC": 1583 h.skinny_1 = True # CA-CB bond interferes with flex join 1584 1585 def set_hbond(self) -> None: 1586 """For OpenSCAD, mark H-N and C-O bonds to be hbonds (magnets).""" 1587 for h in self.hedra.values(): 1588 if h.dh_class == "HNCA": 1589 h.hbond_1 = True 1590 elif h.dh_class == "CACO": 1591 h.hbond_2 = True 1592 1593 def default_startpos(self) -> Dict["AtomKey", numpy.array]: 1594 """Generate default N-Ca-C coordinates to build this residue from.""" 1595 atomCoords = {} 1596 dlist0 = [self.id3_dh_index.get(akl, None) for akl in sorted(self.NCaCKey)] 1597 dlist1 = [d for d in dlist0 if d is not None] 1598 # https://stackoverflow.com/questions/11264684/flatten-list-of-lists 1599 dlist = [val for sublist in dlist1 for val in sublist] 1600 # dlist = self.id3_dh_index[NCaCKey] 1601 for d in dlist: 1602 for i, a in enumerate(d.aks): 1603 atomCoords[a] = d.initial_coords[i] 1604 # if "O" not in self.akc and "CB" in self.akc: 1605 # # need CB coord if no O coord - handle alternate CB path 1606 # # but not clear how to do this for default position 1607 # pass 1608 return atomCoords 1609 1610 def get_startpos(self) -> Dict["AtomKey", numpy.array]: 1611 """Find N-Ca-C coordinates to build this residue from.""" 1612 if 0 < len(self.rprev): 1613 # if there is a previous residue, build on from it 1614 startPos = {} 1615 # nb akl for this res n-ca-c in rp (prev res) dihedra 1616 akl: List[AtomKey] = [] 1617 for tpl in self.NCaCKey: 1618 akl.extend(tpl) 1619 if self.rak("O").missing: 1620 # alternate CB path - only use if O is missing 1621 # else have problems modifying phi angle 1622 akl.append(AtomKey(self, "CB")) 1623 for ak in akl: 1624 for rp in self.rprev: 1625 rpak = rp.atom_coords.get(ak, None) 1626 if rpak is not None: 1627 startPos[ak] = rpak 1628 if 3 > len(startPos): # if don't have all 3, reset to have none 1629 startPos = {} 1630 else: 1631 # get atom posns already added by load_structure 1632 sp = self.residue.parent.internal_coord.initNCaC.get(self.rbase, None) 1633 if sp is None: 1634 startPos = {} 1635 else: 1636 # need copy Here (shallow ok) else assemble() adds to this dict 1637 startPos = cast(Dict["AtomKey", numpy.array], sp.copy()) 1638 1639 if startPos == {}: 1640 startPos = self.default_startpos() 1641 1642 return startPos 1643 1644 def clear_transforms(self): 1645 """Set cst and rcst attributes to none before assemble().""" 1646 for d in self.dihedra.values(): 1647 d.cst = None 1648 d.rcst = None 1649 1650 def assemble( 1651 self, resetLocation: bool = False, verbose: bool = False, 1652 ) -> Union[Dict["AtomKey", numpy.array], Dict[HKT, numpy.array], None]: 1653 """Compute atom coordinates for this residue from internal coordinates. 1654 1655 Join dihedrons starting from N-CA-C and N-CA-CB hedrons, computing protein 1656 space coordinates for backbone and sidechain atoms 1657 1658 Sets forward and reverse transforms on each Dihedron to convert from 1659 protein coordinates to dihedron space coordinates for first three 1660 atoms (see coord_space()) 1661 1662 Not vectorized (yet). 1663 1664 **Algorithm** 1665 1666 form double-ended queue, start with c-ca-n, o-c-ca, n-ca-cb, n-ca-c. 1667 1668 if resetLocation=True, use initial coords from generating dihedron 1669 for n-ca-c initial positions (result in dihedron coordinate space) 1670 1671 while queue not empty 1672 get 3-atom hedron key 1673 1674 for each dihedron starting with hedron key (1st hedron of dihedron) 1675 1676 if have coordinates for all 4 atoms already 1677 add 2nd hedron key to back of queue 1678 else if have coordinates for 1st 3 atoms 1679 compute forward and reverse transforms to take 1st 3 atoms 1680 to/from dihedron initial coordinate space 1681 1682 use reverse transform to get position of 4th atom in 1683 current coordinates from dihedron initial coordinates 1684 1685 add 2nd hedron key to back of queue 1686 else 1687 ordering failed, put hedron key at back of queue and hope 1688 next time we have 1st 3 atom positions (should not happen) 1689 1690 loop terminates (queue drains) as hedron keys which do not start any 1691 dihedra are removed without action 1692 1693 :param resetLocation: bool default False 1694 - Option to ignore start location and orient so N-Ca-C hedron 1695 at origin. 1696 1697 :returns: 1698 Dict of AtomKey -> homogeneous atom coords for residue in protein space 1699 relative to previous residue 1700 1701 """ 1702 # debug statements below still useful, commented for performance 1703 # dbg = False 1704 1705 NCaCKey = sorted(self.NCaCKey) 1706 1707 if not self.ak_set: 1708 return None # give up now if no atoms to work with 1709 1710 # order of these startLst entries matters 1711 startLst = self._split_akl((self.rak("C"), self.rak("CA"), self.rak("N"))) 1712 if "CB" in self.akc: 1713 startLst.extend( 1714 self._split_akl((self.rak("N"), self.rak("CA"), self.rak("CB"))) 1715 ) 1716 if "O" in self.akc: 1717 startLst.extend( 1718 self._split_akl((self.rak("O"), self.rak("C"), self.rak("CA"))) 1719 ) 1720 1721 startLst.extend(NCaCKey) 1722 1723 q = deque(startLst) 1724 # resnum = self.rbase[0] 1725 1726 # get initial coords from previous residue or IC_Chain info 1727 # or default coords 1728 if resetLocation: 1729 # use N-CA-C initial coords from creating dihedral 1730 atomCoords = self.default_startpos() 1731 else: 1732 atomCoords = self.get_startpos() 1733 1734 while q: # deque is not empty 1735 # if dbg: 1736 # print("assemble loop start q=", q) 1737 h1k = cast(HKT, q.pop()) 1738 dihedra = self.id3_dh_index.get(h1k, None) 1739 # if dbg: 1740 # print( 1741 # " h1k:", 1742 # h1k, 1743 # "len dihedra: ", 1744 # len(dihedra) if dihedra is not None else "None", 1745 # ) 1746 if dihedra is not None: 1747 for d in dihedra: 1748 if 4 == len(d.initial_coords) and d.initial_coords[3] is not None: 1749 # skip incomplete dihedron if don't have 4th atom due 1750 # to missing input data 1751 d_h2key = d.hedron2.aks 1752 akl = d.aks 1753 # if dbg: 1754 # print(" process", d, d_h2key, akl) 1755 1756 acount = len([a for a in d.aks if a in atomCoords]) 1757 1758 if 4 == acount: # and not need_transform: 1759 # dihedron already done, queue 2nd hedron key 1760 q.appendleft(d_h2key) 1761 # if dbg: 1762 # print(" 4- already done, append left") 1763 if d.rcst is None: # missing transform 1764 # can happen for altloc atoms 1765 # only needed for write_SCAD output 1766 acs = [atomCoords[a] for a in h1k] 1767 d.cst, d.rcst = coord_space( 1768 acs[0], acs[1], acs[2], True 1769 ) 1770 elif 3 == acount: 1771 # if dbg: 1772 # print(" 3- call coord_space") 1773 1774 acs = [atomCoords[a] for a in h1k] 1775 d.cst, d.rcst = coord_space(acs[0], acs[1], acs[2], True) 1776 # print(d.cst) 1777 # print(d.rcst) 1778 # if dbg: 1779 # print( 1780 # " initial_coords[3]=", 1781 # d.initial_coords[3].transpose(), 1782 # ) 1783 acak3 = d.rcst.dot(d.initial_coords[3]) 1784 # if dbg: 1785 # print(" acak3=", acak3.transpose()) 1786 1787 atomCoords[akl[3]] = numpy.round( 1788 acak3, 3 1789 ) # round to PDB format 8.3 1790 # if dbg: 1791 # print( 1792 # " 3- finished, ak:", 1793 # akl[3], 1794 # "coords:", 1795 # atomCoords[akl[3]].transpose(), 1796 # ) 1797 q.appendleft(d_h2key) 1798 else: 1799 if verbose: 1800 print("no coords to start", d) 1801 print( 1802 [ 1803 a 1804 for a in d.aks 1805 if atomCoords.get(a, None) is not None 1806 ] 1807 ) 1808 else: 1809 if verbose: 1810 print("no initial coords for", d) 1811 1812 return atomCoords 1813 1814 def _split_akl( 1815 self, 1816 lst: Union[Tuple["AtomKey", ...], List["AtomKey"]], 1817 missingOK: bool = False, 1818 ) -> List[Tuple["AtomKey", ...]]: 1819 """Get AtomKeys for this residue (ak_set) given generic list of AtomKeys. 1820 1821 Given a list of AtomKeys (aks) for a Hedron or Dihedron, 1822 return: 1823 list of matching aks that have id3_dh in this residue 1824 (ak may change if occupancy != 1.00) 1825 1826 or 1827 multiple lists of matching aks expanded for all atom altlocs 1828 1829 or 1830 empty list if any of atom_coord(ak) missing and not missingOK 1831 1832 :param lst: list[3] or [4] of AtomKeys 1833 non-altloc AtomKeys to match to specific AtomKeys for this residue 1834 """ 1835 altloc_ndx = AtomKey.fields.altloc 1836 occ_ndx = AtomKey.fields.occ 1837 1838 # step 1 1839 # given a list of AtomKeys (aks) 1840 # form a new list of same aks with coords or diheds in this residue 1841 # plus lists of matching altloc aks in coords or diheds 1842 edraLst: List[Tuple[AtomKey, ...]] = [] 1843 altlocs = set() 1844 posnAltlocs: Dict["AtomKey", Set[str]] = {} 1845 akMap = {} 1846 for ak in lst: 1847 posnAltlocs[ak] = set() 1848 if ( 1849 ak in self.ak_set 1850 and ak.akl[altloc_ndx] is None 1851 and ak.akl[occ_ndx] is None 1852 ): 1853 # simple case no altloc and exact match in set 1854 edraLst.append((ak,)) # tuple of ak 1855 else: 1856 ak2_lst = [] 1857 for ak2 in self.ak_set: 1858 if ak.altloc_match(ak2): 1859 # print(key) 1860 ak2_lst.append(ak2) 1861 akMap[ak2] = ak 1862 altloc = ak2.akl[altloc_ndx] 1863 if altloc is not None: 1864 altlocs.add(altloc) 1865 posnAltlocs[ak].add(altloc) 1866 edraLst.append(tuple(ak2_lst)) 1867 1868 # step 2 1869 # check and finish for 1870 # missing atoms 1871 # simple case no altlocs 1872 # else form new AtomKey lists covering all altloc permutations 1873 maxc = 0 1874 for akl in edraLst: 1875 lenAKL = len(akl) 1876 if 0 == lenAKL and not missingOK: 1877 return [] # atom missing in atom_coords, cannot form object 1878 elif maxc < lenAKL: 1879 maxc = lenAKL 1880 if 1 == maxc: # simple case no altlocs for any ak in list 1881 newAKL = [] 1882 for akl in edraLst: 1883 if akl: # may have empty lists if missingOK, do not append 1884 newAKL.append(akl[0]) 1885 return [tuple(newAKL)] 1886 else: 1887 new_edraLst = [] 1888 for al in altlocs: 1889 # form complete new list for each altloc 1890 alhl = [] 1891 for akl in edraLst: 1892 lenAKL = len(akl) 1893 if 0 == lenAKL: 1894 continue # ignore empty list from missingOK 1895 if 1 == lenAKL: 1896 alhl.append(akl[0]) # not all atoms will have altloc 1897 # elif (lenAKL < maxc 1898 # and al not in posnAltlocs[akMap[akl[0]]]): 1899 elif al not in posnAltlocs[akMap[akl[0]]]: 1900 # this position has fewer altlocs than other positions 1901 # and this position does not have this al, 1902 # so just grab first to form angle as could be any 1903 alhl.append(sorted(akl)[0]) 1904 else: 1905 for ak in akl: 1906 if ak.akl[altloc_ndx] == al: 1907 alhl.append(ak) 1908 new_edraLst.append(tuple(alhl)) 1909 1910 # print(new_edraLst) 1911 return new_edraLst 1912 1913 def _gen_edra(self, lst: Union[Tuple["AtomKey", ...], List["AtomKey"]]) -> None: 1914 """Populate hedra/dihedra given edron ID tuple. 1915 1916 Given list of AtomKeys defining hedron or dihedron 1917 convert to AtomKeys with coordinates in this residue 1918 add appropriately to self.di/hedra, expand as needed atom altlocs 1919 1920 :param lst: tuple of AtomKeys 1921 Specifies Hedron or Dihedron 1922 """ 1923 for ak in lst: 1924 if ak.missing: 1925 return # give up if atoms actually missing 1926 1927 lenLst = len(lst) 1928 if 4 > lenLst: 1929 cdct, dct, obj = self.cic.hedra, self.hedra, Hedron 1930 else: 1931 cdct, dct, obj = self.cic.dihedra, self.dihedra, Dihedron # type: ignore 1932 1933 if isinstance(lst, List): 1934 tlst = tuple(lst) 1935 else: 1936 tlst = lst 1937 1938 hl = self._split_akl(tlst) # expand tlst with any altlocs 1939 # returns list of tuples 1940 1941 for tnlst in hl: 1942 # do not add edron if split_akl() made something shorter 1943 if len(tnlst) == lenLst: 1944 # if edron already exists, then update not replace with new 1945 if tnlst not in cdct: 1946 cdct[tnlst] = obj(tnlst) # type: ignore 1947 if tnlst not in dct: 1948 dct[tnlst] = cdct[tnlst] # type: ignore 1949 1950 dct[tnlst].needs_update = True # type: ignore 1951 1952 def atom_to_internal_coordinates(self, verbose: bool = False) -> None: 1953 """Create hedra and dihedra for atom coordinates. 1954 1955 :param verbose: bool default False 1956 warn about missing N, Ca, C backbone atoms. 1957 """ 1958 # on entry we have all Biopython Atoms loaded 1959 if not self.ak_set: 1960 return # so give up if no atoms loaded for this residue 1961 1962 sN, sCA, sC = self.rak("N"), self.rak("CA"), self.rak("C") 1963 if self.lc != "G": 1964 sCB = self.rak("CB") 1965 1966 # first __init__ di/hedra, AtomKey objects and atom_coords for di/hedra 1967 # which extend into next residue. 1968 1969 if 0 < len(self.rnext) and self.rnext[0].ak_set: 1970 # atom_coords, hedra and dihedra for backbone dihedra 1971 # which reach into next residue 1972 for rn in self.rnext: 1973 nN, nCA, nC = rn.rak("N"), rn.rak("CA"), rn.rak("C") 1974 1975 nextNCaC = rn._split_akl((nN, nCA, nC), missingOK=True) 1976 1977 for tpl in nextNCaC: 1978 for ak in tpl: 1979 if ak in rn.atom_coords: 1980 self.atom_coords[ak] = rn.atom_coords[ak] 1981 self.ak_set.add(ak) 1982 else: 1983 for rn_ak in rn.atom_coords.keys(): 1984 if rn_ak.altloc_match(ak): 1985 self.atom_coords[rn_ak] = rn.atom_coords[rn_ak] 1986 self.ak_set.add(rn_ak) 1987 1988 self._gen_edra((sN, sCA, sC, nN)) # psi 1989 self._gen_edra((sCA, sC, nN, nCA)) # omega i+1 1990 self._gen_edra((sC, nN, nCA, nC)) # phi i+1 1991 self._gen_edra((sCA, sC, nN)) 1992 self._gen_edra((sC, nN, nCA)) 1993 self._gen_edra((nN, nCA, nC)) # tau i+1 1994 1995 # redundant next residue C-beta locator (alternate CB path) 1996 # otherwise missing O will cause no sidechain 1997 # not rn.rak so don't trigger missing CB for Gly 1998 nCB = rn.akc.get("CB", None) 1999 if nCB is not None and nCB in rn.atom_coords: 2000 self.atom_coords[nCB] = rn.atom_coords[nCB] 2001 self.ak_set.add(nCB) 2002 self._gen_edra((nN, nCA, nCB)) 2003 self._gen_edra((sC, nN, nCA, nCB)) 2004 2005 # if start of chain then need to __init__ NCaC hedron as not in previous residue 2006 if 0 == len(self.rprev): 2007 self._gen_edra((sN, sCA, sC)) 2008 2009 # now __init__ di/hedra for standard backbone atoms independent of neighbours 2010 backbone = ic_data_backbone 2011 for edra in backbone: 2012 # only need to build if this residue has all the atoms in the edra 2013 if all(atm in self.akc for atm in edra): 2014 r_edra = [self.rak(atom) for atom in edra] 2015 self._gen_edra(r_edra) # [4] is label on some table entries 2016 2017 # next __init__ sidechain di/hedra 2018 if self.lc is not None: 2019 sidechain = ic_data_sidechains.get(self.lc, []) 2020 for edraLong in sidechain: 2021 edra = edraLong[0:4] # [4] is label on some sidechain table entries 2022 # lots of H di/hedra can be avoided if don't have those atoms 2023 if all(atm in self.akc for atm in edra): 2024 r_edra = [self.rak(atom) for atom in edra] 2025 self._gen_edra(r_edra) 2026 if IC_Residue.AllBonds: # openscad output needs all bond cylinders explicit 2027 sidechain = ic_data_sidechain_extras.get(self.lc, []) 2028 for edra in sidechain: 2029 # test less useful here but avoids populating rak cache if possible 2030 if all(atm in self.akc for atm in edra): 2031 r_edra = [self.rak(atom) for atom in edra] 2032 self._gen_edra(r_edra) 2033 2034 # final processing of all dihedra just generated 2035 self.link_dihedra(verbose) 2036 2037 # now do the actual work computing di/hedra values from atom coordinates 2038 # -> updated to process at chain level (vectorized) 2039 # 2040 # for d in self.dihedra.values(): 2041 # # populate values and hedra for dihedron objects 2042 # d.dihedron_from_atoms() 2043 # for h in self.hedra.values(): 2044 # # miss redundant hedra above, needed for some chi1 angles 2045 # # also miss if missing atoms means hedron not in dihedra 2046 # if h.atoms_needs_update: 2047 # h.hedron_from_atoms(self.atom_coords) 2048 2049 # create di/hedra for gly Cbeta if needed, populate values later 2050 if self.gly_Cbeta and "G" == self.lc: # and self.atom_coords[sCB] is None: 2051 # add C-beta for Gly 2052 2053 self.ak_set.add(AtomKey(self, "CB")) 2054 sCB = self.rak("CB") 2055 sCB.missing = False # was True because akc cache did not have entry 2056 2057 # self.atom_coords[sCB] = None 2058 2059 # main orientation comes from O-C-Ca-Cb so make Cb-Ca-C hedron 2060 sO = self.rak("O") 2061 htpl = (sCB, sCA, sC) 2062 self._gen_edra(htpl) 2063 # values generated in build_glyCB 2064 # h = self.hedra[htpl] 2065 # h.lal[2] = self.hedra[(sCA, sC, sO)].lal[0] # CaCO len12 -> len23 2066 # h.lal[1] = 110.17513 # angle 2067 # h.lal[0] = Ca_Cb_Len # len12 2068 2069 # generate dihedral based on N-Ca-C-O offset from db query above 2070 dtpl = (sO, sC, sCA, sCB) 2071 self._gen_edra(dtpl) 2072 d = self.dihedra[dtpl] 2073 d.ic_residue = self 2074 d.set_hedra() 2075 2076 self.link_dihedra(verbose) # re-run for new dihedra 2077 2078 if verbose: 2079 # oAtom = 2080 self.rak("O") # trigger missing flag if needed 2081 missing = [] 2082 for akk, akv in self.akc.items(): 2083 if isinstance(akk, str) and akv.missing: 2084 missing.append(akv) 2085 if missing: 2086 chn = self.residue.parent 2087 chn_id = chn.id 2088 chn_len = len(chn.internal_coord.ordered_aa_ic_list) 2089 print(f"chain {chn_id} len {chn_len} missing atom(s): {missing}") 2090 2091 def build_glyCB(self, gCBd: "Dihedron"): 2092 """Populate values for Gly C-beta, rest of chain complete. 2093 2094 Data averaged from Sep 2019 Dunbrack cullpdb_pc20_res2.2_R1.0 2095 restricted to structures with amide protons. 2096 2097 Ala avg rotation of OCCACB from NCACO query: 2098 select avg(g.rslt) as avg_rslt, stddev(g.rslt) as sd_rslt, count(*) 2099 from 2100 (select f.d1d, f.d2d, 2101 (case when f.rslt > 0 then f.rslt-360.0 else f.rslt end) as rslt 2102 from (select d1.angle as d1d, d2.angle as d2d, 2103 (d2.angle - d1.angle) as rslt from dihedron d1, 2104 dihedron d2 where d1.rdh_class='AOACACAACB' and 2105 d2.rdh_class='ANACAACAO' and d1.pdb=d2.pdb and d1.chn=d2.chn 2106 and d1.res=d2.res) as f) as g 2107 2108 | avg_rslt | sd_rslt | count | 2109 | -122.682194862932 | 5.04403040513919 | 14098 | 2110 2111 """ 2112 Ca_Cb_Len = 1.53363 2113 if hasattr(self, "scale"): # used for openscad output 2114 Ca_Cb_Len *= self.scale # type: ignore 2115 2116 sN, sCA, sC, sO = self.rak("N"), self.rak("CA"), self.rak("C"), self.rak("O") 2117 2118 # generated dihedron is O-Ca-C-Cb 2119 # hedron2 is reversed: Cb-Ca-C (also h1 reversed: C-Ca-O) 2120 h2 = gCBd.hedron2 2121 h2.lal[:] = (Ca_Cb_Len, 110.17513, self.hedra[(sCA, sC, sO)].lal[0]) 2122 2123 refval = self.dihedra.get((sN, sCA, sC, sO), None) 2124 if refval: 2125 gCBd.angle = 122.68219 + refval.angle 2126 if gCBd.angle > 180.0: 2127 gCBd.angle -= 360.0 2128 else: 2129 gCBd.angle = 120 2130 2131 @staticmethod 2132 def _pdb_atom_string(atm: Atom) -> str: 2133 """Generate PDB ATOM record. 2134 2135 :param atm: Biopython Atom object reference 2136 """ 2137 if 2 == atm.is_disordered(): 2138 s = "" 2139 for a in atm.child_dict.values(): 2140 s += IC_Residue._pdb_atom_string(a) 2141 return s 2142 else: 2143 res = atm.parent 2144 chn = res.parent 2145 s = ( 2146 "{:6}{:5d} {:4}{:1}{:3} {:1}{:4}{:1} {:8.3f}{:8.3f}{:8.3f}" 2147 "{:6.2f}{:6.2f} {:>4}\n" 2148 ).format( 2149 "ATOM", 2150 atm.serial_number, 2151 atm.fullname, 2152 atm.altloc, 2153 res.resname, 2154 chn.id, 2155 res.id[1], 2156 res.id[2], 2157 atm.coord[0], 2158 atm.coord[1], 2159 atm.coord[2], 2160 atm.occupancy, 2161 atm.bfactor, 2162 atm.element, 2163 ) 2164 # print(s) 2165 return s 2166 2167 @staticmethod 2168 def _residue_string(res: "Residue") -> str: 2169 """Generate PIC Residue string. 2170 2171 Enough to create Biopython Residue object without actual Atoms. 2172 2173 :param res: Biopython Residue object reference 2174 """ 2175 segid = res.get_segid() 2176 if segid.isspace() or "" == segid: 2177 segid = "" 2178 else: 2179 segid = " [" + segid + "]" 2180 return str(res.get_full_id()) + " " + res.resname + segid + "\n" 2181 2182 def _write_pic_bfac(self, atm: Atom, s: str, col: int) -> Tuple[str, int]: 2183 ak = self.rak(atm) 2184 if 0 == col % 5: 2185 s += "BFAC:" 2186 s += " " + ak.id + " " + f"{atm.get_bfactor():6.2f}" 2187 col += 1 2188 if 0 == col % 5: 2189 s += "\n" 2190 return s, col 2191 2192 def write_PIC(self, pdbid: str = "0PDB", chainid: str = "A", s: str = "") -> str: 2193 """Write PIC format lines for this residue. 2194 2195 :param str pdbid: PDB idcode string; default 0PDB 2196 :param str chainid: PDB Chain ID character; default A 2197 :param str s: result string to add to 2198 """ 2199 if pdbid is None: 2200 pdbid = "0PDB" 2201 if chainid is None: 2202 chainid = "A" 2203 s += IC_Residue._residue_string(self.residue) 2204 2205 if ( 2206 0 == len(self.rprev) 2207 and hasattr(self, "NCaCKey") 2208 and self.NCaCKey is not None 2209 ): 2210 NCaChedron = self.pick_angle(self.NCaCKey[0]) # first tau 2211 if NCaChedron is not None: 2212 try: 2213 ts = IC_Residue._pdb_atom_string(self.residue["N"]) 2214 ts += IC_Residue._pdb_atom_string(self.residue["CA"]) 2215 ts += IC_Residue._pdb_atom_string(self.residue["C"]) 2216 s += ts # only if no exception: have all 3 atoms 2217 except KeyError: 2218 pass 2219 2220 base = pdbid + " " + chainid + " " 2221 for h in sorted(self.hedra.values()): 2222 try: 2223 s += ( 2224 base 2225 + h.id 2226 + " " 2227 + "{:9.5f} {:9.5f} {:9.5f}".format( 2228 set_accuracy_95(h.lal[0]), # len12 2229 set_accuracy_95(h.lal[1]), # angle 2230 set_accuracy_95(h.lal[2]), # len23 2231 ) 2232 ) 2233 except KeyError: 2234 pass 2235 s += "\n" 2236 for d in sorted(self.dihedra.values()): 2237 try: 2238 s += base + d.id + " " + "{:9.5f}".format(set_accuracy_95(d.angle)) 2239 except KeyError: 2240 pass 2241 s += "\n" 2242 2243 col = 0 2244 for a in sorted(self.residue.get_atoms()): 2245 if 2 == a.is_disordered(): # hasattr(a, 'child_dict'): 2246 if self.alt_ids is None: 2247 s, col = self._write_pic_bfac(a.selected_child, s, col) 2248 else: 2249 for atm in a.child_dict.values(): 2250 s, col = self._write_pic_bfac(atm, s, col) 2251 else: 2252 s, col = self._write_pic_bfac(a, s, col) 2253 if 0 != col % 5: 2254 s += "\n" 2255 2256 return s 2257 2258 def coords_to_residue(self, rnext: bool = False) -> None: 2259 """Convert self.atom_coords to biopython Residue Atom coords. 2260 2261 Copy homogeneous IC_Residue atom_coords to self.residue cartesian 2262 Biopython Atom coords. 2263 2264 :param rnext: bool default False 2265 next IC_Residue has no atom coords due to missing atoms, so try to 2266 populate with any available coords calculated for this residue 2267 di/hedra extending into next 2268 """ 2269 if rnext: 2270 respos, icode = self.rnext[0].residue.id[1:3] 2271 else: 2272 respos, icode = self.residue.id[1:3] 2273 respos = str(respos) 2274 spNdx, icNdx, resnNdx, atmNdx, altlocNdx, occNdx = AtomKey.fields 2275 2276 if rnext: 2277 Res = self.rnext[0].residue 2278 else: 2279 Res = self.residue 2280 ndx = Res.parent.internal_coord.ndx 2281 2282 for ak in sorted(self.atom_coords): 2283 # print(ak) 2284 if respos == ak.akl[spNdx] and ( 2285 (icode == " " and ak.akl[icNdx] is None) or icode == ak.akl[icNdx] 2286 ): 2287 2288 ac = self.atom_coords[ak] 2289 atm_coords = ac[:3] 2290 akl = ak.akl 2291 atm, altloc = akl[atmNdx], akl[altlocNdx] 2292 2293 Atm = None 2294 newAtom = None 2295 2296 if Res.has_id(atm): 2297 Atm = Res[atm] 2298 2299 if Atm is None or ( 2300 2 == Atm.is_disordered() and not Atm.disordered_has_id(altloc) 2301 ): 2302 # print('new', ak) 2303 occ = akl[occNdx] 2304 aloc = akl[altlocNdx] 2305 bfac = self.bfactors.get(ak.id, None) 2306 newAtom = Atom( 2307 atm, 2308 atm_coords, 2309 (0.0 if bfac is None else bfac), 2310 (1.00 if occ is None else float(occ)), 2311 (" " if aloc is None else aloc), 2312 atm, 2313 ndx, 2314 atm[0], 2315 ) 2316 ndx += 1 2317 if Atm is None: 2318 if altloc is None: 2319 Res.add(newAtom) 2320 else: 2321 disordered_atom = DisorderedAtom(atm) 2322 Res.add(disordered_atom) 2323 disordered_atom.disordered_add(newAtom) 2324 Res.flag_disordered() 2325 else: 2326 Atm.disordered_add(newAtom) 2327 else: 2328 # Atm is not None, might be disordered with altloc 2329 # print('update', ak) 2330 if 2 == Atm.is_disordered() and Atm.disordered_has_id(altloc): 2331 Atm.disordered_select(altloc) 2332 Atm.set_coord(atm_coords) 2333 sn = Atm.get_serial_number() 2334 if sn is not None: 2335 ndx = sn + 1 2336 2337 Res.parent.internal_coord.ndx = ndx 2338 2339 def _get_ak_tuple(self, ak_str: str) -> Optional[Tuple["AtomKey", ...]]: 2340 """Convert atom pair string to AtomKey tuple. 2341 2342 :param ak_str: str 2343 Two atom names separated by ':', e.g. 'N:CA' 2344 Optional position specifier relative to self, 2345 e.g. '-1C:N' for preceding peptide bond. 2346 """ 2347 AK = AtomKey 2348 S = self 2349 angle_key2 = [] 2350 akstr_list = ak_str.split(":") 2351 lenInput = len(akstr_list) 2352 for a in akstr_list: 2353 m = self.relative_atom_re.match(a) 2354 if m: 2355 if m.group(1) == "-1": 2356 if 0 < len(S.rprev): 2357 angle_key2.append(AK(S.rprev[0], m.group(2))) 2358 elif m.group(1) == "1": 2359 if 0 < len(S.rnext): 2360 angle_key2.append(AK(S.rnext[0], m.group(2))) 2361 elif m.group(1) == "0": 2362 angle_key2.append(self.rak(m.group(2))) 2363 else: 2364 angle_key2.append(self.rak(a)) 2365 if len(angle_key2) != lenInput: 2366 return None 2367 return tuple(angle_key2) 2368 2369 relative_atom_re = re.compile(r"^(-?[10])([A-Z]+)$") 2370 2371 def _get_angle_for_tuple( 2372 self, angle_key: EKT 2373 ) -> Optional[Union["Hedron", "Dihedron"]]: 2374 len_mkey = len(angle_key) 2375 rval: Optional[Union["Hedron", "Dihedron"]] 2376 if 4 == len_mkey: 2377 rval = self.dihedra.get(cast(DKT, angle_key), None) 2378 elif 3 == len_mkey: 2379 rval = self.hedra.get(cast(HKT, angle_key), None) 2380 else: 2381 return None 2382 return rval 2383 2384 def pick_angle( 2385 self, angle_key: Union[EKT, str] 2386 ) -> Optional[Union["Hedron", "Dihedron"]]: 2387 """Get Hedron or Dihedron for angle_key. 2388 2389 :param angle_key: 2390 - tuple of 3 or 4 AtomKeys 2391 - string of atom names ('CA') separated by :'s 2392 - string of [-1, 0, 1]<atom name> separated by ':'s. -1 is 2393 previous residue, 0 is this residue, 1 is next residue 2394 - psi, phi, omg, omega, chi1, chi2, chi3, chi4, chi5 2395 - tau (N-CA-C angle) see Richardson1981 2396 - except for tuples of AtomKeys, no option to access alternate disordered atoms 2397 2398 Observe that a residue's phi and omega dihedrals, as well as the hedra 2399 comprising them (including the N:Ca:C tau hedron), are stored in the 2400 n-1 di/hedra sets; this is handled here, but may be an issue if accessing 2401 directly. 2402 2403 The following are equivalent (except for sidechains with non-carbon 2404 atoms for chi2):: 2405 2406 ric = r.internal_coord 2407 print( 2408 r, 2409 ric.get_angle("psi"), 2410 ric.get_angle("phi"), 2411 ric.get_angle("omg"), 2412 ric.get_angle("tau"), 2413 ric.get_angle("chi2"), 2414 ) 2415 print( 2416 r, 2417 ric.get_angle("N:CA:C:1N"), 2418 ric.get_angle("-1C:N:CA:C"), 2419 ric.get_angle("-1CA:-1C:N:CA"), 2420 ric.get_angle("N:CA:C"), 2421 ric.get_angle("CA:CB:CG:CD"), 2422 ) 2423 2424 See ic_data.py for detail of atoms in the enumerated sidechain angles 2425 and the backbone angles which do not span the peptide bond. Using 's' 2426 for current residue ('self') and 'n' for next residue, the spanning 2427 angles are:: 2428 2429 (sN, sCA, sC, nN) # psi 2430 (sCA, sC, nN, nCA) # omega i+1 2431 (sC, nN, nCA, nC) # phi i+1 2432 (sCA, sC, nN) 2433 (sC, nN, nCA) 2434 (nN, nCA, nC) # tau i+1 2435 2436 :return: Matching Hedron, Dihedron, or None. 2437 """ 2438 rval: Optional[Union["Hedron", "Dihedron"]] = None 2439 if isinstance(angle_key, tuple): 2440 rval = self._get_angle_for_tuple(angle_key) 2441 if rval is None and self.rprev: 2442 rval = self.rprev[0]._get_angle_for_tuple(angle_key) 2443 elif ":" in angle_key: 2444 angle_key = cast(EKT, self._get_ak_tuple(cast(str, angle_key))) 2445 if angle_key is None: 2446 return None 2447 rval = self._get_angle_for_tuple(angle_key) 2448 if rval is None and self.rprev: 2449 rval = self.rprev[0]._get_angle_for_tuple(angle_key) 2450 elif "psi" == angle_key: 2451 if 0 == len(self.rnext): 2452 return None 2453 rn = self.rnext[0] 2454 sN, sCA, sC = self.rak("N"), self.rak("CA"), self.rak("C") 2455 nN = rn.rak("N") 2456 rval = self.dihedra.get((sN, sCA, sC, nN), None) 2457 elif "phi" == angle_key: 2458 if 0 == len(self.rprev): 2459 return None 2460 rp = self.rprev[0] 2461 pC, sN, sCA = rp.rak("C"), self.rak("N"), self.rak("CA") 2462 sC = self.rak("C") 2463 rval = rp.dihedra.get((pC, sN, sCA, sC), None) 2464 elif "omg" == angle_key or "omega" == angle_key: 2465 if 0 == len(self.rprev): 2466 return None 2467 rp = self.rprev[0] 2468 pCA, pC, sN = rp.rak("CA"), rp.rak("C"), self.rak("N") 2469 sCA = self.rak("CA") 2470 rval = rp.dihedra.get((pCA, pC, sN, sCA), None) 2471 elif "tau" == angle_key: 2472 sN, sCA, sC = self.rak("N"), self.rak("CA"), self.rak("C") 2473 rval = self.hedra.get((sN, sCA, sC), None) 2474 if rval is None and 0 != len(self.rprev): 2475 rp = self.rprev[0] # tau in prev residue for all but first 2476 rval = rp.hedra.get((sN, sCA, sC), None) 2477 elif angle_key.startswith("chi"): 2478 sclist = ic_data_sidechains.get(self.lc, None) 2479 if sclist is None: 2480 return None 2481 for akl in sclist: 2482 if 5 == len(akl): 2483 if akl[4] == angle_key: 2484 klst = [self.rak(a) for a in akl[0:4]] 2485 tklst = cast(DKT, tuple(klst)) 2486 rval = self.dihedra.get(tklst, None) 2487 2488 return rval 2489 2490 def get_angle(self, angle_key: Union[EKT, str]) -> Optional[float]: 2491 """Get dihedron or hedron angle for specified key. 2492 2493 See pick_angle() for key specifications. 2494 """ 2495 edron = self.pick_angle(angle_key) 2496 if edron: 2497 return edron.angle 2498 return None 2499 2500 def set_angle(self, angle_key: Union[EKT, str], v: float): 2501 """Set dihedron or hedron angle for specified key. 2502 2503 See pick_angle() for key specifications. 2504 """ 2505 rval = self.pick_angle(angle_key) 2506 if rval is not None: 2507 rval.angle = v 2508 2509 def pick_length( 2510 self, ak_spec: Union[str, BKT] 2511 ) -> Tuple[Optional[List["Hedron"]], Optional[BKT]]: 2512 """Get list of hedra containing specified atom pair. 2513 2514 :param ak_spec: 2515 - tuple of two AtomKeys 2516 - string: two atom names separated by ':', e.g. 'N:CA' with 2517 optional position specifier relative to self, e.g. '-1C:N' for 2518 preceding peptide bond. 2519 2520 The following are equivalent:: 2521 2522 ric = r.internal_coord 2523 print( 2524 r, 2525 ric.get_length("0C:1N"), 2526 ) 2527 print( 2528 r, 2529 None 2530 if not ric.rnext 2531 else ric.get_length((ric.rak("C"), ric.rnext[0].rak("N"))), 2532 ) 2533 2534 :return: list of hedra containing specified atom pair, tuple of atom keys 2535 """ 2536 rlst: List[Hedron] = [] 2537 # if ":" in ak_spec: 2538 if isinstance(ak_spec, str): 2539 ak_spec = cast(BKT, self._get_ak_tuple(ak_spec)) 2540 if ak_spec is None: 2541 return None, None 2542 for hed_key, hed_val in self.hedra.items(): 2543 if all(ak in hed_key for ak in ak_spec): 2544 rlst.append(hed_val) 2545 return rlst, ak_spec 2546 2547 def get_length(self, ak_spec: Union[str, BKT]) -> Optional[float]: 2548 """Get bond length for specified atom pair. 2549 2550 See pick_length() for ak_spec. 2551 """ 2552 hed_lst, ak_spec2 = self.pick_length(ak_spec) 2553 if hed_lst is None or ak_spec2 is None: 2554 return None 2555 2556 for hed in hed_lst: 2557 val = hed.get_length(ak_spec2) 2558 if val is not None: 2559 return val 2560 return None 2561 2562 def set_length(self, ak_spec: Union[str, BKT], val: float) -> None: 2563 """Set bond length for specified atom pair. 2564 2565 See pick_length() for ak_spec. 2566 """ 2567 hed_lst, ak_spec2 = self.pick_length(ak_spec) 2568 if hed_lst is not None and ak_spec2 is not None: 2569 for hed in hed_lst: 2570 hed.set_length(ak_spec2, val) 2571 2572 def applyMtx(self, mtx: numpy.array) -> None: 2573 """Apply matrix to atom_coords for this IC_Residue.""" 2574 for ak, ac in self.atom_coords.items(): 2575 # self.atom_coords[ak] = mtx @ ac 2576 self.atom_coords[ak] = mtx.dot(ac) 2577 2578 2579class Edron: 2580 """Base class for Hedron and Dihedron classes. 2581 2582 Supports rich comparison based on lists of AtomKeys. 2583 2584 Attributes 2585 ---------- 2586 aks: tuple 2587 3 (hedron) or 4 (dihedron) AtomKeys defining this di/hedron 2588 id: str 2589 ':'-joined string of AtomKeys for this di/hedron 2590 needs_update: bool 2591 indicates di/hedron local atom_coords do NOT reflect current di/hedron 2592 angle and length values in hedron local coordinate space 2593 dh_class: str 2594 sequence of atoms (no position or residue) comprising di/hedron 2595 for statistics 2596 rdh_class: str 2597 sequence of residue, atoms comprising di/hedron for statistics 2598 edron_re: compiled regex (Class Attribute) 2599 A compiled regular expression matching string IDs for Hedron 2600 and Dihedron objects 2601 cic: IC_Chain reference 2602 Chain internal coords object containing this hedron 2603 2604 Methods 2605 ------- 2606 gen_key([AtomKey, ...] or AtomKey, ...) (Static Method) 2607 generate a ':'-joined string of AtomKey Ids 2608 gen_acs(atom_coords) 2609 generate tuple of atom coords for keys in self.aks 2610 is_backbone() 2611 Return True if all aks atoms are N, Ca, C or O 2612 2613 """ 2614 2615 # regular expresion to capture hedron and dihedron specifications, as in 2616 # .pic files 2617 edron_re = re.compile( 2618 # pdbid and chain id 2619 r"^(?P<pdbid>\w+)?\s(?P<chn>[\w|\s])?\s" 2620 # 3 atom specifiers for hedron 2621 r"(?P<a1>[\w\-\.]+):(?P<a2>[\w\-\.]+):(?P<a3>[\w\-\.]+)" 2622 # 4th atom specifier for dihedron 2623 r"(:(?P<a4>[\w\-\.]+))?" 2624 r"\s+" 2625 # len-angle-len for hedron 2626 r"(((?P<len12>\S+)\s+(?P<angle>\S+)\s+(?P<len23>\S+)\s*$)|" 2627 # dihedral angle for dihedron 2628 r"((?P<dihedral>\S+)\s*$))" 2629 ) 2630 2631 @staticmethod 2632 def gen_key(lst: Union[List[str], List["AtomKey"]]) -> str: 2633 """Generate string of ':'-joined AtomKey strings from input. 2634 2635 :param lst: list of AtomKey objects or id strings 2636 """ 2637 if isinstance(lst[0], str): 2638 lst = cast(List[str], lst) 2639 return ":".join(lst) 2640 else: 2641 lst = cast(List[AtomKey], lst) 2642 return ":".join(ak.id for ak in lst) 2643 2644 def __init__(self, *args: Union[List["AtomKey"], EKT], **kwargs: str) -> None: 2645 """Initialize Edron with sequence of AtomKeys. 2646 2647 Acceptable input: 2648 2649 [ AtomKey, ... ] : list of AtomKeys 2650 AtomKey, ... : sequence of AtomKeys as args 2651 {'a1': str, 'a2': str, ... } : dict of AtomKeys as 'a1', 'a2' ... 2652 """ 2653 aks: List[AtomKey] = [] 2654 for arg in args: 2655 if isinstance(arg, list): 2656 aks = arg 2657 elif isinstance(arg, tuple): 2658 aks = list(arg) 2659 else: 2660 if arg is not None: 2661 aks.append(arg) 2662 if [] == aks and all(k in kwargs for k in ("a1", "a2", "a3")): 2663 aks = [AtomKey(kwargs["a1"]), AtomKey(kwargs["a2"]), AtomKey(kwargs["a3"])] 2664 if "a4" in kwargs and kwargs["a4"] is not None: 2665 aks.append(AtomKey(kwargs["a4"])) 2666 2667 # if args are atom key strings instead of AtomKeys 2668 # for i in range(len(aks)): 2669 # if not isinstance(aks[i], AtomKey): 2670 # aks[i] = AtomKey(aks[i]) 2671 2672 self.aks = tuple(aks) 2673 self.id = Edron.gen_key(aks) 2674 self._hash = hash(self.aks) 2675 2676 # flag indicating that atom coordinates are up to date 2677 # (do not need to be recalculated from angle and or length values) 2678 self.needs_update = True 2679 2680 # no residue or position, just atoms 2681 self.dh_class = "" 2682 # same but residue specific 2683 self.rdh_class = "" 2684 2685 # IC_Chain which contains this di/hedron 2686 self.cic: IC_Chain 2687 2688 atmNdx = AtomKey.fields.atm 2689 resNdx = AtomKey.fields.resname 2690 for ak in aks: 2691 akl = ak.akl 2692 self.dh_class += akl[atmNdx] 2693 self.rdh_class += akl[resNdx] + akl[atmNdx] 2694 2695 def gen_acs( 2696 self, atom_coords: Dict["AtomKey", numpy.array] 2697 ) -> Tuple[numpy.array, ...]: 2698 """Generate tuple of atom coord arrays for keys in self.aks. 2699 2700 :param atom_coords: AtomKey dict of atom coords for residue 2701 :raises: MissingAtomError any atoms in self.aks missing coordinates 2702 """ 2703 aks = self.aks 2704 acs: List[numpy.array] = [] 2705 estr = "" 2706 for ak in aks: 2707 ac = atom_coords[ak] 2708 if ac is None: 2709 estr += str(ak) + " " 2710 else: 2711 acs.append(ac) 2712 if estr != "": 2713 raise MissingAtomError("%s missing coordinates for %s" % (self, estr)) 2714 return tuple(acs) 2715 2716 def is_backbone(self) -> bool: 2717 """Report True for contains only N, C, CA, O, H atoms.""" 2718 atmNdx = AtomKey.fields.atm 2719 if all( 2720 atm in ("N", "C", "CA", "O", "H") 2721 for atm in (ak.akl[atmNdx] for ak in self.aks) 2722 ): 2723 return True 2724 return False 2725 2726 def __repr__(self) -> str: 2727 """Tuple of AtomKeys is default repr string.""" 2728 return str(self.aks) 2729 2730 def __hash__(self) -> int: 2731 """Hash calculated at init from aks tuple.""" 2732 return self._hash 2733 2734 def _cmp(self, other: "Edron") -> Union[Tuple["AtomKey", "AtomKey"], bool]: 2735 """Comparison function ranking self vs. other; False on equal.""" 2736 for ak_s, ak_o in zip(self.aks, other.aks): 2737 if ak_s != ak_o: 2738 return ak_s, ak_o 2739 return False 2740 2741 def __eq__(self, other: object) -> bool: 2742 """Test for equality.""" 2743 if not isinstance(other, type(self)): 2744 return NotImplemented 2745 return self.id == other.id 2746 2747 def __ne__(self, other: object) -> bool: 2748 """Test for inequality.""" 2749 if not isinstance(other, type(self)): 2750 return NotImplemented 2751 return self.id != other.id 2752 2753 def __gt__(self, other: object) -> bool: 2754 """Test greater than.""" 2755 if not isinstance(other, type(self)): 2756 return NotImplemented 2757 rslt = self._cmp(other) 2758 if rslt: 2759 rslt = cast(Tuple[AtomKey, AtomKey], rslt) 2760 return rslt[0] > rslt[1] 2761 return False 2762 2763 def __ge__(self, other: object) -> bool: 2764 """Test greater or equal.""" 2765 if not isinstance(other, type(self)): 2766 return NotImplemented 2767 rslt = self._cmp(other) 2768 if rslt: 2769 rslt = cast(Tuple[AtomKey, AtomKey], rslt) 2770 return rslt[0] >= rslt[1] 2771 return True 2772 2773 def __lt__(self, other: object) -> bool: 2774 """Test less than.""" 2775 if not isinstance(other, type(self)): 2776 return NotImplemented 2777 rslt = self._cmp(other) 2778 if rslt: 2779 rslt = cast(Tuple[AtomKey, AtomKey], rslt) 2780 return rslt[0] < rslt[1] 2781 return False 2782 2783 def __le__(self, other: object) -> bool: 2784 """Test less or equal.""" 2785 if not isinstance(other, type(self)): 2786 return NotImplemented 2787 rslt = self._cmp(other) 2788 if rslt: 2789 rslt = cast(Tuple[AtomKey, AtomKey], rslt) 2790 return rslt[0] <= rslt[1] 2791 return True 2792 2793 2794class Hedron(Edron): 2795 """Class to represent three joined atoms forming a plane. 2796 2797 Contains atom coordinates in local coordinate space, central atom 2798 at origin. Stored in two orientations, with the 3rd (forward) or 2799 first (reversed) atom on the +Z axis. 2800 2801 Attributes 2802 ---------- 2803 lal: numpy array of len12, angle, len23 2804 len12 = distance between 1st and 2nd atom 2805 angle = angle (degrees) formed by 3 atoms 2806 len23 = distance between 2nd and 3rd atoms 2807 2808 atoms: 3x4 numpy arrray (view on chain array) 2809 3 homogeneous atoms comprising hedron, 1st on XZ, 2nd at origin, 3rd on +Z 2810 atomsR: 3x4 numpy array (view on chain array) 2811 atoms reversed, 1st on +Z, 2nd at origin, 3rd on XZ plane 2812 2813 Methods 2814 ------- 2815 get_length() 2816 get bond length for specified atom pair 2817 set_length() 2818 set bond length for specified atom pair 2819 angle(), len12(), len23() 2820 gettters and setters for relevant attributes (angle in degrees) 2821 """ 2822 2823 def __init__(self, *args: Union[List["AtomKey"], HKT], **kwargs: str) -> None: 2824 """Initialize Hedron with sequence of AtomKeys, kwargs. 2825 2826 Acceptable input: 2827 As for Edron, plus optional 'len12', 'angle', 'len23' 2828 keyworded values. 2829 """ 2830 super().__init__(*args, **kwargs) 2831 2832 # print('initialising', self.id) 2833 2834 # 3 matrices specifying hedron space coordinates of constituent atoms, 2835 # initially atom3 on +Z axis 2836 self.atoms: HACS 2837 # 3 matrices, hedron space coordinates, reversed order 2838 # initially atom1 on +Z axis 2839 self.atomsR: HACS 2840 2841 if "len12" in kwargs: 2842 self.lal = numpy.array( 2843 ( 2844 float(kwargs["len12"]), 2845 float(kwargs["angle"]), 2846 float(kwargs["len23"]), 2847 ) 2848 ) 2849 else: 2850 self.lal = numpy.zeros(3) 2851 2852 # else: 2853 # self.len12 = None 2854 # self.angle = None 2855 # self.len23 = None 2856 2857 # print(self) 2858 2859 def __repr__(self) -> str: 2860 """Print string for Hedron object.""" 2861 return f"3-{self.id} {self.rdh_class} {str(self.lal[0])} {str(self.lal[1])} {str(self.lal[2])}" 2862 2863 @property 2864 def angle(self) -> float: 2865 """Get this hedron angle.""" 2866 try: 2867 return self.lal[1] # _angle 2868 except AttributeError: 2869 return 0.0 2870 2871 @angle.setter 2872 def angle(self, angle_deg) -> None: 2873 """Set this hedron angle; sets needs_update.""" 2874 self.lal[1] = angle_deg # view on chain numpy arrays 2875 self.cic.hAtoms_needs_update[self.cic.hedraNdx[self.aks]] = True 2876 2877 @property 2878 def len12(self): 2879 """Get first length for Hedron.""" 2880 try: 2881 return self.lal[0] # _len12 2882 except AttributeError: 2883 return 0.0 2884 2885 @len12.setter 2886 def len12(self, len): 2887 """Set first length for Hedron; sets needs_update.""" 2888 self.lal[0] # _len12 = len 2889 self.cic.hAtoms_needs_update[self.cic.hedraNdx[self.aks]] = True 2890 2891 @property 2892 def len23(self) -> float: 2893 """Get second length for Hedron.""" 2894 try: 2895 return self.lal[2] # _len23 2896 except AttributeError: 2897 return 0.0 2898 2899 @len23.setter 2900 def len23(self, len): 2901 """Set second length for Hedron; sets needs_update.""" 2902 self.lal[2] = len 2903 self.cic.hAtoms_needs_update[self.cic.hedraNdx[self.aks]] = True 2904 2905 def get_length(self, ak_tpl: BKT) -> Optional[float]: 2906 """Get bond length for specified atom pair. 2907 2908 :param ak_tpl: tuple of AtomKeys 2909 pair of atoms in this Hedron 2910 """ 2911 if 2 > len(ak_tpl): 2912 return None 2913 if all(ak in self.aks[:2] for ak in ak_tpl): 2914 return self.lal[0] # len12 2915 if all(ak in self.aks[1:] for ak in ak_tpl): 2916 return self.lal[2] # len23 2917 return None 2918 2919 def set_length(self, ak_tpl: BKT, newLength: float): 2920 """Set bond length for specified atom pair; sets needs_update. 2921 2922 :param ak_tpl: tuple of AtomKeys 2923 pair of atoms in this Hedron 2924 """ 2925 if 2 > len(ak_tpl): 2926 raise TypeError("Require exactly 2 AtomKeys: %s" % (str(ak_tpl))) 2927 elif all(ak in self.aks[:2] for ak in ak_tpl): 2928 self.lal[0] = newLength # len12 2929 elif all(ak in self.aks[1:] for ak in ak_tpl): 2930 self.lal[2] = newLength # len23 2931 else: 2932 raise TypeError("%s not found in %s" % (str(ak_tpl), self)) 2933 2934 self.cic.hAtoms_needs_update[self.cic.hedraNdx[self.aks]] = True 2935 2936 2937class Dihedron(Edron): 2938 """Class to represent four joined atoms forming a dihedral angle. 2939 2940 Attributes 2941 ---------- 2942 angle: float 2943 Measurement or specification of dihedral angle in degrees 2944 hedron1, hedron2: Hedron object references 2945 The two hedra which form the dihedral angle 2946 h1key, h2key: tuples of AtomKeys 2947 Hash keys for hedron1 and hedron2 2948 id3,id32: tuples of AtomKeys 2949 First 3 and second 3 atoms comprising dihedron; hxkey orders may differ 2950 initial_coords: tuple[4] of numpy arrays [4] 2951 Local atom coords for 4 atoms, [0] on XZ plane, [1] at origin, 2952 [2] on +Z, [3] rotated by dihedral 2953 a4_pre_rotation: numpy array [4] 2954 4th atom of dihedral aligned to XZ plane (angle not applied) 2955 ic_residue: IC_Residue object reference 2956 IC_Residue object containing this dihedral 2957 reverse: bool 2958 Indicates order of atoms in dihedron is reversed from order of atoms 2959 in hedra (configured by set_hedra()) 2960 cst, rcst: numpy array [4][4] 2961 transforms to and from coordinate space defined by first hedron. 2962 set by IC_Residue.assemble(). defined by id3 order NOT h1key order 2963 (atoms may be reversed between these two) 2964 2965 Methods 2966 ------- 2967 set_hedra() 2968 work out hedra keys and orientation for this dihedron 2969 angle() 2970 getter/setter for dihdral angle in degrees 2971 2972 """ 2973 2974 def __init__(self, *args: Union[List["AtomKey"], DKT], **kwargs: str) -> None: 2975 """Initialize Dihedron with sequence of AtomKeys and optional dihedral angle. 2976 2977 Acceptable input: 2978 As for Edron, plus optional 'dihedral' keyworded angle value. 2979 """ 2980 super().__init__(*args, **kwargs) 2981 2982 # hedra making up this dihedron; set by self:set_hedra() 2983 self.hedron1: Hedron # = None 2984 self.hedron2: Hedron # = None 2985 2986 self.h1key: HKT # = None 2987 self.h2key: HKT # = None 2988 2989 self.id3: HKT = cast(HKT, tuple(self.aks[0:3])) 2990 self.id32: HKT = cast(HKT, tuple(self.aks[1:4])) 2991 2992 # 4 matrices specifying hedron space coordinates of constituent atoms, 2993 # in this space atom 3 is on on +Z axis 2994 # see coord_space() 2995 self.initial_coords: DACS 2996 self.a4_pre_rotation: numpy.array 2997 2998 # IC_Residue object which includes this dihedron; 2999 # set by Residue:linkDihedra() 3000 self.ic_residue: IC_Residue 3001 # order of atoms in dihedron is reversed from order of atoms in hedra 3002 self.reverse = False 3003 3004 # coordinate space transform matrices 3005 # defined by id3 order NOT h1key order (may be reversed) 3006 # self.cst = None # protein coords to 1st hedron coord space 3007 # self.rcst = None # reverse = 1st hedron coords back to protein coords 3008 3009 if "dihedral" in kwargs: 3010 self.angle = float(kwargs["dihedral"]) 3011 3012 def __repr__(self) -> str: 3013 """Print string for Dihedron object.""" 3014 return f"4-{str(self.id)} {self.rdh_class} {str(self.angle)} {str(self.ic_residue)}" 3015 3016 @staticmethod 3017 def _get_hedron(ic_res: IC_Residue, id3: HKT) -> Optional[Hedron]: 3018 """Find specified hedron on this residue or its adjacent neighbors.""" 3019 hedron = ic_res.hedra.get(id3, None) 3020 if not hedron and 0 < len(ic_res.rprev): 3021 for rp in ic_res.rprev: 3022 hedron = rp.hedra.get(id3, None) 3023 if hedron is not None: 3024 break 3025 if not hedron and 0 < len(ic_res.rnext): 3026 for rn in ic_res.rnext: 3027 hedron = rn.hedra.get(id3, None) 3028 if hedron is not None: 3029 break 3030 return hedron 3031 3032 def set_hedra(self) -> Tuple[bool, Hedron, Hedron]: 3033 """Work out hedra keys and set rev flag.""" 3034 try: 3035 return self.rev, self.hedron1, self.hedron2 3036 except AttributeError: 3037 pass 3038 3039 rev = False 3040 res = self.ic_residue 3041 h1key = self.id3 3042 hedron1 = Dihedron._get_hedron(res, h1key) 3043 if not hedron1: 3044 rev = True 3045 h1key = cast(HKT, tuple(self.aks[2::-1])) 3046 hedron1 = Dihedron._get_hedron(res, h1key) 3047 h2key = cast(HKT, tuple(self.aks[3:0:-1])) 3048 else: 3049 h2key = self.id32 3050 3051 if not hedron1: 3052 raise HedronMatchError( 3053 "can't find 1st hedron for key %s dihedron %s" % (h1key, self) 3054 ) 3055 3056 hedron2 = Dihedron._get_hedron(res, h2key) 3057 3058 if not hedron2: 3059 raise HedronMatchError( 3060 "can't find 2nd hedron for key %s dihedron %s" % (h2key, self) 3061 ) 3062 3063 self.hedron1 = hedron1 3064 self.h1key = h1key 3065 self.hedron2 = hedron2 3066 self.h2key = h2key 3067 3068 self.reverse = rev 3069 3070 return rev, hedron1, hedron2 3071 3072 @property 3073 def angle(self) -> float: 3074 """Get dihedral angle.""" 3075 try: 3076 return self.cic.dihedraIC[self.cic.dihedraNdx[self.aks]] 3077 except AttributeError: 3078 try: 3079 return self._dihedral 3080 except AttributeError: 3081 return 360.0 # error value without type hint hassles 3082 3083 @angle.setter 3084 def angle(self, dangle_deg: float) -> None: 3085 """Save new dihedral angle; sets needs_update. 3086 3087 faster to modify IC_Chain level arrays directly. 3088 3089 N.B. dihedron (i-1)C-N-CA-CB is ignored if O exists. 3090 C-beta is by default placed using O-C-CA-CB, but O is missing 3091 in some PDB file residues, which means the sidechain cannot be 3092 placed. The alternate CB path (i-1)C-N-CA-CB is provided to 3093 circumvent this, but if this is needed then it must be adjusted in 3094 conjunction with PHI ((i-1)C-N-CA-C) as they overlap. 3095 3096 :param dangle_deg: float new dihedral angle in degrees 3097 """ 3098 self._dihedral = dangle_deg 3099 self.needs_update = True 3100 try: 3101 dndx = self.cic.dihedraNdx[self.aks] 3102 self.cic.dihedraIC[dndx] = dangle_deg 3103 self.cic.dihedraICr[dndx] = numpy.deg2rad(dangle_deg) 3104 self.cic.dAtoms_needs_update[dndx] = True 3105 except AttributeError: 3106 pass 3107 3108 3109class AtomKey: 3110 """Class for dict keys to reference atom coordinates. 3111 3112 AtomKeys capture residue and disorder information together, and 3113 provide a no-whitespace string key for .pic files. 3114 3115 Supports rich comparison and multiple ways to instantiate. 3116 3117 AtomKeys contain: 3118 residue position, insertion code, 1 or 3 char residue name, 3119 atom name, altloc, and occupancy 3120 3121 Attributes 3122 ---------- 3123 akl: tuple 3124 All six fields of AtomKey 3125 fieldNames: tuple (Class Attribute) 3126 Mapping of key index positions to names 3127 fields: namedtuple (Class Attribute) 3128 Mapping of field names to index positions 3129 id: str 3130 '_'-joined AtomKey fields, excluding 'None' fields 3131 atom_re: compiled regex (Class Attribute) 3132 A compiled regular expression matching the string form of the key 3133 endnum_re: compiled regex (Class Attribute) 3134 A compiled regular expresion capturing digits at end of a string 3135 d2h: bool (Class Attribute) 3136 Convert D atoms to H on input; must also modify IC_Residue.accept_atoms 3137 missing: bool default False 3138 AtomKey __init__'d from string is probably missing, set this flag to 3139 note the issue (not set here) 3140 3141 Methods 3142 ------- 3143 altloc_match(other) 3144 Returns True if this AtomKey matches other AtomKey excluding altloc 3145 and occupancy fields 3146 3147 """ 3148 3149 atom_re = re.compile( 3150 r"^(?P<respos>-?\d+)(?P<icode>[A-Za-z])?" 3151 r"_(?P<resname>[a-zA-Z]+)_(?P<atm>[A-Za-z0-9]+)" 3152 r"(?:_(?P<altloc>\w))?(?:_(?P<occ>-?\d\.\d?\d?))?$" 3153 ) 3154 3155 endnum_re = re.compile(r"\D+(\d+)$") 3156 3157 # PDB altLoc = Character = [\w ] (any non-ctrl ASCII incl space) 3158 # PDB iCode = AChar = [A-Za-z] 3159 3160 fieldNames = ("respos", "icode", "resname", "atm", "altloc", "occ") 3161 fieldsDef = namedtuple( 3162 "fieldsDef", ["respos", "icode", "resname", "atm", "altloc", "occ"] 3163 ) 3164 fields = fieldsDef(0, 1, 2, 3, 4, 5) 3165 3166 d2h = False # convert D Deuterium to H Hydrogen on input 3167 # icd = {"icr": 0, "atm": 0, "lst": 0, "dct": 0, "_": 0, "else": 0} 3168 3169 def __init__( 3170 self, *args: Union[IC_Residue, Atom, List, Dict, str], **kwargs: str 3171 ) -> None: 3172 """Initialize AtomKey with residue and atom data. 3173 3174 Examples of acceptable input: 3175 (<IC_Residue>, 'CA', ...) : IC_Residue with atom info 3176 (<IC_Residue>, <Atom>) : IC_Residue with Biopython Atom 3177 ([52, None, 'G', 'CA', ...]) : list of ordered data fields 3178 (52, None, 'G', 'CA', ...) : multiple ordered arguments 3179 ({respos: 52, icode: None, atm: 'CA', ...}) : dict with fieldNames 3180 (respos: 52, icode: None, atm: 'CA', ...) : kwargs with fieldNames 3181 52_G_CA, 52B_G_CA, 52_G_CA_0.33, 52_G_CA_B_0.33 : id strings 3182 """ 3183 akl: List[Optional[str]] = [] 3184 # self.id = None 3185 for arg in args: 3186 if isinstance(arg, str): 3187 if "_" in arg: 3188 # AtomKey.icd["_"] += 1 3189 # got atom key string, recurse with regex parse 3190 m = self.atom_re.match(arg) 3191 if m is not None: 3192 if akl != []: # [] != akl: 3193 raise Exception( 3194 "Atom Key init full key not first argument: " + arg 3195 ) 3196 # for fn in AtomKey.fieldNames: 3197 # akl.append(m.group(fn)) 3198 # akl = [m.group(fn) for fn in AtomKey.fieldNames] 3199 akl = list(map(m.group, AtomKey.fieldNames)) 3200 else: 3201 # AtomKey.icd["else"] += 1 3202 akl.append(arg) 3203 3204 elif isinstance(arg, IC_Residue): 3205 # AtomKey.icd["icr"] += 1 3206 if akl != []: 3207 raise Exception("Atom Key init Residue not first argument") 3208 akl = list(arg.rbase) 3209 elif isinstance(arg, Atom): 3210 # AtomKey.icd["atm"] += 1 3211 if 3 != len(akl): 3212 raise Exception("Atom Key init Atom before Residue info") 3213 akl.append(arg.name) 3214 altloc = arg.altloc 3215 akl.append(altloc if altloc != " " else None) 3216 occ = float(arg.occupancy) 3217 akl.append(str(occ) if occ != 1.00 else None) 3218 elif isinstance(arg, list): 3219 # AtomKey.icd["lst"] += 1 3220 akl += arg 3221 elif isinstance(arg, dict): 3222 # AtomKey.icd["dct"] += 1 3223 for k in AtomKey.fieldNames: 3224 akl.append(arg.get(k, None)) 3225 else: 3226 raise Exception("Atom Key init not recognised") 3227 3228 # process kwargs, initialize occ and altloc to None 3229 # if not specified above 3230 # for i in range(6): 3231 # if len(akl) <= i: 3232 # fld = kwargs.get(AtomKey.fieldNames[i]) 3233 # if fld is not None: 3234 # akl.append(fld) 3235 3236 for i in range(len(akl), 6): 3237 if len(akl) <= i: 3238 fld = kwargs.get(AtomKey.fieldNames[i]) 3239 if fld is not None: 3240 akl.append(fld) 3241 3242 # tweak local akl to generate id string 3243 if isinstance(akl[0], int): 3244 akl[0] = str(akl[0]) # numeric residue position to string 3245 3246 # occNdx = AtomKey.fields.occ 3247 # if akl[occNdx] is not None: 3248 # akl[occNdx] = str(akl[occNdx]) # numeric occupancy to string 3249 3250 if self.d2h: 3251 atmNdx = AtomKey.fields.atm 3252 if akl[atmNdx][0] == "D": 3253 akl[atmNdx] = re.sub("D", "H", akl[atmNdx], count=1) 3254 3255 # unused option: 3256 # (self.respos, self.icode, self.resname, self.atm, self.occ, 3257 # self.altloc) = akl 3258 3259 self.id = "_".join( 3260 [ 3261 "".join(filter(None, akl[:2])), 3262 str(akl[2]), # exclude None 3263 "_".join(filter(None, akl[3:])), 3264 ] 3265 ) 3266 3267 # while len(akl) < 6: 3268 # akl.append(None) # add no altloc, occ if not specified 3269 akl += [None] * (6 - len(akl)) 3270 3271 self.akl = tuple(akl) 3272 self._hash = hash(self.akl) 3273 self.missing = False 3274 3275 def __repr__(self) -> str: 3276 """Repr string from id.""" 3277 return self.id 3278 3279 def __hash__(self) -> int: 3280 """Hash calculated at init from akl tuple.""" 3281 return self._hash 3282 3283 _backbone_sort_keys = {"N": 0, "CA": 1, "C": 2, "O": 3} 3284 3285 _sidechain_sort_keys = { 3286 "CB": 1, 3287 "CG": 2, 3288 "CG1": 2, 3289 "OG": 2, 3290 "OG1": 2, 3291 "SG": 2, 3292 "CG2": 3, 3293 "CD": 4, 3294 "CD1": 4, 3295 "SD": 4, 3296 "OD1": 4, 3297 "ND1": 4, 3298 "CD2": 5, 3299 "ND2": 5, 3300 "OD2": 5, 3301 "CE": 6, 3302 "NE": 6, 3303 "CE1": 6, 3304 "OE1": 6, 3305 "NE1": 6, 3306 "CE2": 7, 3307 "OE2": 7, 3308 "NE2": 7, 3309 "CE3": 8, 3310 "CZ": 9, 3311 "CZ2": 9, 3312 "NZ": 9, 3313 "NH1": 10, 3314 "OH": 10, 3315 "CZ3": 10, 3316 "CH2": 11, 3317 "NH2": 11, 3318 "OXT": 12, 3319 "H": 13, 3320 } 3321 3322 _greek_sort_keys = {"A": 0, "B": 1, "G": 2, "D": 3, "E": 4, "Z": 5, "H": 6} 3323 3324 def altloc_match(self, other: "AtomKey") -> bool: 3325 """Test AtomKey match other discounting occupancy and altloc.""" 3326 if isinstance(other, type(self)): 3327 return self.akl[:4] == other.akl[:4] 3328 else: 3329 return NotImplemented 3330 3331 def _cmp(self, other: "AtomKey") -> Tuple[int, int]: 3332 """Comparison function ranking self vs. other.""" 3333 akl_s = self.akl 3334 akl_o = other.akl 3335 atmNdx = AtomKey.fields.atm 3336 occNdx = AtomKey.fields.occ 3337 rsNdx = AtomKey.fields.respos 3338 # rsnNdx = AtomKey.fields.resname 3339 for i in range(6): 3340 s, o = akl_s[i], akl_o[i] 3341 if s != o: 3342 # insert_code, altloc can be None, deal with first 3343 if s is None and o is not None: 3344 # no insert code before named insert code 3345 return 0, 1 3346 elif o is None and s is not None: 3347 return 1, 0 3348 # now we know s, o not None 3349 s, o = cast(str, s), cast(str, o) 3350 3351 if atmNdx != i: 3352 # only sorting complications at atom level, occ. 3353 # otherwise respos, insertion code will trigger 3354 # before residue name 3355 if occNdx == i: 3356 oi = int(float(s) * 100) 3357 si = int(float(o) * 100) 3358 return si, oi # swap so higher occupancy comes first 3359 elif rsNdx == i: 3360 return int(s), int(o) 3361 else: # resname or altloc 3362 return ord(s), ord(o) 3363 3364 # atom names from here 3365 # backbone atoms before sidechain atoms 3366 3367 sb = self._backbone_sort_keys.get(s, None) 3368 ob = self._backbone_sort_keys.get(o, None) 3369 if sb is not None and ob is not None: 3370 return sb, ob 3371 elif sb is not None and ob is None: 3372 return 0, 1 3373 elif sb is None and ob is not None: 3374 return 1, 0 3375 # finished backbone and backbone vs. sidechain atoms 3376 3377 # sidechain vs sidechain, sidechain vs H 3378 ss = self._sidechain_sort_keys.get(s, None) 3379 os = self._sidechain_sort_keys.get(o, None) 3380 if ss is not None and os is not None: 3381 return ss, os 3382 elif ss is not None and os is None: 3383 return 0, 1 3384 elif ss is None and os is not None: 3385 return 1, 0 3386 3387 # amide single 'H' captured above in sidechain sort 3388 # now 'complex'' hydrogens after sidechain 3389 s0, s1, o0, o1 = s[0], s[1], o[0], o[1] 3390 s1d, o1d = s1.isdigit(), o1.isdigit() 3391 # if "H" == s0 == o0: # breaks cython 3392 if ("H" == s0) and ("H" == o0): 3393 3394 if (s1 == o1) or (s1d and o1d): 3395 enmS = self.endnum_re.findall(s) 3396 enmO = self.endnum_re.findall(o) 3397 if (enmS != []) and (enmO != []): 3398 return int(enmS[0]), int(enmO[0]) 3399 elif enmS == []: 3400 return 0, 1 3401 else: 3402 return 1, 0 3403 elif s1d: 3404 return 0, 1 3405 elif o1d: 3406 return 1, 0 3407 else: 3408 return (self._greek_sort_keys[s1], self._greek_sort_keys[o1]) 3409 return int(s), int(o) # raise exception? 3410 return 1, 1 3411 3412 def __ne__(self, other: object) -> bool: 3413 """Test for inequality.""" 3414 if isinstance(other, type(self)): 3415 return self.akl != other.akl 3416 else: 3417 return NotImplemented 3418 3419 def __eq__(self, other: object) -> bool: # type: ignore 3420 """Test for equality.""" 3421 if isinstance(other, type(self)): 3422 return self.akl == other.akl 3423 else: 3424 return NotImplemented 3425 3426 def __gt__(self, other: object) -> bool: 3427 """Test greater than.""" 3428 if isinstance(other, type(self)): 3429 rslt = self._cmp(other) 3430 return rslt[0] > rslt[1] 3431 else: 3432 return NotImplemented 3433 3434 def __ge__(self, other: object) -> bool: 3435 """Test greater or equal.""" 3436 if isinstance(other, type(self)): 3437 rslt = self._cmp(other) 3438 return rslt[0] >= rslt[1] 3439 else: 3440 return NotImplemented 3441 3442 def __lt__(self, other: object) -> bool: 3443 """Test less than.""" 3444 if isinstance(other, type(self)): 3445 rslt = self._cmp(other) 3446 return rslt[0] < rslt[1] 3447 else: 3448 return NotImplemented 3449 3450 def __le__(self, other: object) -> bool: 3451 """Test less or equal.""" 3452 if isinstance(other, type(self)): 3453 rslt = self._cmp(other) 3454 return rslt[0] <= rslt[1] 3455 else: 3456 return NotImplemented 3457 3458 3459def set_accuracy_95(num: float) -> float: 3460 """Reduce floating point accuracy to 9.5 (xxxx.xxxxx). 3461 3462 Used by Hedron and Dihedron classes writing PIC and SCAD files. 3463 :param float num: input number 3464 :returns: float with specified accuracy 3465 """ 3466 # return round(num, 5) # much slower 3467 return float(f"{num:9.5f}") 3468 3469 3470# only used for writing PDB atoms so inline in 3471# _pdb_atom_string(atm: Atom) 3472# def set_accuracy_83(num: float) -> float: 3473# """Reduce floating point accuracy to 8.3 (xxxxx.xxx). 3474# 3475# Used by IC_Residue class, matches PDB output format. 3476# :param float num: input number 3477# :returns: float with specified accuracy 3478# """ 3479# return float("{:8.3f}".format(num)) 3480 3481 3482# internal coordinates construction Exceptions 3483class HedronMatchError(Exception): 3484 """Cannot find hedron in residue for given key.""" 3485 3486 pass 3487 3488 3489class MissingAtomError(Exception): 3490 """Missing atom coordinates for hedron or dihedron.""" 3491 3492 pass 3493