1# Copyright 2019 by Robert T. Miller.  All rights reserved.
2# This file is part of the Biopython distribution and governed by your
3# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
4# Please see the LICENSE file that should have been included as part of this
5# package.
6
7"""Classes to support internal coordinates for protein structures.
8
9Internal coordinates comprise Psi, Phi and Omega dihedral angles along the
10protein backbone, Chi angles along the sidechains, and all 3-atom angles and
11bond lengths comprising a protein chain.  These routines can compute internal
12coordinates from atom XYZ coordinates, and compute atom XYZ coordinates from
13internal coordinates.
14
15Internal coordinates are defined on sequences of atoms which span
16residues or follow accepted nomenclature along sidechains.  To manage these
17sequences and support Biopython's disorder mechanisms, AtomKey specifiers are
18implemented to capture residue, atom and variant identification in a single
19object.  A Hedron object is specified as three sequential AtomKeys, comprising
20two bond lengths and the bond angle between them.  A Dihedron consists of four
21sequential AtomKeys, linking two Hedra with a dihedral angle between them.
22
23A Protein Internal Coordinate (.pic) file format is defined to capture
24sufficient detail to reproduce a PDB file from chain starting coordinates
25(first residue N, Ca, C XYZ coordinates) and remaining internal coordinates.
26These files are used internally to verify that a given structure can be
27regenerated from its internal coordinates.
28
29Internal coordinates may also be exported as OpenSCAD data arrays for
30generating 3D printed protein models.  OpenSCAD software is provided as
31proof-of-concept for generating such models.
32
33The following classes comprise the core functionality for processing internal
34coordinates and are sufficiently related and coupled to place them together in
35this module:
36
37IC_Chain: Extends Biopython Chain on .internal_coord attribute.
38    Manages connected sequence of residues and chain breaks; methods generally
39    apply IC_Residue methods along chain.
40
41IC_Residue: Extends for Biopython Residue on .internal_coord attribute.
42    Most control and methods of interest are in this class, see API.
43
44Dihedron: four joined atoms forming a dihedral angle.
45    Dihedral angle, homogeneous atom coordinates in local coordinate space,
46    references to relevant Hedra and IC_Residue.  Methods to compute
47    residue dihedral angles, bond angles and bond lengths.
48
49Hedron: three joined atoms forming a plane.
50    Contains homogeneous atom coordinates in local coordinate space as well as
51    bond lengths and angle between them.
52
53Edron: base class for Hedron and Dihedron classes.
54    Tuple of AtomKeys comprising child, string ID, mainchain membership boolean
55    and other routines common for both Hedra and Dihedra.  Implements rich
56    comparison.
57
58AtomKey: keys (dictionary and string) for referencing atom sequences.
59    Capture residue and disorder/occupancy information, provides a
60    no-whitespace key for .pic files, and implements rich comparison.
61
62Custom exception classes: HedronMatchError and MissingAtomError
63"""
64
65import re
66from collections import deque, namedtuple
67
68try:
69    import numpy  # type: ignore
70except ImportError:
71    from Bio import MissingPythonDependencyError
72
73    raise MissingPythonDependencyError(
74        "Install NumPy to build proteins from internal coordinates."
75    )
76
77from Bio.PDB.Atom import Atom, DisorderedAtom
78from Bio.PDB.Polypeptide import three_to_one
79
80from Bio.PDB.vectors import coord_space, multi_rot_Z, multi_rot_Y
81
82# , calc_dihedral, Vector
83from Bio.PDB.ic_data import ic_data_backbone, ic_data_sidechains
84from Bio.PDB.ic_data import ic_data_sidechain_extras, residue_atom_bond_state
85
86# for type checking only
87from typing import (
88    List,
89    Dict,
90    Set,
91    TextIO,
92    Union,
93    Tuple,
94    cast,
95    TYPE_CHECKING,
96    Optional,
97)
98
99if TYPE_CHECKING:
100    from Bio.PDB.Residue import Residue
101    from Bio.PDB.Chain import Chain
102
103HKT = Tuple["AtomKey", "AtomKey", "AtomKey"]  # Hedron key tuple
104DKT = Tuple["AtomKey", "AtomKey", "AtomKey", "AtomKey"]  # Dihedron Key Tuple
105EKT = Union[HKT, DKT]  # Edron Key Tuple
106BKT = Tuple["AtomKey", "AtomKey"]  # Bond Key Tuple
107
108# HACS = Tuple[numpy.array, numpy.array, numpy.array]  # Hedron Atom Coord Set
109HACS = numpy.array  # Hedron Atom Coord Set
110DACS = Tuple[
111    numpy.array, numpy.array, numpy.array, numpy.array
112]  # Dihedron Atom Coord Set
113
114
115class IC_Chain:
116    """Class to extend Biopython Chain with internal coordinate data.
117
118    Attributes
119    ----------
120    chain: biopython Chain object reference
121        The Chain object this extends
122
123    initNCaC: AtomKey indexed dictionary of N, Ca, C atom coordinates.
124        NCaCKeys start chain segments (first residue or after chain break).
125        These 3 atoms define the coordinate space for a contiguous chain segment,
126        as initially specified by PDB or mmCIF file.
127
128    MaxPeptideBond: **Class** attribute to detect chain breaks.
129        Override for fully contiguous chains with some very long bonds - e.g.
130        for 3D printing (OpenSCAD output) a structure with fully disordered
131        (missing) residues.
132
133    ordered_aa_ic_list: list of IC_Residue objects
134        IC_Residue objects ic algorithms can process (e.g. no waters)
135
136    hedra: dict indexed by 3-tuples of AtomKeys
137        Hedra forming residues in this chain
138
139    hedraLen: int length of hedra dict
140
141    hedraNdx: dict mapping hedra AtomKeys to numpy array data
142
143    dihedra: dict indexed by 4-tuples of AtomKeys
144        Dihedra forming (overlapping) this residue
145
146    dihedraLen: int length of dihedra dict
147
148    dihedraNdx: dict mapping dihedra AtomKeys to numpy array data
149
150    atomArray: numpy array of homogeneous atom coords for chain
151
152    atomArrayIndex: dict mapping AtomKeys to atomArray indexes
153
154    numpy arrays for vector processing of chain di/hedra:
155
156    hedraIC: length-angle-length entries for each hedron
157
158    hAtoms: homogeneous atom coordinates (3x4) of hedra, central atom at origin
159
160    hAtomsR: hAtoms in reverse order
161
162    hAtoms_needs_update: booleans indicating whether hAtoms represent hedraIC
163
164    dihedraIC: dihedral angles for each dihedron
165
166    dAtoms: homogeneous atom coordinates (4x4) of dihedra, second atom at origin
167
168    dAtoms_needs_update: booleans indicating whether dAtoms represent dihedraIC
169
170    Methods
171    -------
172    internal_to_atom_coordinates(verbose, start, fin)
173        Process ic data to Residue/Atom coordinates; calls assemble_residues()
174        followed by coords_to_structure()
175    assemble_residues(verbose, start, fin)
176        Generate IC_Residue atom coords from internal coordinates
177    coords_to_structure()
178        update Biopython Residue.Atom coords from IC_Residue coords for all
179        Residues with IC_Residue attributes
180    atom_to_internal_coordinates(verbose)
181        Calculate dihedrals, angles, bond lengths (internal coordinates) for
182        Atom data
183    link_residues()
184        Call link_dihedra() on each IC_Residue (needs rprev, rnext set)
185    set_residues()
186        Add .internal_coord attribute for all Residues in parent Chain, populate
187        ordered_aa_ic_list, set IC_Residue rprev, rnext or initNCaC coordinates
188    write_SCAD()
189        Write OpenSCAD matrices for internal coordinate data comprising chain
190
191    """
192
193    MaxPeptideBond = 1.4  # larger C-N distance than this is chain break
194
195    def __init__(self, parent: "Chain", verbose: bool = False) -> None:
196        """Initialize IC_Chain object, with or without residue/Atom data.
197
198        :param parent: Biopython Chain object
199            Chain object this extends
200        """
201        # type hinting parent as Chain leads to import cycle
202        self.chain = parent
203        self.ordered_aa_ic_list: List[IC_Residue] = []
204        self.initNCaC: Dict[Tuple[str], Dict["AtomKey", numpy.array]] = {}
205        self.sqMaxPeptideBond = IC_Chain.MaxPeptideBond * IC_Chain.MaxPeptideBond
206        # need init here for _gen_edra():
207        self.hedra = {}
208        # self.hedraNdx = {}
209        self.dihedra = {}
210        # self.dihedraNdx = {}
211        self.set_residues(verbose)  # no effect if no residues loaded
212
213    # return True if a0, a1 within supplied cutoff
214    def _atm_dist_chk(self, a0: Atom, a1: Atom, cutoff: float, sqCutoff: float) -> bool:
215        diff = a0.coord - a1.coord
216        sum = 0
217        for axis in diff:
218            if axis > cutoff:
219                # print("axis: ", axis)
220                return False
221            sum += axis * axis
222        if sum > sqCutoff:
223            # print("sq axis: ", sqrt(sum))  # need import math.sqrt
224            return False
225        return True
226
227    # return a string describing issue, or None if OK
228    def _peptide_check(self, prev: "Residue", curr: "Residue") -> Optional[str]:
229        if 0 == len(curr.child_dict):
230            # curr residue with no atoms => reading pic file, no break
231            return None
232        if (0 != len(curr.child_dict)) and (0 == len(prev.child_dict)):
233            # prev residue with no atoms, curr has atoms => reading pic file,
234            # have break
235            return "PIC data missing atoms"
236
237        # handle non-standard AA not marked as HETATM (1KQF, 1NTH)
238        if not prev.internal_coord.is20AA:
239            return "previous residue not standard amino acid"
240
241        # both biopython Residues have Atoms, so check distance
242        Natom = curr.child_dict.get("N", None)
243        pCatom = prev.child_dict.get("C", None)
244        if Natom is None or pCatom is None:
245            return f"missing {'previous C' if pCatom is None else 'N'} atom"
246
247        # confirm previous residue has all backbone atoms
248        pCAatom = prev.child_dict.get("CA", None)
249        pNatom = prev.child_dict.get("N", None)
250        if pNatom is None or pCAatom is None:
251            return "previous residue missing N or Ca"
252
253        tooFar = f"MaxPeptideBond ({IC_Chain.MaxPeptideBond} angstroms) exceeded"
254        if not Natom.is_disordered() and not pCatom.is_disordered():
255            dc = self._atm_dist_chk(
256                Natom, pCatom, IC_Chain.MaxPeptideBond, self.sqMaxPeptideBond
257            )
258            if dc:
259                return None
260            else:
261                return tooFar
262
263        Nlist: List[Atom] = []
264        pClist: List[Atom] = []
265        if Natom.is_disordered():
266            Nlist.extend(Natom.child_dict.values())
267        else:
268            Nlist = [Natom]
269        if pCatom.is_disordered():
270            pClist.extend(pCatom.child_dict.values())
271        else:
272            pClist = [pCatom]
273
274        for n in Nlist:
275            for c in pClist:
276                if self._atm_dist_chk(
277                    Natom, pCatom, IC_Chain.MaxPeptideBond, self.sqMaxPeptideBond
278                ):
279                    return None
280        return tooFar
281
282    def clear_ic(self):
283        """Clear residue internal_coord settings for this chain."""
284        for res in self.chain.get_residues():
285            res.internal_coord = None
286
287    def _add_residue(
288        self, res: "Residue", last_res: List, last_ord_res: List, verbose: bool = False
289    ) -> bool:
290        """Set rprev, rnext, determine chain break."""
291        if not res.internal_coord:
292            res.internal_coord = IC_Residue(res)
293            res.internal_coord.cic = self
294        if (
295            0 < len(last_res)
296            and last_ord_res == last_res
297            and self._peptide_check(last_ord_res[0].residue, res) is None
298        ):
299            # no chain break
300            for prev in last_ord_res:
301                prev.rnext.append(res.internal_coord)
302                res.internal_coord.rprev.append(prev)
303            return True
304        elif all(atm in res.child_dict for atm in ("N", "CA", "C")):
305            # chain break, save coords for restart
306            if verbose and len(last_res) != 0:  # not first residue
307                if last_ord_res != last_res:
308                    reason = "disordered residues after {last_ord_res.pretty_str()}"
309                else:
310                    reason = cast(
311                        str, self._peptide_check(last_ord_res[0].residue, res)
312                    )
313                print(
314                    f"chain break at {res.internal_coord.pretty_str()} due to {reason}"
315                )
316            initNCaC: Dict["AtomKey", numpy.array] = {}
317            ric = res.internal_coord
318            for atm in ("N", "CA", "C"):
319                bpAtm = res.child_dict[atm]
320                if bpAtm.is_disordered():
321                    for altAtom in bpAtm.child_dict.values():
322                        ak = AtomKey(ric, altAtom)
323                        initNCaC[ak] = IC_Residue.atm241(altAtom.coord)
324                else:
325                    ak = AtomKey(ric, bpAtm)
326                    initNCaC[ak] = IC_Residue.atm241(bpAtm.coord)
327            self.initNCaC[ric.rbase] = initNCaC
328            return True
329        elif (
330            0 == len(res.child_list)
331            and self.chain.child_list[0].id == res.id
332            and res.internal_coord.is20AA
333        ):
334            # this is first residue, no atoms at all, is std amino acid
335            # conclude reading pic file with no N-Ca-C coords
336            return True
337        # chain break but do not have N, Ca, C coords to restart from
338        return False
339
340    def set_residues(self, verbose: bool = False) -> None:
341        """Initialize internal_coord data for loaded Residues.
342
343        Add IC_Residue as .internal_coord attribute for each Residue in parent
344        Chain; populate ordered_aa_ic_list with IC_Residue references for residues
345        which can be built (amino acids and some hetatms); set rprev and rnext
346        on each sequential IC_Residue, populate initNCaC at start and after
347        chain breaks.
348        """
349        # ndx = 0
350        last_res: List["IC_Residue"] = []
351        last_ord_res: List["IC_Residue"] = []
352
353        for res in self.chain.get_residues():
354            # select only not hetero or accepted hetero
355            if res.id[0] == " " or res.id[0] in IC_Residue.accept_resnames:
356                this_res: List["IC_Residue"] = []
357                if 2 == res.is_disordered():
358                    # print('disordered res:', res.is_disordered(), res)
359                    for r in res.child_dict.values():
360                        if self._add_residue(r, last_res, last_ord_res, verbose):
361                            this_res.append(r.internal_coord)
362                else:
363                    if self._add_residue(res, last_res, last_ord_res, verbose):
364                        this_res.append(res.internal_coord)
365
366                if 0 < len(this_res):
367                    self.ordered_aa_ic_list.extend(this_res)
368                    last_ord_res = this_res
369                last_res = this_res
370
371    def link_residues(self) -> None:
372        """link_dihedra() for each IC_Residue; needs rprev, rnext set.
373
374        Called by PICIO:read_PIC() after finished reading chain
375        """
376        for ric in self.ordered_aa_ic_list:
377            ric.cic = self
378            ric.link_dihedra()
379
380    def assemble_residues(
381        self,
382        verbose: bool = False,
383        start: Optional[int] = None,
384        fin: Optional[int] = None,
385    ) -> None:
386        """Generate IC_Residue atom coords from internal coordinates.
387
388        Filter positions between start and fin if set, find appropriate start
389        coordinates for each residue and pass to IC_Residue.assemble()
390
391        :param verbose bool: default False
392            describe runtime problems
393        :param: start, fin lists
394            sequence position, insert code for begin, end of subregion to
395            process
396
397        """
398        for ric in self.ordered_aa_ic_list:
399            ric.clear_transforms()
400
401        for ric in self.ordered_aa_ic_list:
402            if not hasattr(ric, "NCaCKey"):
403                if verbose:
404                    print(
405                        f"no assembly for {str(ric)} due to missing N, Ca and/or C atoms"
406                    )
407                continue
408            respos = ric.residue.id[1]
409            if start and start > respos:
410                continue
411            if fin and fin < respos:
412                continue
413
414            ric.atom_coords = cast(
415                Dict[AtomKey, numpy.array], ric.assemble(verbose=verbose)
416            )
417            if ric.atom_coords:
418                ric.ak_set = set(ric.atom_coords.keys())
419
420    def coords_to_structure(self) -> None:
421        """Promote all ic atom_coords to Biopython Residue/Atom coords.
422
423        IC atom_coords are homogeneous [4], Biopython atom coords are XYZ [3].
424        """
425        self.ndx = 0
426        for res in self.chain.get_residues():
427            if 2 == res.is_disordered():
428                for r in res.child_dict.values():
429                    if r.internal_coord:
430                        if r.internal_coord.atom_coords:
431                            r.internal_coord.coords_to_residue()
432                        elif (
433                            r.internal_coord.rprev
434                            and r.internal_coord.rprev[0].atom_coords
435                        ):
436                            r.internal_coord.rprev[0].coords_to_residue(rnext=True)
437            elif res.internal_coord:
438                if res.internal_coord.atom_coords:
439                    res.internal_coord.coords_to_residue()
440                elif (
441                    res.internal_coord.rprev and res.internal_coord.rprev[0].atom_coords
442                ):
443                    res.internal_coord.rprev[0].coords_to_residue(rnext=True)
444
445    def init_edra(self) -> None:
446        """Create chain level di/hedra arrays.
447
448        If called by read_PIC, self.di/hedra = {} and object tree has IC data.
449        -> build chain arrays from IC data
450
451        If called at start of atom_to_internal_coords, self.di/hedra fully
452        populated.  -> create empty chain numpy arrays
453
454        In both cases, fix di/hedra object attributes to be views on
455        chain-level array data
456        """
457        # hedra:
458
459        if self.hedra == {}:
460            # loaded objects from PIC file, so no chain-level hedra
461            hLAL = {}
462            for ric in self.ordered_aa_ic_list:
463                for k, h in ric.hedra.items():
464                    self.hedra[k] = h
465                    hLAL[k] = h.lal
466            self.hedraLen = len(self.hedra)
467            self.hedraIC = numpy.array(tuple(hLAL.values()))
468        else:
469            # atom_to_internal_coords() populates self.hedra via _gen_edra()
470            # a_to_ic will set ic so create empty
471            self.hedraLen = len(self.hedra)
472            self.hedraIC = numpy.empty((self.hedraLen, 3), dtype=numpy.float64)
473
474        self.hedraNdx = dict(zip(self.hedra.keys(), range(len(self.hedra))))
475
476        self.hAtoms: numpy.ndarray = numpy.zeros(
477            (self.hedraLen, 3, 4), dtype=numpy.float64
478        )
479        self.hAtoms[:, :, 3] = 1.0  # homogeneous
480        self.hAtomsR: numpy.ndarray = numpy.copy(self.hAtoms)
481        self.hAtoms_needs_update = numpy.full(self.hedraLen, True)
482
483        for ric in self.ordered_aa_ic_list:
484            for k, h in ric.hedra.items():
485                # all h.lal become views on hedraIC
486                h.lal = self.hedraIC[self.hedraNdx[k]]
487
488        # dihedra:
489
490        if self.dihedra == {}:
491            # loaded objects from PIC file, so no chain-level hedra
492            dic = {}
493            for ric in self.ordered_aa_ic_list:
494                for k, d in ric.dihedra.items():
495                    self.dihedra[k] = d
496                    dic[k] = d.angle
497            self.dihedraIC = numpy.array(tuple(dic.values()))
498            self.dihedraICr = numpy.deg2rad(self.dihedraIC)
499            self.dihedraLen = len(self.dihedra)
500        else:
501            # atom_to_internal_coords() populates self.hedra via _gen_edra()
502            # a_to_ic will set ic so create empty
503            self.dihedraLen = len(self.dihedra)
504            self.dihedraIC = numpy.empty(self.dihedraLen)
505            self.dihedraICr = numpy.empty(self.dihedraLen)
506
507        self.dihedraNdx = dict(zip(self.dihedra.keys(), range(len(self.dihedra))))
508
509        self.dAtoms: numpy.ndarray = numpy.empty(
510            (self.dihedraLen, 4, 4), dtype=numpy.float64
511        )
512        self.dAtoms[:, :, 3] = 1.0  # homogeneous
513        self.a4_pre_rotation = numpy.empty((self.dihedraLen, 4))
514
515        for k, d in self.dihedra.items():
516            d.initial_coords = self.dAtoms[self.dihedraNdx[k]]
517            d.a4_pre_rotation = self.a4_pre_rotation[self.dihedraNdx[k]]
518
519        self.dAtoms_needs_update = numpy.full(self.dihedraLen, True)
520
521        self.dRev = numpy.array(tuple(d.reverse for d in self.dihedra.values()))
522        self.dFwd = self.dRev != True  # noqa: E712
523        self.dH1ndx = numpy.array(
524            tuple(self.hedraNdx[d.h1key] for d in self.dihedra.values())
525        )
526        self.dH2ndx = numpy.array(
527            tuple(self.hedraNdx[d.h2key] for d in self.dihedra.values())
528        )
529
530    # @profile
531    def init_atom_coords(self) -> None:
532        """Set chain level di/hedra initial coord arrays from IC_Residue data."""
533        if not numpy.all(self.dAtoms_needs_update):
534            self.dAtoms_needs_update |= (self.hAtoms_needs_update[self.dH1ndx]) | (
535                self.hAtoms_needs_update[self.dH2ndx]
536            )
537
538        if numpy.any(self.hAtoms_needs_update):
539            # hedra initial coords
540
541            # supplementary angle radian: angles which add to 180 are supplementary
542            sar = numpy.deg2rad(
543                180.0 - self.hedraIC[:, 1][self.hAtoms_needs_update]
544            )  # angle
545            sinSar = numpy.sin(sar)
546            cosSarN = numpy.cos(sar) * -1
547
548            # a2 is len3 up from a2 on Z axis, X=Y=0
549            self.hAtoms[:, 2, 2][self.hAtoms_needs_update] = self.hedraIC[:, 2][
550                self.hAtoms_needs_update
551            ]
552
553            # a0 X is sin( sar ) * len12
554            self.hAtoms[:, 0, 0][self.hAtoms_needs_update] = (
555                sinSar * self.hedraIC[:, 0][self.hAtoms_needs_update]
556            )
557
558            # a0 Z is -(cos( sar ) * len12)
559            # (assume angle always obtuse, so a0 is in -Z)
560            self.hAtoms[:, 0, 2][self.hAtoms_needs_update] = (
561                cosSarN * self.hedraIC[:, 0][self.hAtoms_needs_update]
562            )
563
564            # same again but 'reversed' : a0 on Z axis, a1 at origin, a2 in -Z
565
566            # a0r is len12 up from a1 on Z axis, X=Y=0
567            self.hAtomsR[:, 0, 2][self.hAtoms_needs_update] = self.hedraIC[:, 0][
568                self.hAtoms_needs_update
569            ]
570            # a2r X is sin( sar ) * len23
571            self.hAtomsR[:, 2, 0][self.hAtoms_needs_update] = (
572                sinSar * self.hedraIC[:, 2][self.hAtoms_needs_update]
573            )
574            # a2r Z is -(cos( sar ) * len23)
575            self.hAtomsR[:, 2, 2][self.hAtoms_needs_update] = (
576                cosSarN * self.hedraIC[:, 2][self.hAtoms_needs_update]
577            )
578
579            self.hAtoms_needs_update[...] = False
580
581        # dihedra initial coords
582
583        dhlen = numpy.sum(self.dAtoms_needs_update)  # self.dihedraLen
584
585        # full size masks:
586        mdFwd = self.dFwd & self.dAtoms_needs_update
587        mdRev = self.dRev & self.dAtoms_needs_update
588
589        # update size masks
590        udFwd = self.dFwd[self.dAtoms_needs_update]
591        udRev = self.dRev[self.dAtoms_needs_update]
592
593        # only 4th atom takes work:
594        # pick 4th atom based on rev flag
595        self.a4_pre_rotation[mdRev] = self.hAtoms[self.dH2ndx, 0][mdRev]
596        self.a4_pre_rotation[mdFwd] = self.hAtomsR[self.dH2ndx, 2][mdFwd]
597
598        # numpy multiply, add operations below intermediate array but out= not
599        # working with masking:
600        self.a4_pre_rotation[:, 2][self.dAtoms_needs_update] = numpy.multiply(
601            self.a4_pre_rotation[:, 2][self.dAtoms_needs_update], -1
602        )  # a4 to +Z
603
604        a4shift = numpy.empty(dhlen)
605        a4shift[udRev] = self.hedraIC[self.dH2ndx, 2][mdRev]  # len23
606        a4shift[udFwd] = self.hedraIC[self.dH2ndx, 0][mdFwd]  # len12
607
608        self.a4_pre_rotation[:, 2][self.dAtoms_needs_update] = numpy.add(
609            self.a4_pre_rotation[:, 2][self.dAtoms_needs_update], a4shift,
610        )  # so a2 at origin
611
612        # build rz rotation matrix for dihedral angle
613        rz = multi_rot_Z(self.dihedraICr[self.dAtoms_needs_update])
614
615        # p = numpy.matmul(mt, dha[:, 0].reshape(-1, 4, 1)).reshape(-1, 4)
616        a4rot = numpy.matmul(
617            rz, self.a4_pre_rotation[self.dAtoms_needs_update][:].reshape(-1, 4, 1)
618        ).reshape(-1, 4)
619        # a4rot = rz.dot(self.a4_pre_rotation) # numpy.matmul(self.a4_pre_rotation, rz)
620
621        # now build dihedra initial coords
622
623        dH1atoms = self.hAtoms[self.dH1ndx]  # fancy indexing so
624        dH1atomsR = self.hAtomsR[self.dH1ndx]  # these copy not view
625
626        self.dAtoms[:, :3][mdFwd] = dH1atoms[mdFwd]
627        self.dAtoms[:, 3][mdFwd] = a4rot[udFwd]  # [self.dFwd]
628
629        self.dAtoms[:, :3][mdRev] = dH1atomsR[:, 2::-1][mdRev]
630        self.dAtoms[:, 3][mdRev] = a4rot[udRev]  # [self.dRev]
631
632        self.dAtoms_needs_update[...] = False
633
634    def internal_to_atom_coordinates(
635        self,
636        verbose: bool = False,
637        start: Optional[int] = None,
638        fin: Optional[int] = None,
639        promote: Optional[bool] = True,
640    ) -> None:
641        """Process, IC data to Residue/Atom coords.
642
643        Not yet vectorized.
644
645        :param verbose bool: default False
646            describe runtime problems
647        :param: start, fin lists
648            sequence position, insert code for begin, end of subregion to
649            process
650        :param promote bool: default True
651            If True (the default) copy result atom XYZ coordinates to
652            Biopython Atom objects for access by other Biopython methods;
653            otherwise, updated atom coordinates must be accessed through
654            IC_Residue and hedron objects.
655        """
656        if self.dihedra == {}:
657            return  # escape if nothing to process
658
659        self.init_atom_coords()
660        self.assemble_residues(
661            verbose=verbose, start=start, fin=fin
662        )  # internal to XYZ coordinates
663        if promote:
664            self.coords_to_structure()  # promote to BioPython Residue/Atom
665
666    # @profile
667    def atom_to_internal_coordinates(self, verbose: bool = False) -> None:
668        """Calculate dihedrals, angles, bond lengths for Atom data.
669
670        :param verbose bool: default False
671            describe runtime problems
672        """
673        hedraAtomDict = {}
674        dihedraAtomDict = {}
675        hInDset = set()
676        hedraDict2 = {}
677        gCBdihedra = set()
678
679        for ric in self.ordered_aa_ic_list:
680            ric.atom_to_internal_coordinates(verbose=verbose)  # builds di/hedra objects
681
682        self.init_edra()
683
684        for ric in self.ordered_aa_ic_list:
685            for k, d in ric.dihedra.items():
686                hInDset.update((d.h1key, d.h2key))
687                try:
688                    # get tuple of atom_coords from ric dict
689                    dihedraAtomDict[k] = d.gen_acs(ric.atom_coords)
690                except KeyError:
691                    gCBdihedra.add(d)  # no atom_coords yet for gly CB
692                    # init to rough approximation, overwrite later
693                    # dihedron = O-C-Ca-Cb
694                    # h1 = Ca-C-O (reversed)
695                    # h2 = Cb-Ca-C (reversed)
696                    # need dihedron atom coords all forward
697                    h1 = d.hedron1.gen_acs(ric.atom_coords)
698                    h1 = numpy.flipud(h1)  # reverse h1 coords for building dihedron
699                    xgcb = numpy.append(h1, [h1[2]], axis=0)
700                    xgcb[3, 0] = xgcb[3, 0] + 1.0
701                    dihedraAtomDict[k] = xgcb
702
703            for k, h in ric.hedra.items():
704                if k not in hInDset:
705                    # print("inaccessible hedron outside dihedron: ", h)
706                    try:
707                        hedraAtomDict[k] = h.gen_acs(ric.atom_coords)
708                    except KeyError:  # gly CB
709                        hedraAtomDict[k] = numpy.array(
710                            [[1, 2, 3, 1], [2, 2, 3, 1], [3, 2, 3, 1]]
711                        )
712
713        if self.dihedra == {}:
714            return  # escape if no hedra loaded for this chain
715
716        if hedraAtomDict != {}:
717            # some hedra not in dihedra to process
718            # issue from alternate CB path, triggered by residue sidechain path not
719            # including n-ca-cb-xg
720            # not needed to build chain but include for consistency / statistics
721            hedraDict2 = {k: h for k, h in self.hedra.items() if k not in hInDset}
722            lh2a = len(hedraDict2)
723            if lh2a > 0:
724                h2a = numpy.array(tuple(hedraAtomDict.values()))
725                h2ai = dict(zip(hedraDict2.keys(), range(lh2a)))
726
727                # get dad for hedra
728                h_a0a1 = numpy.linalg.norm(h2a[:, 0] - h2a[:, 1], axis=1)
729                h_a1a2 = numpy.linalg.norm(h2a[:, 1] - h2a[:, 2], axis=1)
730                h_a0a2 = numpy.linalg.norm(h2a[:, 0] - h2a[:, 2], axis=1)
731                h_a0a1a2 = numpy.rad2deg(
732                    numpy.arccos(
733                        ((h_a0a1 * h_a0a1) + (h_a1a2 * h_a1a2) - (h_a0a2 * h_a0a2))
734                        / (2 * h_a0a1 * h_a1a2)
735                    )
736                )
737
738                for k, h in hedraDict2.items():
739                    hndx = h2ai[k]
740                    h.lal[:] = (h_a0a1[hndx], h_a0a1a2[hndx], h_a1a2[hndx])
741
742        # now process dihedra
743        dLen = self.dihedraLen
744        dha = numpy.array(tuple(dihedraAtomDict.values()))
745        dhai = dict(zip(self.dihedra.keys(), range(dLen)))
746
747        # get dadad dist-angle-dist-angle-dist for dihedra
748        a0a1 = numpy.linalg.norm(dha[:, 0] - dha[:, 1], axis=1)
749        a1a2 = numpy.linalg.norm(dha[:, 1] - dha[:, 2], axis=1)
750        a2a3 = numpy.linalg.norm(dha[:, 2] - dha[:, 3], axis=1)
751
752        a0a2 = numpy.linalg.norm(dha[:, 0] - dha[:, 2], axis=1)
753        a1a3 = numpy.linalg.norm(dha[:, 1] - dha[:, 3], axis=1)
754        sqr_a1a2 = numpy.multiply(a1a2, a1a2)
755
756        a0a1a2 = numpy.rad2deg(
757            numpy.arccos(((a0a1 * a0a1) + sqr_a1a2 - (a0a2 * a0a2)) / (2 * a0a1 * a1a2))
758        )
759
760        a1a2a3 = numpy.rad2deg(
761            numpy.arccos((sqr_a1a2 + (a2a3 * a2a3) - (a1a3 * a1a3)) / (2 * a1a2 * a2a3))
762        )
763
764        # develop coord_space matrix for 1st 3 atoms of dihedra:
765
766        # build tm translation matrix: atom1 to origin
767        tm = numpy.empty((dLen, 4, 4))
768        tm[...] = numpy.identity(4)
769        tm[:, 0:3, 3] = -dha[:, 1, 0:3]
770
771        # directly translate a2 into new space using a1
772        p = dha[:, 2] - dha[:, 1]
773
774        # get spherical coords of translated a2 (p)
775        r = numpy.linalg.norm(p, axis=1)
776        azimuth = numpy.arctan2(p[:, 1], p[:, 0])
777        polar_angle = numpy.arccos(numpy.divide(p[:, 2], r, where=r != 0))
778
779        # build rz rotation matrix: translated a2 -azimuth around Z
780        # (enables next step rotating around Y to align with Z)
781        rz = multi_rot_Z(-azimuth)
782
783        # build ry rotation matrix: translated a2 -polar_angle around Y
784        ry = multi_rot_Y(-polar_angle)
785
786        # mt completes a1-a2 on Z-axis, still need to align a0 with XZ plane
787        mt = numpy.matmul(ry, numpy.matmul(rz, tm))
788
789        # transform a0 to mt space
790        p = numpy.matmul(mt, dha[:, 0].reshape(-1, 4, 1)).reshape(-1, 4)
791        # print("mt[0]:\n", mt[0], "\ndha[0][0] (a0):\n", dha[0][0], "\np[0]:\n", p[0])
792
793        # get azimuth of translated a0
794        azimuth2 = numpy.arctan2(p[:, 1], p[:, 0])
795
796        # build rotation matrix rz2 to rotate a0 -azimuth about Z to align with X
797        rz2 = multi_rot_Z(-azimuth2)
798
799        # update mt to be complete transform into hedron coordinate space
800        mt = numpy.matmul(rz2, mt[:])
801
802        # now put atom 4 into that coordinate space and read dihedral as azimuth
803        do4 = numpy.matmul(mt, dha[:, 3].reshape(-1, 4, 1)).reshape(-1, 4)
804
805        numpy.arctan2(do4[:, 1], do4[:, 0], out=self.dihedraICr)
806        numpy.rad2deg(self.dihedraICr, out=self.dihedraIC)
807
808        # build hedra arrays
809
810        hIC = self.hedraIC
811        hNdx = self.hedraNdx
812
813        for k, d in self.dihedra.items():
814            dndx = dhai[k]
815            # d.angle = dh1d[dndx]
816            rev, hed1, hed2 = (d.reverse, d.hedron1, d.hedron2)
817            h1ndx, h2ndx = (hNdx[d.h1key], hNdx[d.h2key])
818            if not rev:
819                hIC[h1ndx, :] = (a0a1[dndx], a0a1a2[dndx], a1a2[dndx])
820                hIC[h2ndx, :] = (a1a2[dndx], a1a2a3[dndx], a2a3[dndx])
821                # hed1.len12 = a0a1[dndx]
822                # hed1.len23 = hed2.len12 = a1a2[dndx]
823                # hed2.len23 = a2a3[dndx]
824            else:
825                hIC[h1ndx, :] = (a1a2[dndx], a0a1a2[dndx], a0a1[dndx])
826                hIC[h2ndx, :] = (a2a3[dndx], a1a2a3[dndx], a1a2[dndx])
827                # hed1.len23 = a0a1[dndx]
828                # hed1.len12 = hed2.len23 = a1a2[dndx]
829                # hed2.len12 = a2a3[dndx]
830
831            hed1.lal = hIC[h1ndx]
832            hed2.lal = hIC[h2ndx]
833
834            # hed1.angle = a0a1a2[dndx]
835            # hed2.angle = a1a2a3[dndx]
836
837        for gCBd in gCBdihedra:
838            gCBd.ic_residue.build_glyCB(gCBd)
839
840    @staticmethod
841    def _write_mtx(fp: TextIO, mtx: numpy.array) -> None:
842        fp.write("[ ")
843        rowsStarted = False
844        for row in mtx:
845            if rowsStarted:
846                fp.write(", [ ")
847            else:
848                fp.write("[ ")
849                rowsStarted = True
850            colsStarted = False
851            for col in row:
852                if colsStarted:
853                    fp.write(", " + str(col))
854                else:
855                    fp.write(str(col))
856                    colsStarted = True
857            fp.write(" ]")  # close row
858        fp.write(" ]")
859
860    @staticmethod
861    def _writeSCAD_dihed(
862        fp: TextIO, d: "Dihedron", hedraNdx: Dict, hedraSet: Set[EKT]
863    ) -> None:
864        fp.write(
865            "[ {:9.5f}, {}, {}, {}, ".format(
866                d.angle, hedraNdx[d.h1key], hedraNdx[d.h2key], (1 if d.reverse else 0)
867            )
868        )
869        fp.write(
870            "{}, {}, ".format(
871                (0 if d.h1key in hedraSet else 1), (0 if d.h2key in hedraSet else 1)
872            )
873        )
874        fp.write(
875            "    // {} [ {} -- {} ] {}\n".format(
876                d.id, d.hedron1.id, d.hedron2.id, ("reversed" if d.reverse else "")
877            )
878        )
879        fp.write("        ")
880        IC_Chain._write_mtx(fp, d.rcst)
881        fp.write(" ]")  # close residue array of dihedra entry
882
883    def write_SCAD(self, fp: TextIO, backboneOnly: bool) -> None:
884        """Write self to file fp as OpenSCAD data matrices.
885
886        Works with write_SCAD() and embedded OpenSCAD routines in SCADIO.py.
887        The OpenSCAD code explicitly creates spheres and cylinders to
888        represent atoms and bonds in a 3D model.  Options are available
889        to support rotatable bonds and magnetic hydrogen bonds.
890
891        Matrices are written to link, enumerate and describe residues,
892        dihedra, hedra, and chains, mirroring contents of the relevant IC_*
893        data structures.
894
895        The OpenSCAD matrix of hedra has additional information as follows:
896
897        * the atom and bond state (single, double, resonance) are logged
898          so that covalent radii may be used for atom spheres in the 3D models
899
900        * bonds and atoms are tracked so that each is only created once
901
902        * bond options for rotation and magnet holders for hydrogen bonds
903          may be specified
904
905        Note the application of IC_Chain attribute MaxPeptideBond: missing
906        residues may be linked (joining chain segments with arbitrarily long
907        bonds) by setting this to a large value.
908
909        All ALTLOC (disordered) residues and atoms are written to the output model.
910        """
911        fp.write(f'   "{self.chain.id}", // chain id\n')
912
913        # generate dict for all hedra to eliminate redundant references
914        hedra = {}
915        for ric in self.ordered_aa_ic_list:
916            respos, resicode = ric.residue.id[1:]
917            for k, h in ric.hedra.items():
918                hedra[k] = h
919        atomSet: Set[AtomKey] = set()
920        bondDict: Dict = {}  # set()
921        hedraSet: Set[EKT] = set()
922        ndx = 0
923        hedraNdx = {}
924
925        for hk in sorted(hedra):
926            hedraNdx[hk] = ndx
927            ndx += 1
928
929        # write residue dihedra table
930
931        fp.write("   [  // residue array of dihedra")
932        resNdx = {}
933        dihedraNdx = {}
934        ndx = 0
935        chnStarted = False
936        for ric in self.ordered_aa_ic_list:
937            if "O" not in ric.akc:
938                if ric.lc != "G" and ric.lc != "A":
939                    print(
940                        f"Unable to generate complete sidechain for {ric} {ric.lc} missing O atom"
941                    )
942            resNdx[ric] = ndx
943            if chnStarted:
944                fp.write("\n     ],")
945            else:
946                chnStarted = True
947            fp.write(
948                "\n     [ // "
949                + str(ndx)
950                + " : "
951                + str(ric.residue.id)
952                + " "
953                + ric.lc
954                + " backbone\n"
955            )
956            ndx += 1
957            # assemble with no start position, return transform matrices
958            ric.clear_transforms()
959            ric.assemble(resetLocation=True)
960            ndx2 = 0
961            started = False
962            for i in range(1 if backboneOnly else 2):
963                if i == 1:
964                    cma = "," if started else ""
965                    fp.write(
966                        f"{cma}\n       // {str(ric.residue.id)} {ric.lc} sidechain\n"
967                    )
968                started = False
969                for dk, d in sorted(ric.dihedra.items()):
970                    if d.h2key in hedraNdx and (
971                        (i == 0 and d.is_backbone()) or (i == 1 and not d.is_backbone())
972                    ):
973                        if d.rcst is not None:
974                            if started:
975                                fp.write(",\n")
976                            else:
977                                started = True
978                            fp.write("      ")
979                            IC_Chain._writeSCAD_dihed(fp, d, hedraNdx, hedraSet)
980                            dihedraNdx[dk] = ndx2
981                            hedraSet.add(d.h1key)
982                            hedraSet.add(d.h2key)
983                            ndx2 += 1
984                        else:
985                            print(
986                                f"Atom missing for {d.id3}-{d.id32}, OpenSCAD chain may be discontiguous"
987                            )
988        fp.write("   ],")  # end of residue entry dihedra table
989        fp.write("\n  ],\n")  # end of all dihedra table
990
991        # write hedra table
992
993        fp.write("   [  //hedra\n")
994        for hk in sorted(hedra):
995            hed = hedra[hk]
996            fp.write("     [ ")
997            fp.write(
998                "{:9.5f}, {:9.5f}, {:9.5f}".format(
999                    set_accuracy_95(hed.lal[0]),  # len12
1000                    set_accuracy_95(hed.lal[1]),  # angle
1001                    set_accuracy_95(hed.lal[2]),  # len23
1002                )
1003            )
1004            atom_str = ""  # atom and bond state
1005            atom_done_str = ""  # create each only once
1006            akndx = 0
1007            for ak in hed.aks:
1008                atm = ak.akl[AtomKey.fields.atm]
1009                res = ak.akl[AtomKey.fields.resname]
1010                # try first for generic backbone/Cbeta atoms
1011                ab_state_res = residue_atom_bond_state["X"]
1012                ab_state = ab_state_res.get(atm, None)
1013                if "H" == atm[0]:
1014                    ab_state = "Hsb"
1015                if ab_state is None:
1016                    # not found above, must be sidechain atom
1017                    ab_state_res = residue_atom_bond_state.get(res, None)
1018                    if ab_state_res is not None:
1019                        ab_state = ab_state_res.get(atm, "")
1020                    else:
1021                        ab_state = ""
1022                atom_str += ', "' + ab_state + '"'
1023
1024                if ak in atomSet:
1025                    atom_done_str += ", 0"
1026                elif hk in hedraSet:
1027                    if (
1028                        hasattr(hed, "flex_female_1") or hasattr(hed, "flex_male_1")
1029                    ) and akndx != 2:
1030                        if akndx == 0:
1031                            atom_done_str += ", 0"
1032                        elif akndx == 1:
1033                            atom_done_str += ", 1"
1034                            atomSet.add(ak)
1035                    elif (
1036                        hasattr(hed, "flex_female_2") or hasattr(hed, "flex_male_2")
1037                    ) and akndx != 0:
1038                        if akndx == 2:
1039                            atom_done_str += ", 0"
1040                        elif akndx == 1:
1041                            atom_done_str += ", 1"
1042                            atomSet.add(ak)
1043                    else:
1044                        atom_done_str += ", 1"
1045                        atomSet.add(ak)
1046                else:
1047                    atom_done_str += ", 0"
1048                akndx += 1
1049            fp.write(atom_str)
1050            fp.write(atom_done_str)
1051
1052            # specify bond options
1053
1054            bond = []
1055            bond.append(hed.aks[0].id + "-" + hed.aks[1].id)
1056            bond.append(hed.aks[1].id + "-" + hed.aks[2].id)
1057            b0 = True
1058            for b in bond:
1059                wstr = ""
1060                if b in bondDict and bondDict[b] == "StdBond":
1061                    wstr = ", 0"
1062                elif hk in hedraSet:
1063                    bondType = "StdBond"
1064                    if b0:
1065                        if hasattr(hed, "flex_female_1"):
1066                            bondType = "FemaleJoinBond"
1067                        elif hasattr(hed, "flex_male_1"):
1068                            bondType = "MaleJoinBond"
1069                        elif hasattr(hed, "skinny_1"):
1070                            bondType = "SkinnyBond"
1071                        elif hasattr(hed, "hbond_1"):
1072                            bondType = "HBond"
1073                    else:
1074                        if hasattr(hed, "flex_female_2"):
1075                            bondType = "FemaleJoinBond"
1076                        elif hasattr(hed, "flex_male_2"):
1077                            bondType = "MaleJoinBond"
1078                        # elif hasattr(hed, 'skinny_2'):  # unused
1079                        #     bondType = 'SkinnyBond'
1080                        elif hasattr(hed, "hbond_2"):
1081                            bondType = "HBond"
1082                    if b in bondDict:
1083                        bondDict[b] = "StdBond"
1084                    else:
1085                        bondDict[b] = bondType
1086                    wstr = ", " + str(bondType)
1087                else:
1088                    wstr = ", 0"
1089                fp.write(wstr)
1090                b0 = False
1091            akl = hed.aks[0].akl
1092            fp.write(
1093                ', "'
1094                + akl[AtomKey.fields.resname]
1095                + '", '
1096                + akl[AtomKey.fields.respos]
1097                + ', "'
1098                + hed.dh_class
1099                + '"'
1100            )
1101            fp.write(" ], // " + str(hk) + "\n")
1102        fp.write("   ],\n")  # end of hedra table
1103
1104        # write chain table
1105
1106        fp.write("\n[  // chain - world transform for each residue\n")
1107        chnStarted = False
1108        for ric in self.ordered_aa_ic_list:
1109            # handle start / end
1110            for NCaCKey in sorted(ric.NCaCKey):  # type: ignore
1111                if 0 < len(ric.rprev):
1112                    for rpr in ric.rprev:
1113                        acl = [rpr.atom_coords[ak] for ak in NCaCKey]
1114                        mt, mtr = coord_space(acl[0], acl[1], acl[2], True)
1115                else:
1116                    mtr = numpy.identity(4, dtype=numpy.float64)
1117                if chnStarted:
1118                    fp.write(",\n")
1119                else:
1120                    chnStarted = True
1121                fp.write("     [ " + str(resNdx[ric]) + ', "' + str(ric.residue.id[1]))
1122                fp.write(ric.lc + '", //' + str(NCaCKey) + "\n")
1123                IC_Chain._write_mtx(fp, mtr)
1124                fp.write(" ]")
1125        fp.write("\n   ]\n")
1126
1127
1128class IC_Residue:
1129    """Class to extend Biopython Residue with internal coordinate data.
1130
1131    Parameters
1132    ----------
1133    parent: biopython Residue object this class extends
1134    NO_ALTLOC: bool default False
1135    Disable processing of ALTLOC atoms if True, use only selected atoms.
1136
1137    Attributes
1138    ----------
1139    residue: Biopython Residue object reference
1140        The Residue object this extends
1141    hedra: dict indexed by 3-tuples of AtomKeys
1142        Hedra forming this residue
1143    dihedra: dict indexed by 4-tuples of AtomKeys
1144        Dihedra forming (overlapping) this residue
1145    rprev, rnext: lists of IC_Residue objects
1146        References to adjacent (bonded, not missing, possibly disordered)
1147        residues in chain
1148    atom_coords: AtomKey indexed dict of numpy [4] arrays
1149        Local copy of atom homogeneous coordinates [4] for work
1150        distinct from Bopython Residue/Atom values
1151    alt_ids: list of char
1152        AltLoc IDs from PDB file
1153    bfactors: dict
1154        AtomKey indexed B-factors as read from PDB file
1155    NCaCKey: List of tuples of AtomKeys
1156        List of tuples of N, Ca, C backbone atom AtomKeys; usually only 1
1157        but more if backbone altlocs. Set by link_dihedra()
1158    is20AA: bool
1159        True if residue is one of 20 standard amino acids, based on
1160        Residue resname
1161    accept_atoms: tuple
1162        list of PDB atom names to use when generatiing internal coordinates.
1163        Default is:
1164
1165        `accept_atoms = accept_mainchain + accept_hydrogens`
1166
1167        to exclude hydrogens in internal coordinates and generated PDB files,
1168        override as:
1169
1170        `IC_Residue.accept_atoms = IC_Residue.accept_mainchain`
1171
1172        to get only backbone atoms plus amide proton, use:
1173
1174        `IC_Residue.accept_atoms = IC_Residue.accept_mainchain + ('H',)`
1175
1176        to convert D atoms to H, set `AtomKey.d2h = True` and use:
1177
1178        `IC_Residue.accept_atoms = accept_mainchain + accept_hydrogens + accept_deuteriums`
1179
1180        There is currently no option to output internal coordinates with D
1181        instead of H
1182
1183    accept_resnames: tuple
1184        list of 3-letter residue names for HETATMs to accept when generating
1185        internal coordinates from atoms.  HETATM sidechain will be ignored, but normal
1186        backbone atoms (N, CA, C, O, CB) will be included.  Currently only
1187        CYG, YCM and UNK; override at your own risk.  To generate
1188        sidechain, add appropriate entries to ic_data_sidechains in
1189        ic_data.py and support in atom_to_internal_coordinates()
1190    gly_Cbeta: bool default False
1191        override class variable to True to generate internal coordinates for
1192        glycine CB atoms in atom_to_internal_coordinates().
1193
1194        `IC_Residue.gly_Cbeta = True`
1195    allBonds: bool default False
1196        whereas a PDB file just specifies atoms, OpenSCAD output for 3D printing
1197        needs all bonds specified explicitly - otherwise, e.g. PHE rings will not
1198        be closed.  This variable is managed by the Write_SCAD() code and enables
1199        this.
1200    cic: IC_Chain default None
1201        parent chain IC_Chain object
1202
1203    scale: optional float
1204        used for OpenSCAD output to generate gly_Cbeta bond length
1205
1206    Methods
1207    -------
1208    applyMtx()
1209        multiply all IC_Residue atom_cords by passed matrix
1210    assemble(atomCoordsIn, resetLocation, verbose)
1211        Compute atom coordinates for this residue from internal coordinates
1212    atm241(coord)
1213        Convert 1x3 cartesian coords to 4x1 homogeneous coords
1214    coords_to_residue()
1215        Convert homogeneous atom_coords to Biopython cartesian Atom coords
1216    atom_to_internal_coordinates(verbose)
1217        Create hedra and dihedra for atom coordinates
1218    get_angle()
1219        Return angle for passed key
1220    get_length()
1221        Return bond length for specified pair
1222    link_dihedra()
1223        Link dihedra to this residue, form id3_dh_index
1224    load_PIC(edron)
1225        Process parsed (di-/h-)edron data from PIC file
1226    pick_angle()
1227        Find Hedron or Dihedron for passed key
1228    pick_length()
1229        Find hedra for passed AtomKey pair
1230    rak(atom info)
1231        Residue AtomKey - per residue AtomKey result cache
1232    set_angle()
1233        Set angle for passed key (no position updates)
1234    set_length()
1235        Set bond length in all relevant hedra for specified pair
1236    write_PIC(pdbid, chainId, s)
1237        Generate PIC format strings for this residue
1238
1239    """
1240
1241    # add 3-letter residue name here for non-standard residues with
1242    # normal backbone.  CYG for test case 4LGY (1305 residue contiguous
1243    # chain)
1244    accept_resnames = ("CYG", "YCM", "UNK")
1245
1246    AllBonds: bool = False  # For OpenSCAD, generate explicit hedra covering all bonds if True.
1247
1248    def __init__(self, parent: "Residue", NO_ALTLOC: bool = False) -> None:
1249        """Initialize IC_Residue with parent Biopython Residue.
1250
1251        :param parent: Biopython Residue object
1252            The Biopython Residue this object extends
1253        :param NO_ALTLOC: bool default False
1254            Option to disable processing altloc disordered atoms, use selected.
1255        """
1256        # NO_ALTLOC=True will turn off alotloc positions and just use selected
1257        self.residue = parent
1258        self.cic: IC_Chain
1259        # dict of hedron objects indexed by hedron keys
1260        self.hedra: Dict[HKT, Hedron] = {}
1261        # dict of dihedron objects indexed by dihedron keys
1262        self.dihedra: Dict[DKT, Dihedron] = {}
1263        # map of dihedron key (first 3 atom keys) to dihedron
1264        # for all dihedra in Residue
1265        # built by link_dihedra()
1266        self.id3_dh_index: Dict[HKT, List[Dihedron]] = {}
1267        # cache of AtomKey results for rak()
1268        self.akc: Dict[Union[str, Atom], AtomKey] = {}
1269        # set of AtomKeys involved in dihedra, used by split_akl, build_rak_cache
1270        # built by __init__ for XYZ (PDB coord) input, link_dihedra for PIC input
1271        self.ak_set: Set[AtomKey] = set()
1272        # reference to adjacent residues in chain
1273        self.rprev: List[IC_Residue] = []
1274        self.rnext: List[IC_Residue] = []
1275        # local copy, homogeneous coordinates for atoms, numpy [4]
1276        # generated from dihedra include some i+1 atoms
1277        # or initialised here from parent residue if loaded from coordinates
1278        self.atom_coords: Dict["AtomKey", numpy.array] = {}
1279        # bfactors copied from PDB file
1280        self.bfactors: Dict[str, float] = {}
1281        self.alt_ids: Union[List[str], None] = None if NO_ALTLOC else []
1282        self.is20AA = True
1283        # rbase = position, insert code or none, resname (1 letter if in 20)
1284        rid = parent.id
1285        rbase = [rid[1], rid[2] if " " != rid[2] else None, parent.resname]
1286        try:
1287            rbase[2] = three_to_one(rbase[2]).upper()
1288        except KeyError:
1289            self.is20AA = False
1290
1291        self.rbase = tuple(rbase)
1292        self.lc = rbase[2]
1293
1294        if self.is20AA or rbase[2] in self.accept_resnames:
1295            for atom in parent.get_atoms():
1296                if hasattr(atom, "child_dict"):
1297                    if NO_ALTLOC:
1298                        self._add_atom(atom.selected_child)
1299                    else:
1300                        for atm in atom.child_dict.values():
1301                            self._add_atom(atm)
1302                else:
1303                    self._add_atom(atom)
1304            if self.ak_set:  # only for coordinate (pdb) input
1305                self.build_rak_cache()  # init cache ready for atom_to_internal_coords
1306                # self.NCaCKey = [(self.rak("N"), self.rak("CA"), self.rak("C"))]
1307
1308            # print(self.atom_coords)
1309
1310    def rak(self, atm: Union[str, Atom]) -> "AtomKey":
1311        """Cache calls to AtomKey for this residue."""
1312        try:
1313            ak = self.akc[atm]
1314        except (KeyError):
1315            ak = self.akc[atm] = AtomKey(self, atm)
1316            if isinstance(atm, str):
1317                # print(atm)  # debug code
1318                ak.missing = True
1319        return ak
1320
1321    def build_rak_cache(self) -> None:
1322        """Create explicit entries for for atoms so don't miss altlocs."""
1323        for ak in sorted(self.ak_set):
1324            atmName = ak.akl[3]
1325            if self.akc.get(atmName) is None:
1326                self.akc[atmName] = ak
1327
1328    accept_mainchain = (
1329        "N",
1330        "CA",
1331        "C",
1332        "O",
1333        "CB",
1334        "CG",
1335        "CG1",
1336        "OG1",
1337        "OG",
1338        "SG",
1339        "CG2",
1340        "CD",
1341        "CD1",
1342        "SD",
1343        "OD1",
1344        "ND1",
1345        "CD2",
1346        "ND2",
1347        "CE",
1348        "CE1",
1349        "NE",
1350        "OE1",
1351        "NE1",
1352        "CE2",
1353        "OE2",
1354        "NE2",
1355        "CE3",
1356        "CZ",
1357        "NZ",
1358        "CZ2",
1359        "CZ3",
1360        "OD2",
1361        "OH",
1362        "CH2",
1363        "OXT",
1364    )
1365    accept_hydrogens = (
1366        "H",
1367        "H1",
1368        "H2",
1369        "H3",
1370        "HA",
1371        "HA2",
1372        "HA3",
1373        "HB",
1374        "HB1",
1375        "HB2",
1376        "HB3",
1377        "HG2",
1378        "HG3",
1379        "HD2",
1380        "HD3",
1381        "HE2",
1382        "HE3",
1383        "HZ1",
1384        "HZ2",
1385        "HZ3",
1386        "HG11",
1387        "HG12",
1388        "HG13",
1389        "HG21",
1390        "HG22",
1391        "HG23",
1392        "HZ",
1393        "HD1",
1394        "HE1",
1395        "HD11",
1396        "HD12",
1397        "HD13",
1398        "HG",
1399        "HG1",
1400        "HD21",
1401        "HD22",
1402        "HD23",
1403        "NH1",
1404        "NH2",
1405        "HE",
1406        "HH11",
1407        "HH12",
1408        "HH21",
1409        "HH22",
1410        "HE21",
1411        "HE22",
1412        "HE2",
1413        "HH",
1414        "HH2",
1415    )
1416    accept_deuteriums = (
1417        "D",
1418        "D1",
1419        "D2",
1420        "D3",
1421        "DA",
1422        "DA2",
1423        "DA3",
1424        "DB",
1425        "DB1",
1426        "DB2",
1427        "DB3",
1428        "DG2",
1429        "DG3",
1430        "DD2",
1431        "DD3",
1432        "DE2",
1433        "DE3",
1434        "DZ1",
1435        "DZ2",
1436        "DZ3",
1437        "DG11",
1438        "DG12",
1439        "DG13",
1440        "DG21",
1441        "DG22",
1442        "DG23",
1443        "DZ",
1444        "DD1",
1445        "DE1",
1446        "DD11",
1447        "DD12",
1448        "DD13",
1449        "DG",
1450        "DG1",
1451        "DD21",
1452        "DD22",
1453        "DD23",
1454        "ND1",
1455        "ND2",
1456        "DE",
1457        "DH11",
1458        "DH12",
1459        "DH21",
1460        "DH22",
1461        "DE21",
1462        "DE22",
1463        "DE2",
1464        "DH",
1465        "DH2",
1466    )
1467    accept_atoms = accept_mainchain + accept_hydrogens
1468
1469    gly_Cbeta = False
1470
1471    @staticmethod
1472    def atm241(coord: numpy.array) -> numpy.array:
1473        """Convert 1x3 cartesian coordinates to 4x1 homogeneous coordinates."""
1474        arr41 = numpy.empty(4)
1475        arr41[0:3] = coord
1476        arr41[3] = 1.0
1477        return arr41
1478
1479    def _add_atom(self, atm: Atom) -> None:
1480        """Filter Biopython Atom with accept_atoms; set atom_coords, ak_set.
1481
1482        Arbitrarily renames O' and O'' to O and OXT
1483        """
1484        if "O'" == atm.name:
1485            atm.name = "O"
1486        if "O''" == atm.name:
1487            atm.name = "OXT"
1488
1489        if atm.name not in self.accept_atoms:
1490            # print('skip:', atm.name)
1491            return
1492        ak = self.rak(atm)  # passing Atom here not string
1493        self.atom_coords[ak] = IC_Residue.atm241(atm.coord)
1494        self.ak_set.add(ak)
1495
1496    def __repr__(self) -> str:
1497        """Print string is parent Residue ID."""
1498        return str(self.residue.full_id)
1499
1500    def pretty_str(self) -> str:
1501        """Nice string for residue ID."""
1502        id = self.residue.id
1503        return f"{self.residue.resname} {id[0]}{str(id[1])}{id[2]}"
1504
1505    def load_PIC(self, edron: Dict[str, str]) -> None:
1506        """Process parsed (di-/h-)edron data from PIC file.
1507
1508        :param edron: parse dictionary from Edron.edron_re
1509        """
1510        if edron["a4"] is None:  # PIC line is Hedron
1511            ek = (AtomKey(edron["a1"]), AtomKey(edron["a2"]), AtomKey(edron["a3"]))
1512            self.hedra[ek] = Hedron(ek, **edron)
1513        else:  # PIC line is Dihedron
1514            ek = (
1515                AtomKey(edron["a1"]),
1516                AtomKey(edron["a2"]),
1517                AtomKey(edron["a3"]),
1518                AtomKey(edron["a4"]),
1519            )
1520            self.dihedra[ek] = Dihedron(ek, **edron)
1521
1522    def link_dihedra(self, verbose: bool = False) -> None:
1523        """Housekeeping after loading all residues and dihedra.
1524
1525        - Link dihedra to this residue
1526        - form id3_dh_index
1527        - form ak_set
1528        - set NCaCKey to be available AtomKeys
1529        """
1530        id3i: Dict[HKT, List[Dihedron]] = {}
1531        for dh in self.dihedra.values():
1532            dh.ic_residue = self  # each dihedron can find its IC_Residue
1533            dh.cic = self.cic  # each dihedron can update chain dihedral angles
1534            id3 = dh.id3
1535            if id3 not in id3i:
1536                id3i[id3] = []
1537            id3i[id3].append(dh)
1538            self.ak_set.update(dh.aks)
1539            dh.set_hedra()
1540        for h in self.hedra.values():  # collect any atoms in orphan hedra
1541            self.ak_set.update(h.aks)  # e.g. alternate CB path with no O
1542            h.cic = self.cic  # each hedron can update chain hedra
1543
1544        # map to find each dihedron from atom tokens 1-3
1545        self.id3_dh_index = id3i
1546
1547        # if loaded PIC data, akc not initialised yet
1548        if not self.akc:
1549            self.build_rak_cache()
1550
1551        # initialise NCaCKey here:
1552
1553        # not rak here to avoid polluting akc cache with no-altloc keys
1554        # so starting with 'generic' key:
1555        self.NCaCKey = [(AtomKey(self, "N"), AtomKey(self, "CA"), AtomKey(self, "C"))]
1556
1557        newNCaCKey: List[Tuple["AtomKey", ...]] = []
1558
1559        try:
1560            for tpl in sorted(self.NCaCKey):
1561                newNCaCKey.extend(self._split_akl(tpl))
1562            self.NCaCKey = cast(List[HKT], newNCaCKey)
1563            # if len(newNCaCKey) != 1 and len(self.rprev) == 0:
1564            #  debug code to find examples of chains starting with disordered residues
1565            #    print(f"chain start multiple NCaCKey  {newNCaCKey} : {self}")
1566        except AttributeError:
1567            if verbose:
1568                print(
1569                    f"Missing N, Ca and/or C atoms for residue {str(self.residue)} chain {self.residue.parent.id}"
1570                )
1571
1572    def set_flexible(self) -> None:
1573        """For OpenSCAD, mark N-CA and CA-C bonds to be flexible joints."""
1574        for h in self.hedra.values():
1575            if h.dh_class == "NCAC":
1576                h.flex_female_1 = True
1577                h.flex_female_2 = True
1578            elif h.dh_class.endswith("NCA"):
1579                h.flex_male_2 = True
1580            elif h.dh_class.startswith("CAC") and h.aks[1].akl[3] == "C":
1581                h.flex_male_1 = True
1582            elif h.dh_class == "CBCAC":
1583                h.skinny_1 = True  # CA-CB bond interferes with flex join
1584
1585    def set_hbond(self) -> None:
1586        """For OpenSCAD, mark H-N and C-O bonds to be hbonds (magnets)."""
1587        for h in self.hedra.values():
1588            if h.dh_class == "HNCA":
1589                h.hbond_1 = True
1590            elif h.dh_class == "CACO":
1591                h.hbond_2 = True
1592
1593    def default_startpos(self) -> Dict["AtomKey", numpy.array]:
1594        """Generate default N-Ca-C coordinates to build this residue from."""
1595        atomCoords = {}
1596        dlist0 = [self.id3_dh_index.get(akl, None) for akl in sorted(self.NCaCKey)]
1597        dlist1 = [d for d in dlist0 if d is not None]
1598        # https://stackoverflow.com/questions/11264684/flatten-list-of-lists
1599        dlist = [val for sublist in dlist1 for val in sublist]
1600        # dlist = self.id3_dh_index[NCaCKey]
1601        for d in dlist:
1602            for i, a in enumerate(d.aks):
1603                atomCoords[a] = d.initial_coords[i]
1604        # if "O" not in self.akc and "CB" in self.akc:
1605        #    # need CB coord if no O coord - handle alternate CB path
1606        #    # but not clear how to do this for default position
1607        #    pass
1608        return atomCoords
1609
1610    def get_startpos(self) -> Dict["AtomKey", numpy.array]:
1611        """Find N-Ca-C coordinates to build this residue from."""
1612        if 0 < len(self.rprev):
1613            # if there is a previous residue, build on from it
1614            startPos = {}
1615            # nb akl for this res n-ca-c in rp (prev res) dihedra
1616            akl: List[AtomKey] = []
1617            for tpl in self.NCaCKey:
1618                akl.extend(tpl)
1619            if self.rak("O").missing:
1620                # alternate CB path - only use if O is missing
1621                # else have problems modifying phi angle
1622                akl.append(AtomKey(self, "CB"))
1623            for ak in akl:
1624                for rp in self.rprev:
1625                    rpak = rp.atom_coords.get(ak, None)
1626                    if rpak is not None:
1627                        startPos[ak] = rpak
1628            if 3 > len(startPos):  # if don't have all 3, reset to have none
1629                startPos = {}
1630        else:
1631            # get atom posns already added by load_structure
1632            sp = self.residue.parent.internal_coord.initNCaC.get(self.rbase, None)
1633            if sp is None:
1634                startPos = {}
1635            else:
1636                # need copy Here (shallow ok) else assemble() adds to this dict
1637                startPos = cast(Dict["AtomKey", numpy.array], sp.copy())
1638
1639        if startPos == {}:
1640            startPos = self.default_startpos()
1641
1642        return startPos
1643
1644    def clear_transforms(self):
1645        """Set cst and rcst attributes to none before assemble()."""
1646        for d in self.dihedra.values():
1647            d.cst = None
1648            d.rcst = None
1649
1650    def assemble(
1651        self, resetLocation: bool = False, verbose: bool = False,
1652    ) -> Union[Dict["AtomKey", numpy.array], Dict[HKT, numpy.array], None]:
1653        """Compute atom coordinates for this residue from internal coordinates.
1654
1655        Join dihedrons starting from N-CA-C and N-CA-CB hedrons, computing protein
1656        space coordinates for backbone and sidechain atoms
1657
1658        Sets forward and reverse transforms on each Dihedron to convert from
1659        protein coordinates to dihedron space coordinates for first three
1660        atoms (see coord_space())
1661
1662        Not vectorized (yet).
1663
1664        **Algorithm**
1665
1666        form double-ended queue, start with c-ca-n, o-c-ca, n-ca-cb, n-ca-c.
1667
1668        if resetLocation=True, use initial coords from generating dihedron
1669        for n-ca-c initial positions (result in dihedron coordinate space)
1670
1671        while queue not empty
1672            get 3-atom hedron key
1673
1674            for each dihedron starting with hedron key (1st hedron of dihedron)
1675
1676                if have coordinates for all 4 atoms already
1677                    add 2nd hedron key to back of queue
1678                else if have coordinates for 1st 3 atoms
1679                    compute forward and reverse transforms to take 1st 3 atoms
1680                    to/from dihedron initial coordinate space
1681
1682                    use reverse transform to get position of 4th atom in
1683                    current coordinates from dihedron initial coordinates
1684
1685                    add 2nd hedron key to back of queue
1686                else
1687                    ordering failed, put hedron key at back of queue and hope
1688                    next time we have 1st 3 atom positions (should not happen)
1689
1690        loop terminates (queue drains) as hedron keys which do not start any
1691        dihedra are removed without action
1692
1693        :param resetLocation: bool default False
1694            - Option to ignore start location and orient so N-Ca-C hedron
1695            at origin.
1696
1697        :returns:
1698            Dict of AtomKey -> homogeneous atom coords for residue in protein space
1699            relative to previous residue
1700
1701        """
1702        # debug statements below still useful, commented for performance
1703        # dbg = False
1704
1705        NCaCKey = sorted(self.NCaCKey)
1706
1707        if not self.ak_set:
1708            return None  # give up now if no atoms to work with
1709
1710        # order of these startLst entries matters
1711        startLst = self._split_akl((self.rak("C"), self.rak("CA"), self.rak("N")))
1712        if "CB" in self.akc:
1713            startLst.extend(
1714                self._split_akl((self.rak("N"), self.rak("CA"), self.rak("CB")))
1715            )
1716        if "O" in self.akc:
1717            startLst.extend(
1718                self._split_akl((self.rak("O"), self.rak("C"), self.rak("CA")))
1719            )
1720
1721        startLst.extend(NCaCKey)
1722
1723        q = deque(startLst)
1724        # resnum = self.rbase[0]
1725
1726        # get initial coords from previous residue or IC_Chain info
1727        # or default coords
1728        if resetLocation:
1729            # use N-CA-C initial coords from creating dihedral
1730            atomCoords = self.default_startpos()
1731        else:
1732            atomCoords = self.get_startpos()
1733
1734        while q:  # deque is not empty
1735            # if dbg:
1736            #    print("assemble loop start q=", q)
1737            h1k = cast(HKT, q.pop())
1738            dihedra = self.id3_dh_index.get(h1k, None)
1739            # if dbg:
1740            #    print(
1741            #        "  h1k:",
1742            #        h1k,
1743            #        "len dihedra: ",
1744            #        len(dihedra) if dihedra is not None else "None",
1745            #    )
1746            if dihedra is not None:
1747                for d in dihedra:
1748                    if 4 == len(d.initial_coords) and d.initial_coords[3] is not None:
1749                        # skip incomplete dihedron if don't have 4th atom due
1750                        # to missing input data
1751                        d_h2key = d.hedron2.aks
1752                        akl = d.aks
1753                        # if dbg:
1754                        #    print("    process", d, d_h2key, akl)
1755
1756                        acount = len([a for a in d.aks if a in atomCoords])
1757
1758                        if 4 == acount:  # and not need_transform:
1759                            # dihedron already done, queue 2nd hedron key
1760                            q.appendleft(d_h2key)
1761                            # if dbg:
1762                            #    print("    4- already done, append left")
1763                            if d.rcst is None:  # missing transform
1764                                # can happen for altloc atoms
1765                                # only needed for write_SCAD output
1766                                acs = [atomCoords[a] for a in h1k]
1767                                d.cst, d.rcst = coord_space(
1768                                    acs[0], acs[1], acs[2], True
1769                                )
1770                        elif 3 == acount:
1771                            # if dbg:
1772                            #    print("    3- call coord_space")
1773
1774                            acs = [atomCoords[a] for a in h1k]
1775                            d.cst, d.rcst = coord_space(acs[0], acs[1], acs[2], True)
1776                            # print(d.cst)
1777                            # print(d.rcst)
1778                            # if dbg:
1779                            #    print(
1780                            #        "        initial_coords[3]=",
1781                            #        d.initial_coords[3].transpose(),
1782                            #    )
1783                            acak3 = d.rcst.dot(d.initial_coords[3])
1784                            # if dbg:
1785                            #    print("        acak3=", acak3.transpose())
1786
1787                            atomCoords[akl[3]] = numpy.round(
1788                                acak3, 3
1789                            )  # round to PDB format 8.3
1790                            # if dbg:
1791                            #    print(
1792                            #        "        3- finished, ak:",
1793                            #        akl[3],
1794                            #        "coords:",
1795                            #        atomCoords[akl[3]].transpose(),
1796                            #    )
1797                            q.appendleft(d_h2key)
1798                        else:
1799                            if verbose:
1800                                print("no coords to start", d)
1801                                print(
1802                                    [
1803                                        a
1804                                        for a in d.aks
1805                                        if atomCoords.get(a, None) is not None
1806                                    ]
1807                                )
1808                    else:
1809                        if verbose:
1810                            print("no initial coords for", d)
1811
1812        return atomCoords
1813
1814    def _split_akl(
1815        self,
1816        lst: Union[Tuple["AtomKey", ...], List["AtomKey"]],
1817        missingOK: bool = False,
1818    ) -> List[Tuple["AtomKey", ...]]:
1819        """Get AtomKeys for this residue (ak_set) given generic list of AtomKeys.
1820
1821        Given a list of AtomKeys (aks) for a Hedron or Dihedron,
1822          return:
1823                list of matching aks that have id3_dh in this residue
1824                (ak may change if occupancy != 1.00)
1825
1826            or
1827                multiple lists of matching aks expanded for all atom altlocs
1828
1829            or
1830                empty list if any of atom_coord(ak) missing and not missingOK
1831
1832        :param lst: list[3] or [4] of AtomKeys
1833            non-altloc AtomKeys to match to specific AtomKeys for this residue
1834        """
1835        altloc_ndx = AtomKey.fields.altloc
1836        occ_ndx = AtomKey.fields.occ
1837
1838        # step 1
1839        # given a list of AtomKeys (aks)
1840        #  form a new list of same aks with coords or diheds in this residue
1841        #      plus lists of matching altloc aks in coords or diheds
1842        edraLst: List[Tuple[AtomKey, ...]] = []
1843        altlocs = set()
1844        posnAltlocs: Dict["AtomKey", Set[str]] = {}
1845        akMap = {}
1846        for ak in lst:
1847            posnAltlocs[ak] = set()
1848            if (
1849                ak in self.ak_set
1850                and ak.akl[altloc_ndx] is None
1851                and ak.akl[occ_ndx] is None
1852            ):
1853                # simple case no altloc and exact match in set
1854                edraLst.append((ak,))  # tuple of ak
1855            else:
1856                ak2_lst = []
1857                for ak2 in self.ak_set:
1858                    if ak.altloc_match(ak2):
1859                        # print(key)
1860                        ak2_lst.append(ak2)
1861                        akMap[ak2] = ak
1862                        altloc = ak2.akl[altloc_ndx]
1863                        if altloc is not None:
1864                            altlocs.add(altloc)
1865                            posnAltlocs[ak].add(altloc)
1866                edraLst.append(tuple(ak2_lst))
1867
1868        # step 2
1869        # check and finish for
1870        #   missing atoms
1871        #   simple case no altlocs
1872        # else form new AtomKey lists covering all altloc permutations
1873        maxc = 0
1874        for akl in edraLst:
1875            lenAKL = len(akl)
1876            if 0 == lenAKL and not missingOK:
1877                return []  # atom missing in atom_coords, cannot form object
1878            elif maxc < lenAKL:
1879                maxc = lenAKL
1880        if 1 == maxc:  # simple case no altlocs for any ak in list
1881            newAKL = []
1882            for akl in edraLst:
1883                if akl:  # may have empty lists if missingOK, do not append
1884                    newAKL.append(akl[0])
1885            return [tuple(newAKL)]
1886        else:
1887            new_edraLst = []
1888            for al in altlocs:
1889                # form complete new list for each altloc
1890                alhl = []
1891                for akl in edraLst:
1892                    lenAKL = len(akl)
1893                    if 0 == lenAKL:
1894                        continue  # ignore empty list from missingOK
1895                    if 1 == lenAKL:
1896                        alhl.append(akl[0])  # not all atoms will have altloc
1897                    # elif (lenAKL < maxc
1898                    #      and al not in posnAltlocs[akMap[akl[0]]]):
1899                    elif al not in posnAltlocs[akMap[akl[0]]]:
1900                        # this position has fewer altlocs than other positions
1901                        # and this position does not have this al,
1902                        # so just grab first to form angle as could be any
1903                        alhl.append(sorted(akl)[0])
1904                    else:
1905                        for ak in akl:
1906                            if ak.akl[altloc_ndx] == al:
1907                                alhl.append(ak)
1908                new_edraLst.append(tuple(alhl))
1909
1910            # print(new_edraLst)
1911            return new_edraLst
1912
1913    def _gen_edra(self, lst: Union[Tuple["AtomKey", ...], List["AtomKey"]]) -> None:
1914        """Populate hedra/dihedra given edron ID tuple.
1915
1916        Given list of AtomKeys defining hedron or dihedron
1917          convert to AtomKeys with coordinates in this residue
1918          add appropriately to self.di/hedra, expand as needed atom altlocs
1919
1920        :param lst: tuple of AtomKeys
1921            Specifies Hedron or Dihedron
1922        """
1923        for ak in lst:
1924            if ak.missing:
1925                return  # give up if atoms actually missing
1926
1927        lenLst = len(lst)
1928        if 4 > lenLst:
1929            cdct, dct, obj = self.cic.hedra, self.hedra, Hedron
1930        else:
1931            cdct, dct, obj = self.cic.dihedra, self.dihedra, Dihedron  # type: ignore
1932
1933        if isinstance(lst, List):
1934            tlst = tuple(lst)
1935        else:
1936            tlst = lst
1937
1938        hl = self._split_akl(tlst)  # expand tlst with any altlocs
1939        # returns list of tuples
1940
1941        for tnlst in hl:
1942            # do not add edron if split_akl() made something shorter
1943            if len(tnlst) == lenLst:
1944                # if edron already exists, then update not replace with new
1945                if tnlst not in cdct:
1946                    cdct[tnlst] = obj(tnlst)  # type: ignore
1947                if tnlst not in dct:
1948                    dct[tnlst] = cdct[tnlst]  # type: ignore
1949
1950                dct[tnlst].needs_update = True  # type: ignore
1951
1952    def atom_to_internal_coordinates(self, verbose: bool = False) -> None:
1953        """Create hedra and dihedra for atom coordinates.
1954
1955        :param verbose: bool default False
1956            warn about missing N, Ca, C backbone atoms.
1957        """
1958        # on entry we have all Biopython Atoms loaded
1959        if not self.ak_set:
1960            return  # so give up if no atoms loaded for this residue
1961
1962        sN, sCA, sC = self.rak("N"), self.rak("CA"), self.rak("C")
1963        if self.lc != "G":
1964            sCB = self.rak("CB")
1965
1966        # first __init__ di/hedra, AtomKey objects and atom_coords for di/hedra
1967        # which extend into next residue.
1968
1969        if 0 < len(self.rnext) and self.rnext[0].ak_set:
1970            # atom_coords, hedra and dihedra for backbone dihedra
1971            # which reach into next residue
1972            for rn in self.rnext:
1973                nN, nCA, nC = rn.rak("N"), rn.rak("CA"), rn.rak("C")
1974
1975                nextNCaC = rn._split_akl((nN, nCA, nC), missingOK=True)
1976
1977                for tpl in nextNCaC:
1978                    for ak in tpl:
1979                        if ak in rn.atom_coords:
1980                            self.atom_coords[ak] = rn.atom_coords[ak]
1981                            self.ak_set.add(ak)
1982                        else:
1983                            for rn_ak in rn.atom_coords.keys():
1984                                if rn_ak.altloc_match(ak):
1985                                    self.atom_coords[rn_ak] = rn.atom_coords[rn_ak]
1986                                    self.ak_set.add(rn_ak)
1987
1988                self._gen_edra((sN, sCA, sC, nN))  # psi
1989                self._gen_edra((sCA, sC, nN, nCA))  # omega i+1
1990                self._gen_edra((sC, nN, nCA, nC))  # phi i+1
1991                self._gen_edra((sCA, sC, nN))
1992                self._gen_edra((sC, nN, nCA))
1993                self._gen_edra((nN, nCA, nC))  # tau i+1
1994
1995                # redundant next residue C-beta locator (alternate CB path)
1996                # otherwise missing O will cause no sidechain
1997                # not rn.rak so don't trigger missing CB for Gly
1998                nCB = rn.akc.get("CB", None)
1999                if nCB is not None and nCB in rn.atom_coords:
2000                    self.atom_coords[nCB] = rn.atom_coords[nCB]
2001                    self.ak_set.add(nCB)
2002                    self._gen_edra((nN, nCA, nCB))
2003                    self._gen_edra((sC, nN, nCA, nCB))
2004
2005        # if start of chain then need to __init__ NCaC hedron as not in previous residue
2006        if 0 == len(self.rprev):
2007            self._gen_edra((sN, sCA, sC))
2008
2009        # now __init__ di/hedra for standard backbone atoms independent of neighbours
2010        backbone = ic_data_backbone
2011        for edra in backbone:
2012            # only need to build if this residue has all the atoms in the edra
2013            if all(atm in self.akc for atm in edra):
2014                r_edra = [self.rak(atom) for atom in edra]
2015                self._gen_edra(r_edra)  # [4] is label on some table entries
2016
2017        # next __init__ sidechain di/hedra
2018        if self.lc is not None:
2019            sidechain = ic_data_sidechains.get(self.lc, [])
2020            for edraLong in sidechain:
2021                edra = edraLong[0:4]  # [4] is label on some sidechain table entries
2022                # lots of H di/hedra can be avoided if don't have those atoms
2023                if all(atm in self.akc for atm in edra):
2024                    r_edra = [self.rak(atom) for atom in edra]
2025                    self._gen_edra(r_edra)
2026            if IC_Residue.AllBonds:  # openscad output needs all bond cylinders explicit
2027                sidechain = ic_data_sidechain_extras.get(self.lc, [])
2028                for edra in sidechain:
2029                    # test less useful here but avoids populating rak cache if possible
2030                    if all(atm in self.akc for atm in edra):
2031                        r_edra = [self.rak(atom) for atom in edra]
2032                        self._gen_edra(r_edra)
2033
2034        # final processing of all dihedra just generated
2035        self.link_dihedra(verbose)
2036
2037        # now do the actual work computing di/hedra values from atom coordinates
2038        # -> updated to process at chain level  (vectorized)
2039        #
2040        # for d in self.dihedra.values():
2041        #    # populate values and hedra for dihedron objects
2042        #    d.dihedron_from_atoms()
2043        # for h in self.hedra.values():
2044        #    # miss redundant hedra above, needed for some chi1 angles
2045        #    # also miss if missing atoms means hedron not in dihedra
2046        #    if h.atoms_needs_update:
2047        #        h.hedron_from_atoms(self.atom_coords)
2048
2049        # create di/hedra for gly Cbeta if needed, populate values later
2050        if self.gly_Cbeta and "G" == self.lc:  # and self.atom_coords[sCB] is None:
2051            # add C-beta for Gly
2052
2053            self.ak_set.add(AtomKey(self, "CB"))
2054            sCB = self.rak("CB")
2055            sCB.missing = False  # was True because akc cache did not have entry
2056
2057            # self.atom_coords[sCB] = None
2058
2059            # main orientation comes from O-C-Ca-Cb so make Cb-Ca-C hedron
2060            sO = self.rak("O")
2061            htpl = (sCB, sCA, sC)
2062            self._gen_edra(htpl)
2063            # values generated in build_glyCB
2064            # h = self.hedra[htpl]
2065            # h.lal[2] = self.hedra[(sCA, sC, sO)].lal[0]  # CaCO len12 -> len23
2066            # h.lal[1] = 110.17513  # angle
2067            # h.lal[0] = Ca_Cb_Len  # len12
2068
2069            # generate dihedral based on N-Ca-C-O offset from db query above
2070            dtpl = (sO, sC, sCA, sCB)
2071            self._gen_edra(dtpl)
2072            d = self.dihedra[dtpl]
2073            d.ic_residue = self
2074            d.set_hedra()
2075
2076            self.link_dihedra(verbose)  # re-run for new dihedra
2077
2078        if verbose:
2079            # oAtom =
2080            self.rak("O")  # trigger missing flag if needed
2081            missing = []
2082            for akk, akv in self.akc.items():
2083                if isinstance(akk, str) and akv.missing:
2084                    missing.append(akv)
2085            if missing:
2086                chn = self.residue.parent
2087                chn_id = chn.id
2088                chn_len = len(chn.internal_coord.ordered_aa_ic_list)
2089                print(f"chain {chn_id} len {chn_len} missing atom(s): {missing}")
2090
2091    def build_glyCB(self, gCBd: "Dihedron"):
2092        """Populate values for Gly C-beta, rest of chain complete.
2093
2094        Data averaged from Sep 2019 Dunbrack cullpdb_pc20_res2.2_R1.0
2095        restricted to structures with amide protons.
2096
2097        Ala avg rotation of OCCACB from NCACO query:
2098        select avg(g.rslt) as avg_rslt, stddev(g.rslt) as sd_rslt, count(*)
2099        from
2100        (select f.d1d, f.d2d,
2101        (case when f.rslt > 0 then f.rslt-360.0 else f.rslt end) as rslt
2102        from (select d1.angle as d1d, d2.angle as d2d,
2103        (d2.angle - d1.angle) as rslt from dihedron d1,
2104        dihedron d2 where d1.rdh_class='AOACACAACB' and
2105        d2.rdh_class='ANACAACAO' and d1.pdb=d2.pdb and d1.chn=d2.chn
2106        and d1.res=d2.res) as f) as g
2107
2108        | avg_rslt          | sd_rslt          | count   |
2109        | -122.682194862932 | 5.04403040513919 | 14098   |
2110
2111        """
2112        Ca_Cb_Len = 1.53363
2113        if hasattr(self, "scale"):  # used for openscad output
2114            Ca_Cb_Len *= self.scale  # type: ignore
2115
2116        sN, sCA, sC, sO = self.rak("N"), self.rak("CA"), self.rak("C"), self.rak("O")
2117
2118        # generated dihedron is O-Ca-C-Cb
2119        # hedron2 is reversed: Cb-Ca-C (also h1 reversed: C-Ca-O)
2120        h2 = gCBd.hedron2
2121        h2.lal[:] = (Ca_Cb_Len, 110.17513, self.hedra[(sCA, sC, sO)].lal[0])
2122
2123        refval = self.dihedra.get((sN, sCA, sC, sO), None)
2124        if refval:
2125            gCBd.angle = 122.68219 + refval.angle
2126            if gCBd.angle > 180.0:
2127                gCBd.angle -= 360.0
2128        else:
2129            gCBd.angle = 120
2130
2131    @staticmethod
2132    def _pdb_atom_string(atm: Atom) -> str:
2133        """Generate PDB ATOM record.
2134
2135        :param atm: Biopython Atom object reference
2136        """
2137        if 2 == atm.is_disordered():
2138            s = ""
2139            for a in atm.child_dict.values():
2140                s += IC_Residue._pdb_atom_string(a)
2141            return s
2142        else:
2143            res = atm.parent
2144            chn = res.parent
2145            s = (
2146                "{:6}{:5d} {:4}{:1}{:3} {:1}{:4}{:1}   {:8.3f}{:8.3f}{:8.3f}"
2147                "{:6.2f}{:6.2f}        {:>4}\n"
2148            ).format(
2149                "ATOM",
2150                atm.serial_number,
2151                atm.fullname,
2152                atm.altloc,
2153                res.resname,
2154                chn.id,
2155                res.id[1],
2156                res.id[2],
2157                atm.coord[0],
2158                atm.coord[1],
2159                atm.coord[2],
2160                atm.occupancy,
2161                atm.bfactor,
2162                atm.element,
2163            )
2164            # print(s)
2165        return s
2166
2167    @staticmethod
2168    def _residue_string(res: "Residue") -> str:
2169        """Generate PIC Residue string.
2170
2171        Enough to create Biopython Residue object without actual Atoms.
2172
2173        :param res: Biopython Residue object reference
2174        """
2175        segid = res.get_segid()
2176        if segid.isspace() or "" == segid:
2177            segid = ""
2178        else:
2179            segid = " [" + segid + "]"
2180        return str(res.get_full_id()) + " " + res.resname + segid + "\n"
2181
2182    def _write_pic_bfac(self, atm: Atom, s: str, col: int) -> Tuple[str, int]:
2183        ak = self.rak(atm)
2184        if 0 == col % 5:
2185            s += "BFAC:"
2186        s += " " + ak.id + " " + f"{atm.get_bfactor():6.2f}"
2187        col += 1
2188        if 0 == col % 5:
2189            s += "\n"
2190        return s, col
2191
2192    def write_PIC(self, pdbid: str = "0PDB", chainid: str = "A", s: str = "") -> str:
2193        """Write PIC format lines for this residue.
2194
2195        :param str pdbid: PDB idcode string; default 0PDB
2196        :param str chainid: PDB Chain ID character; default A
2197        :param str s: result string to add to
2198        """
2199        if pdbid is None:
2200            pdbid = "0PDB"
2201        if chainid is None:
2202            chainid = "A"
2203        s += IC_Residue._residue_string(self.residue)
2204
2205        if (
2206            0 == len(self.rprev)
2207            and hasattr(self, "NCaCKey")
2208            and self.NCaCKey is not None
2209        ):
2210            NCaChedron = self.pick_angle(self.NCaCKey[0])  # first tau
2211            if NCaChedron is not None:
2212                try:
2213                    ts = IC_Residue._pdb_atom_string(self.residue["N"])
2214                    ts += IC_Residue._pdb_atom_string(self.residue["CA"])
2215                    ts += IC_Residue._pdb_atom_string(self.residue["C"])
2216                    s += ts  # only if no exception: have all 3 atoms
2217                except KeyError:
2218                    pass
2219
2220        base = pdbid + " " + chainid + " "
2221        for h in sorted(self.hedra.values()):
2222            try:
2223                s += (
2224                    base
2225                    + h.id
2226                    + " "
2227                    + "{:9.5f} {:9.5f} {:9.5f}".format(
2228                        set_accuracy_95(h.lal[0]),  # len12
2229                        set_accuracy_95(h.lal[1]),  # angle
2230                        set_accuracy_95(h.lal[2]),  # len23
2231                    )
2232                )
2233            except KeyError:
2234                pass
2235            s += "\n"
2236        for d in sorted(self.dihedra.values()):
2237            try:
2238                s += base + d.id + " " + "{:9.5f}".format(set_accuracy_95(d.angle))
2239            except KeyError:
2240                pass
2241            s += "\n"
2242
2243        col = 0
2244        for a in sorted(self.residue.get_atoms()):
2245            if 2 == a.is_disordered():  # hasattr(a, 'child_dict'):
2246                if self.alt_ids is None:
2247                    s, col = self._write_pic_bfac(a.selected_child, s, col)
2248                else:
2249                    for atm in a.child_dict.values():
2250                        s, col = self._write_pic_bfac(atm, s, col)
2251            else:
2252                s, col = self._write_pic_bfac(a, s, col)
2253        if 0 != col % 5:
2254            s += "\n"
2255
2256        return s
2257
2258    def coords_to_residue(self, rnext: bool = False) -> None:
2259        """Convert self.atom_coords to biopython Residue Atom coords.
2260
2261        Copy homogeneous IC_Residue atom_coords to self.residue cartesian
2262        Biopython Atom coords.
2263
2264        :param rnext: bool default False
2265            next IC_Residue has no atom coords due to missing atoms, so try to
2266            populate with any available coords calculated for this residue
2267            di/hedra extending into next
2268        """
2269        if rnext:
2270            respos, icode = self.rnext[0].residue.id[1:3]
2271        else:
2272            respos, icode = self.residue.id[1:3]
2273        respos = str(respos)
2274        spNdx, icNdx, resnNdx, atmNdx, altlocNdx, occNdx = AtomKey.fields
2275
2276        if rnext:
2277            Res = self.rnext[0].residue
2278        else:
2279            Res = self.residue
2280        ndx = Res.parent.internal_coord.ndx
2281
2282        for ak in sorted(self.atom_coords):
2283            # print(ak)
2284            if respos == ak.akl[spNdx] and (
2285                (icode == " " and ak.akl[icNdx] is None) or icode == ak.akl[icNdx]
2286            ):
2287
2288                ac = self.atom_coords[ak]
2289                atm_coords = ac[:3]
2290                akl = ak.akl
2291                atm, altloc = akl[atmNdx], akl[altlocNdx]
2292
2293                Atm = None
2294                newAtom = None
2295
2296                if Res.has_id(atm):
2297                    Atm = Res[atm]
2298
2299                if Atm is None or (
2300                    2 == Atm.is_disordered() and not Atm.disordered_has_id(altloc)
2301                ):
2302                    # print('new', ak)
2303                    occ = akl[occNdx]
2304                    aloc = akl[altlocNdx]
2305                    bfac = self.bfactors.get(ak.id, None)
2306                    newAtom = Atom(
2307                        atm,
2308                        atm_coords,
2309                        (0.0 if bfac is None else bfac),
2310                        (1.00 if occ is None else float(occ)),
2311                        (" " if aloc is None else aloc),
2312                        atm,
2313                        ndx,
2314                        atm[0],
2315                    )
2316                    ndx += 1
2317                    if Atm is None:
2318                        if altloc is None:
2319                            Res.add(newAtom)
2320                        else:
2321                            disordered_atom = DisorderedAtom(atm)
2322                            Res.add(disordered_atom)
2323                            disordered_atom.disordered_add(newAtom)
2324                            Res.flag_disordered()
2325                    else:
2326                        Atm.disordered_add(newAtom)
2327                else:
2328                    # Atm is not None, might be disordered with altloc
2329                    # print('update', ak)
2330                    if 2 == Atm.is_disordered() and Atm.disordered_has_id(altloc):
2331                        Atm.disordered_select(altloc)
2332                    Atm.set_coord(atm_coords)
2333                    sn = Atm.get_serial_number()
2334                    if sn is not None:
2335                        ndx = sn + 1
2336
2337        Res.parent.internal_coord.ndx = ndx
2338
2339    def _get_ak_tuple(self, ak_str: str) -> Optional[Tuple["AtomKey", ...]]:
2340        """Convert atom pair string to AtomKey tuple.
2341
2342        :param ak_str: str
2343            Two atom names separated by ':', e.g. 'N:CA'
2344            Optional position specifier relative to self,
2345            e.g. '-1C:N' for preceding peptide bond.
2346        """
2347        AK = AtomKey
2348        S = self
2349        angle_key2 = []
2350        akstr_list = ak_str.split(":")
2351        lenInput = len(akstr_list)
2352        for a in akstr_list:
2353            m = self.relative_atom_re.match(a)
2354            if m:
2355                if m.group(1) == "-1":
2356                    if 0 < len(S.rprev):
2357                        angle_key2.append(AK(S.rprev[0], m.group(2)))
2358                elif m.group(1) == "1":
2359                    if 0 < len(S.rnext):
2360                        angle_key2.append(AK(S.rnext[0], m.group(2)))
2361                elif m.group(1) == "0":
2362                    angle_key2.append(self.rak(m.group(2)))
2363            else:
2364                angle_key2.append(self.rak(a))
2365        if len(angle_key2) != lenInput:
2366            return None
2367        return tuple(angle_key2)
2368
2369    relative_atom_re = re.compile(r"^(-?[10])([A-Z]+)$")
2370
2371    def _get_angle_for_tuple(
2372        self, angle_key: EKT
2373    ) -> Optional[Union["Hedron", "Dihedron"]]:
2374        len_mkey = len(angle_key)
2375        rval: Optional[Union["Hedron", "Dihedron"]]
2376        if 4 == len_mkey:
2377            rval = self.dihedra.get(cast(DKT, angle_key), None)
2378        elif 3 == len_mkey:
2379            rval = self.hedra.get(cast(HKT, angle_key), None)
2380        else:
2381            return None
2382        return rval
2383
2384    def pick_angle(
2385        self, angle_key: Union[EKT, str]
2386    ) -> Optional[Union["Hedron", "Dihedron"]]:
2387        """Get Hedron or Dihedron for angle_key.
2388
2389        :param angle_key:
2390            - tuple of 3 or 4 AtomKeys
2391            - string of atom names ('CA') separated by :'s
2392            - string of [-1, 0, 1]<atom name> separated by ':'s. -1 is
2393              previous residue, 0 is this residue, 1 is next residue
2394            - psi, phi, omg, omega, chi1, chi2, chi3, chi4, chi5
2395            - tau (N-CA-C angle) see Richardson1981
2396            - except for tuples of AtomKeys, no option to access alternate disordered atoms
2397
2398        Observe that a residue's phi and omega dihedrals, as well as the hedra
2399        comprising them (including the N:Ca:C tau hedron), are stored in the
2400        n-1 di/hedra sets; this is handled here, but may be an issue if accessing
2401        directly.
2402
2403        The following are equivalent (except for sidechains with non-carbon
2404        atoms for chi2)::
2405
2406            ric = r.internal_coord
2407            print(
2408                r,
2409                ric.get_angle("psi"),
2410                ric.get_angle("phi"),
2411                ric.get_angle("omg"),
2412                ric.get_angle("tau"),
2413                ric.get_angle("chi2"),
2414            )
2415            print(
2416                r,
2417                ric.get_angle("N:CA:C:1N"),
2418                ric.get_angle("-1C:N:CA:C"),
2419                ric.get_angle("-1CA:-1C:N:CA"),
2420                ric.get_angle("N:CA:C"),
2421                ric.get_angle("CA:CB:CG:CD"),
2422            )
2423
2424        See ic_data.py for detail of atoms in the enumerated sidechain angles
2425        and the backbone angles which do not span the peptide bond. Using 's'
2426        for current residue ('self') and 'n' for next residue, the spanning
2427        angles are::
2428
2429                (sN, sCA, sC, nN)   # psi
2430                (sCA, sC, nN, nCA)  # omega i+1
2431                (sC, nN, nCA, nC)   # phi i+1
2432                (sCA, sC, nN)
2433                (sC, nN, nCA)
2434                (nN, nCA, nC)       # tau i+1
2435
2436        :return: Matching Hedron, Dihedron, or None.
2437        """
2438        rval: Optional[Union["Hedron", "Dihedron"]] = None
2439        if isinstance(angle_key, tuple):
2440            rval = self._get_angle_for_tuple(angle_key)
2441            if rval is None and self.rprev:
2442                rval = self.rprev[0]._get_angle_for_tuple(angle_key)
2443        elif ":" in angle_key:
2444            angle_key = cast(EKT, self._get_ak_tuple(cast(str, angle_key)))
2445            if angle_key is None:
2446                return None
2447            rval = self._get_angle_for_tuple(angle_key)
2448            if rval is None and self.rprev:
2449                rval = self.rprev[0]._get_angle_for_tuple(angle_key)
2450        elif "psi" == angle_key:
2451            if 0 == len(self.rnext):
2452                return None
2453            rn = self.rnext[0]
2454            sN, sCA, sC = self.rak("N"), self.rak("CA"), self.rak("C")
2455            nN = rn.rak("N")
2456            rval = self.dihedra.get((sN, sCA, sC, nN), None)
2457        elif "phi" == angle_key:
2458            if 0 == len(self.rprev):
2459                return None
2460            rp = self.rprev[0]
2461            pC, sN, sCA = rp.rak("C"), self.rak("N"), self.rak("CA")
2462            sC = self.rak("C")
2463            rval = rp.dihedra.get((pC, sN, sCA, sC), None)
2464        elif "omg" == angle_key or "omega" == angle_key:
2465            if 0 == len(self.rprev):
2466                return None
2467            rp = self.rprev[0]
2468            pCA, pC, sN = rp.rak("CA"), rp.rak("C"), self.rak("N")
2469            sCA = self.rak("CA")
2470            rval = rp.dihedra.get((pCA, pC, sN, sCA), None)
2471        elif "tau" == angle_key:
2472            sN, sCA, sC = self.rak("N"), self.rak("CA"), self.rak("C")
2473            rval = self.hedra.get((sN, sCA, sC), None)
2474            if rval is None and 0 != len(self.rprev):
2475                rp = self.rprev[0]  # tau in prev residue for all but first
2476                rval = rp.hedra.get((sN, sCA, sC), None)
2477        elif angle_key.startswith("chi"):
2478            sclist = ic_data_sidechains.get(self.lc, None)
2479            if sclist is None:
2480                return None
2481            for akl in sclist:
2482                if 5 == len(akl):
2483                    if akl[4] == angle_key:
2484                        klst = [self.rak(a) for a in akl[0:4]]
2485                        tklst = cast(DKT, tuple(klst))
2486                        rval = self.dihedra.get(tklst, None)
2487
2488        return rval
2489
2490    def get_angle(self, angle_key: Union[EKT, str]) -> Optional[float]:
2491        """Get dihedron or hedron angle for specified key.
2492
2493        See pick_angle() for key specifications.
2494        """
2495        edron = self.pick_angle(angle_key)
2496        if edron:
2497            return edron.angle
2498        return None
2499
2500    def set_angle(self, angle_key: Union[EKT, str], v: float):
2501        """Set dihedron or hedron angle for specified key.
2502
2503        See pick_angle() for key specifications.
2504        """
2505        rval = self.pick_angle(angle_key)
2506        if rval is not None:
2507            rval.angle = v
2508
2509    def pick_length(
2510        self, ak_spec: Union[str, BKT]
2511    ) -> Tuple[Optional[List["Hedron"]], Optional[BKT]]:
2512        """Get list of hedra containing specified atom pair.
2513
2514        :param ak_spec:
2515            - tuple of two AtomKeys
2516            - string: two atom names separated by ':', e.g. 'N:CA' with
2517              optional position specifier relative to self, e.g. '-1C:N' for
2518              preceding peptide bond.
2519
2520        The following are equivalent::
2521
2522            ric = r.internal_coord
2523            print(
2524                r,
2525                ric.get_length("0C:1N"),
2526            )
2527            print(
2528                r,
2529                None
2530                if not ric.rnext
2531                else ric.get_length((ric.rak("C"), ric.rnext[0].rak("N"))),
2532            )
2533
2534        :return: list of hedra containing specified atom pair, tuple of atom keys
2535        """
2536        rlst: List[Hedron] = []
2537        # if ":" in ak_spec:
2538        if isinstance(ak_spec, str):
2539            ak_spec = cast(BKT, self._get_ak_tuple(ak_spec))
2540        if ak_spec is None:
2541            return None, None
2542        for hed_key, hed_val in self.hedra.items():
2543            if all(ak in hed_key for ak in ak_spec):
2544                rlst.append(hed_val)
2545        return rlst, ak_spec
2546
2547    def get_length(self, ak_spec: Union[str, BKT]) -> Optional[float]:
2548        """Get bond length for specified atom pair.
2549
2550        See pick_length() for ak_spec.
2551        """
2552        hed_lst, ak_spec2 = self.pick_length(ak_spec)
2553        if hed_lst is None or ak_spec2 is None:
2554            return None
2555
2556        for hed in hed_lst:
2557            val = hed.get_length(ak_spec2)
2558            if val is not None:
2559                return val
2560        return None
2561
2562    def set_length(self, ak_spec: Union[str, BKT], val: float) -> None:
2563        """Set bond length for specified atom pair.
2564
2565        See pick_length() for ak_spec.
2566        """
2567        hed_lst, ak_spec2 = self.pick_length(ak_spec)
2568        if hed_lst is not None and ak_spec2 is not None:
2569            for hed in hed_lst:
2570                hed.set_length(ak_spec2, val)
2571
2572    def applyMtx(self, mtx: numpy.array) -> None:
2573        """Apply matrix to atom_coords for this IC_Residue."""
2574        for ak, ac in self.atom_coords.items():
2575            # self.atom_coords[ak] = mtx @ ac
2576            self.atom_coords[ak] = mtx.dot(ac)
2577
2578
2579class Edron:
2580    """Base class for Hedron and Dihedron classes.
2581
2582    Supports rich comparison based on lists of AtomKeys.
2583
2584    Attributes
2585    ----------
2586    aks: tuple
2587        3 (hedron) or 4 (dihedron) AtomKeys defining this di/hedron
2588    id: str
2589        ':'-joined string of AtomKeys for this di/hedron
2590    needs_update: bool
2591        indicates di/hedron local atom_coords do NOT reflect current di/hedron
2592        angle and length values in hedron local coordinate space
2593    dh_class: str
2594        sequence of atoms (no position or residue) comprising di/hedron
2595        for statistics
2596    rdh_class: str
2597        sequence of residue, atoms comprising di/hedron for statistics
2598    edron_re: compiled regex (Class Attribute)
2599        A compiled regular expression matching string IDs for Hedron
2600        and Dihedron objects
2601    cic: IC_Chain reference
2602        Chain internal coords object containing this hedron
2603
2604    Methods
2605    -------
2606    gen_key([AtomKey, ...] or AtomKey, ...) (Static Method)
2607        generate a ':'-joined string of AtomKey Ids
2608    gen_acs(atom_coords)
2609        generate tuple of atom coords for keys in self.aks
2610    is_backbone()
2611        Return True if all aks atoms are N, Ca, C or O
2612
2613    """
2614
2615    # regular expresion to capture hedron and dihedron specifications, as in
2616    #  .pic files
2617    edron_re = re.compile(
2618        # pdbid and chain id
2619        r"^(?P<pdbid>\w+)?\s(?P<chn>[\w|\s])?\s"
2620        # 3 atom specifiers for hedron
2621        r"(?P<a1>[\w\-\.]+):(?P<a2>[\w\-\.]+):(?P<a3>[\w\-\.]+)"
2622        # 4th atom specifier for dihedron
2623        r"(:(?P<a4>[\w\-\.]+))?"
2624        r"\s+"
2625        # len-angle-len for hedron
2626        r"(((?P<len12>\S+)\s+(?P<angle>\S+)\s+(?P<len23>\S+)\s*$)|"
2627        # dihedral angle for dihedron
2628        r"((?P<dihedral>\S+)\s*$))"
2629    )
2630
2631    @staticmethod
2632    def gen_key(lst: Union[List[str], List["AtomKey"]]) -> str:
2633        """Generate string of ':'-joined AtomKey strings from input.
2634
2635        :param lst: list of AtomKey objects or id strings
2636        """
2637        if isinstance(lst[0], str):
2638            lst = cast(List[str], lst)
2639            return ":".join(lst)
2640        else:
2641            lst = cast(List[AtomKey], lst)
2642            return ":".join(ak.id for ak in lst)
2643
2644    def __init__(self, *args: Union[List["AtomKey"], EKT], **kwargs: str) -> None:
2645        """Initialize Edron with sequence of AtomKeys.
2646
2647        Acceptable input:
2648
2649            [ AtomKey, ... ]  : list of AtomKeys
2650            AtomKey, ...      : sequence of AtomKeys as args
2651            {'a1': str, 'a2': str, ... }  : dict of AtomKeys as 'a1', 'a2' ...
2652        """
2653        aks: List[AtomKey] = []
2654        for arg in args:
2655            if isinstance(arg, list):
2656                aks = arg
2657            elif isinstance(arg, tuple):
2658                aks = list(arg)
2659            else:
2660                if arg is not None:
2661                    aks.append(arg)
2662        if [] == aks and all(k in kwargs for k in ("a1", "a2", "a3")):
2663            aks = [AtomKey(kwargs["a1"]), AtomKey(kwargs["a2"]), AtomKey(kwargs["a3"])]
2664            if "a4" in kwargs and kwargs["a4"] is not None:
2665                aks.append(AtomKey(kwargs["a4"]))
2666
2667        # if args are atom key strings instead of AtomKeys
2668        # for i in range(len(aks)):
2669        #    if not isinstance(aks[i], AtomKey):
2670        #        aks[i] = AtomKey(aks[i])
2671
2672        self.aks = tuple(aks)
2673        self.id = Edron.gen_key(aks)
2674        self._hash = hash(self.aks)
2675
2676        # flag indicating that atom coordinates are up to date
2677        # (do not need to be recalculated from angle and or length values)
2678        self.needs_update = True
2679
2680        # no residue or position, just atoms
2681        self.dh_class = ""
2682        # same but residue specific
2683        self.rdh_class = ""
2684
2685        # IC_Chain which contains this di/hedron
2686        self.cic: IC_Chain
2687
2688        atmNdx = AtomKey.fields.atm
2689        resNdx = AtomKey.fields.resname
2690        for ak in aks:
2691            akl = ak.akl
2692            self.dh_class += akl[atmNdx]
2693            self.rdh_class += akl[resNdx] + akl[atmNdx]
2694
2695    def gen_acs(
2696        self, atom_coords: Dict["AtomKey", numpy.array]
2697    ) -> Tuple[numpy.array, ...]:
2698        """Generate tuple of atom coord arrays for keys in self.aks.
2699
2700        :param atom_coords: AtomKey dict of atom coords for residue
2701        :raises: MissingAtomError any atoms in self.aks missing coordinates
2702        """
2703        aks = self.aks
2704        acs: List[numpy.array] = []
2705        estr = ""
2706        for ak in aks:
2707            ac = atom_coords[ak]
2708            if ac is None:
2709                estr += str(ak) + " "
2710            else:
2711                acs.append(ac)
2712        if estr != "":
2713            raise MissingAtomError("%s missing coordinates for %s" % (self, estr))
2714        return tuple(acs)
2715
2716    def is_backbone(self) -> bool:
2717        """Report True for contains only N, C, CA, O, H atoms."""
2718        atmNdx = AtomKey.fields.atm
2719        if all(
2720            atm in ("N", "C", "CA", "O", "H")
2721            for atm in (ak.akl[atmNdx] for ak in self.aks)
2722        ):
2723            return True
2724        return False
2725
2726    def __repr__(self) -> str:
2727        """Tuple of AtomKeys is default repr string."""
2728        return str(self.aks)
2729
2730    def __hash__(self) -> int:
2731        """Hash calculated at init from aks tuple."""
2732        return self._hash
2733
2734    def _cmp(self, other: "Edron") -> Union[Tuple["AtomKey", "AtomKey"], bool]:
2735        """Comparison function ranking self vs. other; False on equal."""
2736        for ak_s, ak_o in zip(self.aks, other.aks):
2737            if ak_s != ak_o:
2738                return ak_s, ak_o
2739        return False
2740
2741    def __eq__(self, other: object) -> bool:
2742        """Test for equality."""
2743        if not isinstance(other, type(self)):
2744            return NotImplemented
2745        return self.id == other.id
2746
2747    def __ne__(self, other: object) -> bool:
2748        """Test for inequality."""
2749        if not isinstance(other, type(self)):
2750            return NotImplemented
2751        return self.id != other.id
2752
2753    def __gt__(self, other: object) -> bool:
2754        """Test greater than."""
2755        if not isinstance(other, type(self)):
2756            return NotImplemented
2757        rslt = self._cmp(other)
2758        if rslt:
2759            rslt = cast(Tuple[AtomKey, AtomKey], rslt)
2760            return rslt[0] > rslt[1]
2761        return False
2762
2763    def __ge__(self, other: object) -> bool:
2764        """Test greater or equal."""
2765        if not isinstance(other, type(self)):
2766            return NotImplemented
2767        rslt = self._cmp(other)
2768        if rslt:
2769            rslt = cast(Tuple[AtomKey, AtomKey], rslt)
2770            return rslt[0] >= rslt[1]
2771        return True
2772
2773    def __lt__(self, other: object) -> bool:
2774        """Test less than."""
2775        if not isinstance(other, type(self)):
2776            return NotImplemented
2777        rslt = self._cmp(other)
2778        if rslt:
2779            rslt = cast(Tuple[AtomKey, AtomKey], rslt)
2780            return rslt[0] < rslt[1]
2781        return False
2782
2783    def __le__(self, other: object) -> bool:
2784        """Test less or equal."""
2785        if not isinstance(other, type(self)):
2786            return NotImplemented
2787        rslt = self._cmp(other)
2788        if rslt:
2789            rslt = cast(Tuple[AtomKey, AtomKey], rslt)
2790            return rslt[0] <= rslt[1]
2791        return True
2792
2793
2794class Hedron(Edron):
2795    """Class to represent three joined atoms forming a plane.
2796
2797    Contains atom coordinates in local coordinate space, central atom
2798    at origin.  Stored in two orientations, with the 3rd (forward) or
2799    first (reversed) atom on the +Z axis.
2800
2801    Attributes
2802    ----------
2803    lal: numpy array of len12, angle, len23
2804        len12 = distance between 1st and 2nd atom
2805        angle = angle (degrees) formed by 3 atoms
2806        len23 = distance between 2nd and 3rd atoms
2807
2808    atoms: 3x4 numpy arrray (view on chain array)
2809        3 homogeneous atoms comprising hedron, 1st on XZ, 2nd at origin, 3rd on +Z
2810    atomsR: 3x4 numpy array (view on chain array)
2811        atoms reversed, 1st on +Z, 2nd at origin, 3rd on XZ plane
2812
2813    Methods
2814    -------
2815    get_length()
2816        get bond length for specified atom pair
2817    set_length()
2818        set bond length for specified atom pair
2819    angle(), len12(), len23()
2820        gettters and setters for relevant attributes (angle in degrees)
2821    """
2822
2823    def __init__(self, *args: Union[List["AtomKey"], HKT], **kwargs: str) -> None:
2824        """Initialize Hedron with sequence of AtomKeys, kwargs.
2825
2826        Acceptable input:
2827            As for Edron, plus optional 'len12', 'angle', 'len23'
2828            keyworded values.
2829        """
2830        super().__init__(*args, **kwargs)
2831
2832        # print('initialising', self.id)
2833
2834        # 3 matrices specifying hedron space coordinates of constituent atoms,
2835        # initially atom3 on +Z axis
2836        self.atoms: HACS
2837        # 3 matrices, hedron space coordinates, reversed order
2838        # initially atom1 on +Z axis
2839        self.atomsR: HACS
2840
2841        if "len12" in kwargs:
2842            self.lal = numpy.array(
2843                (
2844                    float(kwargs["len12"]),
2845                    float(kwargs["angle"]),
2846                    float(kwargs["len23"]),
2847                )
2848            )
2849        else:
2850            self.lal = numpy.zeros(3)
2851
2852        # else:
2853        #    self.len12 = None
2854        #    self.angle = None
2855        #    self.len23 = None
2856
2857        # print(self)
2858
2859    def __repr__(self) -> str:
2860        """Print string for Hedron object."""
2861        return f"3-{self.id} {self.rdh_class} {str(self.lal[0])} {str(self.lal[1])} {str(self.lal[2])}"
2862
2863    @property
2864    def angle(self) -> float:
2865        """Get this hedron angle."""
2866        try:
2867            return self.lal[1]  # _angle
2868        except AttributeError:
2869            return 0.0
2870
2871    @angle.setter
2872    def angle(self, angle_deg) -> None:
2873        """Set this hedron angle; sets needs_update."""
2874        self.lal[1] = angle_deg  # view on chain numpy arrays
2875        self.cic.hAtoms_needs_update[self.cic.hedraNdx[self.aks]] = True
2876
2877    @property
2878    def len12(self):
2879        """Get first length for Hedron."""
2880        try:
2881            return self.lal[0]  # _len12
2882        except AttributeError:
2883            return 0.0
2884
2885    @len12.setter
2886    def len12(self, len):
2887        """Set first length for Hedron; sets needs_update."""
2888        self.lal[0]  # _len12 = len
2889        self.cic.hAtoms_needs_update[self.cic.hedraNdx[self.aks]] = True
2890
2891    @property
2892    def len23(self) -> float:
2893        """Get second length for Hedron."""
2894        try:
2895            return self.lal[2]  # _len23
2896        except AttributeError:
2897            return 0.0
2898
2899    @len23.setter
2900    def len23(self, len):
2901        """Set second length for Hedron; sets needs_update."""
2902        self.lal[2] = len
2903        self.cic.hAtoms_needs_update[self.cic.hedraNdx[self.aks]] = True
2904
2905    def get_length(self, ak_tpl: BKT) -> Optional[float]:
2906        """Get bond length for specified atom pair.
2907
2908        :param ak_tpl: tuple of AtomKeys
2909            pair of atoms in this Hedron
2910        """
2911        if 2 > len(ak_tpl):
2912            return None
2913        if all(ak in self.aks[:2] for ak in ak_tpl):
2914            return self.lal[0]  # len12
2915        if all(ak in self.aks[1:] for ak in ak_tpl):
2916            return self.lal[2]  # len23
2917        return None
2918
2919    def set_length(self, ak_tpl: BKT, newLength: float):
2920        """Set bond length for specified atom pair; sets needs_update.
2921
2922        :param ak_tpl: tuple of AtomKeys
2923            pair of atoms in this Hedron
2924        """
2925        if 2 > len(ak_tpl):
2926            raise TypeError("Require exactly 2 AtomKeys: %s" % (str(ak_tpl)))
2927        elif all(ak in self.aks[:2] for ak in ak_tpl):
2928            self.lal[0] = newLength  # len12
2929        elif all(ak in self.aks[1:] for ak in ak_tpl):
2930            self.lal[2] = newLength  # len23
2931        else:
2932            raise TypeError("%s not found in %s" % (str(ak_tpl), self))
2933
2934        self.cic.hAtoms_needs_update[self.cic.hedraNdx[self.aks]] = True
2935
2936
2937class Dihedron(Edron):
2938    """Class to represent four joined atoms forming a dihedral angle.
2939
2940    Attributes
2941    ----------
2942    angle: float
2943        Measurement or specification of dihedral angle in degrees
2944    hedron1, hedron2: Hedron object references
2945        The two hedra which form the dihedral angle
2946    h1key, h2key: tuples of AtomKeys
2947        Hash keys for hedron1 and hedron2
2948    id3,id32: tuples of AtomKeys
2949        First 3 and second 3 atoms comprising dihedron; hxkey orders may differ
2950    initial_coords: tuple[4] of numpy arrays [4]
2951        Local atom coords for 4 atoms, [0] on XZ plane, [1] at origin,
2952        [2] on +Z, [3] rotated by dihedral
2953    a4_pre_rotation: numpy array [4]
2954        4th atom of dihedral aligned to XZ plane (angle not applied)
2955    ic_residue: IC_Residue object reference
2956        IC_Residue object containing this dihedral
2957    reverse: bool
2958        Indicates order of atoms in dihedron is reversed from order of atoms
2959        in hedra (configured by set_hedra())
2960    cst, rcst: numpy array [4][4]
2961        transforms to and from coordinate space defined by first hedron.
2962        set by IC_Residue.assemble().  defined by id3 order NOT h1key order
2963        (atoms may be reversed between these two)
2964
2965    Methods
2966    -------
2967    set_hedra()
2968        work out hedra keys and orientation for this dihedron
2969    angle()
2970        getter/setter for dihdral angle in degrees
2971
2972    """
2973
2974    def __init__(self, *args: Union[List["AtomKey"], DKT], **kwargs: str) -> None:
2975        """Initialize Dihedron with sequence of AtomKeys and optional dihedral angle.
2976
2977        Acceptable input:
2978            As for Edron, plus optional 'dihedral' keyworded angle value.
2979        """
2980        super().__init__(*args, **kwargs)
2981
2982        # hedra making up this dihedron; set by self:set_hedra()
2983        self.hedron1: Hedron  # = None
2984        self.hedron2: Hedron  # = None
2985
2986        self.h1key: HKT  # = None
2987        self.h2key: HKT  # = None
2988
2989        self.id3: HKT = cast(HKT, tuple(self.aks[0:3]))
2990        self.id32: HKT = cast(HKT, tuple(self.aks[1:4]))
2991
2992        # 4 matrices specifying hedron space coordinates of constituent atoms,
2993        # in this space atom 3 is on on +Z axis
2994        # see coord_space()
2995        self.initial_coords: DACS
2996        self.a4_pre_rotation: numpy.array
2997
2998        # IC_Residue object which includes this dihedron;
2999        # set by Residue:linkDihedra()
3000        self.ic_residue: IC_Residue
3001        # order of atoms in dihedron is reversed from order of atoms in hedra
3002        self.reverse = False
3003
3004        # coordinate space transform matrices
3005        # defined by id3 order NOT h1key order (may be reversed)
3006        # self.cst = None  # protein coords to 1st hedron coord space
3007        # self.rcst = None  # reverse = 1st hedron coords back to protein coords
3008
3009        if "dihedral" in kwargs:
3010            self.angle = float(kwargs["dihedral"])
3011
3012    def __repr__(self) -> str:
3013        """Print string for Dihedron object."""
3014        return f"4-{str(self.id)} {self.rdh_class} {str(self.angle)} {str(self.ic_residue)}"
3015
3016    @staticmethod
3017    def _get_hedron(ic_res: IC_Residue, id3: HKT) -> Optional[Hedron]:
3018        """Find specified hedron on this residue or its adjacent neighbors."""
3019        hedron = ic_res.hedra.get(id3, None)
3020        if not hedron and 0 < len(ic_res.rprev):
3021            for rp in ic_res.rprev:
3022                hedron = rp.hedra.get(id3, None)
3023                if hedron is not None:
3024                    break
3025        if not hedron and 0 < len(ic_res.rnext):
3026            for rn in ic_res.rnext:
3027                hedron = rn.hedra.get(id3, None)
3028                if hedron is not None:
3029                    break
3030        return hedron
3031
3032    def set_hedra(self) -> Tuple[bool, Hedron, Hedron]:
3033        """Work out hedra keys and set rev flag."""
3034        try:
3035            return self.rev, self.hedron1, self.hedron2
3036        except AttributeError:
3037            pass
3038
3039        rev = False
3040        res = self.ic_residue
3041        h1key = self.id3
3042        hedron1 = Dihedron._get_hedron(res, h1key)
3043        if not hedron1:
3044            rev = True
3045            h1key = cast(HKT, tuple(self.aks[2::-1]))
3046            hedron1 = Dihedron._get_hedron(res, h1key)
3047            h2key = cast(HKT, tuple(self.aks[3:0:-1]))
3048        else:
3049            h2key = self.id32
3050
3051        if not hedron1:
3052            raise HedronMatchError(
3053                "can't find 1st hedron for key %s dihedron %s" % (h1key, self)
3054            )
3055
3056        hedron2 = Dihedron._get_hedron(res, h2key)
3057
3058        if not hedron2:
3059            raise HedronMatchError(
3060                "can't find 2nd hedron for key %s dihedron %s" % (h2key, self)
3061            )
3062
3063        self.hedron1 = hedron1
3064        self.h1key = h1key
3065        self.hedron2 = hedron2
3066        self.h2key = h2key
3067
3068        self.reverse = rev
3069
3070        return rev, hedron1, hedron2
3071
3072    @property
3073    def angle(self) -> float:
3074        """Get dihedral angle."""
3075        try:
3076            return self.cic.dihedraIC[self.cic.dihedraNdx[self.aks]]
3077        except AttributeError:
3078            try:
3079                return self._dihedral
3080            except AttributeError:
3081                return 360.0  # error value without type hint hassles
3082
3083    @angle.setter
3084    def angle(self, dangle_deg: float) -> None:
3085        """Save new dihedral angle; sets needs_update.
3086
3087        faster to modify IC_Chain level arrays directly.
3088
3089        N.B. dihedron (i-1)C-N-CA-CB is ignored if O exists.
3090        C-beta is by default placed using O-C-CA-CB, but O is missing
3091        in some PDB file residues, which means the sidechain cannot be
3092        placed.  The alternate CB path (i-1)C-N-CA-CB is provided to
3093        circumvent this, but if this is needed then it must be adjusted in
3094        conjunction with PHI ((i-1)C-N-CA-C) as they overlap.
3095
3096        :param dangle_deg: float new dihedral angle in degrees
3097        """
3098        self._dihedral = dangle_deg
3099        self.needs_update = True
3100        try:
3101            dndx = self.cic.dihedraNdx[self.aks]
3102            self.cic.dihedraIC[dndx] = dangle_deg
3103            self.cic.dihedraICr[dndx] = numpy.deg2rad(dangle_deg)
3104            self.cic.dAtoms_needs_update[dndx] = True
3105        except AttributeError:
3106            pass
3107
3108
3109class AtomKey:
3110    """Class for dict keys to reference atom coordinates.
3111
3112    AtomKeys capture residue and disorder information together, and
3113    provide a no-whitespace string key for .pic files.
3114
3115    Supports rich comparison and multiple ways to instantiate.
3116
3117    AtomKeys contain:
3118     residue position, insertion code, 1 or 3 char residue name,
3119     atom name, altloc, and occupancy
3120
3121    Attributes
3122    ----------
3123    akl: tuple
3124        All six fields of AtomKey
3125    fieldNames: tuple (Class Attribute)
3126        Mapping of key index positions to names
3127    fields: namedtuple (Class Attribute)
3128        Mapping of field names to index positions
3129    id: str
3130        '_'-joined AtomKey fields, excluding 'None' fields
3131    atom_re: compiled regex (Class Attribute)
3132        A compiled regular expression matching the string form of the key
3133    endnum_re: compiled regex (Class Attribute)
3134        A compiled regular expresion capturing digits at end of a string
3135    d2h: bool (Class Attribute)
3136        Convert D atoms to H on input; must also modify IC_Residue.accept_atoms
3137    missing: bool default False
3138        AtomKey __init__'d from string is probably missing, set this flag to
3139        note the issue (not set here)
3140
3141    Methods
3142    -------
3143    altloc_match(other)
3144        Returns True if this AtomKey matches other AtomKey excluding altloc
3145        and occupancy fields
3146
3147    """
3148
3149    atom_re = re.compile(
3150        r"^(?P<respos>-?\d+)(?P<icode>[A-Za-z])?"
3151        r"_(?P<resname>[a-zA-Z]+)_(?P<atm>[A-Za-z0-9]+)"
3152        r"(?:_(?P<altloc>\w))?(?:_(?P<occ>-?\d\.\d?\d?))?$"
3153    )
3154
3155    endnum_re = re.compile(r"\D+(\d+)$")
3156
3157    # PDB altLoc = Character = [\w ] (any non-ctrl ASCII incl space)
3158    # PDB iCode = AChar = [A-Za-z]
3159
3160    fieldNames = ("respos", "icode", "resname", "atm", "altloc", "occ")
3161    fieldsDef = namedtuple(
3162        "fieldsDef", ["respos", "icode", "resname", "atm", "altloc", "occ"]
3163    )
3164    fields = fieldsDef(0, 1, 2, 3, 4, 5)
3165
3166    d2h = False  # convert D Deuterium to H Hydrogen on input
3167    # icd = {"icr": 0, "atm": 0, "lst": 0, "dct": 0, "_": 0, "else": 0}
3168
3169    def __init__(
3170        self, *args: Union[IC_Residue, Atom, List, Dict, str], **kwargs: str
3171    ) -> None:
3172        """Initialize AtomKey with residue and atom data.
3173
3174        Examples of acceptable input:
3175            (<IC_Residue>, 'CA', ...)    : IC_Residue with atom info
3176            (<IC_Residue>, <Atom>)       : IC_Residue with Biopython Atom
3177            ([52, None, 'G', 'CA', ...])  : list of ordered data fields
3178            (52, None, 'G', 'CA', ...)    : multiple ordered arguments
3179            ({respos: 52, icode: None, atm: 'CA', ...}) : dict with fieldNames
3180            (respos: 52, icode: None, atm: 'CA', ...) : kwargs with fieldNames
3181            52_G_CA, 52B_G_CA, 52_G_CA_0.33, 52_G_CA_B_0.33  : id strings
3182        """
3183        akl: List[Optional[str]] = []
3184        # self.id = None
3185        for arg in args:
3186            if isinstance(arg, str):
3187                if "_" in arg:
3188                    # AtomKey.icd["_"] += 1
3189                    # got atom key string, recurse with regex parse
3190                    m = self.atom_re.match(arg)
3191                    if m is not None:
3192                        if akl != []:  # [] != akl:
3193                            raise Exception(
3194                                "Atom Key init full key not first argument: " + arg
3195                            )
3196                        # for fn in AtomKey.fieldNames:
3197                        #    akl.append(m.group(fn))
3198                        # akl = [m.group(fn) for fn in AtomKey.fieldNames]
3199                        akl = list(map(m.group, AtomKey.fieldNames))
3200                else:
3201                    # AtomKey.icd["else"] += 1
3202                    akl.append(arg)
3203
3204            elif isinstance(arg, IC_Residue):
3205                # AtomKey.icd["icr"] += 1
3206                if akl != []:
3207                    raise Exception("Atom Key init Residue not first argument")
3208                akl = list(arg.rbase)
3209            elif isinstance(arg, Atom):
3210                # AtomKey.icd["atm"] += 1
3211                if 3 != len(akl):
3212                    raise Exception("Atom Key init Atom before Residue info")
3213                akl.append(arg.name)
3214                altloc = arg.altloc
3215                akl.append(altloc if altloc != " " else None)
3216                occ = float(arg.occupancy)
3217                akl.append(str(occ) if occ != 1.00 else None)
3218            elif isinstance(arg, list):
3219                # AtomKey.icd["lst"] += 1
3220                akl += arg
3221            elif isinstance(arg, dict):
3222                # AtomKey.icd["dct"] += 1
3223                for k in AtomKey.fieldNames:
3224                    akl.append(arg.get(k, None))
3225            else:
3226                raise Exception("Atom Key init not recognised")
3227
3228        # process kwargs, initialize occ and altloc to None
3229        # if not specified above
3230        # for i in range(6):
3231        #    if len(akl) <= i:
3232        #        fld = kwargs.get(AtomKey.fieldNames[i])
3233        #        if fld is not None:
3234        #            akl.append(fld)
3235
3236        for i in range(len(akl), 6):
3237            if len(akl) <= i:
3238                fld = kwargs.get(AtomKey.fieldNames[i])
3239                if fld is not None:
3240                    akl.append(fld)
3241
3242        # tweak local akl to generate id string
3243        if isinstance(akl[0], int):
3244            akl[0] = str(akl[0])  # numeric residue position to string
3245
3246        # occNdx = AtomKey.fields.occ
3247        # if akl[occNdx] is not None:
3248        #    akl[occNdx] = str(akl[occNdx])  # numeric occupancy to string
3249
3250        if self.d2h:
3251            atmNdx = AtomKey.fields.atm
3252            if akl[atmNdx][0] == "D":
3253                akl[atmNdx] = re.sub("D", "H", akl[atmNdx], count=1)
3254
3255            # unused option:
3256            # (self.respos, self.icode, self.resname, self.atm, self.occ,
3257            #    self.altloc) = akl
3258
3259        self.id = "_".join(
3260            [
3261                "".join(filter(None, akl[:2])),
3262                str(akl[2]),  # exclude None
3263                "_".join(filter(None, akl[3:])),
3264            ]
3265        )
3266
3267        # while len(akl) < 6:
3268        #    akl.append(None)  # add no altloc, occ if not specified
3269        akl += [None] * (6 - len(akl))
3270
3271        self.akl = tuple(akl)
3272        self._hash = hash(self.akl)
3273        self.missing = False
3274
3275    def __repr__(self) -> str:
3276        """Repr string from id."""
3277        return self.id
3278
3279    def __hash__(self) -> int:
3280        """Hash calculated at init from akl tuple."""
3281        return self._hash
3282
3283    _backbone_sort_keys = {"N": 0, "CA": 1, "C": 2, "O": 3}
3284
3285    _sidechain_sort_keys = {
3286        "CB": 1,
3287        "CG": 2,
3288        "CG1": 2,
3289        "OG": 2,
3290        "OG1": 2,
3291        "SG": 2,
3292        "CG2": 3,
3293        "CD": 4,
3294        "CD1": 4,
3295        "SD": 4,
3296        "OD1": 4,
3297        "ND1": 4,
3298        "CD2": 5,
3299        "ND2": 5,
3300        "OD2": 5,
3301        "CE": 6,
3302        "NE": 6,
3303        "CE1": 6,
3304        "OE1": 6,
3305        "NE1": 6,
3306        "CE2": 7,
3307        "OE2": 7,
3308        "NE2": 7,
3309        "CE3": 8,
3310        "CZ": 9,
3311        "CZ2": 9,
3312        "NZ": 9,
3313        "NH1": 10,
3314        "OH": 10,
3315        "CZ3": 10,
3316        "CH2": 11,
3317        "NH2": 11,
3318        "OXT": 12,
3319        "H": 13,
3320    }
3321
3322    _greek_sort_keys = {"A": 0, "B": 1, "G": 2, "D": 3, "E": 4, "Z": 5, "H": 6}
3323
3324    def altloc_match(self, other: "AtomKey") -> bool:
3325        """Test AtomKey match other discounting occupancy and altloc."""
3326        if isinstance(other, type(self)):
3327            return self.akl[:4] == other.akl[:4]
3328        else:
3329            return NotImplemented
3330
3331    def _cmp(self, other: "AtomKey") -> Tuple[int, int]:
3332        """Comparison function ranking self vs. other."""
3333        akl_s = self.akl
3334        akl_o = other.akl
3335        atmNdx = AtomKey.fields.atm
3336        occNdx = AtomKey.fields.occ
3337        rsNdx = AtomKey.fields.respos
3338        # rsnNdx = AtomKey.fields.resname
3339        for i in range(6):
3340            s, o = akl_s[i], akl_o[i]
3341            if s != o:
3342                # insert_code, altloc can be None, deal with first
3343                if s is None and o is not None:
3344                    # no insert code before named insert code
3345                    return 0, 1
3346                elif o is None and s is not None:
3347                    return 1, 0
3348                # now we know s, o not None
3349                s, o = cast(str, s), cast(str, o)
3350
3351                if atmNdx != i:
3352                    # only sorting complications at atom level, occ.
3353                    # otherwise respos, insertion code will trigger
3354                    # before residue name
3355                    if occNdx == i:
3356                        oi = int(float(s) * 100)
3357                        si = int(float(o) * 100)
3358                        return si, oi  # swap so higher occupancy comes first
3359                    elif rsNdx == i:
3360                        return int(s), int(o)
3361                    else:  # resname or altloc
3362                        return ord(s), ord(o)
3363
3364                # atom names from here
3365                # backbone atoms before sidechain atoms
3366
3367                sb = self._backbone_sort_keys.get(s, None)
3368                ob = self._backbone_sort_keys.get(o, None)
3369                if sb is not None and ob is not None:
3370                    return sb, ob
3371                elif sb is not None and ob is None:
3372                    return 0, 1
3373                elif sb is None and ob is not None:
3374                    return 1, 0
3375                # finished backbone and backbone vs. sidechain atoms
3376
3377                # sidechain vs sidechain, sidechain vs H
3378                ss = self._sidechain_sort_keys.get(s, None)
3379                os = self._sidechain_sort_keys.get(o, None)
3380                if ss is not None and os is not None:
3381                    return ss, os
3382                elif ss is not None and os is None:
3383                    return 0, 1
3384                elif ss is None and os is not None:
3385                    return 1, 0
3386
3387                # amide single 'H' captured above in sidechain sort
3388                # now 'complex'' hydrogens after sidechain
3389                s0, s1, o0, o1 = s[0], s[1], o[0], o[1]
3390                s1d, o1d = s1.isdigit(), o1.isdigit()
3391                # if "H" == s0 == o0: # breaks cython
3392                if ("H" == s0) and ("H" == o0):
3393
3394                    if (s1 == o1) or (s1d and o1d):
3395                        enmS = self.endnum_re.findall(s)
3396                        enmO = self.endnum_re.findall(o)
3397                        if (enmS != []) and (enmO != []):
3398                            return int(enmS[0]), int(enmO[0])
3399                        elif enmS == []:
3400                            return 0, 1
3401                        else:
3402                            return 1, 0
3403                    elif s1d:
3404                        return 0, 1
3405                    elif o1d:
3406                        return 1, 0
3407                    else:
3408                        return (self._greek_sort_keys[s1], self._greek_sort_keys[o1])
3409                return int(s), int(o)  # raise exception?
3410        return 1, 1
3411
3412    def __ne__(self, other: object) -> bool:
3413        """Test for inequality."""
3414        if isinstance(other, type(self)):
3415            return self.akl != other.akl
3416        else:
3417            return NotImplemented
3418
3419    def __eq__(self, other: object) -> bool:  # type: ignore
3420        """Test for equality."""
3421        if isinstance(other, type(self)):
3422            return self.akl == other.akl
3423        else:
3424            return NotImplemented
3425
3426    def __gt__(self, other: object) -> bool:
3427        """Test greater than."""
3428        if isinstance(other, type(self)):
3429            rslt = self._cmp(other)
3430            return rslt[0] > rslt[1]
3431        else:
3432            return NotImplemented
3433
3434    def __ge__(self, other: object) -> bool:
3435        """Test greater or equal."""
3436        if isinstance(other, type(self)):
3437            rslt = self._cmp(other)
3438            return rslt[0] >= rslt[1]
3439        else:
3440            return NotImplemented
3441
3442    def __lt__(self, other: object) -> bool:
3443        """Test less than."""
3444        if isinstance(other, type(self)):
3445            rslt = self._cmp(other)
3446            return rslt[0] < rslt[1]
3447        else:
3448            return NotImplemented
3449
3450    def __le__(self, other: object) -> bool:
3451        """Test less or equal."""
3452        if isinstance(other, type(self)):
3453            rslt = self._cmp(other)
3454            return rslt[0] <= rslt[1]
3455        else:
3456            return NotImplemented
3457
3458
3459def set_accuracy_95(num: float) -> float:
3460    """Reduce floating point accuracy to 9.5 (xxxx.xxxxx).
3461
3462    Used by Hedron and Dihedron classes writing PIC and SCAD files.
3463    :param float num: input number
3464    :returns: float with specified accuracy
3465    """
3466    # return round(num, 5)  # much slower
3467    return float(f"{num:9.5f}")
3468
3469
3470# only used for writing PDB atoms so inline in
3471# _pdb_atom_string(atm: Atom)
3472# def set_accuracy_83(num: float) -> float:
3473#    """Reduce floating point accuracy to 8.3 (xxxxx.xxx).
3474#
3475#    Used by IC_Residue class, matches PDB output format.
3476#    :param float num: input number
3477#    :returns: float with specified accuracy
3478#    """
3479#    return float("{:8.3f}".format(num))
3480
3481
3482# internal coordinates construction Exceptions
3483class HedronMatchError(Exception):
3484    """Cannot find hedron in residue for given key."""
3485
3486    pass
3487
3488
3489class MissingAtomError(Exception):
3490    """Missing atom coordinates for hedron or dihedron."""
3491
3492    pass
3493