1# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
2#
3# This file is part of the Biopython distribution and governed by your
4# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
5# Please see the LICENSE file that should have been included as part of this
6# package.
7
8"""Atom class, used in Structure objects."""
9
10import copy
11import sys
12import warnings
13
14import numpy as np
15
16from Bio.PDB.Entity import DisorderedEntityWrapper
17from Bio.PDB.PDBExceptions import PDBConstructionWarning
18from Bio.PDB.vectors import Vector
19from Bio.Data import IUPACData
20
21
22class Atom:
23    """Define Atom class.
24
25    The Atom object stores atom name (both with and without spaces),
26    coordinates, B factor, occupancy, alternative location specifier
27    and (optionally) anisotropic B factor and standard deviations of
28    B factor and positions.
29
30    In the case of PQR files, B factor and occupancy are replaced by
31    atomic charge and radius.
32    """
33
34    def __init__(
35        self,
36        name,
37        coord,
38        bfactor,
39        occupancy,
40        altloc,
41        fullname,
42        serial_number,
43        element=None,
44        pqr_charge=None,
45        radius=None,
46    ):
47        """Initialize Atom object.
48
49        :param name: atom name (eg. "CA"). Note that spaces are normally stripped.
50        :type name: string
51
52        :param coord: atomic coordinates (x,y,z)
53        :type coord: Numeric array (Float0, size 3)
54
55        :param bfactor: isotropic B factor
56        :type bfactor: number
57
58        :param occupancy: occupancy (0.0-1.0)
59        :type occupancy: number
60
61        :param altloc: alternative location specifier for disordered atoms
62        :type altloc: string
63
64        :param fullname: full atom name, including spaces, e.g. " CA ". Normally
65                         these spaces are stripped from the atom name.
66        :type fullname: string
67
68        :param element: atom element, e.g. "C" for Carbon, "HG" for mercury,
69        :type element: uppercase string (or None if unknown)
70
71        :param pqr_charge: atom charge
72        :type pqr_charge: number
73
74        :param radius: atom radius
75        :type radius: number
76        """
77        self.level = "A"
78        # Reference to the residue
79        self.parent = None
80        # the atomic data
81        self.name = name  # eg. CA, spaces are removed from atom name
82        self.fullname = fullname  # e.g. " CA ", spaces included
83        self.coord = coord
84        self.bfactor = bfactor
85        self.occupancy = occupancy
86        self.altloc = altloc
87        self.full_id = None  # (structure id, model id, chain id, residue id, atom id)
88        self.id = name  # id of atom is the atom name (e.g. "CA")
89        self.disordered_flag = 0
90        self.anisou_array = None
91        self.siguij_array = None
92        self.sigatm_array = None
93        self.serial_number = serial_number
94        # Dictionary that keeps additional properties
95        self.xtra = {}
96        assert not element or element == element.upper(), element
97        self.element = self._assign_element(element)
98        self.mass = self._assign_atom_mass()
99        self.pqr_charge = pqr_charge
100        self.radius = radius
101
102        # For atom sorting (protein backbone atoms first)
103        self._sorting_keys = {"N": 0, "CA": 1, "C": 2, "O": 3}
104
105    # Sorting Methods
106    # standard across different objects and allows direct comparison
107    def __eq__(self, other):
108        """Test equality."""
109        if isinstance(other, Atom):
110            return self.full_id[1:] == other.full_id[1:]
111        else:
112            return NotImplemented
113
114    def __ne__(self, other):
115        """Test inequality."""
116        if isinstance(other, Atom):
117            return self.full_id[1:] != other.full_id[1:]
118        else:
119            return NotImplemented
120
121    def __gt__(self, other):
122        """Test greater than."""
123        if isinstance(other, Atom):
124            if self.parent != other.parent:
125                return self.parent > other.parent
126            order_s = self._sorting_keys.get(self.name, 4)
127            order_o = self._sorting_keys.get(other.name, 4)
128            if order_s != order_o:
129                return order_s > order_o
130            elif self.name != other.name:
131                return self.name > other.name
132            else:
133                return self.altloc > other.altloc
134        else:
135            return NotImplemented
136
137    def __ge__(self, other):
138        """Test greater or equal."""
139        if isinstance(other, Atom):
140            if self.parent != other.parent:
141                return self.parent >= other.parent
142            order_s = self._sorting_keys.get(self.name, 4)
143            order_o = self._sorting_keys.get(other.name, 4)
144            if order_s != order_o:
145                return order_s >= order_o
146            elif self.name != other.name:
147                return self.name >= other.name
148            else:
149                return self.altloc >= other.altloc
150        else:
151            return NotImplemented
152
153    def __lt__(self, other):
154        """Test less than."""
155        if isinstance(other, Atom):
156            if self.parent != other.parent:
157                return self.parent < other.parent
158            order_s = self._sorting_keys.get(self.name, 4)
159            order_o = self._sorting_keys.get(other.name, 4)
160            if order_s != order_o:
161                return order_s < order_o
162            elif self.name != other.name:
163                return self.name < other.name
164            else:
165                return self.altloc < other.altloc
166        else:
167            return NotImplemented
168
169    def __le__(self, other):
170        """Test less or equal."""
171        if isinstance(other, Atom):
172            if self.parent != other.parent:
173                return self.parent <= other.parent
174            order_s = self._sorting_keys.get(self.name, 4)
175            order_o = self._sorting_keys.get(other.name, 4)
176            if order_s != order_o:
177                return order_s <= order_o
178            elif self.name != other.name:
179                return self.name <= other.name
180            else:
181                return self.altloc <= other.altloc
182        else:
183            return NotImplemented
184
185    # Hash method to allow uniqueness (set)
186    def __hash__(self):
187        """Return atom full identifier."""
188        return hash(self.get_full_id())
189
190    def _assign_element(self, element):
191        """Guess element from atom name if not recognised (PRIVATE).
192
193        There is little documentation about extracting/encoding element
194        information in atom names, but some conventions seem to prevail:
195
196            - C, N, O, S, H, P, F atom names start with a blank space (e.g. " CA ")
197              unless the name is 4 characters long (e.g. HE21 in glutamine). In both
198              these cases, the element is the first character.
199
200            - Inorganic elements do not have a blank space (e.g. "CA  " for calcium)
201              but one must check the full name to differentiate between e.g. helium
202              ("HE  ") and long-name hydrogens (e.g. "HE21").
203
204            - Atoms with unknown or ambiguous elements are marked with 'X', e.g.
205              PDB 4cpa. If we fail to identify an element, we should mark it as
206              such.
207
208        """
209        if not element or element.capitalize() not in IUPACData.atom_weights:
210            if self.fullname[0].isalpha() and not self.fullname[2:].isdigit():
211                putative_element = self.name.strip()
212            else:
213                # Hs may have digit in [0]
214                if self.name[0].isdigit():
215                    putative_element = self.name[1]
216                else:
217                    putative_element = self.name[0]
218
219            if putative_element.capitalize() in IUPACData.atom_weights:
220                msg = "Used element %r for Atom (name=%s) with given element %r" % (
221                    putative_element,
222                    self.name,
223                    element,
224                )
225                element = putative_element
226            else:
227                msg = (
228                    "Could not assign element %r for Atom (name=%s) with given element %r"
229                    % (putative_element, self.name, element)
230                )
231                element = "X"  # mark as unknown/ambiguous
232            warnings.warn(msg, PDBConstructionWarning)
233
234        return element
235
236    def _assign_atom_mass(self):
237        """Return atom weight (PRIVATE)."""
238        try:
239            return IUPACData.atom_weights[self.element.capitalize()]
240        except (AttributeError, KeyError):
241            return float("NaN")
242
243    # Special methods
244
245    def __repr__(self):
246        """Print Atom object as <Atom atom_name>."""
247        return "<Atom %s>" % self.get_id()
248
249    def __sub__(self, other):
250        """Calculate distance between two atoms.
251
252        :param other: the other atom
253        :type other: L{Atom}
254
255        Examples
256        --------
257        This is an incomplete but illustrative example::
258
259            distance = atom1 - atom2
260
261        """
262        diff = self.coord - other.coord
263        return np.sqrt(np.dot(diff, diff))
264
265    # set methods
266
267    def set_serial_number(self, n):
268        """Set serial number."""
269        self.serial_number = n
270
271    def set_bfactor(self, bfactor):
272        """Set isotroptic B factor."""
273        self.bfactor = bfactor
274
275    def set_coord(self, coord):
276        """Set coordinates."""
277        self.coord = coord
278
279    def set_altloc(self, altloc):
280        """Set alternative location specifier."""
281        self.altloc = altloc
282
283    def set_occupancy(self, occupancy):
284        """Set occupancy."""
285        self.occupancy = occupancy
286
287    def set_sigatm(self, sigatm_array):
288        """Set standard deviation of atomic parameters.
289
290        The standard deviation of atomic parameters consists
291        of 3 positional, 1 B factor and 1 occupancy standard
292        deviation.
293
294        :param sigatm_array: standard deviations of atomic parameters.
295        :type sigatm_array: Numeric array (length 5)
296        """
297        self.sigatm_array = sigatm_array
298
299    def set_siguij(self, siguij_array):
300        """Set standard deviations of anisotropic temperature factors.
301
302        :param siguij_array: standard deviations of anisotropic temperature factors.
303        :type siguij_array: Numeric array (length 6)
304        """
305        self.siguij_array = siguij_array
306
307    def set_anisou(self, anisou_array):
308        """Set anisotropic B factor.
309
310        :param anisou_array: anisotropic B factor.
311        :type anisou_array: Numeric array (length 6)
312        """
313        self.anisou_array = anisou_array
314
315    def set_charge(self, pqr_charge):
316        """Set charge."""
317        self.pqr_charge = pqr_charge
318
319    def set_radius(self, radius):
320        """Set radius."""
321        self.radius = radius
322
323    # Public methods
324
325    def flag_disorder(self):
326        """Set the disordered flag to 1.
327
328        The disordered flag indicates whether the atom is disordered or not.
329        """
330        self.disordered_flag = 1
331
332    def is_disordered(self):
333        """Return the disordered flag (1 if disordered, 0 otherwise)."""
334        return self.disordered_flag
335
336    def set_parent(self, parent):
337        """Set the parent residue.
338
339        Arguments:
340         - parent - Residue object
341
342        """
343        self.parent = parent
344        self.full_id = self.get_full_id()
345
346    def detach_parent(self):
347        """Remove reference to parent."""
348        self.parent = None
349
350    def get_sigatm(self):
351        """Return standard deviation of atomic parameters."""
352        return self.sigatm_array
353
354    def get_siguij(self):
355        """Return standard deviations of anisotropic temperature factors."""
356        return self.siguij_array
357
358    def get_anisou(self):
359        """Return anisotropic B factor."""
360        return self.anisou_array
361
362    def get_parent(self):
363        """Return parent residue."""
364        return self.parent
365
366    def get_serial_number(self):
367        """Return the serial number."""
368        return self.serial_number
369
370    def get_name(self):
371        """Return atom name."""
372        return self.name
373
374    def get_id(self):
375        """Return the id of the atom (which is its atom name)."""
376        return self.id
377
378    def get_full_id(self):
379        """Return the full id of the atom.
380
381        The full id of an atom is a tuple used to uniquely identify
382        the atom and consists of the following elements:
383        (structure id, model id, chain id, residue id, atom name, altloc)
384        """
385        try:
386            return self.parent.get_full_id() + ((self.name, self.altloc),)
387        except AttributeError:
388            return (None, None, None, None, self.name, self.altloc)
389
390    def get_coord(self):
391        """Return atomic coordinates."""
392        return self.coord
393
394    def get_bfactor(self):
395        """Return B factor."""
396        return self.bfactor
397
398    def get_occupancy(self):
399        """Return occupancy."""
400        return self.occupancy
401
402    def get_fullname(self):
403        """Return the atom name, including leading and trailing spaces."""
404        return self.fullname
405
406    def get_altloc(self):
407        """Return alternative location specifier."""
408        return self.altloc
409
410    def get_level(self):
411        """Return level."""
412        return self.level
413
414    def get_charge(self):
415        """Return charge."""
416        return self.pqr_charge
417
418    def get_radius(self):
419        """Return radius."""
420        return self.radius
421
422    def transform(self, rot, tran):
423        """Apply rotation and translation to the atomic coordinates.
424
425        :param rot: A right multiplying rotation matrix
426        :type rot: 3x3 Numeric array
427
428        :param tran: the translation vector
429        :type tran: size 3 Numeric array
430
431        Examples
432        --------
433        This is an incomplete but illustrative example::
434
435            from numpy import pi, array
436            from Bio.PDB.vectors import Vector, rotmat
437            rotation = rotmat(pi, Vector(1, 0, 0))
438            translation = array((0, 0, 1), 'f')
439            atom.transform(rotation, translation)
440
441        """
442        self.coord = np.dot(self.coord, rot) + tran
443
444    def get_vector(self):
445        """Return coordinates as Vector.
446
447        :return: coordinates as 3D vector
448        :rtype: Bio.PDB.Vector class
449        """
450        x, y, z = self.coord
451        return Vector(x, y, z)
452
453    def copy(self):
454        """Create a copy of the Atom.
455
456        Parent information is lost.
457        """
458        # Do a shallow copy then explicitly copy what needs to be deeper.
459        shallow = copy.copy(self)
460        shallow.detach_parent()
461        shallow.set_coord(copy.copy(self.get_coord()))
462        shallow.xtra = self.xtra.copy()
463        return shallow
464
465
466class DisorderedAtom(DisorderedEntityWrapper):
467    """Contains all Atom objects that represent the same disordered atom.
468
469    One of these atoms is "selected" and all method calls not caught
470    by DisorderedAtom are forwarded to the selected Atom object. In that way, a
471    DisorderedAtom behaves exactly like a normal Atom. By default, the selected
472    Atom object represents the Atom object with the highest occupancy, but a
473    different Atom object can be selected by using the disordered_select(altloc)
474    method.
475    """
476
477    def __init__(self, id):
478        """Create DisorderedAtom.
479
480        Arguments:
481         - id - string, atom name
482
483        """
484        # TODO - make this a private attribute?
485        self.last_occupancy = -sys.maxsize
486        DisorderedEntityWrapper.__init__(self, id)
487
488    # Special methods
489    # Override parent class __iter__ method
490    def __iter__(self):
491        """Iterate through disordered atoms."""
492        yield from self.disordered_get_list()
493
494    def __repr__(self):
495        """Return disordered atom identifier."""
496        if self.child_dict:
497            return "<DisorderedAtom %s>" % self.get_id()
498        else:
499            return "<Empty DisorderedAtom %s>" % self.get_id()
500
501    # This is a separate method from Entity.center_of_mass since DisorderedAtoms
502    # will be unpacked by Residue.get_unpacked_list(). Here we allow for a very
503    # specific use case that is much simpler than the general implementation.
504    def center_of_mass(self):
505        """Return the center of mass of the DisorderedAtom as a numpy array.
506
507        Assumes all child atoms have the same mass (same element).
508        """
509        children = self.disordered_get_list()
510
511        if not children:
512            raise ValueError(f"{self} does not have children")
513
514        coords = np.asarray([a.coord for a in children], dtype=np.float32)
515        return np.average(coords, axis=0, weights=None)
516
517    def disordered_get_list(self):
518        """Return list of atom instances.
519
520        Sorts children by altloc (empty, then alphabetical).
521        """
522        return sorted(self.child_dict.values(), key=lambda a: ord(a.altloc))
523
524    def disordered_add(self, atom):
525        """Add a disordered atom."""
526        # Add atom to dict, use altloc as key
527        atom.flag_disorder()
528        # set the residue parent of the added atom
529        residue = self.get_parent()
530        atom.set_parent(residue)
531        altloc = atom.get_altloc()
532        occupancy = atom.get_occupancy()
533        self[altloc] = atom
534        if occupancy > self.last_occupancy:
535            self.last_occupancy = occupancy
536            self.disordered_select(altloc)
537
538    def disordered_remove(self, altloc):
539        """Remove a child atom altloc from the DisorderedAtom.
540
541        Arguments:
542         - altloc - name of the altloc to remove, as a string.
543
544        """
545        # Get child altloc
546        atom = self.child_dict[altloc]
547        is_selected = self.selected_child is atom
548
549        # Detach
550        del self.child_dict[altloc]
551        atom.detach_parent()
552
553        if is_selected and self.child_dict:  # pick next highest occupancy
554            child = sorted(self.child_dict.values(), key=lambda a: a.occupancy)[-1]
555            self.disordered_select(child.altloc)
556        elif not self.child_dict:
557            self.selected_child = None
558            self.last_occupancy = -sys.maxsize
559
560    def transform(self, rot, tran):
561        """Apply rotation and translation to all children.
562
563        See the documentation of Atom.transform for details.
564        """
565        for child in self:
566            child.coord = np.dot(child.coord, rot) + tran
567