1# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
2# This code is part of the Biopython distribution and governed by its
3# license.  Please see the LICENSE file that should have been included
4# as part of this package.
5
6"""Output of PDB files."""
7
8
9# To allow saving of chains, residues, etc..
10from Bio.PDB.StructureBuilder import StructureBuilder
11
12# Allowed Elements
13from Bio.Data.IUPACData import atom_weights
14
15
16_ATOM_FORMAT_STRING = (
17    "%s%5i %-4s%c%3s %c%4i%c   %8.3f%8.3f%8.3f%s%6.2f      %4s%2s%2s\n"
18)
19_PQR_ATOM_FORMAT_STRING = (
20    "%s%5i %-4s%c%3s %c%4i%c   %8.3f%8.3f%8.3f %7s  %6s      %2s\n"
21)
22
23
24class Select:
25    """Select everything for PDB output (for use as a base class).
26
27    Default selection (everything) during writing - can be used as base class
28    to implement selective output. This selects which entities will be written out.
29    """
30
31    def __repr__(self):
32        """Represent the output as a string for debugging."""
33        return "<Select all>"
34
35    def accept_model(self, model):
36        """Overload this to reject models for output."""
37        return 1
38
39    def accept_chain(self, chain):
40        """Overload this to reject chains for output."""
41        return 1
42
43    def accept_residue(self, residue):
44        """Overload this to reject residues for output."""
45        return 1
46
47    def accept_atom(self, atom):
48        """Overload this to reject atoms for output."""
49        return 1
50
51
52_select = Select()
53
54
55class StructureIO:
56    """Base class to derive structure file format writers from."""
57
58    def __init__(self):
59        """Initialise."""
60        pass
61
62    def set_structure(self, pdb_object):
63        """Check what the user is providing and build a structure."""
64        # The idea here is to build missing upstream components of
65        # the SMCRA object representation. E.g., if the user provides
66        # a Residue, build Structure/Model/Chain.
67
68        if pdb_object.level == "S":
69            structure = pdb_object
70        else:  # Not a Structure
71            sb = StructureBuilder()
72            sb.init_structure("pdb")
73            sb.init_seg(" ")
74
75            if pdb_object.level == "M":
76                sb.structure.add(pdb_object.copy())
77                self.structure = sb.structure
78            else:  # Not a Model
79                sb.init_model(0)
80
81                if pdb_object.level == "C":
82                    sb.structure[0].add(pdb_object.copy())
83                else:  # Not a Chain
84                    chain_id = "A"  # default
85                    sb.init_chain(chain_id)
86
87                    if pdb_object.level == "R":  # Residue
88                        # Residue extracted from a larger structure?
89                        if pdb_object.parent is not None:
90                            og_chain_id = pdb_object.parent.id
91                            sb.structure[0][chain_id].id = og_chain_id
92                            chain_id = og_chain_id
93
94                        sb.structure[0][chain_id].add(pdb_object.copy())
95
96                    else:  # Atom
97                        sb.init_residue("DUM", " ", 1, " ")  # Dummy residue
98                        sb.structure[0][chain_id].child_list[0].add(pdb_object.copy())
99
100                        # Fix chain identifier if Atom has grandparents.
101                        try:
102                            og_chain_id = pdb_object.parent.parent.id
103                        except AttributeError:  # pdb_object.parent == None
104                            pass
105                        else:
106                            sb.structure[0][chain_id].id = og_chain_id
107
108            # Return structure
109            structure = sb.structure
110        self.structure = structure
111
112
113class PDBIO(StructureIO):
114    """Write a Structure object (or a subset of a Structure object) as a PDB or PQR file.
115
116    Examples
117    --------
118    >>> from Bio.PDB import PDBParser
119    >>> from Bio.PDB.PDBIO import PDBIO
120    >>> parser = PDBParser()
121    >>> structure = parser.get_structure("1a8o", "PDB/1A8O.pdb")
122    >>> io=PDBIO()
123    >>> io.set_structure(structure)
124    >>> io.save("bio-pdb-pdbio-out.pdb")
125    >>> import os
126    >>> os.remove("bio-pdb-pdbio-out.pdb")  # tidy up
127
128
129    """
130
131    def __init__(self, use_model_flag=0, is_pqr=False):
132        """Create the PDBIO object.
133
134        :param use_model_flag: if 1, force use of the MODEL record in output.
135        :type use_model_flag: int
136        :param is_pqr: if True, build PQR file. Otherwise build PDB file.
137        :type is_pqr: Boolean
138        """
139        self.use_model_flag = use_model_flag
140        self.is_pqr = is_pqr
141
142    # private methods
143
144    def _get_atom_line(
145        self,
146        atom,
147        hetfield,
148        segid,
149        atom_number,
150        resname,
151        resseq,
152        icode,
153        chain_id,
154        charge="  ",
155    ):
156        """Return an ATOM PDB string (PRIVATE)."""
157        if hetfield != " ":
158            record_type = "HETATM"
159        else:
160            record_type = "ATOM  "
161
162        if atom.element:
163            element = atom.element.strip().upper()
164            if element.capitalize() not in atom_weights and element != "X":
165                raise ValueError("Unrecognised element %r" % atom.element)
166            element = element.rjust(2)
167        else:
168            element = "  "
169
170        name = atom.get_fullname().strip()
171        # Pad atom name if:
172        #     - smaller than 4 characters
173        # AND - is not C, N, O, S, H, F, P, ..., one letter elements
174        # AND - first character is NOT numeric (funky hydrogen naming rules)
175        if len(name) < 4 and name[:1].isalpha() and len(element.strip()) < 2:
176            name = " " + name
177
178        altloc = atom.get_altloc()
179        x, y, z = atom.get_coord()
180
181        # PDB Arguments
182        if not self.is_pqr:
183            bfactor = atom.get_bfactor()
184            occupancy = atom.get_occupancy()
185
186        # PQR Arguments
187        else:
188            radius = atom.get_radius()
189            pqr_charge = atom.get_charge()
190
191        if not self.is_pqr:
192            try:
193                occupancy_str = "%6.2f" % occupancy
194            except TypeError:
195                if occupancy is None:
196                    occupancy_str = " " * 6
197                    import warnings
198                    from Bio import BiopythonWarning
199
200                    warnings.warn(
201                        "Missing occupancy in atom %r written as blank"
202                        % (atom.get_full_id(),),
203                        BiopythonWarning,
204                    )
205                else:
206                    raise TypeError(
207                        "Invalid occupancy %r in atom %r"
208                        % (occupancy, atom.get_full_id())
209                    ) from None
210
211            args = (
212                record_type,
213                atom_number,
214                name,
215                altloc,
216                resname,
217                chain_id,
218                resseq,
219                icode,
220                x,
221                y,
222                z,
223                occupancy_str,
224                bfactor,
225                segid,
226                element,
227                charge,
228            )
229            return _ATOM_FORMAT_STRING % args
230
231        else:
232            # PQR case
233            try:
234                pqr_charge = "%7.4f" % pqr_charge
235            except TypeError:
236                if pqr_charge is None:
237                    pqr_charge = " " * 7
238                    import warnings
239                    from Bio import BiopythonWarning
240
241                    warnings.warn(
242                        "Missing charge in atom %r written as blank"
243                        % (atom.get_full_id(),),
244                        BiopythonWarning,
245                    )
246                else:
247                    raise TypeError(
248                        "Invalid charge %r in atom %r"
249                        % (pqr_charge, atom.get_full_id())
250                    ) from None
251            try:
252                radius = "%6.4f" % radius
253            except TypeError:
254                if radius is None:
255                    radius = " " * 6
256                    import warnings
257                    from Bio import BiopythonWarning
258
259                    warnings.warn(
260                        "Missing radius in atom %r written as blank"
261                        % (atom.get_full_id(),),
262                        BiopythonWarning,
263                    )
264                else:
265                    raise TypeError(
266                        "Invalid radius %r in atom %r" % (radius, atom.get_full_id())
267                    ) from None
268
269            args = (
270                record_type,
271                atom_number,
272                name,
273                altloc,
274                resname,
275                chain_id,
276                resseq,
277                icode,
278                x,
279                y,
280                z,
281                pqr_charge,
282                radius,
283                element,
284            )
285
286            return _PQR_ATOM_FORMAT_STRING % args
287
288    # Public methods
289
290    def save(self, file, select=_select, write_end=True, preserve_atom_numbering=False):
291        """Save structure to a file.
292
293        :param file: output file
294        :type file: string or filehandle
295
296        :param select: selects which entities will be written.
297        :type select: object
298
299        Typically select is a subclass of L{Select}, it should
300        have the following methods:
301
302         - accept_model(model)
303         - accept_chain(chain)
304         - accept_residue(residue)
305         - accept_atom(atom)
306
307        These methods should return 1 if the entity is to be
308        written out, 0 otherwise.
309
310        Typically select is a subclass of L{Select}.
311        """
312        get_atom_line = self._get_atom_line
313        if isinstance(file, str):
314            fp = open(file, "w")
315            close_file = 1
316        else:
317            # filehandle, I hope :-)
318            fp = file
319            close_file = 0
320        # multiple models?
321        if len(self.structure) > 1 or self.use_model_flag:
322            model_flag = 1
323        else:
324            model_flag = 0
325        for model in self.structure.get_list():
326            if not select.accept_model(model):
327                continue
328            # necessary for ENDMDL
329            # do not write ENDMDL if no residues were written
330            # for this model
331            model_residues_written = 0
332            if not preserve_atom_numbering:
333                atom_number = 1
334            if model_flag:
335                fp.write("MODEL      %s\n" % model.serial_num)
336            for chain in model.get_list():
337                if not select.accept_chain(chain):
338                    continue
339                chain_id = chain.get_id()
340                # necessary for TER
341                # do not write TER if no residues were written
342                # for this chain
343                chain_residues_written = 0
344                for residue in chain.get_unpacked_list():
345                    if not select.accept_residue(residue):
346                        continue
347                    hetfield, resseq, icode = residue.get_id()
348                    resname = residue.get_resname()
349                    segid = residue.get_segid()
350                    for atom in residue.get_unpacked_list():
351                        if select.accept_atom(atom):
352                            chain_residues_written = 1
353                            model_residues_written = 1
354                            if preserve_atom_numbering:
355                                atom_number = atom.get_serial_number()
356
357                                # Check if the atom serial number is an integer
358                                # Not always the case for mmCIF files.
359                                try:
360                                    atom_number = int(atom_number)
361                                except ValueError:
362                                    raise ValueError(
363                                        f"{repr(atom_number)} is not a number."
364                                        "Atom serial numbers must be numerical"
365                                        " If you are converting from an mmCIF"
366                                        " structure, try using"
367                                        " preserve_atom_numbering=False"
368                                    )
369
370                            s = get_atom_line(
371                                atom,
372                                hetfield,
373                                segid,
374                                atom_number,
375                                resname,
376                                resseq,
377                                icode,
378                                chain_id,
379                            )
380                            fp.write(s)
381                            if not preserve_atom_numbering:
382                                atom_number += 1
383                if chain_residues_written:
384                    fp.write(
385                        "TER   %5i      %3s %c%4i%c                                                      \n"
386                        % (atom_number, resname, chain_id, resseq, icode)
387                    )
388
389            if model_flag and model_residues_written:
390                fp.write("ENDMDL\n")
391        if write_end:
392            fp.write("END   \n")
393        if close_file:
394            fp.close()
395