1# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk) 2# This code is part of the Biopython distribution and governed by its 3# license. Please see the LICENSE file that should have been included 4# as part of this package. 5 6"""Output of PDB files.""" 7 8 9# To allow saving of chains, residues, etc.. 10from Bio.PDB.StructureBuilder import StructureBuilder 11 12# Allowed Elements 13from Bio.Data.IUPACData import atom_weights 14 15 16_ATOM_FORMAT_STRING = ( 17 "%s%5i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f%s%6.2f %4s%2s%2s\n" 18) 19_PQR_ATOM_FORMAT_STRING = ( 20 "%s%5i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f %7s %6s %2s\n" 21) 22 23 24class Select: 25 """Select everything for PDB output (for use as a base class). 26 27 Default selection (everything) during writing - can be used as base class 28 to implement selective output. This selects which entities will be written out. 29 """ 30 31 def __repr__(self): 32 """Represent the output as a string for debugging.""" 33 return "<Select all>" 34 35 def accept_model(self, model): 36 """Overload this to reject models for output.""" 37 return 1 38 39 def accept_chain(self, chain): 40 """Overload this to reject chains for output.""" 41 return 1 42 43 def accept_residue(self, residue): 44 """Overload this to reject residues for output.""" 45 return 1 46 47 def accept_atom(self, atom): 48 """Overload this to reject atoms for output.""" 49 return 1 50 51 52_select = Select() 53 54 55class StructureIO: 56 """Base class to derive structure file format writers from.""" 57 58 def __init__(self): 59 """Initialise.""" 60 pass 61 62 def set_structure(self, pdb_object): 63 """Check what the user is providing and build a structure.""" 64 # The idea here is to build missing upstream components of 65 # the SMCRA object representation. E.g., if the user provides 66 # a Residue, build Structure/Model/Chain. 67 68 if pdb_object.level == "S": 69 structure = pdb_object 70 else: # Not a Structure 71 sb = StructureBuilder() 72 sb.init_structure("pdb") 73 sb.init_seg(" ") 74 75 if pdb_object.level == "M": 76 sb.structure.add(pdb_object.copy()) 77 self.structure = sb.structure 78 else: # Not a Model 79 sb.init_model(0) 80 81 if pdb_object.level == "C": 82 sb.structure[0].add(pdb_object.copy()) 83 else: # Not a Chain 84 chain_id = "A" # default 85 sb.init_chain(chain_id) 86 87 if pdb_object.level == "R": # Residue 88 # Residue extracted from a larger structure? 89 if pdb_object.parent is not None: 90 og_chain_id = pdb_object.parent.id 91 sb.structure[0][chain_id].id = og_chain_id 92 chain_id = og_chain_id 93 94 sb.structure[0][chain_id].add(pdb_object.copy()) 95 96 else: # Atom 97 sb.init_residue("DUM", " ", 1, " ") # Dummy residue 98 sb.structure[0][chain_id].child_list[0].add(pdb_object.copy()) 99 100 # Fix chain identifier if Atom has grandparents. 101 try: 102 og_chain_id = pdb_object.parent.parent.id 103 except AttributeError: # pdb_object.parent == None 104 pass 105 else: 106 sb.structure[0][chain_id].id = og_chain_id 107 108 # Return structure 109 structure = sb.structure 110 self.structure = structure 111 112 113class PDBIO(StructureIO): 114 """Write a Structure object (or a subset of a Structure object) as a PDB or PQR file. 115 116 Examples 117 -------- 118 >>> from Bio.PDB import PDBParser 119 >>> from Bio.PDB.PDBIO import PDBIO 120 >>> parser = PDBParser() 121 >>> structure = parser.get_structure("1a8o", "PDB/1A8O.pdb") 122 >>> io=PDBIO() 123 >>> io.set_structure(structure) 124 >>> io.save("bio-pdb-pdbio-out.pdb") 125 >>> import os 126 >>> os.remove("bio-pdb-pdbio-out.pdb") # tidy up 127 128 129 """ 130 131 def __init__(self, use_model_flag=0, is_pqr=False): 132 """Create the PDBIO object. 133 134 :param use_model_flag: if 1, force use of the MODEL record in output. 135 :type use_model_flag: int 136 :param is_pqr: if True, build PQR file. Otherwise build PDB file. 137 :type is_pqr: Boolean 138 """ 139 self.use_model_flag = use_model_flag 140 self.is_pqr = is_pqr 141 142 # private methods 143 144 def _get_atom_line( 145 self, 146 atom, 147 hetfield, 148 segid, 149 atom_number, 150 resname, 151 resseq, 152 icode, 153 chain_id, 154 charge=" ", 155 ): 156 """Return an ATOM PDB string (PRIVATE).""" 157 if hetfield != " ": 158 record_type = "HETATM" 159 else: 160 record_type = "ATOM " 161 162 if atom.element: 163 element = atom.element.strip().upper() 164 if element.capitalize() not in atom_weights and element != "X": 165 raise ValueError("Unrecognised element %r" % atom.element) 166 element = element.rjust(2) 167 else: 168 element = " " 169 170 name = atom.get_fullname().strip() 171 # Pad atom name if: 172 # - smaller than 4 characters 173 # AND - is not C, N, O, S, H, F, P, ..., one letter elements 174 # AND - first character is NOT numeric (funky hydrogen naming rules) 175 if len(name) < 4 and name[:1].isalpha() and len(element.strip()) < 2: 176 name = " " + name 177 178 altloc = atom.get_altloc() 179 x, y, z = atom.get_coord() 180 181 # PDB Arguments 182 if not self.is_pqr: 183 bfactor = atom.get_bfactor() 184 occupancy = atom.get_occupancy() 185 186 # PQR Arguments 187 else: 188 radius = atom.get_radius() 189 pqr_charge = atom.get_charge() 190 191 if not self.is_pqr: 192 try: 193 occupancy_str = "%6.2f" % occupancy 194 except TypeError: 195 if occupancy is None: 196 occupancy_str = " " * 6 197 import warnings 198 from Bio import BiopythonWarning 199 200 warnings.warn( 201 "Missing occupancy in atom %r written as blank" 202 % (atom.get_full_id(),), 203 BiopythonWarning, 204 ) 205 else: 206 raise TypeError( 207 "Invalid occupancy %r in atom %r" 208 % (occupancy, atom.get_full_id()) 209 ) from None 210 211 args = ( 212 record_type, 213 atom_number, 214 name, 215 altloc, 216 resname, 217 chain_id, 218 resseq, 219 icode, 220 x, 221 y, 222 z, 223 occupancy_str, 224 bfactor, 225 segid, 226 element, 227 charge, 228 ) 229 return _ATOM_FORMAT_STRING % args 230 231 else: 232 # PQR case 233 try: 234 pqr_charge = "%7.4f" % pqr_charge 235 except TypeError: 236 if pqr_charge is None: 237 pqr_charge = " " * 7 238 import warnings 239 from Bio import BiopythonWarning 240 241 warnings.warn( 242 "Missing charge in atom %r written as blank" 243 % (atom.get_full_id(),), 244 BiopythonWarning, 245 ) 246 else: 247 raise TypeError( 248 "Invalid charge %r in atom %r" 249 % (pqr_charge, atom.get_full_id()) 250 ) from None 251 try: 252 radius = "%6.4f" % radius 253 except TypeError: 254 if radius is None: 255 radius = " " * 6 256 import warnings 257 from Bio import BiopythonWarning 258 259 warnings.warn( 260 "Missing radius in atom %r written as blank" 261 % (atom.get_full_id(),), 262 BiopythonWarning, 263 ) 264 else: 265 raise TypeError( 266 "Invalid radius %r in atom %r" % (radius, atom.get_full_id()) 267 ) from None 268 269 args = ( 270 record_type, 271 atom_number, 272 name, 273 altloc, 274 resname, 275 chain_id, 276 resseq, 277 icode, 278 x, 279 y, 280 z, 281 pqr_charge, 282 radius, 283 element, 284 ) 285 286 return _PQR_ATOM_FORMAT_STRING % args 287 288 # Public methods 289 290 def save(self, file, select=_select, write_end=True, preserve_atom_numbering=False): 291 """Save structure to a file. 292 293 :param file: output file 294 :type file: string or filehandle 295 296 :param select: selects which entities will be written. 297 :type select: object 298 299 Typically select is a subclass of L{Select}, it should 300 have the following methods: 301 302 - accept_model(model) 303 - accept_chain(chain) 304 - accept_residue(residue) 305 - accept_atom(atom) 306 307 These methods should return 1 if the entity is to be 308 written out, 0 otherwise. 309 310 Typically select is a subclass of L{Select}. 311 """ 312 get_atom_line = self._get_atom_line 313 if isinstance(file, str): 314 fp = open(file, "w") 315 close_file = 1 316 else: 317 # filehandle, I hope :-) 318 fp = file 319 close_file = 0 320 # multiple models? 321 if len(self.structure) > 1 or self.use_model_flag: 322 model_flag = 1 323 else: 324 model_flag = 0 325 for model in self.structure.get_list(): 326 if not select.accept_model(model): 327 continue 328 # necessary for ENDMDL 329 # do not write ENDMDL if no residues were written 330 # for this model 331 model_residues_written = 0 332 if not preserve_atom_numbering: 333 atom_number = 1 334 if model_flag: 335 fp.write("MODEL %s\n" % model.serial_num) 336 for chain in model.get_list(): 337 if not select.accept_chain(chain): 338 continue 339 chain_id = chain.get_id() 340 # necessary for TER 341 # do not write TER if no residues were written 342 # for this chain 343 chain_residues_written = 0 344 for residue in chain.get_unpacked_list(): 345 if not select.accept_residue(residue): 346 continue 347 hetfield, resseq, icode = residue.get_id() 348 resname = residue.get_resname() 349 segid = residue.get_segid() 350 for atom in residue.get_unpacked_list(): 351 if select.accept_atom(atom): 352 chain_residues_written = 1 353 model_residues_written = 1 354 if preserve_atom_numbering: 355 atom_number = atom.get_serial_number() 356 357 # Check if the atom serial number is an integer 358 # Not always the case for mmCIF files. 359 try: 360 atom_number = int(atom_number) 361 except ValueError: 362 raise ValueError( 363 f"{repr(atom_number)} is not a number." 364 "Atom serial numbers must be numerical" 365 " If you are converting from an mmCIF" 366 " structure, try using" 367 " preserve_atom_numbering=False" 368 ) 369 370 s = get_atom_line( 371 atom, 372 hetfield, 373 segid, 374 atom_number, 375 resname, 376 resseq, 377 icode, 378 chain_id, 379 ) 380 fp.write(s) 381 if not preserve_atom_numbering: 382 atom_number += 1 383 if chain_residues_written: 384 fp.write( 385 "TER %5i %3s %c%4i%c \n" 386 % (atom_number, resname, chain_id, resseq, icode) 387 ) 388 389 if model_flag and model_residues_written: 390 fp.write("ENDMDL\n") 391 if write_end: 392 fp.write("END \n") 393 if close_file: 394 fp.close() 395