1 // $Id: mmdb_chain.h $ 2 // ================================================================= 3 // 4 // CCP4 Coordinate Library: support of coordinate-related 5 // functionality in protein crystallography applications. 6 // 7 // Copyright (C) Eugene Krissinel 2000-2013. 8 // 9 // This library is free software: you can redistribute it and/or 10 // modify it under the terms of the GNU Lesser General Public 11 // License version 3, modified in accordance with the provisions 12 // of the license to address the requirements of UK law. 13 // 14 // You should have received a copy of the modified GNU Lesser 15 // General Public License along with this library. If not, copies 16 // may be downloaded from http://www.ccp4.ac.uk/ccp4license.php 17 // 18 // This program is distributed in the hope that it will be useful, 19 // but WITHOUT ANY WARRANTY; without even the implied warranty of 20 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21 // GNU Lesser General Public License for more details. 22 // 23 // ================================================================= 24 // 25 // 23.12.15 <-- Date of Last Modification. 26 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 27 // ----------------------------------------------------------------- 28 // 29 // **** Module : MMDB_Chain <interface> 30 // ~~~~~~~~~ 31 // **** Project : MacroMolecular Data Base (MMDB) 32 // ~~~~~~~~~ 33 // **** Classes : mmdb::ProModel ( abstract Model class ) 34 // ~~~~~~~~~ mmdb::DBReference ( DBREF records ) 35 // mmdb::ChainContainer ( container of in-chain classes ) 36 // mmdb::ContainerChain ( chain containered class template) 37 // mmdb::SeqAdv ( SEQADV records ) 38 // mmdb::SeqRes ( SEQRES records ) 39 // mmdb::ModRes ( MODRES records ) 40 // mmdb::HetRec ( HET records ) 41 // mmdb::Chain ( chain class ) 42 // 43 // Copyright (C) E. Krissinel 2000-2015 44 // 45 // ================================================================= 46 // 47 48 #ifndef __MMDB_Chain__ 49 #define __MMDB_Chain__ 50 51 #include "mmdb_io_stream.h" 52 #include "mmdb_utils.h" 53 #include "mmdb_atom.h" 54 #include "mmdb_defs.h" 55 56 namespace mmdb { 57 58 // ==================== ProModel ====================== 59 60 // This class is a virtue needed only for defining certain 61 // functions of Model, which are used by Chain and 62 // Residue 63 64 DefineClass(ProModel); 65 DefineStreamFunctions(ProModel); 66 67 DefineClass(Manager); 68 69 class ProModel : public UDData { 70 71 friend class Chain; 72 73 public : 74 ProModel()75 ProModel () : UDData () {} ProModel(io::RPStream Object)76 ProModel ( io::RPStream Object ) : UDData ( Object ) {} ~ProModel()77 ~ProModel () {} 78 GetEntryID()79 virtual cpstr GetEntryID () { return ""; } SetEntryID(const IDCode)80 virtual void SetEntryID ( const IDCode ) {} 81 AddChain(PChain)82 virtual int AddChain ( PChain ) { return 0; } 83 84 // returns pointer to Root GetCoordHierarchy()85 virtual PManager GetCoordHierarchy() { return NULL; } 86 87 // GetNumberOfModels() returns TOTAL number of models GetNumberOfModels()88 virtual int GetNumberOfModels() { return 0; } 89 90 // GetNumberOfAllAtoms() returns TOTAL number of atoms in 91 // all models GetNumberOfAllAtoms()92 virtual int GetNumberOfAllAtoms() { return 0; } 93 94 // returns pointer to the general Atom array GetAllAtoms()95 virtual PPAtom GetAllAtoms() { return NULL; } 96 GetSerNum()97 virtual int GetSerNum () { return 0; } 98 ExpandAtomArray(int)99 virtual void ExpandAtomArray ( int ) {} AddAtomArray(int)100 virtual void AddAtomArray ( int ) {} 101 102 protected : 103 _ExcludeChain(const ChainID)104 virtual int _ExcludeChain ( const ChainID ) { return 0; } 105 106 }; 107 108 109 110 // ==================== ChainContainer ====================== 111 112 DefineClass(ChainContainer); 113 DefineStreamFunctions(ChainContainer); 114 115 class ChainContainer : public ClassContainer { 116 117 public : 118 ChainContainer()119 ChainContainer () : ClassContainer () {} ChainContainer(io::RPStream Object)120 ChainContainer ( io::RPStream Object ) 121 : ClassContainer ( Object ) {} ~ChainContainer()122 ~ChainContainer () {} 123 124 PContainerClass MakeContainerClass ( int ClassID ); 125 126 void SetChain ( PChain Chain_Owner ); // must be set before using 127 // the Container 128 129 // special functions used in Model::GetCIF(..) 130 cpstr Get1stChainID (); 131 void MoveByChainID ( const ChainID chainID, 132 PChainContainer chainContainer ); 133 134 protected : 135 PChain chain; 136 137 }; 138 139 140 // ================== ContainerChain ===================== 141 142 DefineClass(ContainerChain); 143 DefineStreamFunctions(ContainerChain); 144 145 class ContainerChain : public ContainerClass { 146 147 friend class ChainContainer; 148 149 public : 150 151 ContainerChain (); 152 ContainerChain ( PChain Chain_Owner ); ContainerChain(io::RPStream Object)153 ContainerChain ( io::RPStream Object ) : ContainerClass(Object) {} 154 155 void SetChain ( PChain Chain_Owner ); 156 157 protected : 158 PChain chain; 159 ChainID chainID; // just a copy of Chain->chainID 160 161 }; 162 163 164 // ================== DBReference ======================== 165 166 DefineClass(DBReference); 167 DefineStreamFunctions(DBReference); 168 169 class DBReference : public ContainerChain { 170 171 public : 172 173 int seqBeg; // initial seq num of the PDB seq-ce segment 174 InsCode insBeg; // initial ins code of the PDB seq-ce segm-t 175 int seqEnd; // ending seq number of the PDB seq-ce segm-t 176 InsCode insEnd; // ending ins code of the PDB seq-ce segment 177 DBName database; // sequence database name 178 DBAcCode dbAccession; // sequence database accession code 179 DBIdCode dbIdCode; // sequence database identification code 180 int dbseqBeg; // initial seq number of the database segment 181 InsCode dbinsBeg; // ins code of initial residue of the segment 182 int dbseqEnd; // ending seq number of the database segment 183 InsCode dbinsEnd; // ins code of the ending residue of the seg-t 184 185 DBReference (); 186 DBReference ( PChain Chain_Owner ); 187 DBReference ( PChain Chain_Owner, cpstr S ); 188 DBReference ( io::RPStream Object ); 189 ~DBReference(); 190 191 void PDBASCIIDump ( pstr S, int N ); 192 void MakeCIF ( mmcif::PData CIF, int N ); 193 ERROR_CODE ConvertPDBASCII ( cpstr S ); 194 ERROR_CODE GetCIF ( mmcif::PData CIF, int & n ); GetClassID()195 CLASS_ID GetClassID () { return ClassID_DBReference; } 196 197 void Copy ( PContainerClass DBRef ); 198 199 void write ( io::RFile f ); 200 void read ( io::RFile f ); 201 202 protected : 203 204 void InitDBReference(); 205 206 }; 207 208 209 // ==================== SeqAdv =========================== 210 211 DefineClass(SeqAdv); 212 DefineStreamFunctions(SeqAdv); 213 214 class SeqAdv : public ContainerChain { 215 216 public : 217 218 ResName resName; // residue name in conflict 219 int seqNum; // residue sequence number 220 InsCode insCode; // residue insertion code 221 DBName database; // sequence database name 222 DBAcCode dbAccession; // sequence database accession code 223 ResName dbRes; // sequence database residue name 224 int dbSeq; // sequence database sequence number 225 pstr conflict; // conflict comment 226 227 SeqAdv (); 228 SeqAdv ( PChain Chain_Owner ); 229 SeqAdv ( PChain Chain_Owner, cpstr S ); 230 SeqAdv ( io::RPStream Object ); 231 ~SeqAdv(); 232 233 void PDBASCIIDump ( pstr S, int N ); 234 ERROR_CODE ConvertPDBASCII ( cpstr S ); 235 void MakeCIF ( mmcif::PData CIF, int N ); 236 ERROR_CODE GetCIF ( mmcif::PData CIF, int & n ); GetClassID()237 CLASS_ID GetClassID () { return ClassID_SeqAdv; } 238 239 void Copy ( PContainerClass seqAdv ); 240 241 void write ( io::RFile f ); 242 void read ( io::RFile f ); 243 244 protected : 245 246 void InitSeqAdv(); 247 248 }; 249 250 251 // ================== SeqRes ======================== 252 253 DefineClass(SeqRes); 254 DefineStreamFunctions(SeqRes); 255 256 class SeqRes : public io::Stream { 257 258 friend class Model; 259 friend class Chain; 260 261 public : 262 263 int numRes; // number of residues in the chain 264 PResName resName; // residue names 265 266 SeqRes (); 267 SeqRes ( io::RPStream Object ); 268 ~SeqRes(); 269 270 void SetChain ( PChain Chain_Owner ); 271 void PDBASCIIDump ( io::RFile f ); 272 ERROR_CODE ConvertPDBASCII ( cpstr S ); 273 274 void MakeCIF ( mmcif::PData CIF ); 275 ERROR_CODE GetCIF ( mmcif::PData CIF ); 276 277 void Copy ( PSeqRes seqRes ); 278 279 void write ( io::RFile f ); 280 void read ( io::RFile f ); 281 282 protected : 283 PChain chain; 284 ChainID chainID; 285 int serNum; 286 287 void InitSeqRes(); 288 void FreeMemory(); 289 290 }; 291 292 293 // ================== ModRes ======================== 294 295 DefineClass(ModRes); 296 DefineStreamFunctions(ModRes); 297 298 class ModRes : public ContainerChain { 299 300 public : 301 302 ResName resName; // residue name used 303 int seqNum; // residue sequence number 304 InsCode insCode; // residue insertion code 305 ResName stdRes; // standard residue name 306 pstr comment; // description of the residue modification 307 308 ModRes (); 309 ModRes ( PChain Chain_Owner ); 310 ModRes ( PChain Chain_Owner, cpstr S ); 311 ModRes ( io::RPStream Object ); 312 ~ModRes(); 313 314 void PDBASCIIDump ( pstr S, int N ); 315 void MakeCIF ( mmcif::PData CIF, int N ); 316 ERROR_CODE ConvertPDBASCII ( cpstr S ); 317 ERROR_CODE GetCIF ( mmcif::PData CIF, int & n ); GetClassID()318 CLASS_ID GetClassID () { return ClassID_ModRes; } 319 320 void Copy ( PContainerClass modRes ); 321 322 void write ( io::RFile f ); 323 void read ( io::RFile f ); 324 325 protected : 326 327 void InitModRes(); 328 329 }; 330 331 332 // ================== HetRec =========================== 333 334 DefineClass(HetRec); 335 DefineStreamFunctions(HetRec); 336 337 class HetRec : public ContainerChain { 338 339 public : 340 341 ResName hetID; // Het identifier (right-justified) 342 int seqNum; // sequence number 343 InsCode insCode; // insertion code 344 int numHetAtoms; // number of HETATM records for the 345 // group present in the entry 346 pstr comment; // text describing Het group 347 348 HetRec (); 349 HetRec ( PChain Chain_Owner ); 350 HetRec ( PChain Chain_Owner, cpstr S ); 351 HetRec ( io::RPStream Object ); 352 ~HetRec(); 353 354 void PDBASCIIDump ( pstr S, int N ); 355 void MakeCIF ( mmcif::PData CIF, int N ); 356 ERROR_CODE ConvertPDBASCII ( cpstr S ); 357 ERROR_CODE GetCIF ( mmcif::PData CIF, int & n ); GetClassID()358 CLASS_ID GetClassID () { return ClassID_Het; } 359 360 void Copy ( PContainerClass Het ); 361 362 void write ( io::RFile f ); 363 void read ( io::RFile f ); 364 365 protected : 366 367 void InitHetRec(); 368 369 }; 370 371 372 // ================= Chain ======================= 373 374 DefineFactoryFunctions(Chain); 375 376 class Chain : public UDData { 377 378 friend class DBReference; 379 friend class SeqAdv; 380 friend class SeqRes; 381 friend class ModRes; 382 friend class HetRec; 383 friend class Residue; 384 friend class Atom; 385 friend class Model; 386 friend class Root; 387 friend class SelManager; 388 friend class BondManager; 389 friend class CoorManager; 390 friend class Manager; 391 392 public : 393 394 ChainContainer DBRef; // database reference 395 ChainContainer seqAdv; // SEQADV records 396 SeqRes seqRes; // Sequence residues, SEQRES records 397 ChainContainer modRes; // modification descriptions 398 ChainContainer Het; // non-standard residues descriptions 399 400 Chain (); // SetModel() MUST be used after this constructor! 401 Chain ( PProModel model, const ChainID chID ); 402 Chain ( io::RPStream Object ); 403 ~Chain(); 404 405 void FreeAnnotations(); 406 407 void SetModel ( PProModel model ); 408 void SetChain ( const ChainID chID ); 409 410 PManager GetCoordHierarchy(); // PRoot 411 412 // ConvertXXXXX(..) functions do not check for record name 413 // and assume that PDBString is at least 81 symbols long 414 // (including the terminating null). 415 ERROR_CODE ConvertDBREF ( cpstr PDBString ); 416 ERROR_CODE ConvertSEQADV ( cpstr PDBString ); 417 ERROR_CODE ConvertSEQRES ( cpstr PDBString ); 418 ERROR_CODE ConvertMODRES ( cpstr PDBString ); 419 ERROR_CODE ConvertHET ( cpstr PDBString ); 420 421 // This function should be used for testing purposes only. 422 // A full PDB ASCII dump for all models and chains involved 423 // is done by Root class. 424 void PDBASCIIDump ( io::RFile f ); 425 426 void PDBASCIIAtomDump ( io::RFile f ); 427 void MakeAtomCIF ( mmcif::PData CIF ); 428 429 430 // ----------------- Extracting residues ------------------------- 431 432 int GetNumberOfResidues(); // returns number of res-s in the chain 433 PResidue GetResidue ( int resNo ); // returns resNo-th residue 434 // in the chain; 435 // 0<=resNo<nResidues 436 437 // GetResidue(..) returns pointer on residue, whose sequence 438 // number and insert code are given in seqNum and insCode, 439 // respectively. If such a residue is absent in the chain, 440 // returns NULL. 441 PResidue GetResidue ( int seqNum, const InsCode insCode ); 442 443 // GetResidueNo(..) returns the residue number in the chain's 444 // residues table. Residues are numbered as 0..nres-1 as they 445 // appear in the coordinate file. 446 // If residue is not found, the function returns -1. 447 int GetResidueNo ( int seqNum, const InsCode insCode ); 448 449 void GetResidueTable ( PPResidue & resTable, 450 int & NumberOfResidues ); 451 452 // GetResidueCreate(..) returns pointer on residue, whose name, 453 // sequence number and insertion code are given by resName, seqNum 454 // and insCode, respectively. If such a residue is absent in the 455 // chain, one is created at the end of chain. 456 // If a residue with given sequence number and insertion code 457 // is present in the chain but has a different name, the function 458 // returns NULL unless Enforce is set True. In the latter case, 459 // a new residue is still created at the end of chain, but there 460 // is no guarantee that any function operating on the sequence 461 // number and insertion code will work properly. 462 PResidue GetResidueCreate ( const ResName resName, int seqNum, 463 const InsCode insCode, bool Enforce ); 464 465 // GetCoorSequence(...) returns sequence inferred from list 466 // of residues (which may differ from one in the file header). 467 // The sequence is returned as a null-terminated string 'seq'. 468 // On input, 'seq' should be either NULL or allocated (in which 469 // case the original allocation will be released). 470 void GetCoordSequence ( pstr & seq ); 471 472 // ------------------ Deleting residues ---------------------- 473 474 int DeleteResidue ( int resNo ); // returns num of deleted res-s 475 int DeleteResidue ( int seqNum, const InsCode insCode ); 476 int DeleteAllResidues(); 477 int DeleteSolvent (); 478 void TrimResidueTable (); // do not forget to call after all dels 479 480 // ------------------- Adding residues ----------------------- 481 482 // AddResidue(..) adds residue to the chain, InsResidue inserts 483 // the residue on the specified position of the chain (other 484 // residues are shifted up to the end of chain). Position in the 485 // chain may be specified by a serial number (that is position in 486 // the residue table) or by seqNum and insCode of one of the 487 // chain's residues (the new residue is then inserted before that 488 // one). If the chain is associated with a coordinate hierarchy, 489 // and residue 'res' is not, the latter is checked in 490 // automatically. If residue 'res' belongs to any coordinate 491 // hierarchy (even though that of the residue), it is *copied* 492 // rather than simply taken over, and is checked in. 493 // If the chain is not associated with a coordinate hierarchy, 494 // all added residues will be checked in automatically once the 495 // chain is checked in. 496 int AddResidue ( PResidue res ); 497 int InsResidue ( PResidue res, int pos ); 498 int InsResidue ( PResidue res, int seqNum, const InsCode insCode ); 499 500 // -------------------- Extracting atoms --------------------- 501 502 int GetNumberOfAtoms ( bool countTers ); 503 int GetNumberOfAtoms ( int seqNo, const InsCode insCode ); 504 int GetNumberOfAtoms ( int resNo ); 505 506 PAtom GetAtom ( int seqNo, 507 const InsCode insCode, 508 const AtomName aname, 509 const Element elmnt, 510 const AltLoc aloc ); 511 PAtom GetAtom ( int seqNo, const InsCode insCode, int atomNo ); 512 PAtom GetAtom ( int resNo, 513 const AtomName aname, 514 const Element elmnt, 515 const AltLoc aloc ); 516 PAtom GetAtom ( int resNo, int atomNo ); 517 518 void GetAtomTable ( int seqNo, const InsCode insCode, 519 PPAtom & atomTable, int & NumberOfAtoms ); 520 void GetAtomTable ( int resNo, 521 PPAtom & atomTable, int & NumberOfAtoms ); 522 523 // GetAtomTable1(..) returns atom table without TER atoms and 524 // without NULL atom pointers. NumberOfAtoms returns the actual 525 // number of atom pointers in atomTable. 526 // atomTable is allocated withing the function. If it was 527 // not set to NULL before calling the function, the latter will 528 // attempt to deallocate it first. 529 // The application is responsible for deleting atomTable, 530 // however it must not touch atom pointers, i.e. use simply 531 // "delete[] atomTable;". Never pass atomTable from 532 // GetAtomTable(..) into this function, unless you set it to NULL 533 // before doing that. 534 void GetAtomTable1 ( int seqNo, const InsCode insCode, 535 PPAtom & atomTable, int & NumberOfAtoms ); 536 void GetAtomTable1 ( int resNo, 537 PPAtom & atomTable, int & NumberOfAtoms ); 538 539 // --------------------- Deleting atoms ---------------------- 540 541 int DeleteAtom ( int seqNo, 542 const InsCode insCode, 543 const AtomName aname, 544 const Element elmnt, 545 const AltLoc aloc ); 546 int DeleteAtom ( int seqNo, 547 const InsCode insCode, 548 int atomNo ); 549 int DeleteAtom ( int resNo, 550 const AtomName aname, 551 const Element elmnt, 552 const AltLoc aloc ); 553 int DeleteAtom ( int resNo, int atomNo ); 554 555 int DeleteAllAtoms ( int seqNo, const InsCode insCode ); 556 int DeleteAllAtoms ( int resNo ); 557 int DeleteAllAtoms (); 558 559 // DeleteAltLocs() leaves only alternative location with maximal 560 // occupancy, if those are equal or unspecified, the one with 561 // "least" alternative location indicator. 562 // The function returns the number of deleted. All tables remain 563 // untrimmed, so that explicit trimming or calling 564 // FinishStructEdit() is required. 565 int DeleteAltLocs(); 566 567 // ---------------------- Adding atoms ----------------------- 568 569 int AddAtom ( int seqNo, const InsCode insCode, PAtom atom ); 570 int AddAtom ( int resNo, PAtom atom ); 571 572 // ------------------------------------------------------------- 573 574 void ApplyTransform ( mat44 & TMatrix ); // transforms all 575 // coordinates by multiplying 576 // with matrix TMatrix 577 578 int GetModelNum(); GetModel()579 PModel GetModel () { return (PModel)model; } GetChainID()580 cpstr GetChainID () { return chainID; } 581 void SetChainID ( const ChainID chID ); 582 cpstr GetChainID ( pstr ChID ); // returns /m/c 583 584 void GetAtomStatistics ( RAtomStat AS ); 585 void CalAtomStatistics ( RAtomStat AS ); 586 587 int CheckID ( const ChainID chID ); 588 int CheckIDS ( cpstr CID ); 589 590 cpstr GetEntryID (); 591 void SetEntryID ( const IDCode idCode ); 592 593 int GetNumberOfDBRefs (); 594 PDBReference GetDBRef ( int dbRefNo ); // 0..nDBRefs-1 595 596 void MaskAtoms ( PMask Mask ); 597 void MaskResidues ( PMask Mask ); 598 void UnmaskAtoms ( PMask Mask ); 599 void UnmaskResidues ( PMask Mask ); 600 601 void SortResidues (); 602 603 int GetNofModResidues(); 604 PModRes GetModResidue ( int modResNo ); // 0.. on 605 606 bool isSolventChain (); 607 bool isInSelection ( int selHnd ); 608 bool isAminoacidChain (); 609 bool isNucleotideChain(); 610 611 612 // ------- user-defined data handlers 613 int PutUDData ( int UDDhandle, int iudd ); 614 int PutUDData ( int UDDhandle, realtype rudd ); 615 int PutUDData ( int UDDhandle, cpstr sudd ); 616 617 int GetUDData ( int UDDhandle, int & iudd ); 618 int GetUDData ( int UDDhandle, realtype & rudd ); 619 int GetUDData ( int UDDhandle, pstr sudd, int maxLen ); 620 int GetUDData ( int UDDhandle, pstr & sudd ); 621 622 void Copy ( PChain chain ); 623 void CopyAnnotations ( PChain chain ); 624 625 void write ( io::RFile f ); // writes header to PDB binary file 626 void read ( io::RFile f ); // reads header from PDB binary file 627 628 protected : 629 630 ChainID chainID; // chain ID 631 ChainID prevChainID; // if chain is renamed, its original 632 // name may be saved here. 633 PProModel model; // pointer to model class 634 635 int nWeights; // used externally for sorting 636 realtype Weight; // chains 637 638 int nResidues; // number of residues 639 PPResidue residue; // array of residues 640 641 bool Exclude; // used internally 642 643 void InitChain (); 644 void FreeMemory(); 645 646 void ExpandResidueArray ( int inc ); 647 // _ExcludeResidue(..) excludes (but does not dispose!) a residue 648 // from the chain. Returns 1 if the chain gets empty and 0 649 // otherwise. 650 int _ExcludeResidue ( const ResName resName, int seqNum, 651 const InsCode insCode ); 652 void _copy ( PChain chain ); 653 void _copy ( PChain chain, PPAtom atom, int & atom_index ); 654 void CheckInAtoms(); 655 656 private : 657 int resLen; // length of Residue array 658 659 }; 660 661 662 extern void TestChain(); // reads from 'in.chain', writes into 663 // 'out.chain' and 'abin.chain' 664 665 } // namespace mmdb 666 667 #endif 668 669