1 //  $Id: mmdb_chain.h $
2 //  =================================================================
3 //
4 //   CCP4 Coordinate Library: support of coordinate-related
5 //   functionality in protein crystallography applications.
6 //
7 //   Copyright (C) Eugene Krissinel 2000-2013.
8 //
9 //    This library is free software: you can redistribute it and/or
10 //    modify it under the terms of the GNU Lesser General Public
11 //    License version 3, modified in accordance with the provisions
12 //    of the license to address the requirements of UK law.
13 //
14 //    You should have received a copy of the modified GNU Lesser
15 //    General Public License along with this library. If not, copies
16 //    may be downloaded from http://www.ccp4.ac.uk/ccp4license.php
17 //
18 //    This program is distributed in the hope that it will be useful,
19 //    but WITHOUT ANY WARRANTY; without even the implied warranty of
20 //    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21 //    GNU Lesser General Public License for more details.
22 //
23 //  =================================================================
24 //
25 //    23.12.15   <--  Date of Last Modification.
26 //                   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
27 //  -----------------------------------------------------------------
28 //
29 //  **** Module  :  MMDB_Chain <interface>
30 //       ~~~~~~~~~
31 //  **** Project :  MacroMolecular Data Base (MMDB)
32 //       ~~~~~~~~~
33 //  **** Classes :  mmdb::ProModel     ( abstract Model class         )
34 //       ~~~~~~~~~  mmdb::DBReference  ( DBREF  records               )
35 //             mmdb::ChainContainer ( container of in-chain classes   )
36 //             mmdb::ContainerChain ( chain containered class template)
37 //             mmdb::SeqAdv         ( SEQADV records                  )
38 //             mmdb::SeqRes         ( SEQRES records                  )
39 //             mmdb::ModRes         ( MODRES records                  )
40 //             mmdb::HetRec         ( HET    records                  )
41 //             mmdb::Chain          ( chain class                     )
42 //
43 //  Copyright (C) E. Krissinel 2000-2015
44 //
45 //  =================================================================
46 //
47 
48 #ifndef __MMDB_Chain__
49 #define __MMDB_Chain__
50 
51 #include "mmdb_io_stream.h"
52 #include "mmdb_utils.h"
53 #include "mmdb_atom.h"
54 #include "mmdb_defs.h"
55 
56 namespace mmdb  {
57 
58   //  ====================  ProModel  ======================
59 
60   //    This class is a virtue needed only for defining certain
61   // functions of Model, which are used by Chain and
62   // Residue
63 
64   DefineClass(ProModel);
65   DefineStreamFunctions(ProModel);
66 
67   DefineClass(Manager);
68 
69   class ProModel : public UDData  {
70 
71     friend class Chain;
72 
73     public :
74 
ProModel()75       ProModel  () : UDData () {}
ProModel(io::RPStream Object)76       ProModel  ( io::RPStream Object ) : UDData ( Object ) {}
~ProModel()77       ~ProModel () {}
78 
GetEntryID()79       virtual cpstr GetEntryID () { return ""; }
SetEntryID(const IDCode)80       virtual void  SetEntryID ( const IDCode ) {}
81 
AddChain(PChain)82       virtual int   AddChain ( PChain ) { return 0; }
83 
84       // returns pointer to Root
GetCoordHierarchy()85       virtual PManager GetCoordHierarchy() { return NULL; }
86 
87       //  GetNumberOfModels() returns TOTAL number of models
GetNumberOfModels()88       virtual int GetNumberOfModels() { return 0;    }
89 
90       //  GetNumberOfAllAtoms() returns TOTAL number of atoms in
91       // all models
GetNumberOfAllAtoms()92       virtual int GetNumberOfAllAtoms() { return 0;    }
93 
94       //  returns pointer to the general Atom array
GetAllAtoms()95       virtual PPAtom     GetAllAtoms() { return NULL; }
96 
GetSerNum()97       virtual int  GetSerNum       () { return 0; }
98 
ExpandAtomArray(int)99       virtual void ExpandAtomArray ( int )  {}
AddAtomArray(int)100       virtual void AddAtomArray    ( int )  {}
101 
102     protected :
103 
_ExcludeChain(const ChainID)104       virtual int  _ExcludeChain ( const ChainID ) { return 0; }
105 
106   };
107 
108 
109 
110   //  ====================  ChainContainer  ======================
111 
112   DefineClass(ChainContainer);
113   DefineStreamFunctions(ChainContainer);
114 
115   class ChainContainer : public ClassContainer  {
116 
117     public :
118 
ChainContainer()119       ChainContainer  () : ClassContainer () {}
ChainContainer(io::RPStream Object)120       ChainContainer  ( io::RPStream Object )
121                           : ClassContainer ( Object ) {}
~ChainContainer()122       ~ChainContainer () {}
123 
124       PContainerClass MakeContainerClass ( int ClassID );
125 
126       void  SetChain ( PChain Chain_Owner ); // must be set before using
127                                               // the Container
128 
129       // special functions used in Model::GetCIF(..)
130       cpstr Get1stChainID ();
131       void  MoveByChainID ( const ChainID chainID,
132                             PChainContainer chainContainer );
133 
134     protected :
135       PChain chain;
136 
137   };
138 
139 
140   //  ==================  ContainerChain  =====================
141 
142   DefineClass(ContainerChain);
143   DefineStreamFunctions(ContainerChain);
144 
145   class ContainerChain : public ContainerClass {
146 
147     friend class ChainContainer;
148 
149     public :
150 
151       ContainerChain ();
152       ContainerChain ( PChain Chain_Owner  );
ContainerChain(io::RPStream Object)153       ContainerChain ( io::RPStream Object ) : ContainerClass(Object) {}
154 
155       void SetChain   ( PChain Chain_Owner );
156 
157     protected :
158       PChain  chain;
159       ChainID chainID;  // just a copy of Chain->chainID
160 
161   };
162 
163 
164   //  ==================  DBReference  ========================
165 
166   DefineClass(DBReference);
167   DefineStreamFunctions(DBReference);
168 
169   class DBReference : public ContainerChain  {
170 
171     public :
172 
173       int      seqBeg;      // initial seq num of the PDB seq-ce segment
174       InsCode  insBeg;      // initial ins code of the PDB seq-ce segm-t
175       int      seqEnd;      // ending seq number of the PDB seq-ce segm-t
176       InsCode  insEnd;      // ending ins code of the PDB seq-ce segment
177       DBName   database;    // sequence database name
178       DBAcCode dbAccession; // sequence database accession code
179       DBIdCode dbIdCode;    // sequence database identification code
180       int      dbseqBeg;    // initial seq number of the database segment
181       InsCode  dbinsBeg;    // ins code of initial residue of the segment
182       int      dbseqEnd;    // ending seq number of the database segment
183       InsCode  dbinsEnd;   // ins code of the ending residue of the seg-t
184 
185       DBReference ();
186       DBReference ( PChain Chain_Owner );
187       DBReference ( PChain Chain_Owner, cpstr S );
188       DBReference ( io::RPStream Object );
189       ~DBReference();
190 
191       void       PDBASCIIDump    ( pstr S, int N );
192       void       MakeCIF         ( mmcif::PData CIF, int N );
193       ERROR_CODE ConvertPDBASCII ( cpstr S );
194       ERROR_CODE GetCIF          ( mmcif::PData CIF, int & n );
GetClassID()195       CLASS_ID   GetClassID      () { return ClassID_DBReference; }
196 
197       void  Copy  ( PContainerClass DBRef );
198 
199       void  write ( io::RFile f );
200       void  read  ( io::RFile f );
201 
202     protected :
203 
204       void InitDBReference();
205 
206   };
207 
208 
209   //  ====================  SeqAdv  ===========================
210 
211   DefineClass(SeqAdv);
212   DefineStreamFunctions(SeqAdv);
213 
214   class SeqAdv : public ContainerChain  {
215 
216     public :
217 
218       ResName  resName;     // residue name in conflict
219       int      seqNum;      // residue sequence number
220       InsCode  insCode;     // residue insertion code
221       DBName   database;    // sequence database name
222       DBAcCode dbAccession; // sequence database accession code
223       ResName  dbRes;       // sequence database residue name
224       int      dbSeq;       // sequence database sequence number
225       pstr     conflict;    // conflict comment
226 
227       SeqAdv ();
228       SeqAdv ( PChain Chain_Owner );
229       SeqAdv ( PChain Chain_Owner, cpstr S );
230       SeqAdv ( io::RPStream Object );
231       ~SeqAdv();
232 
233       void       PDBASCIIDump    ( pstr S, int N );
234       ERROR_CODE ConvertPDBASCII ( cpstr S );
235       void       MakeCIF         ( mmcif::PData CIF, int N );
236       ERROR_CODE GetCIF          ( mmcif::PData CIF, int & n );
GetClassID()237       CLASS_ID   GetClassID      () { return ClassID_SeqAdv; }
238 
239       void  Copy  ( PContainerClass seqAdv );
240 
241       void  write ( io::RFile f );
242       void  read  ( io::RFile f );
243 
244     protected :
245 
246       void InitSeqAdv();
247 
248   };
249 
250 
251   //  ==================  SeqRes  ========================
252 
253   DefineClass(SeqRes);
254   DefineStreamFunctions(SeqRes);
255 
256   class SeqRes : public io::Stream  {
257 
258     friend class Model;
259     friend class Chain;
260 
261     public :
262 
263       int       numRes;   // number of residues in the chain
264       PResName  resName;  // residue names
265 
266       SeqRes ();
267       SeqRes ( io::RPStream Object );
268       ~SeqRes();
269 
270       void       SetChain        ( PChain Chain_Owner );
271       void       PDBASCIIDump    ( io::RFile f );
272       ERROR_CODE ConvertPDBASCII ( cpstr  S );
273 
274       void  MakeCIF     ( mmcif::PData CIF );
275       ERROR_CODE GetCIF ( mmcif::PData CIF );
276 
277       void  Copy  ( PSeqRes seqRes );
278 
279       void  write ( io::RFile f );
280       void  read  ( io::RFile f );
281 
282     protected :
283       PChain  chain;
284       ChainID chainID;
285       int     serNum;
286 
287       void InitSeqRes();
288       void FreeMemory();
289 
290   };
291 
292 
293   //  ==================  ModRes  ========================
294 
295   DefineClass(ModRes);
296   DefineStreamFunctions(ModRes);
297 
298   class ModRes : public ContainerChain  {
299 
300     public :
301 
302       ResName  resName;     // residue name used
303       int      seqNum;      // residue sequence number
304       InsCode  insCode;     // residue insertion code
305       ResName  stdRes;      // standard residue name
306       pstr     comment;     // description of the residue modification
307 
308       ModRes ();
309       ModRes ( PChain Chain_Owner );
310       ModRes ( PChain Chain_Owner, cpstr S );
311       ModRes ( io::RPStream Object );
312       ~ModRes();
313 
314       void       PDBASCIIDump    ( pstr S, int N );
315       void       MakeCIF         ( mmcif::PData CIF, int N );
316       ERROR_CODE ConvertPDBASCII ( cpstr S );
317       ERROR_CODE GetCIF          ( mmcif::PData CIF, int & n );
GetClassID()318       CLASS_ID   GetClassID      () { return ClassID_ModRes; }
319 
320       void  Copy  ( PContainerClass modRes );
321 
322       void  write ( io::RFile f );
323       void  read  ( io::RFile f );
324 
325     protected :
326 
327       void InitModRes();
328 
329   };
330 
331 
332   //  ==================  HetRec  ===========================
333 
334   DefineClass(HetRec);
335   DefineStreamFunctions(HetRec);
336 
337   class HetRec : public ContainerChain  {
338 
339     public :
340 
341       ResName  hetID;       // Het identifier (right-justified)
342       int      seqNum;      // sequence number
343       InsCode  insCode;     // insertion code
344       int      numHetAtoms; // number of HETATM records for the
345                             // group present in the entry
346       pstr     comment;     // text describing Het group
347 
348       HetRec ();
349       HetRec ( PChain Chain_Owner );
350       HetRec ( PChain Chain_Owner, cpstr S );
351       HetRec ( io::RPStream Object );
352       ~HetRec();
353 
354       void       PDBASCIIDump    ( pstr S, int N );
355       void       MakeCIF         ( mmcif::PData CIF, int N );
356       ERROR_CODE ConvertPDBASCII ( cpstr S );
357       ERROR_CODE GetCIF          ( mmcif::PData CIF, int & n );
GetClassID()358       CLASS_ID   GetClassID      () { return ClassID_Het; }
359 
360       void  Copy  ( PContainerClass Het );
361 
362       void  write ( io::RFile f );
363       void  read  ( io::RFile f );
364 
365     protected :
366 
367       void InitHetRec();
368 
369   };
370 
371 
372   //  =================  Chain  =======================
373 
374   DefineFactoryFunctions(Chain);
375 
376   class Chain : public UDData  {
377 
378     friend class DBReference;
379     friend class SeqAdv;
380     friend class SeqRes;
381     friend class ModRes;
382     friend class HetRec;
383     friend class Residue;
384     friend class Atom;
385     friend class Model;
386     friend class Root;
387     friend class SelManager;
388     friend class BondManager;
389     friend class CoorManager;
390     friend class Manager;
391 
392     public :
393 
394       ChainContainer DBRef;    // database reference
395       ChainContainer seqAdv;   // SEQADV records
396       SeqRes         seqRes;   // Sequence residues, SEQRES records
397       ChainContainer modRes;   // modification descriptions
398       ChainContainer Het;      // non-standard residues descriptions
399 
400       Chain ();  // SetModel() MUST be used after this constructor!
401       Chain ( PProModel model, const ChainID chID );
402       Chain ( io::RPStream Object );
403       ~Chain();
404 
405       void FreeAnnotations();
406 
407       void SetModel ( PProModel    model );
408       void SetChain ( const ChainID chID );
409 
410       PManager GetCoordHierarchy();   // PRoot
411 
412       //   ConvertXXXXX(..) functions do not check for record name
413       // and assume that PDBString is at least 81 symbols long
414       // (including the terminating null).
415       ERROR_CODE ConvertDBREF  ( cpstr PDBString );
416       ERROR_CODE ConvertSEQADV ( cpstr PDBString );
417       ERROR_CODE ConvertSEQRES ( cpstr PDBString );
418       ERROR_CODE ConvertMODRES ( cpstr PDBString );
419       ERROR_CODE ConvertHET    ( cpstr PDBString );
420 
421       // This function should be used for testing purposes only.
422       // A full PDB ASCII dump for all models and chains involved
423       // is done by Root class.
424       void  PDBASCIIDump     ( io::RFile f );
425 
426       void  PDBASCIIAtomDump ( io::RFile f );
427       void  MakeAtomCIF      ( mmcif::PData CIF );
428 
429 
430       //  -----------------  Extracting residues  -------------------------
431 
432       int GetNumberOfResidues(); // returns number of res-s in the chain
433       PResidue GetResidue ( int resNo );  // returns resNo-th residue
434                                           // in the chain;
435                                           // 0<=resNo<nResidues
436 
437       //   GetResidue(..) returns pointer on residue, whose sequence
438       // number and insert code are given in seqNum and insCode,
439       // respectively. If such a residue is absent in the chain,
440       // returns NULL.
441       PResidue GetResidue ( int seqNum, const InsCode insCode );
442 
443       //   GetResidueNo(..) returns the residue number in the chain's
444       // residues table. Residues are numbered as 0..nres-1 as they
445       // appear in the coordinate file.
446       //   If residue is not found, the function returns -1.
447       int  GetResidueNo ( int seqNum, const InsCode insCode );
448 
449       void GetResidueTable ( PPResidue & resTable,
450                              int & NumberOfResidues );
451 
452       //   GetResidueCreate(..) returns pointer on residue, whose name,
453       // sequence number and insertion code are given by resName, seqNum
454       // and insCode, respectively. If such a residue is absent in the
455       // chain, one is created at the end of chain.
456       //   If a residue with given sequence number and insertion code
457       // is present in the chain but has a different name, the function
458       // returns NULL unless Enforce is set True. In the latter case,
459       // a new residue is still created at the end of chain, but there
460       // is no guarantee that any function operating on the sequence
461       // number and insertion code will work properly.
462       PResidue GetResidueCreate ( const ResName resName, int seqNum,
463                                   const InsCode insCode, bool Enforce );
464 
465       //   GetCoorSequence(...) returns sequence inferred from list
466       // of residues (which may differ from one in the file header).
467       // The sequence is returned as a null-terminated string 'seq'.
468       // On input, 'seq' should be either NULL or allocated (in which
469       // case the original allocation will be released).
470       void GetCoordSequence ( pstr & seq );
471 
472       //  ------------------  Deleting residues  ----------------------
473 
474       int  DeleteResidue ( int resNo ); // returns num of deleted res-s
475       int  DeleteResidue ( int seqNum, const InsCode insCode );
476       int  DeleteAllResidues();
477       int  DeleteSolvent    ();
478       void TrimResidueTable ();  // do not forget to call after all dels
479 
480       //  -------------------  Adding residues  -----------------------
481 
482       //   AddResidue(..) adds residue to the chain, InsResidue inserts
483       // the residue on the specified position of the chain (other
484       // residues are shifted up to the end of chain). Position in the
485       // chain may be specified by a serial number (that is position in
486       // the residue table) or by seqNum and insCode of one of the
487       // chain's residues (the new residue is then inserted before that
488       // one). If the chain is associated with a coordinate hierarchy,
489       // and residue 'res' is not, the latter is checked in
490       // automatically. If residue 'res' belongs to any coordinate
491       // hierarchy (even though that of the residue), it is *copied*
492       // rather than simply taken over, and is checked in.
493       //   If the chain is not associated with a coordinate hierarchy,
494       // all added residues will be checked in automatically once the
495       // chain is checked in.
496       int  AddResidue ( PResidue res );
497       int  InsResidue ( PResidue res, int pos );
498       int  InsResidue ( PResidue res, int seqNum, const InsCode insCode );
499 
500       //  --------------------  Extracting atoms  ---------------------
501 
502       int  GetNumberOfAtoms ( bool countTers );
503       int  GetNumberOfAtoms ( int seqNo, const InsCode insCode );
504       int  GetNumberOfAtoms ( int resNo );
505 
506       PAtom GetAtom ( int            seqNo,
507                       const InsCode  insCode,
508                       const AtomName aname,
509                       const Element  elmnt,
510                       const AltLoc   aloc );
511       PAtom GetAtom ( int seqNo, const InsCode insCode, int atomNo );
512       PAtom GetAtom ( int            resNo,
513                       const AtomName aname,
514                       const Element  elmnt,
515                       const AltLoc   aloc );
516       PAtom GetAtom ( int resNo, int atomNo );
517 
518       void GetAtomTable ( int seqNo, const InsCode insCode,
519                           PPAtom & atomTable, int & NumberOfAtoms );
520       void GetAtomTable ( int resNo,
521                           PPAtom & atomTable, int & NumberOfAtoms );
522 
523       //   GetAtomTable1(..) returns atom table without TER atoms and
524       // without NULL atom pointers. NumberOfAtoms returns the actual
525       // number of atom pointers in atomTable.
526       //   atomTable is allocated withing the function. If it was
527       // not set to NULL before calling the function, the latter will
528       // attempt to deallocate it first.
529       //   The application is responsible for deleting atomTable,
530       // however it must not touch atom pointers, i.e. use simply
531       // "delete[] atomTable;". Never pass atomTable from
532       // GetAtomTable(..) into this function, unless you set it to NULL
533       // before doing that.
534       void GetAtomTable1 ( int seqNo, const InsCode insCode,
535                            PPAtom & atomTable, int & NumberOfAtoms );
536       void GetAtomTable1 ( int resNo,
537                            PPAtom & atomTable, int & NumberOfAtoms );
538 
539       //  ---------------------  Deleting atoms  ----------------------
540 
541       int DeleteAtom ( int            seqNo,
542                        const InsCode  insCode,
543                        const AtomName aname,
544                        const Element  elmnt,
545                        const AltLoc   aloc );
546       int DeleteAtom ( int            seqNo,
547                        const InsCode  insCode,
548                        int            atomNo );
549       int DeleteAtom ( int            resNo,
550                        const AtomName aname,
551                        const Element  elmnt,
552                        const AltLoc   aloc );
553       int DeleteAtom ( int resNo, int atomNo );
554 
555       int DeleteAllAtoms ( int seqNo, const InsCode insCode );
556       int DeleteAllAtoms ( int resNo );
557       int DeleteAllAtoms ();
558 
559       //  DeleteAltLocs() leaves only alternative location with maximal
560       // occupancy, if those are equal or unspecified, the one with
561       // "least" alternative location indicator.
562       //  The function returns the number of deleted. All tables remain
563       // untrimmed, so that explicit trimming or calling
564       // FinishStructEdit() is required.
565       int DeleteAltLocs();
566 
567       //  ----------------------  Adding atoms  -----------------------
568 
569       int AddAtom ( int seqNo, const InsCode insCode, PAtom atom );
570       int AddAtom ( int resNo, PAtom atom );
571 
572       //  -------------------------------------------------------------
573 
574       void  ApplyTransform ( mat44 & TMatrix );  // transforms all
575                                            // coordinates by multiplying
576                                            // with matrix TMatrix
577 
578       int    GetModelNum();
GetModel()579       PModel GetModel   ()  { return (PModel)model; }
GetChainID()580       cpstr  GetChainID ()  { return chainID;       }
581       void   SetChainID ( const ChainID chID );
582       cpstr  GetChainID ( pstr  ChID );  // returns /m/c
583 
584       void  GetAtomStatistics ( RAtomStat AS );
585       void  CalAtomStatistics ( RAtomStat AS );
586 
587       int   CheckID    ( const ChainID chID );
588       int   CheckIDS   ( cpstr CID  );
589 
590       cpstr GetEntryID ();
591       void  SetEntryID ( const IDCode idCode );
592 
593       int   GetNumberOfDBRefs ();
594       PDBReference  GetDBRef ( int dbRefNo );  // 0..nDBRefs-1
595 
596       void  MaskAtoms      ( PMask Mask );
597       void  MaskResidues   ( PMask Mask );
598       void  UnmaskAtoms    ( PMask Mask );
599       void  UnmaskResidues ( PMask Mask );
600 
601       void  SortResidues   ();
602 
603       int     GetNofModResidues();
604       PModRes GetModResidue    ( int modResNo );  // 0.. on
605 
606       bool   isSolventChain   ();
607       bool   isInSelection    ( int selHnd );
608       bool   isAminoacidChain ();
609       bool   isNucleotideChain();
610 
611 
612       // -------  user-defined data handlers
613       int   PutUDData ( int UDDhandle, int      iudd );
614       int   PutUDData ( int UDDhandle, realtype rudd );
615       int   PutUDData ( int UDDhandle, cpstr    sudd );
616 
617       int   GetUDData ( int UDDhandle, int      & iudd );
618       int   GetUDData ( int UDDhandle, realtype & rudd );
619       int   GetUDData ( int UDDhandle, pstr sudd, int maxLen );
620       int   GetUDData ( int UDDhandle, pstr     & sudd );
621 
622       void  Copy            ( PChain chain );
623       void  CopyAnnotations ( PChain chain );
624 
625       void  write ( io::RFile f );    // writes header to PDB binary file
626       void  read  ( io::RFile f );    // reads header from PDB binary file
627 
628     protected :
629 
630       ChainID    chainID;     // chain ID
631       ChainID    prevChainID; // if chain is renamed, its original
632                               // name may be saved here.
633       PProModel  model;       // pointer to model class
634 
635       int        nWeights;    // used externally for sorting
636       realtype   Weight;      //   chains
637 
638       int        nResidues;   // number of residues
639       PPResidue  residue;     // array of residues
640 
641       bool       Exclude;     // used internally
642 
643       void  InitChain ();
644       void  FreeMemory();
645 
646       void  ExpandResidueArray ( int inc );
647       //   _ExcludeResidue(..) excludes (but does not dispose!) a residue
648       // from the chain. Returns 1 if the chain gets empty and 0
649       // otherwise.
650       int   _ExcludeResidue ( const ResName resName, int seqNum,
651                               const InsCode insCode );
652       void  _copy ( PChain chain );
653       void  _copy ( PChain chain, PPAtom atom, int & atom_index );
654       void  CheckInAtoms();
655 
656     private :
657       int  resLen;      // length of Residue array
658 
659   };
660 
661 
662   extern void  TestChain();  //  reads from 'in.chain', writes into
663                              //  'out.chain' and 'abin.chain'
664 
665 }  // namespace mmdb
666 
667 #endif
668 
669