1 // $Id: mmdb_utils.h $ 2 // ================================================================= 3 // 4 // CCP4 Coordinate Library: support of coordinate-related 5 // functionality in protein crystallography applications. 6 // 7 // Copyright (C) Eugene Krissinel 2000-2008. 8 // 9 // This library is free software: you can redistribute it and/or 10 // modify it under the terms of the GNU Lesser General Public 11 // License version 3, modified in accordance with the provisions 12 // of the license to address the requirements of UK law. 13 // 14 // You should have received a copy of the modified GNU Lesser 15 // General Public License along with this library. If not, copies 16 // may be downloaded from http://www.ccp4.ac.uk/ccp4license.php 17 // 18 // This program is distributed in the hope that it will be useful, 19 // but WITHOUT ANY WARRANTY; without even the implied warranty of 20 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21 // GNU Lesser General Public License for more details. 22 // 23 // ================================================================= 24 // 25 // 23.10.15 <-- Date of Last Modification. 26 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 27 // ----------------------------------------------------------------- 28 // 29 // **** Module : MMDBF_Utils <interface> 30 // ~~~~~~~~~ 31 // **** Project : MacroMolecular Data Base (MMDB) 32 // ~~~~~~~~~ 33 // 34 // **** Classes : mmdb::ContainerClass ( containered class template ) 35 // ~~~~~~~~~ mmdb::ContString ( containered string ) 36 // mmdb::ClassContainer ( container of classes ) 37 // mmdb::AtomPath ( atom path ID ) 38 // mmdb::QuickSort ( quick sort of integers ) 39 // 40 // **** Functions : Date9to11 ( DD-MMM-YY -> DD-MMM-YYYY ) 41 // ~~~~~~~~~~~ Date11to9 ( DD-MMM-YYYY -> DD-MMM-YY ) 42 // Date9toCIF ( DD-MMM-YY -> YYYY-MM-DD ) 43 // Date11toCIF( DD-MMM-YYYY -> YYYY-MM-DD ) 44 // DateCIFto9 ( YYYY-MM-DD -> DD-MMM-YY ) 45 // DateCIFto11( YYYY-MM-DD -> DD-MMM-YYYY ) 46 // GetInteger ( reads integer from a string ) 47 // GetReal ( reads real from a string ) 48 // GetIntIns ( reads integer and insert code ) 49 // PutInteger ( writes integer into a string ) 50 // PutRealF ( writes real in F-form into a string ) 51 // PutIntIns ( writes integer and insert code ) 52 // CIFGetInteger ( reads and deletes int from CIF ) 53 // CIFGetReal ( reads and deletes real from CIF ) 54 // CIFGetString ( reads and deletes string from CIF) 55 // CIFGetInteger1 (reads and del-s int from CIF loop) 56 // CIFGetReal1 (reads and del-s int from CIF loop) 57 // Mat4Inverse ( inversion of 4x4 matrices ) 58 // GetErrorDescription (ascii line to an Error_XXXXX) 59 // ParseAtomID ( parses atom ID line ) 60 // ParseResID ( parses residue ID line ) 61 // ParseAtomPath ( parses full atom path ) 62 // 63 // (C) E. Krissinel 2000-2015 64 // 65 // ================================================================= 66 // 67 68 #ifndef __MMDB_Utils__ 69 #define __MMDB_Utils__ 70 71 #include "mmdb_io_stream.h" 72 #include "mmdb_mmcif_.h" 73 #include "mmdb_defs.h" 74 75 namespace mmdb { 76 77 // ================== Date functions =================== 78 79 // converts DD-MMM-YY to DD-MMM-YYYY; appends terminating zero 80 extern void Date9to11 ( cpstr Date9, pstr Date11 ); 81 82 // converts DD-MMM-YYYY to DD-MMM-YY; does not append terminating zero 83 extern void Date11to9 ( cpstr Date11, pstr Date9 ); 84 85 // converts DD-MMM-YY to YYYY-MM-DD; appends terminating zero 86 extern void Date9toCIF ( cpstr Date9, pstr DateCIF ); 87 88 // converts DD-MMM-YYYY to YYYY-MM-DD; appends terminating zero 89 extern void Date11toCIF ( cpstr Date11, pstr DateCIF ); 90 91 // converts YYYY-MM-DD to DD-MMM-YY; appends terminating zero 92 extern void DateCIFto9 ( cpstr DateCIF, pstr Date9 ); 93 94 // converts YYYY-MM-DD to DD-MMM-YYYY; appends terminating zero 95 extern void DateCIFto11 ( cpstr DateCIF, pstr Date11 ); 96 97 98 // ================= Format functions ================== 99 100 // Returns true if S contains an integer number in its 101 // first M characters. This number is returned in N. 102 // The return is false if no integer number may be 103 // recognized. In this case, N is assigned MinInt4 value. 104 extern bool GetInteger ( int & N, cpstr S, int M ); 105 106 // Returns true if S contains a real number in its 107 // first M characters. This number is returned in R. 108 // The return is false if no real number may be 109 // recognized. In this case, R is assigned -MaxReal value. 110 extern bool GetReal ( realtype & R, cpstr S, int M ); 111 112 // Returns true if S contains an integer number in its 113 // first M characters. This number is returned in N. In addition 114 // to that, GetIntIns() retrieves the insertion code which may 115 // follow the integer and returns it in "ins" (1 character + 116 // terminating 0). 117 // The return is false if no integer number may be 118 // recognized. In this case, N is assigned MinInt4 value, 119 // "ins" just returns (M+1)th symbol of S (+terminating 0). 120 extern bool GetIntIns ( int & N, pstr ins, cpstr S, int M ); 121 122 // Integer N is converted into ASCII string of length M 123 // and pasted onto first M characters of string S. No 124 // terminating zero is added. 125 // If N is set to MinInt4, then first M characters of 126 // string S are set to space. 127 extern void PutInteger ( pstr S, int N, int M ); 128 129 // Real R is converted into ASCII string of length M 130 // and pasted onto first M characters of string S. No 131 // terminating zero is added. The conversion is done 132 // according to fixed format FM.L 133 // If R is set to -MaxReal, then first M characters of 134 // string S are set to the space character. 135 extern void PutRealF ( pstr S, realtype R, int M, int L ); 136 137 // Integer N is converted into ASCII string of length M 138 // and pasted onto first M characters of string S. No 139 // terminating zero is added. The insert code ins is put 140 // immediately after the integer. 141 // If N is set to MinInt4, then first M+1 characters of 142 // string S are set to space, and no insert code are 143 // appended. 144 extern void PutIntIns ( pstr S, int N, int M, cpstr ins ); 145 146 147 // CIFInteger(..), CIFReal(..) and CIFGetString(..) automate 148 // extraction and analysis of data from CIF file. If the data 149 // is erroneous or absent, they store an error message in 150 // CIFErrorLocation string (below) and return non-zero. 151 extern ERROR_CODE CIFGetInteger ( int & I, mmcif::PStruct Struct, 152 cpstr Tag, 153 bool Remove=true ); 154 extern ERROR_CODE CIFGetReal ( realtype & R, mmcif::PStruct Struct, 155 cpstr Tag, 156 bool Remove=true ); 157 extern ERROR_CODE CIFGetString ( pstr S, mmcif::PStruct Struct, 158 cpstr Tag, int SLen, 159 cpstr DefS, 160 bool Remove=true ); 161 162 extern ERROR_CODE CIFGetInteger ( int & I, mmcif::PLoop Loop, cpstr Tag, 163 int & Signal ); 164 extern ERROR_CODE CIFGetIntegerD ( int & I, mmcif::PLoop Loop, cpstr Tag, 165 int defValue=MinInt4 ); 166 extern ERROR_CODE CIFGetInteger1 ( int & I, mmcif::PLoop Loop, cpstr Tag, 167 int nrow ); 168 169 extern ERROR_CODE CIFGetReal ( realtype & R, mmcif::PLoop Loop, 170 cpstr Tag, int & Signal ); 171 extern ERROR_CODE CIFGetReal1 ( realtype & R, mmcif::PLoop Loop, 172 cpstr Tag, int nrow ); 173 174 extern ERROR_CODE CIFGetString ( pstr S, mmcif::PLoop Loop, cpstr Tag, 175 int row, int SLen, cpstr DefS ); 176 177 // Calculates AI=A^{-1} 178 extern void Mat4Inverse ( const mat44 & A, mat44 & AI ); 179 // Calculates A=B*C 180 extern void Mat4Mult ( mat44 & A, const mat44 & B, const mat44 & C ); 181 // Calculates A=B^{-1}*C 182 extern void Mat4Div1 ( mat44 & A, const mat44 & B, const mat44 & C ); 183 // Calculates A=B*C^{-1} 184 extern void Mat4Div2 ( mat44 & A, const mat44 & B, const mat44 & C ); 185 // Calculates determinant of the rotation part 186 extern realtype Mat4RotDet ( mat44 & T ); 187 188 // Sets up a unit matrix 189 extern void Mat4Init ( mat44 & A ); 190 extern void Mat3Init ( mat33 & A ); 191 192 // Calculates AI=A^{-1}, returns determinant 193 extern realtype Mat3Inverse ( const mat33 & A, mat33 & AI ); 194 195 extern bool isMat4Unit ( const mat44 & A, realtype eps, bool rotOnly ); 196 197 // Copies A into AC 198 extern void Mat4Copy ( const mat44 & A, mat44 & ACopy ); 199 extern void Mat3Copy ( const mat33 & A, mat33 & ACopy ); 200 extern bool isMat4Eq ( const mat44 & A, const mat44 & B, realtype eps, 201 bool rotOnly ); 202 203 extern void TransformXYZ ( const mat44 & T, 204 realtype & X, realtype & Y, realtype & Z ); 205 extern realtype TransformX ( const mat44 & T, 206 realtype X, realtype Y, realtype Z ); 207 extern realtype TransformY ( const mat44 & T, 208 realtype X, realtype Y, realtype Z ); 209 extern realtype TransformZ ( const mat44 & T, 210 realtype X, realtype Y, realtype Z ); 211 212 213 extern char CIFErrorLocation[200]; 214 215 // Returns ASCII string explaining the nature of 216 // Error_xxxx error code. 217 extern cpstr GetErrorDescription ( ERROR_CODE ErrorCode ); 218 219 220 221 // ================ ContainerClass ==================== 222 223 DefineClass(ContainerClass); 224 DefineStreamFunctions(ContainerClass); 225 226 class ContainerClass : public io::Stream { 227 228 friend class ClassContainer; 229 230 public : 231 232 ContainerClass (); 233 ContainerClass ( io::RPStream Object ); ~ContainerClass()234 ~ContainerClass() {} 235 236 // ConvertPDBASCII(..) will return one of the Error_XXXXX 237 // constants, see <mmdb_defs.h> ConvertPDBASCII(cpstr)238 virtual ERROR_CODE ConvertPDBASCII ( cpstr ) 239 { return Error_NoError; } PDBASCIIDump(pstr,int)240 virtual void PDBASCIIDump ( pstr, int ) {} PDBASCIIDump1(io::RFile)241 virtual bool PDBASCIIDump1 ( io::RFile ) { return false; } MakeCIF(mmcif::PData,int)242 virtual void MakeCIF ( mmcif::PData, int ) {} 243 244 // Append(..) should return true if CC is appended to this class. 245 // If this is not the case, CC is merely put on the top of 246 // container. 247 // Note: Append(..) detects the necessity to append CC and 248 // performs all the necessary actions for that. The rest of CC 249 // will be disposed by Class Container. 250 // Note: Class Container checks every new class, which is 251 // being added to it (see CClassContainer::AddData(..)), only 252 // against the top of container. 253 virtual bool Append ( PContainerClass CC ); 254 255 // GetCIF(..) extracts any necessary information from CIF and 256 // returns in Signal: 257 // Error_noError : the information was successfully extracted, 258 // this instance of container class should be 259 // stored, and unchanged value of Signal should 260 // be passed to the next (newly created) instance 261 // of this container class. 262 // Error_EmptyCIF : there is no information for this type of 263 // containers to extract. This instance of 264 // container class should be deleted and input 265 // for this type of container class terminated. 266 // Other : the corresponding error. This instance of 267 // container class should be deleted and the 268 // whole input stopped. GetCIF(mmcif::PData,int & n)269 virtual ERROR_CODE GetCIF ( mmcif::PData, int & n ) 270 { n = -1; return Error_EmptyCIF; } GetClassID()271 virtual CLASS_ID GetClassID () { return ClassID_Template; } 272 Copy(PContainerClass)273 virtual void Copy ( PContainerClass ) {} 274 write(io::RFile)275 void write ( io::RFile ) {} read(io::RFile)276 void read ( io::RFile ) {} 277 278 protected : 279 int ContinuationNo; 280 281 }; 282 283 284 // ======================== ContString ========================= 285 286 DefineClass(ContString); 287 DefineStreamFunctions(ContString); 288 289 class ContString : public ContainerClass { 290 291 public : 292 293 pstr Line; // a string 294 295 ContString (); 296 ContString ( cpstr S ); 297 ContString ( io::RPStream Object ); 298 ~ContString(); 299 300 ERROR_CODE ConvertPDBASCII ( cpstr S ); 301 void PDBASCIIDump ( pstr S, int N ); 302 bool PDBASCIIDump1 ( io::RFile f ); 303 void MakeCIF ( mmcif::PData CIF, int N ); 304 // void GetCIF1 ( mmcif::PData CIF, ERROR_CODE & Signal, 305 // int & pos ); 306 bool Append ( PContainerClass ContString ); GetClassID()307 CLASS_ID GetClassID () { return ClassID_String; } 308 309 void Copy ( PContainerClass CString ); 310 311 void write ( io::RFile f ); 312 void read ( io::RFile f ); 313 314 protected : 315 pstr CIFCategory,CIFTag; 316 317 void InitString(); 318 319 }; 320 321 322 // ============== ClassContainer ==================== 323 324 DefineClass(ClassContainer); 325 DefineStreamFunctions(ClassContainer); 326 327 class ClassContainer : public io::Stream { 328 329 public : 330 331 ClassContainer (); 332 ClassContainer ( io::RPStream Object ); 333 ~ClassContainer (); 334 335 void FreeContainer (); 336 void AddData ( PContainerClass Data ); 337 virtual void PDBASCIIDump ( io::RFile f ); 338 virtual void MakeCIF ( mmcif::PData CIF ); 339 // GetCIF(..) will return one of the Error_XXXXX constants, 340 // see <mmdb_defs.h> 341 virtual ERROR_CODE GetCIF ( mmcif::PData CIF, int ClassID ); 342 virtual PContainerClass MakeContainerClass ( int ClassID ); 343 344 // Copy will empty the class if parameter is set to NULL 345 virtual void Copy ( PClassContainer CContainer ); 346 Length()347 inline int Length() { return length; } 348 PContainerClass GetContainerClass ( int ContClassNo ); 349 350 void write ( io::RFile f ); 351 void read ( io::RFile f ); 352 353 protected : 354 int length; 355 PPContainerClass Container; 356 357 void Init(); 358 359 }; 360 361 362 // ====================== ID parsers ========================== 363 364 DefineClass(AtomPath); 365 DefineStreamFunctions(AtomPath); 366 367 enum APATH_FLAG { 368 APATH_ModelNo = 0x00000001, 369 APATH_ChainID = 0x00000002, 370 APATH_SeqNum = 0x00000004, 371 APATH_InsCode = 0x00000008, 372 APATH_ResName = 0x00000010, 373 APATH_AtomName = 0x00000020, 374 APATH_Element = 0x00000040, 375 APATH_AltLoc = 0x00000080, 376 APATH_Incomplete = 0x00000100, 377 APATH_WC_ModelNo = 0x00001000, 378 APATH_WC_ChainID = 0x00002000, 379 APATH_WC_SeqNum = 0x00004000, 380 APATH_WC_InsCode = 0x00008000, 381 APATH_WC_ResName = 0x00010000, 382 APATH_WC_AtomName = 0x00020000, 383 APATH_WC_Element = 0x00040000, 384 APATH_WC_AltLoc = 0x00080000 385 }; 386 387 class AtomPath : public io::Stream { 388 389 public : 390 391 int modelNo; 392 ChainID chainID; 393 int seqNum; 394 InsCode insCode; 395 ResName resName; 396 AtomName atomName; 397 Element element; 398 AltLoc altLoc; 399 int isSet; 400 401 AtomPath (); 402 AtomPath ( cpstr ID ); 403 AtomPath ( io::RPStream Object ); 404 ~AtomPath (); 405 406 // SetPath(..) parses the Atom Path ID string, which 407 // may be incomplete. Below {..} means blocks that 408 // may be omitted; any elements within such blocks 409 // may be omitted as well. 410 // 411 // 1. If ID starts with '/' then the ID must be of 412 // the following form: 413 // /mdl{/chn{/seq(res).i{/atm[elm]:a}}} 414 // 415 // 2. If ID starts with a letter: 416 // chn{/seq(res).i{/atm[elm]:a}} 417 // 418 // 3. If ID starts with a number or '(': 419 // seq(res).i{/atm[elm]:a} 420 // 421 // 4. If ID contains colon ':' or '[' then 422 // it may be just 423 // atm[elm]:a 424 // 425 // The following are valid samples of IDs: 426 // 427 // /1 model number 1 428 // /1/A/23(GLU).A/CA[C]:A model number 1, chain A, 429 // residue 23 GLU insertion code A, C-alpha 430 // atom in alternative location A 431 // A/23 residue 23 of chain A 432 // CA[C]: atom C-alpha 433 // [C] a carbon 434 // *[C]:* same as above 435 // :A an atom with insertion code A 436 // 5 residue number 5 437 // (GLU) residue GLU 438 // 439 // All spaces are ignored. SetPath(..) sets bit of isSet 440 // for each element present. Any element may be a wildcard 441 // '*'. Wildcard for model will set modelNo=0, for sequence 442 // number will set seqNum=MinInt4. 443 // 444 // Returns: 445 // 0 <-> Ok 446 // -1 <-> wrong numerical format for model 447 // -2 <-> wrong numerical format for sequence number 448 int SetPath ( cpstr ID ); 449 450 void write ( io::RFile f ); 451 void read ( io::RFile f ); 452 453 protected : 454 void InitAtomPath(); 455 456 }; 457 458 459 // -------------------------------------------------------------- 460 461 DefineClass(QuickSort); 462 463 class QuickSort : public io::Stream { 464 465 public : 466 QuickSort (); 467 QuickSort ( io::RPStream Object ); ~QuickSort()468 ~QuickSort() {} 469 virtual int Compare ( int i, int j ); 470 virtual void Swap ( int i, int j ); 471 void Sort ( void * sortdata, int data_len ); 472 473 protected : 474 int selSortLimit,dlen; 475 void * data; 476 477 void SelectionSort ( int left, int right ); 478 int Partition ( int left, int right ); 479 void Quicksort ( int left, int right ); 480 481 }; 482 483 484 // -------------------------------------------------------------- 485 486 extern void takeWord ( pstr & p, pstr wrd, cpstr ter, int l ); 487 488 // ParseAtomID(..) reads the atom ID of the following form: 489 // {name} {[element]} {:altcode} 490 // (here {} means that the item may be omitted; any field may have 491 // value of wildcard '*'), and returns the atom name in aname, 492 // element name - in elname, and alternate location code - in aloc. 493 // Except for the alternate location code, missing items are 494 // replaced by wildcards. Missing alternate location code is 495 // returned as empty string "". 496 // Leading spaces are allowed; any other space will terminate 497 // the parsing. 498 // The followings are perfectly valid atom IDs: 499 // CA[C]:A (carbon C_alpha in location A) 500 // CA[*]:A (either C_alpha or Ca in location A) 501 // CA:A (same as above) 502 // CA (either C_alpha or Ca with no location indicator) 503 // CA[] (same as above) 504 // CA[C]: (C_alpha with no location indicator) 505 // [C] (any carbon with no location indicator) 506 // [C]:* (any carbon with any location indicator) 507 // *[C]:* (same as above) 508 // :A (any atom in location A) 509 // *[*]:A (same as above) 510 // *[*]:* (any atom) 511 // * (any atom with no alternate location indicator) 512 extern void ParseAtomID ( cpstr ID, AtomName aname, 513 Element elname, AltLoc aloc ); 514 515 // ParseResID(..) reads the residue ID of the following form: 516 // {seqnum} {(name)} {.inscode} 517 // (here {} means that the item may be omitted; any field may have 518 // value of wildcard '*'), and returns the sequence number in sn, 519 // insertion code - in inscode, and residue name - in resname. 520 // If a wildcard was specified for the sequence number, then 521 // ParseResID(..) returns 1. Missing residue name is replaced by 522 // the wildcard '*', and misisng insertion code is returned as empty 523 // string "". 524 // Leading spaces are allowed; any other space will terminate 525 // the parsing. 526 // Return 0 means Ok, 1 - wildcard for the sequence number, 527 // 2 - an error in numerical format of the sequence number 528 // (other items are parsed). 529 // The followings are perfectly valid residue IDs: 530 // 27(ALA).A (residue 27A ALA) 531 // 27().A (residue 27A) 532 // 27(*).A (same as above) 533 // 27.A (same as above) 534 // 27 (residue 27) 535 // 27(). (same as above) 536 // (ALA) (any ALA without insertion code) 537 // (ALA). (same as above) 538 // (ALA).* (any ALA) 539 // *(ALA).* (any ALA) 540 // .A (any residue with insertion code A) 541 // *(*).A (same as above) 542 // *(*).* (any residue) 543 // * (any residue with no insertion code) 544 extern int ParseResID ( cpstr ID, int & sn, 545 InsCode inscode, ResName resname ); 546 547 548 // ParseAtomPath(..) parses an atom path string of the following 549 // structure: 550 // /mdl/chn/seq(res).i/atm[elm]:a 551 // where all items may be represented by a wildcard '*' and 552 // mdl - model number (mandatory); at least model #1 is always 553 // present; returned in mdl; on a wildcard mdl is set to 0 554 // chn - chain identifier ( mandatory); returned in chn; on a 555 // wildcard chn is set to '*' 556 // seq - residue sequence number (mandatory); returned in sn; 557 // on a wild card ParseAtomPath(..) returns 1 558 // (res) - residue name in round brackets (may be omitted); 559 // returnded in res; on a wildcard res is set to '*' 560 // .i - insert code after a dot; if '.i' or 'i' is missing 561 // then a residue without an insertion code is looked for; 562 // returned in ic; on a wildcard (any insertion code would 563 // do) ic is set to '*' 564 // atm - atom name (mandatory); returned in atm; on a wildcard 565 // atm is set to '*' 566 // [elm] - chemical element code in square brackets; it may 567 // be omitted but could be helpful for e.g. 568 // distinguishing C_alpha and CA; returned in elm; 569 // in a wildcard elm is set to '*' 570 // :a - alternate location indicator after colon; if 571 // ':a' or 'a' is missing then an atom without 572 // alternate location indicator is looked for; returned 573 // in aloc; on a wildcard (any alternate code would do) 574 // aloc is set to '*'. 575 // All spaces are ignored, all identifiers should be in capital 576 // letters (comparisons are case-sensitive). 577 // The atom path string may be incomplete. If DefPath is supplied, 578 // the function will try to get missing elements from there. If 579 // missing items may not be found in DefPath, they are replaced by 580 // wildcards. 581 // ParseAtomPath(..) returns the following bits: 582 // 0 - Ok 583 // APATH_Incomplete - if path contains wildcards. Wildcards for 584 // residue name and chemical element will be 585 // ignored here if sequence number and 586 // atom name, correspondingly, are provided. 587 // APATH_WC_XXXXX - wildcard for different elements 588 // -1 - wrong numerical format for model (fatal) 589 // -2 - wrong numerical format for seqNum (fatal) 590 591 extern int ParseAtomPath ( cpstr ID, 592 int & mdl, 593 ChainID chn, 594 int & sn, 595 InsCode ic, 596 ResName res, 597 AtomName atm, 598 Element elm, 599 AltLoc aloc, 600 PAtomPath DefPath=NULL ); 601 602 603 604 extern int ParseSelectionPath ( cpstr CID, 605 int & iModel, 606 pstr Chains, 607 int & sNum1, 608 InsCode ic1, 609 int & sNum2, 610 InsCode ic2, 611 pstr RNames, 612 pstr ANames, 613 pstr Elements, 614 pstr altLocs ); 615 616 617 618 extern void MakeSelectionPath ( pstr CID, 619 int iModel, 620 cpstr Chains, 621 int sNum1, 622 const InsCode ic1, 623 int sNum2, 624 const InsCode ic2, 625 cpstr RNames, 626 cpstr ANames, 627 cpstr Elements, 628 cpstr altLocs ); 629 630 } // namespace mmdb 631 632 #endif 633 634