1 // $Id: mmdb_mmcif_.h $ 2 // ================================================================= 3 // 4 // CCP4 Coordinate Library: support of coordinate-related 5 // functionality in protein crystallography applications. 6 // 7 // Copyright (C) Eugene Krissinel 2000-2013. 8 // 9 // This library is free software: you can redistribute it and/or 10 // modify it under the terms of the GNU Lesser General Public 11 // License version 3, modified in accordance with the provisions 12 // of the license to address the requirements of UK law. 13 // 14 // You should have received a copy of the modified GNU Lesser 15 // General Public License along with this library. If not, copies 16 // may be downloaded from http://www.ccp4.ac.uk/ccp4license.php 17 // 18 // This program is distributed in the hope that it will be useful, 19 // but WITHOUT ANY WARRANTY; without even the implied warranty of 20 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21 // GNU Lesser General Public License for more details. 22 // 23 // ================================================================= 24 // 25 // 12.09.13 <-- Date of Last Modification. 26 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 27 // ----------------------------------------------------------------- 28 // 29 // **** Module : MMDB_MMCIF <interface> 30 // ~~~~~~~~~ 31 // **** Project : MacroMolecular Data Base (MMDB) 32 // ~~~~~~~~~ 33 // **** Classes : mmdb::mmcif::Category ( mmCIF category ) 34 // ~~~~~~~~~ mmdb::mmcif::Struct ( mmCIF structure ) 35 // mmdb::mmcif::Loop ( mmCIF loop ) 36 // mmdb::mmcif::Data ( mmCIF data block ) 37 // mmdb::mmcif::File ( mmCIF file ) 38 // 39 // (C) E. Krissinel 2000-2013 40 // 41 // ================================================================= 42 // 43 44 45 #ifndef __MMDB_MMCIF__ 46 #define __MMDB_MMCIF__ 47 48 49 #include "mmdb_io_stream.h" 50 51 namespace mmdb { 52 53 namespace mmcif { 54 55 56 // ====================== Category ========================== 57 58 enum MMCIF_ITEM { 59 MMCIF_Category = 0, 60 MMCIF_Struct = 1, 61 MMCIF_Loop = 2, 62 MMCIF_Data = 3 63 }; 64 65 DefineClass(Category); 66 DefineStreamFunctions(Category); 67 68 /// \brief mmcif::Category is a base class for mmcif::Struct and 69 /// mmcif::Loop, implementations of mmCIF's "structure" and 70 /// "loop" categories. 71 /*! 72 This class is not instantiated independently in any applications, 73 however, it provides a few public functions which work for 74 both mmcif::Struct and mmcif::Loop. 75 76 All data in mmCIF hierarchy is addressed using construct 77 "category.tag" plus row number (>=0) for loops. Category names 78 should always start from underscore, while tags normally start 79 with a letter, e.g. "_barrel.id". 80 81 See general principles of working with mmCIF files and mmCIF 82 hierarchies in Section \"\ref mmcif_handler\". 83 */ 84 85 class Category : public io::Stream { 86 87 friend class Data; 88 89 public : 90 91 /// \brief Basic constructor. 92 Category (); 93 94 /// \brief Constructor that assigns category name. 95 /// \param[in] N category name (must start with underscore). 96 Category ( cpstr N ); 97 98 /// \brief Constructor for MMDB data streaming functions. 99 Category ( io::RPStream Object ); 100 101 /// \brief Destructor. 102 ~Category(); 103 104 /// \brief Returns category name. 105 /// \return NULL if name was not set 106 /// \return pointer to character string if name was set GetCategoryName()107 inline pstr GetCategoryName() { return name; } 108 109 /// \brief Sets category name. 110 /// \param N new category name 111 void SetCategoryName ( cpstr N ); 112 113 /// \brief Returns category type. 114 /// This function may be used when retrieving categories 115 /// (structures and loops) from data blocks (mmcif::Data). 116 /// \return MMCIF_Category for mmcif::Category 117 /// \return MMCIF_Struct for mmcif::Struct 118 /// \return MMCIF_Loop for mmcif::Loop GetCategoryID()119 virtual MMCIF_ITEM GetCategoryID() { return MMCIF_Category; } 120 121 /// \brief Virtual function for writing category's content 122 /// into mmCIF file. 123 /// Default implementation does nothing. WriteMMCIF(io::RFile)124 virtual void WriteMMCIF ( io::RFile ) {} 125 126 /// \brief Virtual function for optimizig data structures. 127 /// Optimized data structures take less RAM and their indexes 128 /// are sorted for quicker access. Sorting is done automatically 129 /// as new data is added to the category. If the 130 /// category is edited (fields/data removed), it may need 131 /// optimization and re-sorting for efficiency.\n\n 132 /// The sorting preserves the order of actual appearance of 133 /// tags in mmCIF file. If a category is created 134 /// programmatically, the order of tags in mmCIF file will be 135 /// the same as order of adding them to the category. 136 virtual void Optimize(); 137 138 /// \brief Sorts category's data for quicker access. 139 /// The sorting is essentially re-indexing of data for quicker 140 /// access. It does not change the order of data in both mmCIF 141 /// hierarchy and mmCIF file. E.g., if tag "serial_no" was 2nd 142 /// one in given category before sorting, it will remain on 2nd 143 /// place after it, therefore no change in tag number passed 144 /// to functions in mmcif::Struct, mmcif::Loop and mmcif::Data. 145 void Sort(); 146 147 /// \brief Returns serial number of a tag in the category. 148 /// \param[in] ttag tag (or name of a field in category) 149 /// \return \b >=0 : the tag is in given position 150 /// \return \b <0 : the tag was not found, but it could be 151 /// inserted before tag with (-rc-1)th index, where 152 /// 'rc' is the return. 153 int GetTagNo ( cpstr ttag ); 154 155 /// \brief Adds a tag to the category. 156 /// Adding a tag in mmcif::Category does not reserve any 157 /// placeholder for the corresponding value. All tags get 158 /// automatically sorted (reindexed) for quicker access. 159 /// Tags will appear in mmCIF file in order of their addition 160 /// to the category. 161 /// \param[in] ttag tag to be added. 162 /// \return \b >=0 the tag is already in the category, and return 163 /// is its serial number. No changes to the category 164 /// is done 165 /// \return \b <0 the tag was added to the list of tags, and 166 /// return is minus total number of tags in the 167 /// category. 168 int AddTag ( cpstr ttag ); 169 170 /// \brief Returns the total number of tags in the category GetNofTags()171 int GetNofTags() { return nTags; } 172 173 /// \brief Returns tag with the specified serial number. 174 /// The tags are enumerated as 0..GetNofTags()-1. 175 /// \param tagNo tag's serial number 176 /// \return \b NULL: tagNo is outside the range 177 /// of 0..GetNofTags()-1 178 /// \return \b not \b NULL: tag in tagNo'th position 179 pstr GetTag ( int tagNo ); // 0..nTags-1 180 181 /// \brief Prints list of tags to stdout. 182 /// Both sorted and unsorted tags are printed to standard 183 /// output. This function may be used for debugging. 184 void PrintTags(); 185 186 /// \brief Returns true if all tags from the list are found 187 /// in the category. 188 /// The size of the list of tags may be less than the number 189 /// of tags in the category, and order of tags is disregarded. 190 /// \param[in] tagList list of tags to be checked for presence 191 /// in the category. The list must end with NULL 192 /// pointer, or your program will crash. 193 /// \return \b true if all tags from the list were found in the 194 /// category 195 /// \return \b false if one or more tags from the list were not 196 /// found in the category. 197 /// 198 /// Example: 199 /// \code 200 /// cpstr tagList[] = {"id","type","date",NULL}; 201 /// mmcif::Struct cifStruct; 202 /// if (cifStruct.CheckTags(tagList)) 203 /// printf ( " all tags are found in category %s\n", 204 /// cifStruct.GetCategoryName() ); 205 /// \endcode 206 /// This function is useful for finding categories in 207 /// "dirty cifs", where category name is not given. 208 bool CheckTags ( cpstr * tagList ); 209 210 /// \brief Deep copy of categories. 211 /// Deep copy duplicates all data and memory allocations, 212 /// producing a genuine clone of the original. Only deep copy 213 /// should be used for copying MMDB objects, a mere assignment 214 /// operator will fail you. 215 /// \param[in] Category a pointer to mmcif::Category, the content of 216 /// which is copied into 'this' category. 217 virtual void Copy ( PCategory Category ); 218 219 /// \brief MMDB stream writer. 220 void write ( io::RFile f ); 221 222 /// \brief MMDB stream reader. 223 void read ( io::RFile f ); 224 225 protected: 226 int nTags; 227 pstr name; 228 psvector tag; 229 ivector index; 230 int nAllocTags; 231 232 void InitCategory (); 233 virtual void FreeMemory (); 234 void ExpandTags ( int nTagsNew ); 235 void PutCategoryName ( cpstr newName ); 236 237 }; 238 239 240 241 // ====================== Struct ============================ 242 243 DefineClass(Struct); 244 DefineStreamFunctions(Struct); 245 246 /// \brief Constants used to specify mmCIF's \"data not given\" and 247 /// \"data not available\" data types. 248 extern const int CIF_NODATA_DOT; 249 extern const int CIF_NODATA_QUESTION; 250 extern cpstr CIF_NODATA_DOT_FIELD; 251 extern cpstr CIF_NODATA_QUESTION_FIELD; 252 253 /// \brief mmcif::Struct represents mmCIF's \"structure\" category, 254 /// where data follows directly the corresponding tag. 255 /*! 256 mmCIF's \"structure\" category has the following form: 257 \code 258 _structure_name.tag0 value0 259 _structure_name.tag1 value1 260 ........... 261 _structure_name.tagN valueN 262 \endcode 263 mmcif::Struct represents this construct by keeping category name 264 (\"_structure_name\") and associated lists of tags 265 (\"tag0,tag1...tagN\") and their values (\"value0,value1...valueN\"). 266 267 The structure is created automatically when an mmCIF file is read, 268 or it may be created programatically and then pushed into file. 269 270 Access to data is provided via tags. Internally, all values are kept 271 as character fields, and it is only on the retrieval stage that they 272 are converted to other data types (integers, floats or strings). 273 If conversion is not possible, an error code is returned by the 274 corresponding functions, which should be checked by the application. 275 276 See general principles of working with mmCIF files and mmCIF 277 hierarchies, as well as some code samples, in Section 278 \"\ref mmcif_handler\". 279 */ 280 281 class Struct : public Category { 282 283 public : 284 285 /// \brief Basic constructor 286 Struct (); 287 288 /// \brief Constructor that assigns structure name. 289 /// \param[in] N structure name (must start with underscore). 290 Struct ( cpstr N ); 291 292 /// \brief Constructor for MMDB data streaming functions 293 Struct ( io::RPStream Object ); 294 295 /// \brief Destructor 296 ~Struct(); 297 298 /// \brief Adds field to the structure. 299 /// \param[in] F field value 300 /// \param[in] T tag name 301 /// \param[in] Concatenate flag to concatenate existing field 302 /// with the value of \b F. If tag \b T is already in 303 /// the structure and \b Concatenate=true, then 304 /// value of \b F is appended to the existing field. 305 /// Otherwise, the field is replaced with the value 306 /// of \b F 307 void AddField ( cpstr F, cpstr T, bool Concatenate=false ); 308 309 /// \brief Returns category type \b MMCIF_Struct. GetCategoryID()310 MMCIF_ITEM GetCategoryID() { return MMCIF_Struct; } 311 312 /// \brief Optimizes structure for RAM and data access speed. 313 /// Optimized data structures take less RAM and their indexes 314 /// are sorted for quicker access. Sorting is done automatically 315 /// as new data is added to the category. If the structure 316 /// is edited (fields/data removed), it may need 317 /// optimization and re-sorting for efficiency.\n\n 318 /// The sorting preserves the order of actual appearance of 319 /// tags in mmCIF file. If a structure is created 320 /// programmatically, the order of tags in mmCIF file will be 321 /// the same as order of adding them to the structure. 322 void Optimize(); 323 324 /// \brief Returns value of field corresponding to tag in the 325 /// specified position. 326 /// Tag positions are defined by the order of their appearance in 327 /// mmCIF file (if structure was read from a file), or by the 328 /// order of their addition to the structure (if structure was 329 /// created programmatically). Tags are numbered as 330 /// 0...GetNofTags()-1. 331 /// \param[in] tagNo tag number (position in the structure) 332 /// \return \b NULL: tag does not exist 333 /// \return \b CIF_NODATA_DOT_FIELD the field contains 334 /// \"data not given\" value 335 /// \return \b CIF_NODATA_QUESTION_FIELD the field contains 336 /// \"data not available\" value 337 /// \return \b not \b NULL: string value of the field 338 pstr GetField ( int tagNo ); // 0..nTags-1 339 340 /// \brief Fetches value, corresponding to the given tag, as 341 /// a string 342 /// \param[out] S pointer to string, which will point to newly 343 /// allocated character string, containing value 344 /// associated with tag \b TName. If tag or value 345 /// is not found, or if value corresponds to 346 /// mmCIF's \"data not given\" or 347 /// \"data not available\", \b S returns NULL. 348 /// \param[in] TName character string with tag name 349 /// \param[in] Remove flag to remove the tag and its value from 350 /// structure after it is read. 351 /// \return \b CIFRC_NoTag: tag is not found 352 /// \return \b CIFRC_NoField: value is not found 353 /// \return \b CIFRC_Ok: success. If \b S returns NULL, then 354 /// the value corresponds to either 355 /// \"data not available\" or 356 /// \"data not given\". 357 /// \remarks If \b S!=NULL at time of call, the function will 358 /// try to dispose the string it points on. This allows a slick 359 /// re-use of the same pointer in consequitive calls. This also 360 /// means that one should never pass unallocated pointer to 361 /// this function. Safe use assumes the following patern: 362 /// \code 363 /// mmcif::Struct mmCIFStruct; 364 /// pstr S; // this is merely "char *S" 365 /// int rc; 366 /// 367 /// S = NULL; // null pointer before first use 368 /// rc = mmCIFStruct.GetString ( S,"id" ); 369 /// if (rc) CreateCopy ( S,"*** data not found" ); 370 /// if (!S) CreateCopy ( S,"*** data not given or not available" ); 371 /// printf ( " rc=%i, S='%s'\n",rc,S ); 372 /// 373 /// rc = mmCIFStruct.GetString ( S,"property" ); 374 /// if (rc) CreateCopy ( S,"*** data not found" ); 375 /// if (!S) CreateCopy ( S,"*** data not given or not available" ); 376 /// printf ( " rc=%i, S='%s'\n",rc,S ); 377 /// 378 /// // etc etc etc 379 /// 380 /// delete[] S; // application is responsible for final 381 /// // disposal of memory 382 /// \endcode 383 int GetString ( pstr & S, cpstr TName, bool Remove=false ); 384 385 /// \brief Returns pointer to value associated with given tag. 386 /// \param[in] TName character string with tag name 387 /// \param[out] RC return code: 388 /// \arg \b CIFRC_NoTag: tag is not found 389 /// \arg \b CIFRC_NoField: value is not found 390 /// \arg \b CIFRC_Ok: success. If function returns NULL, then 391 /// the value corresponds to either 392 /// \"data not available\" or 393 /// \"data not given\". 394 /// \return \b NULL: either tag or value is not found, or the 395 /// value is \"data not available\" or \"data not given\". 396 /// Read return code \b RC in order to interpret NULL return. 397 /// \return \b not \b NULL: pointer (\c char \c *) to value 398 /// associated with \b TName. 399 /// \remarks Never try to dispose memory pointed by the return 400 /// value, or your program will crash. 401 pstr GetString ( cpstr TName, int & RC ); // NULL if TName 402 // is not there 403 404 /// \brief Deletes field associated with given tag. 405 /// \param[in] TName character string with tag name 406 /// \return \b >=0: field deleted 407 /// \return \b <0: either field or tag is not found 408 int DeleteField ( cpstr TName ); // <0 the field was not there 409 410 /// \brief Fetches value, corresponding to the given tag, as 411 /// a real number. 412 /// \param[out] R reference to real number to accept the value. 413 /// In case of failure, \b R returns zero. 414 /// \param[in] TName character string with tag name 415 /// \param[in] Remove flag to remove the tag and its value from 416 /// structure after it is read. 417 /// \return \b CIFRC_NoTag: tag is not found 418 /// \return \b CIFRC_NoField: field is not found 419 /// \return \b CIFRC_WrongFormat: value is not a real or integer 420 /// number. 421 /// \return \b CIFRC_NoData: value is either 422 /// \"data not available\" or 423 /// \"data not given\". 424 /// \return \b CIFRC_Ok: success. 425 int GetReal ( realtype & R, cpstr TName, bool Remove=false ); 426 427 /// \brief Fetches value, corresponding to the given tag, as 428 /// an integer number. 429 /// \param[out] I reference to integer number to accept the 430 /// value. In case of failure, \b I returns zero, except 431 /// when value is \"data not available\" or 432 /// \"data not given\", when I returns \c MinInt4. 433 /// \param[in] TName character string with tag name 434 /// \param[in] Remove flag to remove the tag and its value from 435 /// structure after it is read. 436 /// \return \arg \b CIFRC_NoTag: tag is not found 437 /// \return \b CIFRC_NoField: field is not found 438 /// \return \b CIFRC_WrongFormat: value is not an integer number. 439 /// \return \b CIFRC_NoData: value is either 440 /// \"data not available\" or 441 /// \"data not given\". 442 /// \return \b CIFRC_Ok: success. 443 int GetInteger ( int & I, cpstr TName, bool Remove=false ); 444 445 /// \brief Sets string value for given tag. 446 /// \param[in] S character string with value to be set. 447 /// If \b S==NULL, the \"data not given\" value 448 /// will be set. If \b S==\"\" (empty string), the 449 /// \"data not available\" value is stored. 450 /// \param[in] TName character string with tag name. If tag 451 /// is not found, it will be added to the structure. 452 /// \param[in] NonBlankOnly flag to treat white-space-only 453 /// strings: 454 /// \arg \b false: set as is 455 /// \arg \b true: set \"data not available\" value instead. 456 void PutString ( cpstr S, cpstr TName, 457 bool NonBlankOnly=false ); 458 459 /// \brief Sets current date in format YYYY-MM-DD as a value 460 /// for given tag. 461 /// \param[in] T character string with tag name. If tag 462 /// is not found, it will be added to the structure. 463 void PutDate ( cpstr T ); 464 465 /// \brief Sets \"data not given\" or \"data not available\" 466 /// values for given tag. 467 /// \param[in] NoDataType can be either 468 /// \arg \b CIF_NODATA_DOT for \"data not given\" 469 /// \arg \b CIF_NODATA_QUESTION for \"data not available\" 470 /// \param[in] T character string with tag name. If tag 471 /// is not found, it will be added to the structure. 472 void PutNoData ( int NoDataType, cpstr T ); 473 474 /// \brief Sets float-point value for given tag. 475 /// \param[in] R real number with value to be set. 476 /// \param[in] TName character string with tag name. If tag 477 /// is not found, it will be added to the structure. 478 /// \param[in] prec float-point precision; g-format with given 479 /// precision will be used 480 void PutReal ( realtype R, cpstr TName, int prec=8 ); 481 482 /// \brief Sets float-point value for given tag. 483 /// \param[in] R real number with value to be set. 484 /// \param[in] TName character string with tag name. If tag 485 /// is not found, it will be added to the structure. 486 /// \param[in] format format string to convert \b R. 487 void PutReal ( realtype R, cpstr TName, cpstr format ); 488 489 /// \brief Sets integer value for given tag. 490 /// \param[in] I integer number with value to be set. 491 /// \param[in] TName character string with tag name. If tag 492 /// is not found, it will be added to the structure. 493 void PutInteger ( int I, cpstr TName ); 494 495 /// \brief Writes structure data in mmCIF format into file. 496 /// \param[in] FName character string with file name. 497 /// \param[in] gzipMode flag to controll compression of files: 498 /// \arg \b GZM_NONE: do not compress 499 /// \arg \b GZM_CHECK: check file name suffix and compress 500 /// (or not) accordingly 501 /// \arg \b GZM_ENFORCE_GZIP: force gzip compression despite 502 /// suffix 503 /// \arg \b GZM_ENFORCE_COMPRESS: force using compress despite 504 /// suffix 505 /// \return \b true: success 506 /// \return \b false: can not open file for writing. 507 /// \remarks This function does not create a valid mmCIF file 508 /// as \"data_XXX\" record will be missing. It may be used for 509 /// debugging though. 510 bool WriteMMCIFStruct ( cpstr FName, 511 io::GZ_MODE gzipMode=io::GZM_CHECK ); 512 513 /// \brief Writes structure into given file. 514 /// \param f reference to MMDB's file class. The file should be 515 /// opened and closed by application. 516 /// \remarks There is a very limited use of this function on 517 /// application level. It is primarily used by mmcif::Data class. 518 void WriteMMCIF ( io::RFile f ); 519 520 /// \brief Deep copy of structures. 521 /// Deep copy duplicates all data and memory allocations, 522 /// producing a genuine clone of the original. Only deep copy 523 /// should be used for copying MMDB objects, a mere assignment 524 /// operator will fail you. 525 /// \param[in] Struct a pointer to mmcif::Struct, the content of 526 /// which is copied into 'this' structure. 527 void Copy ( PCategory Struct ); 528 529 /// \brief MMDB stream writer. 530 void write ( io::RFile f ); 531 532 /// \brief MMDB stream reader. 533 void read ( io::RFile f ); 534 535 protected: 536 psvector field; 537 538 void InitStruct(); 539 void FreeMemory(); 540 541 }; 542 543 544 545 // ====================== Loop ============================== 546 547 DefineClass(Loop); 548 DefineStreamFunctions(Loop); 549 550 /// \brief mmcif::Loop represents mmCIF's \"loop\" category, which keeps 551 /// rows of data values associated with tags. 552 /*! 553 mmCIF's \"loop\" category has the following form: 554 \code 555 loop_ 556 _loop_name.tag0 value0 557 _loop_name.tag1 value1 558 ........... 559 _loop_name.tagN valueN 560 value00 value10 ... valueN0 561 value01 value11 ... valueN1 562 ........... 563 value0M value1M ... valueNM 564 \endcode 565 mmcif::Loop represents this construct by keeping category name 566 (\"_loop_name\") and associated lists of tags 567 (\"tag0,tag1...tagN\") and data vectors 568 (\"[value00...value0M],[value10...value1M]...[valueN0...valueNM]\"). 569 570 The loop object is created automatically when an mmCIF file is read, 571 or it may be created programatically and then pushed into file. 572 573 Access to data is provided via tags and data indexes. Internally, 574 all values are kept as character fields, and it is only on the 575 retrieval stage that they are converted to other data types 576 (integers, floats or strings). If conversion is not possible, an 577 error code is returned by the corresponding functions, which should 578 be checked by the application. 579 580 The following code gives an example of creating mmCIF loop category 581 and populating it with data: 582 \code 583 mmcif::Loop loop; 584 char S[100]; 585 int i; 586 587 // Specify loop name: 588 loop.SetCategoryName ( "_sample_loop" ); 589 590 // Create loop structure, i.e., list of tags first: 591 loop.AddLoopTag ( "id" ); 592 loop.AddLoopTag ( "name" ); 593 loop.AddLoopTag ( "value" ); 594 595 // Now populate it with data. This my be done in 2 ways. 596 // Here is how you write loop data in stream fashion, 597 // value-after-value: 598 for (i=0;i<3;i++) { 599 loop.AddInteger ( i ); 600 sprintf ( S,"1st_way-%i",i ); 601 loop.AddString ( S ); 602 loop.AddReal ( 2.5*(i+1) ); 603 } 604 605 // Here is how you populate loop data using direct-access 606 // functions: 607 for (i=3;i<6;i++) { 608 loop.PutReal ( 2.5*(i+1),"value",i ); 609 loop.PutInteger ( i,"id" ); 610 sprintf ( S,"2nd way: %i",i ); 611 loop.PutString ( S,"name" ); 612 } 613 614 loop.WriteMMCIFLoop ( "sample_loop.cif" ); 615 616 \endcode 617 618 The resulting file \b sample_loop.cif will contain: 619 620 \code 621 622 loop_ 623 _sample_loop.id 624 _sample_loop.name 625 _sample_loop.value 626 0 1st_way-0 2.5 627 1 1st_way-1 5.0 628 2 1st_way-2 7.5 629 3 "2nd way: 3" 10.0 630 4 "2nd way: 4" 12.5 631 5 "2nd way: 5" 15.0 632 633 \endcode 634 635 See general principles of working with mmCIF files and mmCIF 636 hierarchies, as well as some code samples, in Section 637 \"\ref mmcif_handler\". 638 */ 639 640 class Loop : public Category { 641 642 friend class Data; 643 644 public : 645 646 /// \brief Basic constructor 647 Loop (); 648 649 /// \brief Constructor that assigns structure name. 650 /// \param[in] N structure name (must start with underscore). 651 Loop ( cpstr N ); 652 653 /// \brief Constructor for MMDB data streaming functions 654 Loop ( io::RPStream Object ); 655 656 /// \brief Destructor 657 ~Loop(); 658 659 /// \brief Adds tag to the loop. 660 /// The tag is appended to the list of existing tags. The order 661 /// of tags cannot be changed. 662 /// \param[in] T tag name 663 /// \param[in] Remove flag to remove all fields in the loop. 664 void AddLoopTag ( cpstr T, bool Remove=true ); 665 666 /// \brief Sets string value at current loop position. 667 /// When \b mmcif::Loop::Add[Data] functions use internal loop 668 /// pointer. When category is created or cleared (by using 669 /// \b mmcif::Loop::AddLoopTag ( T,true )) the pointer is set to 670 /// 0th row and 0th column (tag). After each call to 671 /// \b mmcif::Loop::Add[Data] function, internal pointer advances 672 /// to next column (tag), and wraps over to next row, 0th tag, 673 /// if list of tags is exhausted. Any remaining fields in last 674 /// row will be populated with \"data not given\" value. 675 /// \param[in] S character string with value to be set. 676 /// If \b S==NULL, the \"data not given\" value 677 /// will be set. If \b S==\"\" (empty string), the 678 /// \"data not available\" value is stored. 679 /// \param[in] NonBlankOnly flag to treat white-space-only 680 /// strings: 681 /// \arg \b false: set as is 682 /// \arg \b true: set \"data not available\" value instead. 683 void AddString ( cpstr S, bool NonBlankOnly=false ); 684 685 /// \brief Sets \"data not given\" or \"data not available\" at 686 /// current loop position. 687 /// When \b mmcif::Loop::Add[Data] functions use internal loop 688 /// pointer. When category is created or cleared (by using 689 /// \b mmcif::Loop::AddLoopTag ( T,true )) the pointer is set to 690 /// 0th row and 0th column (tag). After each call to 691 /// \b mmcif::Loop::Add[Data] function, internal pointer advances 692 /// to next column (tag), and wraps over to next row, 0th tag, 693 /// if list of tags is exhausted. Any remaining fields in last 694 /// row will be populated with \"data not given\" value. 695 /// \param[in] NoDataType integer key specifying which type of 696 /// data absence should be set as a value: 697 /// \arg \b CIF_NODATA_DOT for \"data not given\" 698 /// \arg \b CIF_NODATA_QUESTION for \"data not available\" 699 void AddNoData ( int NoDataType ); 700 701 /// \brief Sets float-point value at current loop position. 702 /// When \b mmcif::Loop::Add[Data] functions use internal loop 703 /// pointer. When category is created or cleared (by using 704 /// \b mmcif::Loop::AddLoopTag ( T,true )) the pointer is set to 705 /// 0th row and 0th column (tag). After each call to 706 /// \b mmcif::Loop::Add[Data] function, internal pointer advances 707 /// to next column (tag), and wraps over to next row, 0th tag, 708 /// if list of tags is exhausted. Any remaining fields in last 709 /// row will be populated with \"data not given\" value. 710 /// \param[in] R real number with value to be set. 711 /// \param[in] prec float-point precision; g-format with given 712 /// precision will be used 713 void AddReal ( realtype R, int prec=8 ); 714 715 /// \brief Sets float-point value at current loop position in 716 /// given format. 717 /// When \b mmcif::Loop::Add[Data] functions use internal loop 718 /// pointer. When category is created or cleared (by using 719 /// \b mmcif::Loop::AddLoopTag ( T,true )) the pointer is set to 720 /// 0th row and 0th column (tag). After each call to 721 /// \b mmcif::Loop::Add[Data] function, internal pointer advances 722 /// to next column (tag), and wraps over to next row, 0th tag, 723 /// if list of tags is exhausted. Any remaining fields in last 724 /// row will be populated with \"data not given\" value. 725 /// \brief Sets float-point value for given tag. 726 /// \param[in] R real number with value to be set. 727 /// \param[in] format format string to convert \b R. 728 void AddReal ( realtype R, cpstr format ); 729 730 /// \brief Sets integer value at current loop position in given 731 /// format. 732 /// When \b mmcif::Loop::Add[Data] functions use internal loop 733 /// pointer. When category is created or cleared (by using 734 /// \b mmcif::Loop::AddLoopTag ( T,true )) the pointer is set to 735 /// 0th row and 0th column (tag). After each call to 736 /// \b mmcif::Loop::Add[Data] function, internal pointer advances 737 /// to next column (tag), and wraps over to next row, 0th tag, 738 /// if list of tags is exhausted. Any remaining fields in last 739 /// row will be populated with \"data not given\" value. 740 /// \param[in] I integer number with value to be set. 741 void AddInteger ( int I ); 742 743 /// \brief Returns current length of the loop (i.e. the number 744 /// of rows). 745 /// \return number of data rows in the loop. GetLoopLength()746 int GetLoopLength() { return nRows; } 747 748 /// \brief Returns string pointer on the field corresponding to 749 /// tag in the specified position, in the specified row. 750 /// Tag positions are defined by the order of their appearance in 751 /// mmCIF file (if loop was read from a file), or by the 752 /// order of their addition to the loop (if loop was 753 /// created programmatically). 754 /// \param[in] rowNo row number (0...GetLoopLength()-1) 755 /// \param[in] tagNo tag number (0...GetNofTags()-1) 756 /// \return \b NULL: tag or row do not exist 757 /// \return \b CIF_NODATA_DOT_FIELD the field contains 758 /// \"data not given\" value 759 /// \return \b CIF_NODATA_QUESTION_FIELD the field contains 760 /// \"data not available\" value 761 /// \return \b not \b NULL: string value of the field 762 /// \remarks Never try to dispose memory pointed by the return 763 /// value, or your program will crash. 764 pstr GetField ( int rowNo, int tagNo ); 765 766 /// \brief Fetches value, corresponding to the given tag, in 767 /// the given row, as a string 768 /// \param[out] S pointer to string, which will point to newly 769 /// allocated character string, containing value 770 /// associated with tag \b TName and row \b nrow. 771 /// If tag, row or value 772 /// is not found, or if value corresponds to 773 /// mmCIF's \"data not given\" or 774 /// \"data not available\", \b S returns NULL. 775 /// \param[in] TName character string with tag name 776 /// \param[in] nrow row number (0...GetLoopLength()-1) 777 /// \param[in] Remove flag to remove the field from 778 /// structure after it is read. 779 /// \return \b CIFRC_NoTag: tag is not found 780 /// \return \b CIFRC_WrongIndex: row is not found 781 /// \return \b CIFRC_NoField: value is not found 782 /// \return \b CIFRC_Ok: success. If \b S returns NULL, then 783 /// the value corresponds to either 784 /// \"data not available\" or 785 /// \"data not given\". 786 /// \remarks If \b S!=NULL at time of call, the function will 787 /// try to dispose the string it points on. This allows a slick 788 /// re-use of the same pointer in consequitive calls. This also 789 /// means that one should never pass unallocated pointer to 790 /// this function. Safe use assumes the following patern: 791 /// \code 792 /// mmcif::Loop mmCIFLoop; 793 /// pstr S; // this is merely "char *S" 794 /// int rc; 795 /// 796 /// S = NULL; // null pointer before first use 797 /// rc = mmCIFLoop.GetString ( S,"id",1 ); 798 /// if (rc) CreateCopy ( S,"*** data not found" ); 799 /// if (!S) CreateCopy ( S,"*** data not given or not available" ); 800 /// printf ( " rc=%i, S='%s'\n",rc,S ); 801 /// 802 /// rc = mmCIFLoop.GetString ( S,"property",0 ); 803 /// if (rc) CreateCopy ( S,"*** data not found" ); 804 /// if (!S) CreateCopy ( S,"*** data not given or not available" ); 805 /// printf ( " rc=%i, S='%s'\n",rc,S ); 806 /// 807 /// // etc etc etc 808 /// 809 /// delete[] S; // application is responsible for final 810 /// // disposal of memory 811 /// \endcode 812 int GetString ( pstr & S, cpstr TName, int nrow, 813 bool Remove=false ); 814 815 /// \brief Returns pointer to value associated with given tag, 816 /// in the given row of the loop. 817 /// \param[in] TName character string with tag name 818 /// \param[in] nrow row number (0...GetLoopLength()-1) 819 /// \param[out] RC return code: 820 /// \arg \b CIFRC_NoTag: tag is not found 821 /// \arg \b CIFRC_WrongIndex: row is not found 822 /// \arg \b CIFRC_NoField: value is not found 823 /// \arg \b CIFRC_Ok: success. If function returns NULL, then 824 /// the value corresponds to either 825 /// \"data not available\" or 826 /// \"data not given\". 827 /// \return \b NULL: either tag, row or value is not found, or the 828 /// value is \"data not available\" or \"data not given\". 829 /// Read return code \b RC in order to interpret NULL return. 830 /// \return \b not \b NULL: pointer (\c char \c *) to value 831 /// associated with \b TName. 832 /// \remarks Never try to dispose memory pointed by the return 833 /// value, or your program will crash. 834 pstr GetString ( cpstr TName, int nrow, int & RC ); 835 836 /// \brief Copies value, associated with given tag, 837 /// in the given row, into specified buffer. 838 /// Terminating NULL character is appended. 839 /// \param[out] buf character string to accept the value 840 /// \param[in] maxlength maximum number of bytes to copy 841 /// \param[in] TName character string with tag name 842 /// \param[in] nrow row number (0...GetLoopLength()-1) 843 /// \param[out] RC return code: 844 /// \arg \b CIFRC_NoTag: tag is not found 845 /// \arg \b CIFRC_WrongIndex: row is not found 846 /// \arg \b CIFRC_NoField: value is not found 847 /// \arg \b CIFRC_Ok: success. 848 /// \remarks Destination string \b buf is not modified if 849 /// \b RC!=CIFRC_Ok . 850 void CopyString ( pstr buf, int maxlength, 851 cpstr TName, int nrow, int & RC ); 852 853 /// \brief Deletes field associated with given tag in 854 /// the given row. 855 /// \param[in] TName character string with tag name 856 /// \param[in] nrow row number (0...GetLoopLength()-1) 857 /// \return \b >=0: field deleted 858 /// \return \b <0: either field or tag is not found 859 int DeleteField ( cpstr TName, int nrow ); 860 861 /// \brief Deletes all fields in given row. 862 /// \param[in] nrow row number (0...GetLoopLength()-1) 863 /// \return \b CIFRC_Ok: fields deleted 864 /// \return \b CIFRC_WrongIndex: row not found 865 /// \remarks Note that this function delets just the fields, but 866 /// not the row. If you wish the row to be deleted, call 867 /// mmcif::Loop::Optimize() function after this one. 868 int DeleteRow ( int nrow ); 869 870 /// \brief Fetches value, corresponding to the given tag, 871 /// in the given row, as a real number. 872 /// \param[out] R reference to real number to accept the value. 873 /// In case of failure, \b R returns zero. 874 /// \param[in] TName character string with tag name 875 /// \param[in] nrow row number (0...GetLoopLength()-1) 876 /// \param[in] Remove flag to remove the field from 877 /// the loop after it is read. 878 /// \return \b CIFRC_NoTag: tag is not found 879 /// \return \b CIFRC_WrongIndex: row not found 880 /// \return \b CIFRC_NoField: field is not found 881 /// \return \b CIFRC_WrongFormat: value is not a real or integer 882 /// number. 883 /// \return \b CIFRC_NoData: value is either 884 /// \"data not available\" or 885 /// \"data not given\". 886 /// \return \b CIFRC_Ok: success. 887 int GetReal ( realtype & R, cpstr TName, int nrow, 888 bool Remove=false ); 889 890 /// \brief Copies value, associated with given tag, 891 /// in the given row, into specified destination as 892 /// a real number. 893 /// \param[out] R reference to real number to accept the value 894 /// \param[in] TName character string with tag name 895 /// \param[in] nrow row number (0...GetLoopLength()-1) 896 /// \param[out] RC return code: 897 /// \arg \b CIFRC_NoTag: tag is not found 898 /// \arg \b CIFRC_WrongIndex: row is not found 899 /// \arg \b CIFRC_NoField: value is not found 900 /// \arg \b CIFRC_Ok: success. 901 /// \remarks Destination \b R is set 0 if \b RC!=CIFRC_Ok. 902 void CopyReal ( realtype & R, cpstr TName, int nrow, int & RC ); 903 904 /// \brief Copies value, associated with given tag, 905 /// in the given row, into specified destination as 906 /// an integer number. 907 /// \param[out] I reference to integer number to accept the value 908 /// \param[in] TName character string with tag name 909 /// \param[in] nrow row number (0...GetLoopLength()-1) 910 /// \param[out] RC return code: 911 /// \arg \b CIFRC_NoTag: tag is not found 912 /// \arg \b CIFRC_WrongIndex: row is not found 913 /// \arg \b CIFRC_NoField: value is not found 914 /// \arg \b CIFRC_Ok: success. 915 /// \remarks Destination \b I is set 0 if \b RC!=CIFRC_Ok. 916 void CopyInteger ( int & I, cpstr TName, int nrow, int & RC ); 917 918 /// \brief Fetches value, corresponding to the given tag, 919 /// in the given row, as an integer number. 920 /// \param[out] I reference to integer number to accept the value. 921 /// In case of failure, \b R returns zero. 922 /// \param[in] TName character string with tag name 923 /// \param[in] nrow row number (0...GetLoopLength()-1) 924 /// \param[in] Remove flag to remove the field from 925 /// the loop after it is read. 926 /// \return \b CIFRC_NoTag: tag is not found 927 /// \return \b CIFRC_WrongIndex: row not found 928 /// \return \b CIFRC_NoField: field is not found 929 /// \return \b CIFRC_WrongFormat: value is not a real or integer 930 /// number. 931 /// \return \b CIFRC_NoData: value is either 932 /// \"data not available\" or 933 /// \"data not given\". 934 /// \return \b CIFRC_Ok: success. 935 int GetInteger ( int & I, cpstr TName, int nrow, 936 bool Remove=false ); 937 938 /// \brief Fetches set of values, corresponding to the given 939 /// tag, in the given range of rows, as a vector of 940 /// strings. 941 /// \param[out] S reference to string vector to accept 942 /// the values. if \b S==NULL , the vector will be 943 /// allocated with starting index of \b i1. 944 /// \param[in] TName character string with tag name 945 /// \param[in] i1 minimum row number to fetch, the actual 946 /// index will be calculated as \b max(0,min(i1,i2)) 947 /// \param[in] i2 maximum row number to fetch, the actual 948 /// index will be calculated as 949 /// \b min(GetLoopLength()-1,max(i1,i2)) 950 /// \param[in] Remove flag to remove fetched fields from 951 /// the loop after they are read. 952 /// \return \b CIFRC_NoTag: tag is not found 953 /// \return \b CIFRC_WrongIndex: invalid range of rows 954 /// \return \b CIFRC_Ok: success. 955 /// 956 /// For safe use, \b S should be pre-allocated by calling 957 /// process. Only elements \b S[i1] to \b S[i2] will contain 958 /// fetched data, others remain untouched. The calling 959 /// process is responsible for the disposal of \b S. Example: 960 /// \code 961 /// mmcif::Loop loop; 962 /// psvector S; // equivalent to char **S 963 /// int i,i1,i2,rc,n; 964 /// 965 /// // ... get loop data 966 /// 967 /// n = loop.GetLoopLength(); 968 /// i1 = 5; i2 = n - 5; // could be wrong! 969 /// 970 /// // allocate vector of strings 971 /// GetVectorMemory ( S,n,0 ); // "0" for starting index 972 /// for (i=0;i<n;i++) 973 /// S[i] = NULL; // initialize NULL string pointers 974 /// 975 /// loop.GetSVector ( S,"name",i1,i2 ); 976 /// printf ( " Fetched with return code rc=%i\n",rc ); 977 /// // you may want a more thorough treatment of 978 /// // the return code here 979 /// for (i=i1;i<=i2;i++) 980 /// if (S[i]) printf ( " %4i. name='%s'\n",i,S[i] ); 981 /// else printf ( " %4i. name is not available\n",i ); 982 /// 983 /// // S[] may be re-used for as many fetches as necessary 984 /// // without cleaning or disposals 985 /// 986 /// // dispose of vector of strings 987 /// for (i=0;i<n;i++) 988 /// if (S[i]) delete[] S[i]; 989 /// FreeVectorMemory ( S,0 ); // "0" for starting index 990 /// 991 /// \endcode 992 int GetSVector ( psvector & S, cpstr TName, 993 int i1=0, int i2=MaxInt4, 994 bool Remove=false ); 995 996 /// \brief Fetches set of values, corresponding to the given 997 /// tag, in the given range of rows, as a vector of 998 /// float-point numbers. 999 /// \param[out] R reference to float-point vector to accept 1000 /// the values. if \b R==NULL , the vector will be 1001 /// allocated with starting index of \b i1. 1002 /// \param[in] TName character string with tag name 1003 /// \param[in] i1 minimum row number to fetch, the actual 1004 /// index will be calculated as \b max(0,min(i1,i2)) 1005 /// \param[in] i2 maximum row number to fetch, the actual 1006 /// index will be calculated as 1007 /// \b min(GetLoopLength()-1,max(i1,i2)) 1008 /// \param[in] Remove flag to remove fetched fields from 1009 /// the loop after they are read. 1010 /// \return \b CIFRC_NoTag: tag is not found 1011 /// \return \b CIFRC_WrongIndex: invalid range of rows 1012 /// \return \b CIFRC_Ok: success. 1013 /// 1014 /// For safe use, \b R should be pre-allocated by calling 1015 /// process. Only elements \b R[i1] to \b R[i2] will contain 1016 /// fetched data, others remain untouched. The calling 1017 /// process is responsible for the disposal of \b R. Example: 1018 /// \code 1019 /// mmcif::Loop loop; 1020 /// rvector R; // equivalent to realtype *R 1021 /// int i,i1,i2,rc,n; 1022 /// 1023 /// // ... get loop data 1024 /// 1025 /// n = loop.GetLoopLength(); 1026 /// i1 = 5; i2 = n - 5; // could be wrong! 1027 /// 1028 /// // allocate a vector of real numbers 1029 /// GetVectorMemory ( R,n,0 ); // "0" for starting index 1030 /// // no need to initiaize unless required for the 1031 /// // application 1032 /// 1033 /// rc = loop.GetRVector ( R,"value",i1,i2 ); 1034 /// printf ( " Fetched with return code rc=%i\n",rc ); 1035 /// // you may want a more thorough treatment of 1036 /// // the return code here 1037 /// for (i=i1;i<=i2;i++) 1038 /// printf ( " value[%4i] = %15.7g\n",i,R[i] ); 1039 /// 1040 /// // R[] may be re-used for as many fetches as necessary 1041 /// // without cleaning or disposals 1042 /// 1043 /// // dispose of the vector 1044 /// FreeVectorMemory ( R,0 ); // "0" for starting index 1045 /// 1046 /// \endcode 1047 int GetRVector ( rvector & R, cpstr TName, 1048 int i1=0, int i2=MaxInt4, 1049 bool Remove=false ); 1050 1051 /// \brief Fetches set of values, corresponding to the given 1052 /// tag, in the given range of rows, as a vector of 1053 /// integer numbers. 1054 /// \param[out] I reference to float-point vector to accept 1055 /// the values. if \b I==NULL , the vector will be 1056 /// allocated with starting index of \b i1. 1057 /// \param[in] TName character string with tag name 1058 /// \param[in] i1 minimum row number to fetch, the actual 1059 /// index will be calculated as \b max(0,min(i1,i2)) 1060 /// \param[in] i2 maximum row number to fetch, the actual 1061 /// index will be calculated as 1062 /// \b min(GetLoopLength()-1,max(i1,i2)) 1063 /// \param[in] Remove flag to remove fetched fields from 1064 /// the loop after they are read. 1065 /// \return \b CIFRC_NoTag: tag is not found 1066 /// \return \b CIFRC_WrongIndex: invalid range of rows 1067 /// \return \b CIFRC_Ok: success. 1068 /// 1069 /// For safe use, \b I should be pre-allocated by calling 1070 /// process. Only elements \b I[i1] to \b I[i2] will contain 1071 /// fetched data, others remain untouched. The calling 1072 /// process is responsible for the disposal of \b I. 1073 /// See example in mmcif::Loop::GetRVector documentation 1074 /// for details. 1075 int GetIVector ( ivector & I, cpstr TName, 1076 int i1=0, int i2=MaxInt4, 1077 bool Remove=false ); 1078 1079 /// \brief Sets string value for given tag and row. 1080 /// \param[in] S character string with value to be set. 1081 /// If \b S==NULL, the \"data not given\" value 1082 /// will be set. If \b S==\"\" (empty string), the 1083 /// \"data not available\" value is stored. 1084 /// \param[in] T character string with tag name. If tag 1085 /// is not found, it will be added, and all data in 1086 /// the loop will be reindexed accordingly. 1087 /// \param[in] nrow row number. If the row does not exist then 1088 /// it will be created, along with all other rows 1089 /// between GetLoopLength()-1 and \b nrow as 1090 /// necessary. All newly created fields will be 1091 /// initialised with \"data not given\" value. 1092 void PutString ( cpstr S, cpstr T, int nrow ); 1093 1094 /// \brief Sets \"data not given\" or \"data not available\" 1095 /// values for given tag and row. 1096 /// \param[in] NoDataType can be either 1097 /// \arg \b CIF_NODATA_DOT for \"data not given\" 1098 /// \arg \b CIF_NODATA_QUESTION for \"data not available\" 1099 /// \param[in] T character string with tag name. If tag 1100 /// is not found, it will be added, and all data in 1101 /// the loop will be reindexed accordingly. 1102 /// \param[in] nrow row number. If the row does not exist then 1103 /// it will be created, along with all other rows 1104 /// between GetLoopLength()-1 and \b nrow as 1105 /// necessary. All newly created fields will be 1106 /// initialised with \"data not given\" value. 1107 void PutNoData ( int NoDataType, cpstr T, int nrow ); 1108 1109 /// \brief Sets float-point value for given tag and row. 1110 /// \param[in] R real number with value to be set. 1111 /// \param[in] T character string with tag name. If tag 1112 /// is not found, it will be added, and all data in 1113 /// the loop will be reindexed accordingly. 1114 /// \param[in] nrow row number. If the row does not exist then 1115 /// it will be created, along with all other rows 1116 /// between GetLoopLength()-1 and \b nrow as 1117 /// necessary. All newly created fields will be 1118 /// initialised with \"data not given\" value. 1119 /// \param[in] prec float-point precision; g-format with given 1120 /// precision will be used 1121 void PutReal ( realtype R, cpstr T, int nrow, int prec=8 ); 1122 1123 /// \brief Sets float-point value for given tag and row. 1124 /// \param[in] R real number with value to be set. 1125 /// \param[in] T character string with tag name. If tag 1126 /// is not found, it will be added, and all data in 1127 /// the loop will be reindexed accordingly. 1128 /// \param[in] nrow row number. If the row does not exist then 1129 /// it will be created, along with all other rows 1130 /// between GetLoopLength()-1 and \b nrow as 1131 /// necessary. All newly created fields will be 1132 /// initialised with \"data not given\" value. 1133 /// \param[in] format format string to convert \b R. 1134 void PutReal ( realtype R, cpstr T, int nrow, cpstr format ); 1135 1136 /// \brief Sets integer value for given tag. 1137 /// \param[in] I integer number with value to be set. 1138 /// \param[in] T character string with tag name. If tag 1139 /// is not found, it will be added, and all data in 1140 /// the loop will be reindexed accordingly. 1141 /// \param[in] nrow row number. If the row does not exist then 1142 /// it will be created, along with all other rows 1143 /// between GetLoopLength()-1 and \b nrow as 1144 /// necessary. All newly created fields will be 1145 /// initialised with \"data not given\" value. 1146 void PutInteger ( int I, cpstr T, int nrow ); 1147 1148 /// \brief Sets a set of string values for the given tag and 1149 /// range of rows. 1150 /// \param[in] S string vector with values to store in the loop 1151 /// \param[in] T character string with tag name. If tag 1152 /// is not found, it will be added, and all data in 1153 /// the loop will be reindexed accordingly. 1154 /// \param[in] i1 minimum data index in \b S to set in the loop 1155 /// \param[in] i2 maximum data index in \b S to set in the loop. 1156 /// 1157 /// The data will be set in rows \b i1 to \b i2 (inclusive) in 1158 /// the loop. If range \b [i1,i2] is not contained in the loop, 1159 /// all missing rows will be created and initialised to 1160 /// \"data not given\" value. Example: 1161 /// \code 1162 /// mmcif::Loop loop("_sample_loop"); 1163 /// pstr S[100]; 1164 /// int i; 1165 /// 1166 /// // initialize vector of strings 1167 /// for (i=0;i<100;i++) { 1168 /// S[i] = new char[20]; 1169 /// sprintf ( S[i],"value i=%i",i ); 1170 /// } 1171 /// 1172 /// // put data in loop 1173 /// loop.PutSVector ( S,"made_up_string_value",0,99 ); 1174 /// 1175 /// // dispose of vector of strings 1176 /// for (i=0;i<100;i++) 1177 /// if (S[i]) delete[] S[i]; 1178 /// 1179 /// \endcode 1180 void PutSVector ( psvector S, cpstr T, int i1, int i2 ); 1181 1182 /// \brief Sets a set of float-point values for the given tag and 1183 /// range of rows. 1184 /// \param[in] R vector of real numbers to store in the loop 1185 /// \param[in] T character string with tag name. If tag 1186 /// is not found, it will be added, and all data in 1187 /// the loop will be reindexed accordingly. 1188 /// \param[in] i1 minimum data index in \b S to set in the loop 1189 /// \param[in] i2 maximum data index in \b S to set in the loop 1190 /// \param[in] prec float-point precision; g-format with given 1191 /// precision will be used. 1192 /// 1193 /// The data will be set in rows \b i1 to \b i2 (inclusive) in 1194 /// the loop. If range \b [i1,i2] is not contained in the loop, 1195 /// all missing rows will be created and initialised to 1196 /// \"data not given\" value. 1197 void PutRVector ( rvector R, cpstr T, int i1, int i2, 1198 int prec=8 ); 1199 1200 /// \brief Sets a set of integer values for the given tag and 1201 /// range of rows. 1202 /// \param[in] I vector of integers to store in the loop 1203 /// \param[in] T character string with tag name. If tag 1204 /// is not found, it will be added, and all data in 1205 /// the loop will be reindexed accordingly. 1206 /// \param[in] i1 minimum data index in \b S to set in the loop 1207 /// \param[in] i2 maximum data index in \b S to set in the loop. 1208 /// 1209 /// The data will be set in rows \b i1 to \b i2 (inclusive) in 1210 /// the loop. If range \b [i1,i2] is not contained in the loop, 1211 /// all missing rows will be created and initialised to 1212 /// \"data not given\" value. 1213 void PutIVector ( ivector I, cpstr T, int i1, int i2 ); 1214 1215 /// \brief Returns category type \b MMCIF_Loop. GetCategoryID()1216 MMCIF_ITEM GetCategoryID() { return MMCIF_Loop; } 1217 1218 /// \brief Optimizes loop for RAM and data access speed. 1219 /// Optimized data structures take less RAM and their indexes 1220 /// are sorted for quicker access. Sorting is done automatically 1221 /// as new data is added to the category. If the structure 1222 /// is edited (fields/data removed), it may need 1223 /// optimization and re-sorting for efficiency.\n\n 1224 /// The sorting preserves the order of actual appearance of 1225 /// tags and rows in mmCIF file. If a loop is created 1226 /// programmatically, the order of tags and rows in mmCIF file 1227 /// will be the same as order of adding them to the loop. 1228 void Optimize(); 1229 1230 /// \brief Writes loop data in mmCIF format into file. 1231 /// \param[in] FName character string with file name. 1232 /// \param[in] gzipMode flag to controll compression of files: 1233 /// \arg \b GZM_NONE: do not compress 1234 /// \arg \b GZM_CHECK: check file name suffix and compress 1235 /// (or not) accordingly 1236 /// \arg \b GZM_ENFORCE_GZIP: force gzip compression despite 1237 /// suffix 1238 /// \arg \b GZM_ENFORCE_COMPRESS: force using compress despite 1239 /// suffix 1240 /// \return \b true: success 1241 /// \return \b false: can not open file for writing. 1242 /// \remarks This function does not create a valid mmCIF file 1243 /// as \"data_XXX\" record will be missing. It may be used for 1244 /// debugging though. 1245 bool WriteMMCIFLoop ( cpstr FName, 1246 io::GZ_MODE gzipMode=io::GZM_CHECK ); 1247 1248 /// \brief Writes loop data into given file. 1249 /// \param f reference to MMDB's file class. The file should be 1250 /// opened and closed by application. 1251 /// \remarks There is a very limited use of this function on 1252 /// application level. It is primarily used by mmcif::Data class. 1253 void WriteMMCIF ( io::RFile f ); 1254 1255 /// \brief Deep copy of loops. 1256 /// Deep copy duplicates all data and memory allocations, 1257 /// producing a genuine clone of the original. Only deep copy 1258 /// should be used for copying MMDB objects, a mere assignment 1259 /// operator will fail you. 1260 /// \param[in] Loop a pointer to mmcif::Loop, the content of 1261 /// which is copied into 'this' loop. 1262 void Copy ( PCategory Loop ); 1263 1264 /// \brief MMDB stream writer. 1265 void write ( io::RFile f ); 1266 1267 /// \brief MMDB stream reader. 1268 void read ( io::RFile f ); 1269 1270 protected: 1271 int nRows; 1272 psmatrix field; 1273 int iColumn,nAllocRows; 1274 1275 void InitLoop (); 1276 void FreeMemory (); 1277 void DeleteFields (); 1278 void ExpandRows ( int nRowsNew ); 1279 1280 }; 1281 1282 1283 1284 // ====================== Data ============================= 1285 1286 1287 // CIFW are warnings which may be issued on reading the CIF file. 1288 // Each of them means actually a CIF syntax error. 1289 1290 enum CIF_WARNING { 1291 CIFW_UnrecognizedItems = 0x00000020, 1292 CIFW_MissingField = 0x00000040, 1293 CIFW_EmptyLoop = 0x00000080, 1294 CIFW_UnexpectedEOF = 0x00000100, 1295 CIFW_LoopFieldMissing = 0x00000200, 1296 CIFW_NotAStructure = 0x00000400, 1297 CIFW_NotALoop = 0x00000800, 1298 CIFW_DuplicateTag = 0x00001000 1299 }; 1300 1301 // CIFRC are return codes from procedures of extracting data from 1302 // the read CIF file. Negative returns reflect unsuccessful and 1303 // not accomplished operation. 1304 enum CIF_RC { 1305 CIFRC_Loop = 2, 1306 CIFRC_Structure = 1, 1307 CIFRC_Ok = 0, 1308 CIFRC_StructureNoTag = -1, 1309 CIFRC_LoopNoTag = -2, 1310 CIFRC_NoCategory = -3, 1311 CIFRC_WrongFormat = -4, 1312 CIFRC_NoTag = -5, 1313 CIFRC_NotAStructure = -6, 1314 CIFRC_NotALoop = -7, 1315 CIFRC_WrongIndex = -8, 1316 CIFRC_NoField = -9, 1317 CIFRC_Created = -12, 1318 CIFRC_CantOpenFile = -13, 1319 CIFRC_NoDataLine = -14, 1320 CIFRC_NoData = -15 1321 }; 1322 1323 // 1324 // Functional flags: 1325 // ~~~~~~~~~~~~~~~~~ 1326 // 1327 // CIFFL_PrintWarnings when reading CIF file, all warning 1328 // messages will be printed. If the flag 1329 // is off, the warnings will be bit-encoded 1330 // in the return code 1331 // CIFFL_StopOnWarnings reading CIF file will stop at first 1332 // warning issued 1333 // CIFFL_SuggestCategories allows reading CIF file with loops having 1334 // no categories. Hidden category names 1335 // will be automatically generated for 1336 // internal consistency of the system. 1337 // These names will not appear in output. 1338 // As these names are hidden, they cannot 1339 // be used to access data. It is therefore 1340 // assumed that all tags in all loops without 1341 // categories are unique. Simply specify "" 1342 // for category when accessing such data 1343 // (it cannot be accessed through mmcif::Loop, 1344 // but only through mmcif::Data functions 1345 // taking both Category and Tag; note that 1346 // CIFFL_SuggestCategories flag must be on 1347 // while accessing such data). 1348 // CIFFL_SuggestTags allows for identical tags in a category 1349 // (including a hidden category). Hidden 1350 // suffixes to tag names will be generated 1351 // for internal consistency. At present, 1352 // only data for first non-unique tag may be 1353 // accessed. 1354 // 1355 enum CIF_FLAG { 1356 CIFFL_PrintWarnings = 0x00000001, 1357 CIFFL_StopOnWarnings = 0x00000002, 1358 CIFFL_SuggestCategories = 0x00000004, 1359 CIFFL_SuggestTags = 0x00000008 1360 }; 1361 1362 DefineClass(Data); 1363 DefineStreamFunctions(Data); 1364 1365 1366 /// \brief mmcif::Data represents mmCIF's \"data\" category, which keeps 1367 /// structures and loops and is mandatory element of mmCIF file. 1368 /*! 1369 mmCIF's \"data\" category has the following form: 1370 \code 1371 data_DataName 1372 1373 _structure1.tag1 value1 1374 .......... 1375 1376 loop_ 1377 .......... 1378 1379 \endcode 1380 In the above example, all structures and loops that follow \b data_ 1381 keyword until next \b data_ or end of file are part of data category 1382 with name \b DataName. 1383 1384 mmcif::Data represents this construct by keeping a list of mmcif::Struct 1385 and mmcif::Loop class instances associated with the corresponding 1386 categories in the data block. 1387 1388 The data object is created automatically when an mmCIF file is read, 1389 or it may be created programatically and then pushed into file. 1390 1391 Access to data is provided via category (structures and loops) names, 1392 tags and data indexes (in case of loops). Alternatively, pointers to 1393 contained structures and loops may be obtained first, an used for 1394 fetching data using mmcif::Struct's and mmcif::Loop's interface 1395 functions. 1396 1397 The following code gives an example of creating mmCIF's data category 1398 and populating it: 1399 \code 1400 mmcif::Data data; 1401 1402 // Specify data name: 1403 data.PutDataName ( "Sample_Data" ); 1404 1405 // the following statement: 1406 data.PutInteger ( 12345,"_category1","id" ); 1407 // creates structure "_category1" with tag "id" and assigns it 1408 // the integer value of 12345. 1409 1410 data.PutString ( "a name","_category1","name" ); 1411 1412 // Loops may be created quite similarly: 1413 data.PutLoopInteger ( 12345 ,"_loop1","id" ,2 ); 1414 data.PutLoopInteger ( "a name","_loop1","name",0 ); 1415 1416 // push data into a file 1417 data.WriteMMCIFData ( "sample.cif" ); 1418 1419 \endcode 1420 1421 The resulting file \b sample.cif will contain: 1422 1423 \code 1424 data_Sample_Data 1425 1426 _category1.id 12345 1427 _category1.name "a name" 1428 1429 loop_ 1430 _loop1.id 1431 _loop1.name 1432 . "a name" 1433 . . 1434 12345 . 1435 \endcode 1436 1437 The same result may be achieved differently: 1438 1439 \code 1440 mmcif::Data data; 1441 mmcif::PStruct mmCIFStruct; // equivalent to mmcif::Struct *mmCIFStruct 1442 mmcif::PLoop mmCIFLoop; // equivalent to mmcif::Loop *mmCIFLoop 1443 1444 // Specify data name: 1445 data.PutDataName ( "Sample_Data" ); 1446 1447 // create new mmCIF's structure in the data block: 1448 data.AddStructure ( "_category1",mmCIFStruct ); 1449 if (mmCIFStruct) { 1450 mmCIFStruct->PutInteger ( 12345 ,"id" ); 1451 mmCIFStruct->PutString ( "a name","name" ); 1452 } 1453 1454 // similarly for the loop: 1455 data.AddLoop ( "_loop1",mmCIFLoop ); 1456 if (mmCIFLoop) { 1457 mmCIFLoop->PutInteger ( 12345 ,"id" ,2 ); 1458 mmCIFLoop->PutString ( "a name","name",0 ); 1459 } 1460 1461 // push data into a file 1462 data.WriteMMCIFData ( "sample.cif" ); 1463 1464 \endcode 1465 1466 See general principles of working with mmCIF files and mmCIF 1467 hierarchies, as well as some code samples, in Section 1468 \"\ref mmcif_handler\". 1469 */ 1470 1471 class Data : public io::Stream { 1472 1473 friend class File; 1474 1475 public : 1476 1477 /// \brief Basic constructor. 1478 Data (); 1479 1480 /// \brief Constructor that assigns data block name. 1481 /// \param[in] N data block name. 1482 Data ( cpstr N ); 1483 1484 /// \brief Constructor for MMDB data streaming functions. 1485 Data ( io::RPStream Object ); 1486 1487 /// \brief Destructor. 1488 ~Data(); 1489 1490 1491 // -------- General I/O functions 1492 1493 /// \brief Sets flag to print warnings on reading mmCIF files. 1494 /// \param[in] SPW flag to print warnings: 1495 /// \arg \b true : warnings will be printed to stdout 1496 /// \arg \b false : warnings will not be printed but returned 1497 /// in return code (default) 1498 void SetPrintWarnings ( bool SPW ); 1499 1500 /// \brief Sets flag to stop on warning when reading an mmCIF file. 1501 /// \param[in] SOW flag to stop on warning: 1502 /// \arg \b true : reading will stop on first warning encountered 1503 /// \arg \b false : warnings will not stop reading (default) 1504 void SetStopOnWarning ( bool SOW ); 1505 1506 /// \brief Sets optional flag(s) for reading mmCIF files. 1507 /// By default, no flags are set. 1508 /// \param[in] F flag or logical \"or\" of several flags to be set: 1509 /// \arg \b CIFFL_PrintWarnings toggles printing warning messages 1510 /// at reading an mmCIF file, in stdout. If this 1511 /// flag is not set (default), the warnings will 1512 /// be returned in the bit-encoded return code 1513 /// \arg \b CIFFL_StopOnWarnings if set, reading an mmCIF file 1514 /// will stop at first warning issued 1515 /// \arg \b CIFFL_SuggestCategories allows for reading of mmCIF 1516 /// files with loops and structures having no 1517 /// category names (\"dirty CIFs\"). If this flag is 1518 /// set, then hidden category names will be 1519 /// automatically generated. These names will not 1520 /// appear in the output. As these names are hidden, 1521 /// they cannot be used to access data. In order to 1522 /// access data in such categories, consider whether 1523 /// they are structures or loops. In case of a 1524 /// unnamed structure, simply specify \"\" (empty 1525 /// string) for structure name in all access 1526 /// functions ( note that \b CIFFL_SuggestCategories 1527 /// flag must be on while accessing such data). In 1528 /// case of a loop, first use the mmcif::Data::FindLoop 1529 /// function to retrieve pointer on the hidden loop, 1530 /// and then use mmcif::Loop's interface function to 1531 /// fetch data from the loop. 1532 /// \arg \b CIFFL_SuggestTags allows for duplicate tags in a 1533 /// category (structure or loop, including hidden 1534 /// categories). This may help reading \"dirty CIFs\". 1535 /// At present, only data for first non-unique tag 1536 /// may be accessed. 1537 void SetFlag ( CIF_FLAG F ); 1538 1539 /// \brief Removes optional flag(s) for reading mmCIF files. 1540 /// By default, no flags are set. 1541 /// \param[in] F flag or logical \"or\" of several flags to be 1542 /// removed (unset): 1543 /// \arg \b CIFFL_PrintWarnings no wornings will be printed in 1544 /// stdout, but rather returned in the bit-encoded 1545 /// return code 1546 /// \arg \b CIFFL_StopOnWarnings warnings will not stop reading 1547 /// \arg \b CIFFL_SuggestCategories loops without names will 1548 /// count as errors and stop reading 1549 /// \arg \b CIFFL_SuggestTags duplicate tags in structures and 1550 /// loops will count as errors and stop reading. 1551 /// 1552 /// See more detail flag description in mmcif::Data::SetFlag(). 1553 void RemoveFlag ( CIF_FLAG F ); 1554 1555 /// \brief Returns bit-encoded warnings issued at last file read. 1556 /// \return an integer number, which is an or-superposition of 1557 /// warning flags: 1558 /// \arg \b CIFW_UnrecognizedItems: unrecognized items were found 1559 /// \arg \b CIFW_MissingField: expected data field not found 1560 /// \arg \b CIFW_EmptyLoop: loop category was defined but has no 1561 /// data 1562 /// \arg \b CIFW_UnexpectedEOF: mmCIF construct finished prematurely 1563 /// \arg \b CIFW_LoopFieldMissing: loop category has wrong number 1564 /// of data fields 1565 /// \arg \b CIFW_NotAStructure: attempt to use a category name, 1566 /// which was once defined as a structure, 1567 /// as a loop 1568 /// \arg \b CIFW_NotALoop: attempt to use a category name, which was 1569 /// once defined as a loop, as a structure 1570 /// \arg \b CIFW_DuplicateTag: duplicate tag was found in a 1571 /// structure or loop GetWarnings()1572 inline int GetWarnings() { return Warning; } 1573 1574 /// \brief Sets category names and tags that are to be ignored 1575 /// on file read. 1576 /// \param[in] cats list of categories, terminated by NULL 1577 /// \param[in] tags list of tags, terminated by NULL. 1578 /// 1579 /// This special function is to aid reading corrupt mmCIF files. 1580 /// The input lists should be of equal length 'n', and specify 1581 /// 'n' \"wrong fields\" that should be ignored on input. E.g., 1582 /// ith \"wrong field\" is identified as \"cats[i].taga[i]\". 1583 /// If \"wrong field\" belongs to a loop, then all the corresponding 1584 /// column is assumed to be absent. This corrects for mmCIF errors 1585 /// when defined tags in loops or structures do not have actual data 1586 /// associated with them. 1587 /// 1588 /// In order to remove settings, call SetWrongFields(NULL,NULL). 1589 /// 1590 /// Example: 1591 /*! 1592 \code 1593 // assume data for "_category.item1" and "_category.item2" 1594 // missed in a file to-be-read 1595 mmcif::Data data; 1596 cpstr cats[] = { "_category", "_category", NULL }; 1597 cpstr tags[] = { "item1" , "item2" , NULL }; 1598 1599 data.SetWrongFields ( cats,tags ); 1600 data.ReadMMCIFData ( "corrupt.cif" ); 1601 \endcode 1602 */ 1603 void SetWrongFields ( cpstr *cats, cpstr *tags ); 1604 1605 /// \brief Reads mmCIF data block from file. 1606 /// \param FName character null-terminated string with file name 1607 /// \param gzipMode flag to read compressed files: 1608 /// \arg \b GZM_NONE: do not assume any compression 1609 /// \arg \b GZM_CHECK: check compression type by file extension 1610 /// \arg \b GZM_ENFORCE: same as \b GZM_ENFORCE_GZIP 1611 /// \arg \b GZM_ENFORCE_GZIP: assume gzip compression (*.gz files) 1612 /// \arg \b GZM_ENFORCE_COMPRESS: assume compression with 'compress' 1613 /// (*.Z files). 1614 /// \return \b CIFRC_Ok: no errors 1615 /// \return \b negative: there were errors 1616 /// \return \b positive: there were warnings. 1617 /// 1618 /// This function will read 1st data block from the specified file. 1619 /// In case of non-zero return, use GetCIFMessage() function to 1620 /// print the corresponding error message or warning: 1621 /*! 1622 \code 1623 mmcif::Data data; 1624 char errLog[500]; 1625 int rc; 1626 rc = data.ReadMMCIFData ( "myfile.cif" ); 1627 if (rc<0) 1628 printf ( " There was an error:\n %s\n", 1629 GetCIFMessage(errLog,rc) ); 1630 else if (rc>0) 1631 printf ( " There were warnings:\n %s\n", 1632 GetCIFMessage(errLog,rc) ); 1633 else 1634 printf ( " mmCIF file has be read in successfully.\n" ); 1635 \endcode 1636 */ 1637 int ReadMMCIFData ( cpstr FName, 1638 io::GZ_MODE gzipMode=io::GZM_CHECK ); 1639 1640 /// \brief Reads sequential mmCIF data blocks from file. 1641 /// \param RCFile reference to a CFile object opened on a file 1642 /// \param S buffer string which represent a sliding read window. 1643 /// The string should be at least 500 characters long, 1644 /// initialized with empty-string value before first read, 1645 /// and passed unchanged between the reads 1646 /// \param lcount line counter, should be set zero before first 1647 /// read and passed unchanged between the reads. 1648 /// \return \b CIFRC_Ok: no errors 1649 /// \return \b negative: there were errors 1650 /// \return \b positive: there were warnings. 1651 /// 1652 /// This function will read 1st data block from the current position 1653 /// of the file. The function is useful if a file contains more than 1654 /// a single data block, which should be read sequentially. 1655 /// 1656 /// \note Alternatively, files with multiple data blocks can be 1657 /// read using mmcif::File class. 1658 /// 1659 /// In case of non-zero return, use GetCIFMessage() function to 1660 /// print the corresponding error message or warning: 1661 /*! 1662 \code 1663 mmcif::Data mmCIFData; 1664 CFile f; 1665 char S[1000]; 1666 int rc,lcount; 1667 1668 // open file first 1669 f.assign ( "/path/example.cif" ); 1670 if (!f.reset(true)) { 1671 printf ( " *** cannot open file '%s' for reading.\n", 1672 f.FileName() ); 1673 return -1; 1674 } 1675 1676 lcount = 0; // global line counter through the file 1677 S[0] = char(0); // buffer string 1678 while (!f.FileEnd()) { 1679 1680 rc = mmCIFData.ReadMMCIFData ( f,S,lcount ); 1681 1682 if (rc!=CIFRC_Ok) { // error or warning 1683 if ((rc<0) && (!f.FileEnd())) { // error 1684 printf ( " *** error reading file %s:\n" 1685 " %s\n",f.FileName(),GetCIFMessage(S,rc) ); 1686 return rc; 1687 } else if (rc>0) { // warning 1688 printf ( " ... warning on reading file %s:\n" 1689 " %s\n",f.FileName(),GetCIFMessage(S,rc) ); 1690 } 1691 } else { 1692 // fetch needful values from the data block 1693 // ........ 1694 } 1695 1696 } 1697 1698 f.shut(); // close file 1699 1700 // NOTE: do not delete mmcif::Struct/mmcif::Loop 1701 // classes obtained from mmcif::Data. If you do, get a crash. 1702 // All these structures are containers that dispose their 1703 // content automatically. 1704 \endcode 1705 */ 1706 int ReadMMCIFData ( io::RFile f, pstr S, int & lcount ); 1707 1708 /// \brief Writes mmCIF data block into file. 1709 /// \param FName character null-terminated string with file name 1710 /// \param gzipMode flag to read compressed files: 1711 /// \arg \b GZM_NONE: do not compress 1712 /// \arg \b GZM_CHECK: compress according to file extension 1713 /// \arg \b GZM_ENFORCE: same as \b GZM_ENFORCE_GZIP 1714 /// \arg \b GZM_ENFORCE_GZIP: compress with gzip 1715 /// \arg \b GZM_ENFORCE_COMPRESS: compression with 'compress'. 1716 /// \return \b true: no errors 1717 /// \return \b false: file cannot be open for writing. 1718 bool WriteMMCIFData ( cpstr FName, 1719 io::GZ_MODE gzipMode=io::GZM_CHECK ); 1720 1721 /// \brief Writes (next) mmCIF data block into file. 1722 /// \param RCFile reference to a CFile object opened on a file. 1723 /// 1724 /// This function allows for sequential write of mmCIF data blocks 1725 /// into a file. 1726 /// 1727 /// \note Alternatively, files with multiple data blocks can be 1728 /// created using mmcif::File class. 1729 /// 1730 /// Example: 1731 /*! 1732 \code 1733 io::File f; 1734 mmcif::Data cifData; 1735 1736 // open file first 1737 f.assign ( "/path/example.cif" ); 1738 if (!f.rewrite()) { 1739 printf ( " *** cannot open file '%s' for writing.\n", 1740 f.FileName() ); 1741 return -1; 1742 } 1743 1744 cifData.PutDataName ( "name1" ); 1745 // fill cifData with all data needed 1746 cifData.WriteMMCIF ( f ); // first data block written 1747 1748 cifData.FreeMemory ( 0 ); // reset data block to empty 1749 cifData.PutDataName ( "name2" ); 1750 // fill cifData with all data needed 1751 cifData.WriteMMCIF ( f ); // second data block written 1752 1753 // add as many data blocks as needed 1754 1755 // now close the file 1756 f.shut(); 1757 1758 \endcode 1759 1760 */ 1761 void WriteMMCIF ( io::RFile f ); 1762 1763 1764 // -------- Retrieving data 1765 1766 /// \brief Returns the number of categories (structures and loops) 1767 /// in data block. GetNumberOfCategories()1768 inline int GetNumberOfCategories () { return nCategories; } 1769 1770 /// \brief Retrieves pointer to category (a structure or a loop) by 1771 /// category number. 1772 /// \param categoryNo category number to retrieve. Categories are 1773 /// numbered from 0 to GetNumberOfCategories()-1. 1774 /// \return pointer to category, if \b categoryNo is in the right 1775 /// range, or \b NULL otherwise. 1776 /// 1777 /// \note The category type (structure or loop) is returned by 1778 /// function mmcif::Category::GetCategoryID(). 1779 /// \note The application should never attempt to deallocate 1780 /// the category returned. It will be properly disposed of by 1781 /// mmcif::Data's destructor. 1782 PCategory GetCategory ( int categoryNo ); // 0..nCategories-1 1783 1784 /// \brief Retrieves mmCIF structure with given name. 1785 /// \param CName character string with name of the structure (must 1786 /// start with underscore). 1787 /// \return pointer to structure if structure with given name was 1788 /// found, and \b NULL otherwise. 1789 /// \note The application should never attempt to deallocate 1790 /// the structure returned. It will be properly disposed of by 1791 /// mmcif::Data's destructor. 1792 PStruct GetStructure ( cpstr CName ); 1793 1794 /// \brief Retrieves mmCIF loop with given name. 1795 /// \param CName character string with name of the loop (must 1796 /// start with underscore). 1797 /// \return pointer to loop if loop with given name was 1798 /// found, and \b NULL otherwise. 1799 /// \note The application should never attempt to deallocate 1800 /// the loop returned. It will be properly disposed of by 1801 /// mmcif::Data's destructor. 1802 PLoop GetLoop ( cpstr CName ); 1803 1804 /// \brief Finds loop containing all tags from the tag list 1805 /// provided. 1806 /// \param tagList list of tags to be looked for. The list should 1807 /// be terminated by empty string \"\". The order of tags 1808 /// is not significant. 1809 /// \return pointer to loop if loop with given tags was found, and 1810 /// \b NULL otherwise. 1811 /// 1812 /// The function will look for first loop that includes all tags 1813 /// from the list. The list does not have to include all tags for 1814 /// that loop in order for function to succeed. This function is 1815 /// useful for reading \"dirty cifs\" that may contain loops without 1816 /// a name. 1817 PLoop FindLoop ( cpstr * tagList ); 1818 1819 /// \brief Retrieves data block name into dynamically-allocated 1820 /// string. 1821 /// \param dname pointer reference to a string that accepts data 1822 /// block name. If \b dname is not \b NULL, it is treated 1823 /// as a pre-allocated string, which is disposed before 1824 /// copying. The application is responsible for deallocating 1825 /// \b dname. 1826 /// \param Remove flag to remove name from the data block. 1827 void GetDataName ( pstr & dname, bool Remove=false ); 1828 1829 /// \brief Returns data block name. GetDataName()1830 inline pstr GetDataName() { return name; } 1831 1832 // CheckData(..) returns positive value if the field is in the 1833 // file: 1834 // CIFRC_Structure category CName is a structure 1835 // CIFRC_Loop category CName is a loop 1836 // Negative returns mean: 1837 // CIFRC_StructureNoTag category CName is present, 1838 // it is a structure, but it does not 1839 // have tag TName 1840 // CIFRC_LoopNoTag category CName is present, 1841 // it is a loop, but it does not have 1842 // tag TName 1843 // CIFRC_NoCategory category CName is not present. 1844 // If TName is set to NULL then only the CName is checked and 1845 // possible returns are CIFRC_Structure, CIFRC_Loop and 1846 // CIFRC_NoCategory. 1847 int CheckData ( cpstr CName, cpstr TName ); 1848 1849 int DeleteCategory ( cpstr CName ); 1850 int DeleteStructure ( cpstr CName ); 1851 int DeleteLoop ( cpstr CName ); 1852 1853 // Optimize() optimizes the CIF data in memory allocation. It is 1854 // a good idea to call it once after extraction of data (GetXXXXXX 1855 // functions) with Remove flag set on has been completed. 1856 void Optimize(); 1857 1858 // GetString(..), GetReal(..) and GetInteger(..) return 0 if the 1859 // requested field was found and successfully converted. Negative 1860 // returns mean: 1861 // CIFRC_WrongFormat the field was found but failed to convert 1862 // due to improper numeric format 1863 // CIFRC_NoTag category CName was found, but it does not 1864 // have tag TName 1865 // CIFRC_NoCategory category CName was not found 1866 // CIFRC_NotAStructure category CName was found, but it is 1867 // a loop rather than a structure. 1868 // GetString(..) will try to dispose Dest unless it is assigned 1869 // NULL value before the call. The string will be then dynamically 1870 // allocated and copied. 1871 // If Remove is set to true, the field will be removed after 1872 // extraction. 1873 int GetString ( pstr & Dest, cpstr CName, cpstr TName, 1874 bool Remove=false ); 1875 pstr GetString ( cpstr CName, cpstr TName, int & RC ); 1876 int DeleteField ( cpstr CName, cpstr TName ); 1877 int GetReal ( realtype & R, cpstr CName, 1878 cpstr TName, bool Remove=false ); 1879 int GetInteger ( int & I, cpstr CName, cpstr TName, 1880 bool Remove=false ); 1881 1882 // GetLoopLength(..) returns CIFRC_NotALoop if the category CName 1883 // is not a loop, CIFRC_NoCategory if the category CName is not 1884 // found. Non-negative returns give the length of the loop (may be 1885 // 0 if the loop is empty). 1886 int GetLoopLength ( cpstr CName ); 1887 1888 // GetLoopString(..), GetLoopReal(..) and GetLoopInteger(..) act 1889 // like GetString(..), GetReal(..) and GetInteger(..) above for 1890 // nrow-th element of the 'loop_' (indexed like 0..N-1 where N 1891 // is obtained through GetLoopLength(..)). They will return 1892 // CIFRC_WrongIndex if nrow is out of range. 1893 // If Remove is set to true, the field will be removed after 1894 // extraction. 1895 int GetLoopString ( pstr & Dest, cpstr CName, 1896 cpstr TName, int nrow, 1897 bool Remove=false ); 1898 pstr GetLoopString ( cpstr CName, cpstr TName, 1899 int nrow, int & RC ); 1900 int DeleteLoopField ( cpstr CName, cpstr TName, 1901 int nrow ); 1902 int GetLoopReal ( realtype & R, cpstr CName, 1903 cpstr TName, int nrow, 1904 bool Remove=false ); 1905 int GetLoopInteger ( int & I, cpstr CName, 1906 cpstr TName, int nrow, 1907 bool Remove=false ); 1908 1909 // GetLoopSVector(..), GetLoopRVector(..) and GetLoopIVector(..) 1910 // read CIF 'loop_' data into allocated vectors of strings, reals 1911 // and integers, correspondingly. The vectors may be deallocated 1912 // prior to call and assigned NULL, in which case they will be 1913 // allocated with offsets of i1, which is also the lower index of 1914 // the 'loop_' data transferred into it. The upper vector index is 1915 // given by i2 or by the loop's length whichever is less. If 1916 // vectors are not assigned NULL prior the call, it is assumed 1917 // that they are properly (i1-offset, i2-i1+1 length) allocated. 1918 // The return codes are same as those of GetLoopString(..), 1919 // GetLoopReal(..) and GetLoopInteger(..). 1920 int GetLoopSVector ( psvector & S, cpstr CName, 1921 cpstr TName, int i1=0, int i2=MaxInt4, 1922 bool Remove=false ); 1923 int GetLoopRVector ( rvector & R, cpstr CName, 1924 cpstr TName, int i1=0, int i2=MaxInt4, 1925 bool Remove=false ); 1926 int GetLoopIVector ( ivector & I, cpstr CName, 1927 cpstr TName, int i1=0, int i2=MaxInt4, 1928 bool Remove=false ); 1929 1930 1931 // -------- Storing data 1932 1933 // Unless the data are to be added to the existing CIF structure, 1934 // FreeMemory() should be called once before creating a new 1935 // CIF data set. 1936 void FreeMemory ( int key ); 1937 1938 void PutDataName ( cpstr dname ); // stores name for 'data_' 1939 // record 1940 1941 // PutString(..), PutReal(..) and PutInteger(..) will put the 1942 // values given into the specified category (CName) under the 1943 // specified tag (TName). The category, tag and field are created 1944 // automatically; the field will be replaced silently if identical 1945 // CName.TName is specified in two calls. Calls of these functions 1946 // may follow in random order; however CIF file will have all tags 1947 // grouped by categories and catgories will follow in the order 1948 // of first appearance in PutString(..), PutReal(..) or 1949 // PutInteger(..). 1950 // Return code - one of CIFRC_Ok or CIFRC_NotAStruct 1951 int PutNoData ( int NoDataType, cpstr CName, 1952 cpstr TName ); 1953 int PutString ( cpstr S, cpstr CName, 1954 cpstr TName, bool Concatenate=false ); 1955 int PutDate ( cpstr CName, cpstr TName ); 1956 int PutReal ( realtype R, cpstr CName, cpstr TName, 1957 int prec=8 ); 1958 int PutInteger ( int I, cpstr CName, cpstr TName ); 1959 1960 // If loop category CName is not present in the CIF data 1961 // structure, AddLoop(..) creates an empty one and returns 1962 // its pointer in Loop. If loop category CName is already in 1963 // the CIF data structure, its pointer is returned, and any 1964 // data which might be contained in it, remains untouched. 1965 // To stuff the loop with data, first the data tags have to 1966 // be specified by calling Loop->AddLoopTag(..). After all 1967 // tags are given, the data comes as a stream of calls 1968 // Loop->AddString(..), Loop->AddReal(..) and 1969 // Loop->AddInteger(..) which should provide data for every 1970 // tag in sequence in strictly the same order as the tags 1971 // were given. This essentially reflects reading a CIF loop 1972 // from a file. 1973 // Alternatively, the loop data may be stored with PutLoopXXX() 1974 // functions given below, although this way may be less 1975 // efficient (but more flexible). 1976 // AddLoop(..) may return 1977 // CIFRC_Ok category was present 1978 // CIFRC_Created category was not present but it has 1979 // been created; the category is empty 1980 // CIFRC_NotALoop category was present as a structure, but 1981 // has been replaced for a loop; 1982 // the category is empty. 1983 int AddLoop ( cpstr CName, PLoop & cifLoop ); 1984 int AddStructure ( cpstr CName, PStruct & cifStruct ); 1985 1986 // PutLoopString(..), PutLoopReal(..) and PutLoopInteger(..) act 1987 // like PutString(..), PutReal(..) and PutInteger(..) above for 1988 // nrow-th element of the 'loop_' CName (indexed begining from 0). 1989 // In consequitive calls, given values of nrow does not have to be 1990 // ordered; the most efficient way is to start with HIGHEST value 1991 // for nrow in the loop and move down to 0. The least efficient way 1992 // is to start with nrow=0 and move up. 1993 // These functions allow to form loops in arbitrary way. 1994 // The functions may return CIFRC_Ok or CIFRC_NotALoop. 1995 int PutLoopNoData ( int NoDataType, cpstr CName, 1996 cpstr TName, int nrow ); 1997 int PutLoopString ( cpstr S, cpstr CName, 1998 cpstr TName, int nrow ); 1999 int PutLoopReal ( realtype R, cpstr CName, 2000 cpstr TName, int nrow, 2001 int prec=8 ); 2002 int PutLoopInteger ( int I, cpstr CName, cpstr TName, 2003 int nrow ); 2004 2005 // PutLoopSVector(..), PutLoopRVector(..) and PutLoopIVector(..) 2006 // put vectors of values into specified loop fields. Parameters i1 2007 // and i2 give the range of indices of values which are to be 2008 // transfered. To transfer an entire vector allocated as [0..N-1] 2009 // i1 shoudl be set to 0 and i2 - to N-1. Note that the loop is 2010 // always indexed as starting form 0 on, therefore negative i1 and 2011 // i2 are not allowed, and specifying i1>0 will leave first i1 2012 // elements of the CIF loop for the corresponding tag undefined 2013 // (will be output like '?'). 2014 // These functions allow to form loops in arbitrary way. 2015 int PutLoopSVector ( psvector S, cpstr CName, 2016 cpstr TName, int i1, int i2 ); 2017 int PutLoopRVector ( rvector R, cpstr CName, 2018 cpstr TName, int i1, int i2, 2019 int prec=8 ); 2020 int PutLoopIVector ( ivector I, cpstr CName, 2021 cpstr TName, int i1, int i2 ); 2022 2023 int RenameCategory ( cpstr CName, cpstr newCName ); 2024 2025 // -------- 2026 2027 void Copy ( PData Data ); 2028 int CopyCategory ( PData Data, cpstr CName, 2029 cpstr newCName=NULL ); 2030 2031 void PrintCategories(); // for debuging only 2032 2033 void write ( io::RFile f ); 2034 void read ( io::RFile f ); 2035 2036 protected: 2037 pstr name; 2038 int nCategories; 2039 PPCategory Category; 2040 ivector index; 2041 int flags; 2042 int Warning; 2043 int loopNo; // used locally for suggesting categories 2044 int tagNo; // used locally for suggesting tags 2045 psvector WrongCat; 2046 psvector WrongTag; 2047 int nWrongFields; 2048 2049 void InitData (); 2050 void FreeWrongFields (); 2051 bool CheckWrongField ( cpstr C, cpstr T ); 2052 void Sort (); 2053 2054 // GetCategoryNo searches for index of category cname 2055 // in Category[]. Return: 2056 // >=0 : position of the category found 2057 // <0 : the category was not found, it could be inserted before 2058 // (-RC-1)th element, where RC is the return value 2059 int GetCategoryNo ( cpstr cname ); 2060 int AddCategory ( cpstr cname ); 2061 int DeleteCategory ( int CatNo ); 2062 2063 void GetDataItem ( io::RFile f, pstr S, pstr & L, pstr & p, 2064 int & lcount, int & llen ); 2065 void GetLoop ( io::RFile f, pstr S, pstr & L, pstr & p, 2066 int & lcount, int & llen ); 2067 int GetField ( io::RFile f, pstr S, pstr & L, pstr & p, 2068 int & lcount, int & llen ); 2069 2070 }; 2071 2072 2073 2074 // ======================== File ============================= 2075 2076 DefineClass(File); 2077 DefineStreamFunctions(File); 2078 2079 class File : public io::Stream { 2080 2081 public : 2082 int nData; 2083 ivector index; 2084 PPData data; 2085 2086 File (); 2087 File ( cpstr FName, io::GZ_MODE gzipMode=io::GZM_CHECK ); 2088 File ( io::RPStream Object ); 2089 ~File(); 2090 SetPrintWarnings(bool SPW)2091 void SetPrintWarnings ( bool SPW ) { PrintWarnings = SPW; } SetStopOnWarning(bool SOW)2092 void SetStopOnWarning ( bool SOW ) { StopOnWarning = SOW; } 2093 2094 int ReadMMCIFFile ( cpstr FName, 2095 io::GZ_MODE gzipMode=io::GZM_CHECK ); 2096 int WriteMMCIFFile ( cpstr FName, 2097 io::GZ_MODE gzipMode=io::GZM_CHECK ); 2098 GetNofData()2099 int GetNofData() { return nData; } 2100 PData GetCIFData ( int dataNo ); // 0..nData-1 2101 PData GetCIFData ( cpstr DName ); 2102 int AddCIFData ( cpstr DName ); 2103 int DeleteCIFData ( cpstr DName ); 2104 int DeleteCIFData ( int dataNo ); 2105 int GetCIFDataNo ( cpstr DName ); 2106 2107 void WriteMMCIF ( io::RFile f ); 2108 2109 void Copy ( PFile File ); 2110 2111 void write ( io::RFile f ); 2112 void read ( io::RFile f ); 2113 2114 protected: 2115 int nAllocData; 2116 bool PrintWarnings; 2117 bool StopOnWarning; 2118 2119 void InitFile (); 2120 void FreeMemory (); 2121 void Sort (); 2122 void ExpandData ( int nDataNew ); 2123 2124 }; 2125 2126 2127 extern pstr GetMMCIFInputBuffer ( int & LineNo ); 2128 2129 // isCIF will return 2130 // -1 if file FName does not exist 2131 // 0 if file FName is likely a CIF file ( 'data_' is present ) 2132 // 1 if file FName is not a CIF file ( 'data_' is absent ) 2133 extern int isCIF ( cpstr FName, io::GZ_MODE gzipMode=io::GZM_CHECK ); 2134 extern int isCIF ( io::RFile f ); 2135 2136 pstr GetCIFMessage ( pstr M, int RC ); 2137 2138 2139 } // namespace mmcif 2140 2141 } // namespace mmdb 2142 2143 2144 #endif 2145 2146 2147