1 //  $Id: mmdb_mmcif_.h $
2 //  =================================================================
3 //
4 //   CCP4 Coordinate Library: support of coordinate-related
5 //   functionality in protein crystallography applications.
6 //
7 //   Copyright (C) Eugene Krissinel 2000-2013.
8 //
9 //    This library is free software: you can redistribute it and/or
10 //    modify it under the terms of the GNU Lesser General Public
11 //    License version 3, modified in accordance with the provisions
12 //    of the license to address the requirements of UK law.
13 //
14 //    You should have received a copy of the modified GNU Lesser
15 //    General Public License along with this library. If not, copies
16 //    may be downloaded from http://www.ccp4.ac.uk/ccp4license.php
17 //
18 //    This program is distributed in the hope that it will be useful,
19 //    but WITHOUT ANY WARRANTY; without even the implied warranty of
20 //    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21 //    GNU Lesser General Public License for more details.
22 //
23 //  =================================================================
24 //
25 //    12.09.13   <--  Date of Last Modification.
26 //                   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
27 //  -----------------------------------------------------------------
28 //
29 //  **** Module  :  MMDB_MMCIF <interface>
30 //       ~~~~~~~~~
31 //  **** Project :  MacroMolecular Data Base (MMDB)
32 //       ~~~~~~~~~
33 //  **** Classes :  mmdb::mmcif::Category ( mmCIF category    )
34 //       ~~~~~~~~~  mmdb::mmcif::Struct   ( mmCIF structure   )
35 //                  mmdb::mmcif::Loop     ( mmCIF loop        )
36 //                  mmdb::mmcif::Data     ( mmCIF data block  )
37 //                  mmdb::mmcif::File     ( mmCIF file        )
38 //
39 //  (C) E. Krissinel 2000-2013
40 //
41 //  =================================================================
42 //
43 
44 
45 #ifndef __MMDB_MMCIF__
46 #define __MMDB_MMCIF__
47 
48 
49 #include "mmdb_io_stream.h"
50 
51 namespace mmdb  {
52 
53   namespace mmcif  {
54 
55 
56     //  ======================  Category  ==========================
57 
58     enum MMCIF_ITEM  {
59       MMCIF_Category = 0,
60       MMCIF_Struct   = 1,
61       MMCIF_Loop     = 2,
62       MMCIF_Data     = 3
63     };
64 
65     DefineClass(Category);
66     DefineStreamFunctions(Category);
67 
68     /// \brief mmcif::Category is a base class for mmcif::Struct and
69     ///        mmcif::Loop, implementations of mmCIF's "structure" and
70     ///        "loop" categories.
71     /*!
72     This class is not instantiated independently in any applications,
73     however, it provides a few public functions which work for
74     both mmcif::Struct and mmcif::Loop.
75 
76     All data in mmCIF hierarchy is addressed using construct
77     "category.tag" plus row number (>=0) for loops. Category names
78     should always start from underscore, while tags normally start
79     with a letter, e.g. "_barrel.id".
80 
81     See general principles of working with mmCIF files and mmCIF
82     hierarchies in Section \"\ref mmcif_handler\".
83     */
84 
85     class Category : public io::Stream  {
86 
87       friend class Data;
88 
89       public :
90 
91         /// \brief Basic constructor.
92         Category ();
93 
94         /// \brief Constructor that assigns category name.
95         /// \param[in] N category name (must start with underscore).
96         Category ( cpstr N );
97 
98         /// \brief Constructor for MMDB data streaming functions.
99         Category ( io::RPStream Object );
100 
101         /// \brief Destructor.
102         ~Category();
103 
104         /// \brief Returns category name.
105         /// \return NULL if name was not set
106         /// \return pointer to character string if name was set
GetCategoryName()107         inline pstr   GetCategoryName() { return name; }
108 
109         /// \brief Sets category name.
110         /// \param N new category name
111         void   SetCategoryName ( cpstr N );
112 
113         /// \brief Returns category type.
114         /// This function may be used when retrieving categories
115         /// (structures and loops) from data blocks (mmcif::Data).
116         /// \return MMCIF_Category for mmcif::Category
117         /// \return MMCIF_Struct   for mmcif::Struct
118         /// \return MMCIF_Loop     for mmcif::Loop
GetCategoryID()119         virtual MMCIF_ITEM GetCategoryID() { return MMCIF_Category; }
120 
121         /// \brief Virtual function for writing category's content
122         ///        into mmCIF file.
123         /// Default implementation does nothing.
WriteMMCIF(io::RFile)124         virtual void WriteMMCIF ( io::RFile ) {}
125 
126         /// \brief Virtual function for optimizig data structures.
127         /// Optimized data structures take less RAM and their indexes
128         /// are sorted for quicker access. Sorting is done automatically
129         /// as new data is added to the category. If the
130         /// category is edited (fields/data removed), it may need
131         /// optimization and re-sorting for efficiency.\n\n
132         /// The sorting preserves the order of actual appearance of
133         /// tags in mmCIF file. If a category is created
134         /// programmatically, the order of tags in mmCIF file will be
135         /// the same as order of adding them to the category.
136         virtual void Optimize();
137 
138         /// \brief Sorts category's data for quicker access.
139         /// The sorting is essentially re-indexing of data for quicker
140         /// access. It does not change the order of data in both mmCIF
141         /// hierarchy and mmCIF file. E.g., if tag "serial_no" was 2nd
142         /// one in given category before sorting, it will remain on 2nd
143         /// place after it, therefore no change in tag number passed
144         /// to functions in mmcif::Struct, mmcif::Loop and mmcif::Data.
145         void  Sort();
146 
147         /// \brief Returns serial number of a tag in the category.
148         /// \param[in]  ttag tag (or name of a field in category)
149         /// \return \b >=0 : the tag is in given position
150         /// \return \b <0 : the tag was not found, but it could be
151         ///         inserted before tag with (-rc-1)th index, where
152         ///         'rc' is the return.
153         int   GetTagNo ( cpstr ttag );
154 
155         /// \brief Adds a tag to the category.
156         /// Adding a tag in mmcif::Category does not reserve any
157         /// placeholder for the corresponding value. All tags get
158         /// automatically sorted (reindexed) for quicker access.
159         /// Tags will appear in mmCIF file in order of their addition
160         /// to the category.
161         /// \param[in]  ttag tag to be added.
162         /// \return \b >=0 the tag is already in the category, and return
163         ///             is its serial number. No changes to the category
164         ///             is done
165         /// \return \b <0  the tag was added to the list of tags, and
166         ///             return is minus total number of tags in the
167         ///             category.
168         int   AddTag ( cpstr ttag );
169 
170         /// \brief Returns the total number of tags in the category
GetNofTags()171         int   GetNofTags() { return nTags; }
172 
173         /// \brief Returns tag with the specified serial number.
174         /// The tags are enumerated as 0..GetNofTags()-1.
175         /// \param tagNo tag's serial number
176         /// \return \b NULL: tagNo is outside the range
177         ///                  of 0..GetNofTags()-1
178         /// \return \b not \b NULL: tag in tagNo'th position
179         pstr  GetTag ( int tagNo );  // 0..nTags-1
180 
181         /// \brief Prints list of tags to stdout.
182         /// Both sorted and unsorted tags are printed to standard
183         /// output. This function may be used for debugging.
184         void  PrintTags();
185 
186         /// \brief Returns true if all tags from the list are found
187         ///        in the category.
188         /// The size of the list of tags may be less than the number
189         /// of tags in the category, and order of tags is disregarded.
190         /// \param[in] tagList  list of tags to be checked for presence
191         ///                 in the category. The list must end with NULL
192         ///                 pointer, or your program will crash.
193         /// \return \b true  if all tags from the list were found in the
194         ///               category
195         /// \return \b false if one or more tags from the list were not
196         ///               found in the category.
197         ///
198         /// Example:
199         /// \code
200         ///  cpstr tagList[] = {"id","type","date",NULL};
201         ///  mmcif::Struct cifStruct;
202         ///  if (cifStruct.CheckTags(tagList))
203         ///    printf ( " all tags are found in category %s\n",
204         ///             cifStruct.GetCategoryName() );
205         /// \endcode
206         /// This function is useful for finding categories in
207         /// "dirty cifs", where category name is not given.
208         bool CheckTags ( cpstr * tagList );
209 
210         /// \brief Deep copy of categories.
211         /// Deep copy duplicates all data and memory allocations,
212         /// producing a genuine clone of the original. Only deep copy
213         /// should be used for copying MMDB objects, a mere assignment
214         /// operator will fail you.
215         /// \param[in] Category a pointer to mmcif::Category, the content of
216         ///                 which is copied into 'this' category.
217         virtual void Copy ( PCategory Category );
218 
219         /// \brief MMDB stream writer.
220         void  write ( io::RFile f );
221 
222         /// \brief MMDB stream reader.
223         void  read  ( io::RFile f );
224 
225       protected:
226         int      nTags;
227         pstr     name;
228         psvector tag;
229         ivector  index;
230         int      nAllocTags;
231 
232         void          InitCategory    ();
233         virtual void  FreeMemory      ();
234         void          ExpandTags      ( int nTagsNew );
235         void          PutCategoryName ( cpstr newName );
236 
237     };
238 
239 
240 
241     //  ======================  Struct  ============================
242 
243     DefineClass(Struct);
244     DefineStreamFunctions(Struct);
245 
246     /// \brief Constants used to specify mmCIF's \"data not given\" and
247     /// \"data not available\" data types.
248     extern const int CIF_NODATA_DOT;
249     extern const int CIF_NODATA_QUESTION;
250     extern cpstr     CIF_NODATA_DOT_FIELD;
251     extern cpstr     CIF_NODATA_QUESTION_FIELD;
252 
253     /// \brief mmcif::Struct represents mmCIF's \"structure\" category,
254     ///        where data follows directly the corresponding tag.
255     /*!
256     mmCIF's \"structure\" category has the following form:
257     \code
258     _structure_name.tag0   value0
259     _structure_name.tag1   value1
260     ...........
261     _structure_name.tagN   valueN
262     \endcode
263     mmcif::Struct represents this construct by keeping category name
264     (\"_structure_name\") and associated lists of tags
265     (\"tag0,tag1...tagN\") and their values (\"value0,value1...valueN\").
266 
267     The structure is created automatically when an mmCIF file is read,
268     or it may be created programatically and then pushed into file.
269 
270     Access to data is provided via tags. Internally, all values are kept
271     as character fields, and it is only on the retrieval stage that they
272     are converted to other data types (integers, floats or strings).
273     If conversion is not possible, an error code is returned by the
274     corresponding functions, which should be checked by the application.
275 
276     See general principles of working with mmCIF files and mmCIF
277     hierarchies, as well as some code samples, in Section
278     \"\ref mmcif_handler\".
279     */
280 
281     class Struct : public Category  {
282 
283       public :
284 
285         /// \brief Basic constructor
286         Struct ();
287 
288         /// \brief Constructor that assigns structure name.
289         /// \param[in] N structure name (must start with underscore).
290         Struct ( cpstr N );
291 
292         /// \brief Constructor for MMDB data streaming functions
293         Struct ( io::RPStream Object );
294 
295         /// \brief Destructor
296         ~Struct();
297 
298         /// \brief Adds field to the structure.
299         /// \param[in] F field value
300         /// \param[in] T tag name
301         /// \param[in] Concatenate flag to concatenate existing field
302         ///            with the value of \b F. If tag \b T is already in
303         ///            the structure and \b Concatenate=true, then
304         ///            value of \b F is appended to the existing field.
305         ///            Otherwise, the field is replaced with the value
306         ///            of \b F
307         void AddField ( cpstr F, cpstr T, bool Concatenate=false );
308 
309         /// \brief Returns category type \b MMCIF_Struct.
GetCategoryID()310         MMCIF_ITEM  GetCategoryID()  { return MMCIF_Struct; }
311 
312         /// \brief Optimizes structure for RAM and data access speed.
313         /// Optimized data structures take less RAM and their indexes
314         /// are sorted for quicker access. Sorting is done automatically
315         /// as new data is added to the category. If the structure
316         /// is edited (fields/data removed), it may need
317         /// optimization and re-sorting for efficiency.\n\n
318         /// The sorting preserves the order of actual appearance of
319         /// tags in mmCIF file. If a structure is created
320         /// programmatically, the order of tags in mmCIF file will be
321         /// the same as order of adding them to the structure.
322         void Optimize();
323 
324         /// \brief Returns value of field corresponding to tag in the
325         ///        specified position.
326         /// Tag positions are defined by the order of their appearance in
327         /// mmCIF file (if structure was read from a file), or by the
328         /// order of their addition to the structure (if structure was
329         /// created programmatically). Tags are numbered as
330         /// 0...GetNofTags()-1.
331         /// \param[in] tagNo tag number (position in the structure)
332         /// \return \b NULL: tag does not exist
333         /// \return \b CIF_NODATA_DOT_FIELD the field contains
334         ///            \"data not given\" value
335         /// \return \b CIF_NODATA_QUESTION_FIELD the field contains
336         ///            \"data not available\" value
337         /// \return \b not \b NULL: string value of the field
338         pstr GetField ( int tagNo );  // 0..nTags-1
339 
340         /// \brief Fetches value, corresponding to the given tag, as
341         ///        a string
342         /// \param[out] S pointer to string, which will point to newly
343         ///               allocated character string, containing value
344         ///               associated with tag \b TName. If tag or value
345         ///               is not found, or if value corresponds to
346         ///               mmCIF's \"data not given\" or
347         ///               \"data not available\", \b S returns NULL.
348         /// \param[in] TName character string with tag name
349         /// \param[in] Remove flag to remove the tag and its value from
350         ///               structure after it is read.
351         /// \return \b CIFRC_NoTag: tag is not found
352         /// \return \b CIFRC_NoField: value is not found
353         /// \return \b CIFRC_Ok: success. If \b S returns NULL, then
354         ///                the value corresponds to either
355         ///                \"data not available\" or
356         ///                \"data not given\".
357         /// \remarks If \b S!=NULL at time of call, the function will
358         ///  try to dispose the string it points on. This allows a slick
359         ///  re-use of the same pointer in consequitive calls. This also
360         ///  means that one should never pass unallocated pointer to
361         ///  this function. Safe use assumes the following patern:
362         ///  \code
363         ///  mmcif::Struct mmCIFStruct;
364         ///  pstr S;  // this is merely "char *S"
365         ///  int  rc;
366         ///
367         ///    S  = NULL; // null pointer before first use
368         ///    rc = mmCIFStruct.GetString ( S,"id" );
369         ///    if (rc)  CreateCopy ( S,"*** data not found" );
370         ///    if (!S)  CreateCopy ( S,"*** data not given or not available" );
371         ///    printf ( " rc=%i, S='%s'\n",rc,S );
372         ///
373         ///    rc = mmCIFStruct.GetString ( S,"property" );
374         ///    if (rc)  CreateCopy ( S,"*** data not found" );
375         ///    if (!S)  CreateCopy ( S,"*** data not given or not available" );
376         ///    printf ( " rc=%i, S='%s'\n",rc,S );
377         ///
378         ///    // etc etc etc
379         ///
380         ///    delete[] S;  // application is responsible for final
381         ///                 // disposal of memory
382         ///  \endcode
383         int  GetString ( pstr & S, cpstr TName, bool Remove=false );
384 
385         /// \brief Returns pointer to value associated with given tag.
386         /// \param[in] TName character string with tag name
387         /// \param[out] RC return code:
388         ///    \arg \b CIFRC_NoTag: tag is not found
389         ///    \arg \b CIFRC_NoField: value is not found
390         ///    \arg \b CIFRC_Ok: success. If function returns NULL, then
391         ///                the value corresponds to either
392         ///                \"data not available\" or
393         ///                \"data not given\".
394         /// \return \b NULL: either tag or value is not found, or the
395         ///    value is \"data not available\" or \"data not given\".
396         ///    Read return code \b RC in order to interpret NULL return.
397         /// \return \b not \b NULL: pointer (\c char \c *) to value
398         ///    associated with \b TName.
399         /// \remarks Never try to dispose memory pointed by the return
400         /// value, or your program will crash.
401         pstr GetString ( cpstr TName, int & RC ); // NULL if TName
402                                                          // is not there
403 
404         /// \brief Deletes field associated with given tag.
405         /// \param[in] TName character string with tag name
406         /// \return \b >=0: field deleted
407         /// \return \b <0: either field or tag is not found
408         int  DeleteField ( cpstr TName );  // <0 the field was not there
409 
410         /// \brief Fetches value, corresponding to the given tag, as
411         ///        a real number.
412         /// \param[out] R reference to real number to accept the value.
413         ///        In case of failure, \b R returns zero.
414         /// \param[in] TName character string with tag name
415         /// \param[in] Remove flag to remove the tag and its value from
416         ///               structure after it is read.
417         /// \return \b CIFRC_NoTag: tag is not found
418         /// \return \b CIFRC_NoField: field is not found
419         /// \return \b CIFRC_WrongFormat: value is not a real or integer
420         ///            number.
421         /// \return \b CIFRC_NoData: value is either
422         ///            \"data not available\" or
423         ///            \"data not given\".
424         /// \return \b CIFRC_Ok: success.
425         int  GetReal ( realtype & R, cpstr TName, bool Remove=false );
426 
427         /// \brief Fetches value, corresponding to the given tag, as
428         ///        an integer number.
429         /// \param[out] I reference to integer number to accept the
430         ///        value. In case of failure, \b I returns zero, except
431         ///        when value is \"data not available\" or
432         ///        \"data not given\", when I returns \c MinInt4.
433         /// \param[in] TName character string with tag name
434         /// \param[in] Remove flag to remove the tag and its value from
435         ///               structure after it is read.
436         /// \return \arg \b CIFRC_NoTag: tag is not found
437         /// \return \b CIFRC_NoField: field is not found
438         /// \return \b CIFRC_WrongFormat: value is not an integer number.
439         /// \return \b CIFRC_NoData: value is either
440         ///            \"data not available\" or
441         ///            \"data not given\".
442         /// \return \b CIFRC_Ok: success.
443         int  GetInteger ( int & I, cpstr TName, bool Remove=false );
444 
445         /// \brief Sets string value for given tag.
446         /// \param[in] S character string with value to be set.
447         ///            If \b S==NULL, the \"data not given\" value
448         ///            will be set. If \b S==\"\" (empty string), the
449         ///            \"data not available\" value is stored.
450         /// \param[in] TName character string with tag name. If tag
451         ///            is not found, it will be added to the structure.
452         /// \param[in] NonBlankOnly flag to treat white-space-only
453         ///            strings:
454         ///   \arg \b false: set as is
455         ///   \arg \b true:  set \"data not available\" value instead.
456         void PutString   ( cpstr S, cpstr TName,
457                            bool NonBlankOnly=false );
458 
459         /// \brief Sets current date in format YYYY-MM-DD as a value
460         ///        for given tag.
461         /// \param[in] T character string with tag name. If tag
462         ///            is not found, it will be added to the structure.
463         void PutDate     ( cpstr T );
464 
465         /// \brief Sets \"data not given\" or \"data not available\"
466         ///        values for given tag.
467         /// \param[in] NoDataType can be either
468         ///   \arg \b CIF_NODATA_DOT for \"data not given\"
469         ///   \arg \b CIF_NODATA_QUESTION for \"data not available\"
470         /// \param[in] T character string with tag name. If tag
471         ///            is not found, it will be added to the structure.
472         void PutNoData   ( int NoDataType, cpstr T  );
473 
474         /// \brief Sets float-point value for given tag.
475         /// \param[in] R real number with value to be set.
476         /// \param[in] TName character string with tag name. If tag
477         ///            is not found, it will be added to the structure.
478         /// \param[in] prec float-point precision; g-format with given
479         ///            precision will be used
480         void PutReal     ( realtype R, cpstr TName, int prec=8 );
481 
482         /// \brief Sets float-point value for given tag.
483         /// \param[in] R real number with value to be set.
484         /// \param[in] TName character string with tag name. If tag
485         ///            is not found, it will be added to the structure.
486         /// \param[in] format format string to convert \b R.
487         void PutReal     ( realtype R, cpstr TName, cpstr format );
488 
489         /// \brief Sets integer value for given tag.
490         /// \param[in] I integer number with value to be set.
491         /// \param[in] TName character string with tag name. If tag
492         ///            is not found, it will be added to the structure.
493         void PutInteger  ( int      I, cpstr TName );
494 
495         /// \brief Writes structure data in mmCIF format into file.
496         /// \param[in] FName character string with file name.
497         /// \param[in] gzipMode flag to controll compression of files:
498         ///  \arg \b GZM_NONE: do not compress
499         ///  \arg \b GZM_CHECK: check file name suffix and compress
500         ///                     (or not) accordingly
501         ///  \arg \b GZM_ENFORCE_GZIP: force gzip compression despite
502         ///                     suffix
503         ///  \arg \b GZM_ENFORCE_COMPRESS: force using compress despite
504         ///                     suffix
505         ///  \return \b true: success
506         ///  \return \b false: can not open file for writing.
507         /// \remarks This function does not create a valid mmCIF file
508         /// as \"data_XXX\" record will be missing. It may be used for
509         /// debugging though.
510         bool WriteMMCIFStruct ( cpstr FName,
511                                 io::GZ_MODE gzipMode=io::GZM_CHECK );
512 
513         /// \brief Writes structure into given file.
514         /// \param f reference to MMDB's file class. The file should be
515         /// opened and closed by application.
516         /// \remarks There is a very limited use of this function on
517         /// application level. It is primarily used by mmcif::Data class.
518         void    WriteMMCIF ( io::RFile f  );
519 
520         /// \brief Deep copy of structures.
521         /// Deep copy duplicates all data and memory allocations,
522         /// producing a genuine clone of the original. Only deep copy
523         /// should be used for copying MMDB objects, a mere assignment
524         /// operator will fail you.
525         /// \param[in] Struct a pointer to mmcif::Struct, the content of
526         ///                 which is copied into 'this' structure.
527         void Copy ( PCategory Struct );
528 
529         /// \brief MMDB stream writer.
530         void write ( io::RFile f );
531 
532         /// \brief MMDB stream reader.
533         void read  ( io::RFile f );
534 
535       protected:
536         psvector field;
537 
538         void InitStruct();
539         void FreeMemory();
540 
541     };
542 
543 
544 
545     //  ======================  Loop  ==============================
546 
547     DefineClass(Loop);
548     DefineStreamFunctions(Loop);
549 
550     /// \brief mmcif::Loop represents mmCIF's \"loop\" category, which keeps
551     ///        rows of data values associated with tags.
552     /*!
553     mmCIF's \"loop\" category has the following form:
554     \code
555     loop_
556     _loop_name.tag0   value0
557     _loop_name.tag1   value1
558     ...........
559     _loop_name.tagN   valueN
560     value00 value10 ... valueN0
561     value01 value11 ... valueN1
562     ...........
563     value0M value1M ... valueNM
564     \endcode
565     mmcif::Loop represents this construct by keeping category name
566     (\"_loop_name\") and associated lists of tags
567     (\"tag0,tag1...tagN\") and data vectors
568     (\"[value00...value0M],[value10...value1M]...[valueN0...valueNM]\").
569 
570     The loop object is created automatically when an mmCIF file is read,
571     or it may be created programatically and then pushed into file.
572 
573     Access to data is provided via tags and data indexes. Internally,
574     all values are kept as character fields, and it is only on the
575     retrieval stage that they are converted to other data types
576     (integers, floats or strings). If conversion is not possible, an
577     error code is returned by the corresponding functions, which should
578     be checked by the application.
579 
580     The following code gives an example of creating mmCIF loop category
581     and populating it with data:
582     \code
583     mmcif::Loop loop;
584     char       S[100];
585     int        i;
586 
587       // Specify loop name:
588       loop.SetCategoryName ( "_sample_loop" );
589 
590       // Create loop structure, i.e., list of tags first:
591       loop.AddLoopTag ( "id"    );
592       loop.AddLoopTag ( "name"  );
593       loop.AddLoopTag ( "value" );
594 
595       // Now populate it with data. This my be done in 2 ways.
596       // Here is how you write loop data in stream fashion,
597       // value-after-value:
598       for (i=0;i<3;i++)  {
599         loop.AddInteger ( i );
600         sprintf ( S,"1st_way-%i",i );
601         loop.AddString ( S );
602         loop.AddReal ( 2.5*(i+1) );
603       }
604 
605       // Here is how you populate loop data using direct-access
606       // functions:
607       for (i=3;i<6;i++)  {
608         loop.PutReal ( 2.5*(i+1),"value",i );
609         loop.PutInteger ( i,"id" );
610         sprintf ( S,"2nd way: %i",i );
611         loop.PutString ( S,"name" );
612       }
613 
614       loop.WriteMMCIFLoop ( "sample_loop.cif" );
615 
616     \endcode
617 
618     The resulting file \b sample_loop.cif will contain:
619 
620     \code
621 
622     loop_
623     _sample_loop.id
624     _sample_loop.name
625     _sample_loop.value
626     0   1st_way-0     2.5
627     1   1st_way-1     5.0
628     2   1st_way-2     7.5
629     3   "2nd way: 3"  10.0
630     4   "2nd way: 4"  12.5
631     5   "2nd way: 5"  15.0
632 
633     \endcode
634 
635     See general principles of working with mmCIF files and mmCIF
636     hierarchies, as well as some code samples, in Section
637     \"\ref mmcif_handler\".
638     */
639 
640     class Loop : public Category  {
641 
642       friend class Data;
643 
644       public :
645 
646         /// \brief Basic constructor
647         Loop ();
648 
649         /// \brief Constructor that assigns structure name.
650         /// \param[in] N structure name (must start with underscore).
651         Loop ( cpstr N );
652 
653         /// \brief Constructor for MMDB data streaming functions
654         Loop ( io::RPStream Object );
655 
656         /// \brief Destructor
657         ~Loop();
658 
659         /// \brief Adds tag to the loop.
660         /// The tag is appended to the list of existing tags. The order
661         /// of tags cannot be changed.
662         /// \param[in] T tag name
663         /// \param[in] Remove flag to remove all fields in the loop.
664         void  AddLoopTag ( cpstr T, bool Remove=true );
665 
666         /// \brief Sets string value at current loop position.
667         /// When \b mmcif::Loop::Add[Data] functions use internal loop
668         /// pointer. When category is created or cleared (by using
669         /// \b mmcif::Loop::AddLoopTag ( T,true )) the pointer is set to
670         /// 0th row and 0th column (tag). After each call to
671         /// \b mmcif::Loop::Add[Data] function, internal pointer advances
672         /// to next column (tag), and wraps over to next row, 0th tag,
673         /// if list of tags is exhausted. Any remaining fields in last
674         /// row will be populated with \"data not given\" value.
675         /// \param[in] S character string with value to be set.
676         ///            If \b S==NULL, the \"data not given\" value
677         ///            will be set. If \b S==\"\" (empty string), the
678         ///            \"data not available\" value is stored.
679         /// \param[in] NonBlankOnly flag to treat white-space-only
680         ///            strings:
681         ///   \arg \b false: set as is
682         ///   \arg \b true:  set \"data not available\" value instead.
683         void  AddString ( cpstr S, bool NonBlankOnly=false );
684 
685         /// \brief Sets \"data not given\" or \"data not available\" at
686         ///        current loop position.
687         /// When \b mmcif::Loop::Add[Data] functions use internal loop
688         /// pointer. When category is created or cleared (by using
689         /// \b mmcif::Loop::AddLoopTag ( T,true )) the pointer is set to
690         /// 0th row and 0th column (tag). After each call to
691         /// \b mmcif::Loop::Add[Data] function, internal pointer advances
692         /// to next column (tag), and wraps over to next row, 0th tag,
693         /// if list of tags is exhausted. Any remaining fields in last
694         /// row will be populated with \"data not given\" value.
695         /// \param[in] NoDataType integer key specifying which type of
696         ///            data absence should be set as a value:
697         ///   \arg \b CIF_NODATA_DOT for \"data not given\"
698         ///   \arg \b CIF_NODATA_QUESTION for \"data not available\"
699         void  AddNoData ( int NoDataType );
700 
701         /// \brief Sets float-point value at current loop position.
702         /// When \b mmcif::Loop::Add[Data] functions use internal loop
703         /// pointer. When category is created or cleared (by using
704         /// \b mmcif::Loop::AddLoopTag ( T,true )) the pointer is set to
705         /// 0th row and 0th column (tag). After each call to
706         /// \b mmcif::Loop::Add[Data] function, internal pointer advances
707         /// to next column (tag), and wraps over to next row, 0th tag,
708         /// if list of tags is exhausted. Any remaining fields in last
709         /// row will be populated with \"data not given\" value.
710         /// \param[in] R real number with value to be set.
711         /// \param[in] prec float-point precision; g-format with given
712         ///            precision will be used
713         void  AddReal ( realtype R, int prec=8 );
714 
715         /// \brief Sets float-point value at current loop position in
716         ///        given format.
717         /// When \b mmcif::Loop::Add[Data] functions use internal loop
718         /// pointer. When category is created or cleared (by using
719         /// \b mmcif::Loop::AddLoopTag ( T,true )) the pointer is set to
720         /// 0th row and 0th column (tag). After each call to
721         /// \b mmcif::Loop::Add[Data] function, internal pointer advances
722         /// to next column (tag), and wraps over to next row, 0th tag,
723         /// if list of tags is exhausted. Any remaining fields in last
724         /// row will be populated with \"data not given\" value.
725         /// \brief Sets float-point value for given tag.
726         /// \param[in] R real number with value to be set.
727         /// \param[in] format format string to convert \b R.
728         void  AddReal ( realtype R, cpstr format );
729 
730         /// \brief Sets integer value at current loop position in given
731         ///        format.
732         /// When \b mmcif::Loop::Add[Data] functions use internal loop
733         /// pointer. When category is created or cleared (by using
734         /// \b mmcif::Loop::AddLoopTag ( T,true )) the pointer is set to
735         /// 0th row and 0th column (tag). After each call to
736         /// \b mmcif::Loop::Add[Data] function, internal pointer advances
737         /// to next column (tag), and wraps over to next row, 0th tag,
738         /// if list of tags is exhausted. Any remaining fields in last
739         /// row will be populated with \"data not given\" value.
740         /// \param[in] I integer number with value to be set.
741         void  AddInteger ( int I );
742 
743         /// \brief Returns current length of the loop (i.e. the number
744         ///        of rows).
745         /// \return number of data rows in the loop.
GetLoopLength()746         int   GetLoopLength() { return nRows; }
747 
748         /// \brief Returns string pointer on the field corresponding to
749         ///        tag in the specified position, in the specified row.
750         /// Tag positions are defined by the order of their appearance in
751         /// mmCIF file (if loop was read from a file), or by the
752         /// order of their addition to the loop (if loop was
753         /// created programmatically).
754         /// \param[in] rowNo row number (0...GetLoopLength()-1)
755         /// \param[in] tagNo tag number (0...GetNofTags()-1)
756         /// \return \b NULL: tag or row do not exist
757         /// \return \b CIF_NODATA_DOT_FIELD the field contains
758         ///            \"data not given\" value
759         /// \return \b CIF_NODATA_QUESTION_FIELD the field contains
760         ///            \"data not available\" value
761         /// \return \b not \b NULL: string value of the field
762         /// \remarks Never try to dispose memory pointed by the return
763         /// value, or your program will crash.
764         pstr  GetField ( int rowNo, int tagNo );
765 
766         /// \brief Fetches value, corresponding to the given tag, in
767         ///        the given row, as a string
768         /// \param[out] S pointer to string, which will point to newly
769         ///               allocated character string, containing value
770         ///               associated with tag \b TName and row \b nrow.
771         ///               If tag, row or value
772         ///               is not found, or if value corresponds to
773         ///               mmCIF's \"data not given\" or
774         ///               \"data not available\", \b S returns NULL.
775         /// \param[in] TName character string with tag name
776         /// \param[in] nrow  row number (0...GetLoopLength()-1)
777         /// \param[in] Remove flag to remove the field from
778         ///               structure after it is read.
779         /// \return \b CIFRC_NoTag: tag is not found
780         /// \return \b CIFRC_WrongIndex: row is not found
781         /// \return \b CIFRC_NoField: value is not found
782         /// \return \b CIFRC_Ok: success. If \b S returns NULL, then
783         ///                the value corresponds to either
784         ///                \"data not available\" or
785         ///                \"data not given\".
786         /// \remarks If \b S!=NULL at time of call, the function will
787         ///  try to dispose the string it points on. This allows a slick
788         ///  re-use of the same pointer in consequitive calls. This also
789         ///  means that one should never pass unallocated pointer to
790         ///  this function. Safe use assumes the following patern:
791         ///  \code
792         ///  mmcif::Loop mmCIFLoop;
793         ///  pstr S;  // this is merely "char *S"
794         ///  int  rc;
795         ///
796         ///    S  = NULL; // null pointer before first use
797         ///    rc = mmCIFLoop.GetString ( S,"id",1 );
798         ///    if (rc)  CreateCopy ( S,"*** data not found" );
799         ///    if (!S)  CreateCopy ( S,"*** data not given or not available" );
800         ///    printf ( " rc=%i, S='%s'\n",rc,S );
801         ///
802         ///    rc = mmCIFLoop.GetString ( S,"property",0 );
803         ///    if (rc)  CreateCopy ( S,"*** data not found" );
804         ///    if (!S)  CreateCopy ( S,"*** data not given or not available" );
805         ///    printf ( " rc=%i, S='%s'\n",rc,S );
806         ///
807         ///    // etc etc etc
808         ///
809         ///    delete[] S;  // application is responsible for final
810         ///                 // disposal of memory
811         ///  \endcode
812         int   GetString ( pstr & S, cpstr TName, int nrow,
813                                            bool Remove=false );
814 
815         /// \brief Returns pointer to value associated with given tag,
816         ///        in the given row of the loop.
817         /// \param[in] TName character string with tag name
818         /// \param[in] nrow  row number (0...GetLoopLength()-1)
819         /// \param[out] RC return code:
820         ///    \arg \b CIFRC_NoTag: tag is not found
821         ///    \arg \b CIFRC_WrongIndex: row is not found
822         ///    \arg \b CIFRC_NoField: value is not found
823         ///    \arg \b CIFRC_Ok: success. If function returns NULL, then
824         ///                the value corresponds to either
825         ///                \"data not available\" or
826         ///                \"data not given\".
827         /// \return \b NULL: either tag, row or value is not found, or the
828         ///    value is \"data not available\" or \"data not given\".
829         ///    Read return code \b RC in order to interpret NULL return.
830         /// \return \b not \b NULL: pointer (\c char \c *) to value
831         ///    associated with \b TName.
832         /// \remarks Never try to dispose memory pointed by the return
833         /// value, or your program will crash.
834         pstr  GetString    ( cpstr TName, int nrow, int & RC );
835 
836         /// \brief Copies value, associated with given tag,
837         ///        in the given row, into specified buffer.
838         ///  Terminating NULL character is appended.
839         /// \param[out] buf character string to accept the value
840         /// \param[in] maxlength maximum number of bytes to copy
841         /// \param[in] TName character string with tag name
842         /// \param[in] nrow  row number (0...GetLoopLength()-1)
843         /// \param[out] RC return code:
844         ///    \arg \b CIFRC_NoTag: tag is not found
845         ///    \arg \b CIFRC_WrongIndex: row is not found
846         ///    \arg \b CIFRC_NoField: value is not found
847         ///    \arg \b CIFRC_Ok: success.
848         /// \remarks Destination string \b buf is not modified if
849         /// \b RC!=CIFRC_Ok .
850         void  CopyString   ( pstr  buf,   int maxlength,
851                              cpstr TName, int nrow, int & RC );
852 
853         /// \brief Deletes field associated with given tag in
854         ///          the given row.
855         /// \param[in] TName character string with tag name
856         /// \param[in] nrow  row number (0...GetLoopLength()-1)
857         /// \return \b >=0: field deleted
858         /// \return \b <0: either field or tag is not found
859         int   DeleteField  ( cpstr TName, int nrow );
860 
861         /// \brief Deletes all fields in given row.
862         /// \param[in] nrow  row number (0...GetLoopLength()-1)
863         /// \return \b CIFRC_Ok: fields deleted
864         /// \return \b CIFRC_WrongIndex: row not found
865         /// \remarks Note that this function delets just the fields, but
866         /// not the row. If you wish the row to be deleted, call
867         /// mmcif::Loop::Optimize() function after this one.
868         int   DeleteRow    ( int nrow );
869 
870         /// \brief Fetches value, corresponding to the given tag,
871         ///        in the given row, as a real number.
872         /// \param[out] R reference to real number to accept the value.
873         ///        In case of failure, \b R returns zero.
874         /// \param[in] TName character string with tag name
875         /// \param[in] nrow  row number (0...GetLoopLength()-1)
876         /// \param[in] Remove flag to remove the field from
877         ///               the loop after it is read.
878         /// \return \b CIFRC_NoTag: tag is not found
879         /// \return \b CIFRC_WrongIndex: row not found
880         /// \return \b CIFRC_NoField: field is not found
881         /// \return \b CIFRC_WrongFormat: value is not a real or integer
882         ///            number.
883         /// \return \b CIFRC_NoData: value is either
884         ///            \"data not available\" or
885         ///            \"data not given\".
886         /// \return \b CIFRC_Ok: success.
887         int   GetReal ( realtype & R, cpstr TName, int nrow,
888                                            bool Remove=false );
889 
890         /// \brief Copies value, associated with given tag,
891         ///        in the given row, into specified destination as
892         ///        a real number.
893         /// \param[out] R reference to real number to accept the value
894         /// \param[in] TName character string with tag name
895         /// \param[in] nrow  row number (0...GetLoopLength()-1)
896         /// \param[out] RC return code:
897         ///    \arg \b CIFRC_NoTag: tag is not found
898         ///    \arg \b CIFRC_WrongIndex: row is not found
899         ///    \arg \b CIFRC_NoField: value is not found
900         ///    \arg \b CIFRC_Ok: success.
901         /// \remarks Destination \b R is set 0 if \b RC!=CIFRC_Ok.
902         void  CopyReal ( realtype & R, cpstr TName, int nrow, int & RC );
903 
904         /// \brief Copies value, associated with given tag,
905         ///        in the given row, into specified destination as
906         ///        an integer number.
907         /// \param[out] I reference to integer number to accept the value
908         /// \param[in] TName character string with tag name
909         /// \param[in] nrow  row number (0...GetLoopLength()-1)
910         /// \param[out] RC return code:
911         ///    \arg \b CIFRC_NoTag: tag is not found
912         ///    \arg \b CIFRC_WrongIndex: row is not found
913         ///    \arg \b CIFRC_NoField: value is not found
914         ///    \arg \b CIFRC_Ok: success.
915         /// \remarks Destination \b I is set 0 if \b RC!=CIFRC_Ok.
916         void  CopyInteger ( int & I, cpstr TName, int nrow, int & RC );
917 
918         /// \brief Fetches value, corresponding to the given tag,
919         ///        in the given row, as an integer number.
920         /// \param[out] I reference to integer number to accept the value.
921         ///        In case of failure, \b R returns zero.
922         /// \param[in] TName character string with tag name
923         /// \param[in] nrow  row number (0...GetLoopLength()-1)
924         /// \param[in] Remove flag to remove the field from
925         ///               the loop after it is read.
926         /// \return \b CIFRC_NoTag: tag is not found
927         /// \return \b CIFRC_WrongIndex: row not found
928         /// \return \b CIFRC_NoField: field is not found
929         /// \return \b CIFRC_WrongFormat: value is not a real or integer
930         ///            number.
931         /// \return \b CIFRC_NoData: value is either
932         ///            \"data not available\" or
933         ///            \"data not given\".
934         /// \return \b CIFRC_Ok: success.
935         int   GetInteger   ( int & I, cpstr TName, int nrow,
936                                            bool Remove=false );
937 
938         /// \brief Fetches set of values, corresponding to the given
939         ///        tag, in the given range of rows, as a vector of
940         ///        strings.
941         /// \param[out] S reference to string vector to accept
942         ///        the values. if \b S==NULL , the vector will be
943         ///        allocated with starting index of \b i1.
944         /// \param[in] TName character string with tag name
945         /// \param[in] i1  minimum row number to fetch, the actual
946         ///            index will be calculated as \b max(0,min(i1,i2))
947         /// \param[in] i2  maximum row number to fetch, the actual
948         ///            index will be calculated as
949         ///            \b min(GetLoopLength()-1,max(i1,i2))
950         /// \param[in] Remove flag to remove fetched fields from
951         ///               the loop after they are read.
952         /// \return \b CIFRC_NoTag: tag is not found
953         /// \return \b CIFRC_WrongIndex: invalid range of rows
954         /// \return \b CIFRC_Ok: success.
955         ///
956         /// For safe use, \b S should be pre-allocated by calling
957         /// process. Only elements \b S[i1] to \b S[i2] will contain
958         /// fetched data, others remain untouched. The calling
959         /// process is responsible for the disposal of \b S. Example:
960         /// \code
961         /// mmcif::Loop loop;
962         /// psvector   S;  // equivalent to char **S
963         /// int        i,i1,i2,rc,n;
964         ///
965         ///    // ... get loop data
966         ///
967         ///    n  = loop.GetLoopLength();
968         ///    i1 = 5;  i2 = n - 5;  // could be wrong!
969         ///
970         ///    //  allocate vector of strings
971         ///    GetVectorMemory ( S,n,0 );  // "0" for starting index
972         ///    for (i=0;i<n;i++)
973         ///      S[i] = NULL;  // initialize NULL string pointers
974         ///
975         ///    loop.GetSVector ( S,"name",i1,i2 );
976         ///    printf ( " Fetched with return code rc=%i\n",rc );
977         ///        // you may want a more thorough treatment of
978         ///        // the return code here
979         ///    for (i=i1;i<=i2;i++)
980         ///      if (S[i])  printf ( " %4i. name='%s'\n",i,S[i] );
981         ///           else  printf ( " %4i. name is not available\n",i );
982         ///
983         ///    //  S[] may be re-used for as many fetches as necessary
984         ///    //  without cleaning or disposals
985         ///
986         ///    //  dispose of vector of strings
987         ///    for (i=0;i<n;i++)
988         ///      if (S[i])  delete[] S[i];
989         ///    FreeVectorMemory ( S,0 );  // "0" for starting index
990         ///
991         /// \endcode
992         int  GetSVector ( psvector & S, cpstr TName,
993                           int i1=0, int i2=MaxInt4,
994                           bool Remove=false );
995 
996         /// \brief Fetches set of values, corresponding to the given
997         ///        tag, in the given range of rows, as a vector of
998         ///        float-point numbers.
999         /// \param[out] R reference to float-point vector to accept
1000         ///        the values. if \b R==NULL , the vector will be
1001         ///        allocated with starting index of \b i1.
1002         /// \param[in] TName character string with tag name
1003         /// \param[in] i1  minimum row number to fetch, the actual
1004         ///            index will be calculated as \b max(0,min(i1,i2))
1005         /// \param[in] i2  maximum row number to fetch, the actual
1006         ///            index will be calculated as
1007         ///            \b min(GetLoopLength()-1,max(i1,i2))
1008         /// \param[in] Remove flag to remove fetched fields from
1009         ///               the loop after they are read.
1010         /// \return \b CIFRC_NoTag: tag is not found
1011         /// \return \b CIFRC_WrongIndex: invalid range of rows
1012         /// \return \b CIFRC_Ok: success.
1013         ///
1014         /// For safe use, \b R should be pre-allocated by calling
1015         /// process. Only elements \b R[i1] to \b R[i2] will contain
1016         /// fetched data, others remain untouched. The calling
1017         /// process is responsible for the disposal of \b R. Example:
1018         /// \code
1019         /// mmcif::Loop loop;
1020         /// rvector    R;  // equivalent to realtype *R
1021         /// int        i,i1,i2,rc,n;
1022         ///
1023         ///    // ... get loop data
1024         ///
1025         ///    n  = loop.GetLoopLength();
1026         ///    i1 = 5;  i2 = n - 5;  // could be wrong!
1027         ///
1028         ///    //  allocate a vector of real numbers
1029         ///    GetVectorMemory ( R,n,0 );  // "0" for starting index
1030         ///    // no need to initiaize unless required for the
1031         ///    // application
1032         ///
1033         ///    rc = loop.GetRVector ( R,"value",i1,i2 );
1034         ///    printf ( " Fetched with return code rc=%i\n",rc );
1035         ///        // you may want a more thorough treatment of
1036         ///        // the return code here
1037         ///    for (i=i1;i<=i2;i++)
1038         ///      printf ( " value[%4i] = %15.7g\n",i,R[i] );
1039         ///
1040         ///    //  R[] may be re-used for as many fetches as necessary
1041         ///    //  without cleaning or disposals
1042         ///
1043         ///    //  dispose of the vector
1044         ///    FreeVectorMemory ( R,0 );  // "0" for starting index
1045         ///
1046         /// \endcode
1047         int  GetRVector ( rvector  & R, cpstr TName,
1048                           int i1=0, int i2=MaxInt4,
1049                           bool Remove=false );
1050 
1051         /// \brief Fetches set of values, corresponding to the given
1052         ///        tag, in the given range of rows, as a vector of
1053         ///        integer numbers.
1054         /// \param[out] I reference to float-point vector to accept
1055         ///        the values. if \b I==NULL , the vector will be
1056         ///        allocated with starting index of \b i1.
1057         /// \param[in] TName character string with tag name
1058         /// \param[in] i1  minimum row number to fetch, the actual
1059         ///            index will be calculated as \b max(0,min(i1,i2))
1060         /// \param[in] i2  maximum row number to fetch, the actual
1061         ///            index will be calculated as
1062         ///            \b min(GetLoopLength()-1,max(i1,i2))
1063         /// \param[in] Remove flag to remove fetched fields from
1064         ///               the loop after they are read.
1065         /// \return \b CIFRC_NoTag: tag is not found
1066         /// \return \b CIFRC_WrongIndex: invalid range of rows
1067         /// \return \b CIFRC_Ok: success.
1068         ///
1069         /// For safe use, \b I should be pre-allocated by calling
1070         /// process. Only elements \b I[i1] to \b I[i2] will contain
1071         /// fetched data, others remain untouched. The calling
1072         /// process is responsible for the disposal of \b I.
1073         /// See example in mmcif::Loop::GetRVector documentation
1074         /// for details.
1075         int  GetIVector ( ivector  & I, cpstr TName,
1076                           int i1=0, int i2=MaxInt4,
1077                           bool Remove=false );
1078 
1079         /// \brief Sets string value for given tag and row.
1080         /// \param[in] S character string with value to be set.
1081         ///            If \b S==NULL, the \"data not given\" value
1082         ///            will be set. If \b S==\"\" (empty string), the
1083         ///            \"data not available\" value is stored.
1084         /// \param[in] T character string with tag name. If tag
1085         ///            is not found, it will be added, and all data in
1086         ///            the loop will be reindexed accordingly.
1087         /// \param[in] nrow  row number. If the row does not exist then
1088         ///            it will be created, along with all other rows
1089         ///            between GetLoopLength()-1 and \b nrow as
1090         ///            necessary. All newly created fields will be
1091         ///            initialised with \"data not given\" value.
1092         void  PutString ( cpstr S, cpstr T, int nrow );
1093 
1094         /// \brief Sets \"data not given\" or \"data not available\"
1095         ///        values for given tag and row.
1096         /// \param[in] NoDataType can be either
1097         ///   \arg \b CIF_NODATA_DOT for \"data not given\"
1098         ///   \arg \b CIF_NODATA_QUESTION for \"data not available\"
1099         /// \param[in] T character string with tag name. If tag
1100         ///            is not found, it will be added, and all data in
1101         ///            the loop will be reindexed accordingly.
1102         /// \param[in] nrow  row number. If the row does not exist then
1103         ///            it will be created, along with all other rows
1104         ///            between GetLoopLength()-1 and \b nrow as
1105         ///            necessary. All newly created fields will be
1106         ///            initialised with \"data not given\" value.
1107         void  PutNoData ( int NoDataType, cpstr T, int nrow );
1108 
1109         /// \brief Sets float-point value for given tag and row.
1110         /// \param[in] R real number with value to be set.
1111         /// \param[in] T character string with tag name. If tag
1112         ///            is not found, it will be added, and all data in
1113         ///            the loop will be reindexed accordingly.
1114         /// \param[in] nrow  row number. If the row does not exist then
1115         ///            it will be created, along with all other rows
1116         ///            between GetLoopLength()-1 and \b nrow as
1117         ///            necessary. All newly created fields will be
1118         ///            initialised with \"data not given\" value.
1119         /// \param[in] prec float-point precision; g-format with given
1120         ///            precision will be used
1121         void  PutReal ( realtype R, cpstr T, int nrow, int prec=8 );
1122 
1123         /// \brief Sets float-point value for given tag and row.
1124         /// \param[in] R real number with value to be set.
1125         /// \param[in] T character string with tag name. If tag
1126         ///            is not found, it will be added, and all data in
1127         ///            the loop will be reindexed accordingly.
1128         /// \param[in] nrow  row number. If the row does not exist then
1129         ///            it will be created, along with all other rows
1130         ///            between GetLoopLength()-1 and \b nrow as
1131         ///            necessary. All newly created fields will be
1132         ///            initialised with \"data not given\" value.
1133         /// \param[in] format format string to convert \b R.
1134         void  PutReal ( realtype R, cpstr T, int nrow, cpstr format );
1135 
1136         /// \brief Sets integer value for given tag.
1137         /// \param[in] I integer number with value to be set.
1138         /// \param[in] T character string with tag name. If tag
1139         ///            is not found, it will be added, and all data in
1140         ///            the loop will be reindexed accordingly.
1141         /// \param[in] nrow  row number. If the row does not exist then
1142         ///            it will be created, along with all other rows
1143         ///            between GetLoopLength()-1 and \b nrow as
1144         ///            necessary. All newly created fields will be
1145         ///            initialised with \"data not given\" value.
1146         void  PutInteger ( int I, cpstr T, int nrow );
1147 
1148         /// \brief Sets a set of string values for the given tag and
1149         ///        range of rows.
1150         /// \param[in] S string vector with values to store in the loop
1151         /// \param[in] T character string with tag name. If tag
1152         ///            is not found, it will be added, and all data in
1153         ///            the loop will be reindexed accordingly.
1154         /// \param[in] i1  minimum data index in \b S to set in the loop
1155         /// \param[in] i2  maximum data index in \b S to set in the loop.
1156         ///
1157         /// The data will be set in rows \b i1 to \b i2 (inclusive) in
1158         /// the loop. If range \b [i1,i2] is not contained in the loop,
1159         /// all missing rows will be created and initialised to
1160         /// \"data not given\" value. Example:
1161         /// \code
1162         /// mmcif::Loop loop("_sample_loop");
1163         /// pstr       S[100];
1164         /// int        i;
1165         ///
1166         ///    //  initialize vector of strings
1167         ///    for (i=0;i<100;i++)  {
1168         ///      S[i] = new char[20];
1169         ///      sprintf ( S[i],"value i=%i",i );
1170         ///    }
1171         ///
1172         ///    //  put data in loop
1173         ///    loop.PutSVector ( S,"made_up_string_value",0,99 );
1174         ///
1175         ///    //  dispose of vector of strings
1176         ///    for (i=0;i<100;i++)
1177         ///      if (S[i])  delete[] S[i];
1178         ///
1179         /// \endcode
1180         void  PutSVector   ( psvector S, cpstr T, int i1, int i2 );
1181 
1182         /// \brief Sets a set of float-point values for the given tag and
1183         ///        range of rows.
1184         /// \param[in] R vector of real numbers to store in the loop
1185         /// \param[in] T character string with tag name. If tag
1186         ///            is not found, it will be added, and all data in
1187         ///            the loop will be reindexed accordingly.
1188         /// \param[in] i1  minimum data index in \b S to set in the loop
1189         /// \param[in] i2  maximum data index in \b S to set in the loop
1190         /// \param[in] prec float-point precision; g-format with given
1191         ///            precision will be used.
1192         ///
1193         /// The data will be set in rows \b i1 to \b i2 (inclusive) in
1194         /// the loop. If range \b [i1,i2] is not contained in the loop,
1195         /// all missing rows will be created and initialised to
1196         /// \"data not given\" value.
1197         void  PutRVector   ( rvector  R, cpstr T, int i1, int i2,
1198                                                        int prec=8 );
1199 
1200         /// \brief Sets a set of integer values for the given tag and
1201         ///        range of rows.
1202         /// \param[in] I vector of integers to store in the loop
1203         /// \param[in] T character string with tag name. If tag
1204         ///            is not found, it will be added, and all data in
1205         ///            the loop will be reindexed accordingly.
1206         /// \param[in] i1  minimum data index in \b S to set in the loop
1207         /// \param[in] i2  maximum data index in \b S to set in the loop.
1208         ///
1209         /// The data will be set in rows \b i1 to \b i2 (inclusive) in
1210         /// the loop. If range \b [i1,i2] is not contained in the loop,
1211         /// all missing rows will be created and initialised to
1212         /// \"data not given\" value.
1213         void  PutIVector   ( ivector  I, cpstr T, int i1, int i2 );
1214 
1215         /// \brief Returns category type \b MMCIF_Loop.
GetCategoryID()1216         MMCIF_ITEM  GetCategoryID() { return MMCIF_Loop; }
1217 
1218         /// \brief Optimizes loop for RAM and data access speed.
1219         /// Optimized data structures take less RAM and their indexes
1220         /// are sorted for quicker access. Sorting is done automatically
1221         /// as new data is added to the category. If the structure
1222         /// is edited (fields/data removed), it may need
1223         /// optimization and re-sorting for efficiency.\n\n
1224         /// The sorting preserves the order of actual appearance of
1225         /// tags and rows in mmCIF file. If a loop is created
1226         /// programmatically, the order of tags and rows in mmCIF file
1227         /// will be the same as order of adding them to the loop.
1228         void  Optimize();
1229 
1230         /// \brief Writes loop data in mmCIF format into file.
1231         /// \param[in] FName character string with file name.
1232         /// \param[in] gzipMode flag to controll compression of files:
1233         ///  \arg \b GZM_NONE: do not compress
1234         ///  \arg \b GZM_CHECK: check file name suffix and compress
1235         ///                     (or not) accordingly
1236         ///  \arg \b GZM_ENFORCE_GZIP: force gzip compression despite
1237         ///                     suffix
1238         ///  \arg \b GZM_ENFORCE_COMPRESS: force using compress despite
1239         ///                     suffix
1240         /// \return \b true: success
1241         /// \return \b false: can not open file for writing.
1242         /// \remarks This function does not create a valid mmCIF file
1243         /// as \"data_XXX\" record will be missing. It may be used for
1244         /// debugging though.
1245         bool WriteMMCIFLoop ( cpstr FName,
1246                               io::GZ_MODE gzipMode=io::GZM_CHECK );
1247 
1248         /// \brief Writes loop data into given file.
1249         /// \param f reference to MMDB's file class. The file should be
1250         /// opened and closed by application.
1251         /// \remarks There is a very limited use of this function on
1252         /// application level. It is primarily used by mmcif::Data class.
1253         void  WriteMMCIF ( io::RFile f );
1254 
1255         /// \brief Deep copy of loops.
1256         /// Deep copy duplicates all data and memory allocations,
1257         /// producing a genuine clone of the original. Only deep copy
1258         /// should be used for copying MMDB objects, a mere assignment
1259         /// operator will fail you.
1260         /// \param[in] Loop a pointer to mmcif::Loop, the content of
1261         ///                 which is copied into 'this' loop.
1262         void  Copy ( PCategory Loop );
1263 
1264         /// \brief MMDB stream writer.
1265         void write ( io::RFile f );
1266 
1267         /// \brief MMDB stream reader.
1268         void read  ( io::RFile f );
1269 
1270       protected:
1271         int      nRows;
1272         psmatrix field;
1273         int      iColumn,nAllocRows;
1274 
1275         void  InitLoop     ();
1276         void  FreeMemory   ();
1277         void  DeleteFields ();
1278         void  ExpandRows   ( int nRowsNew );
1279 
1280     };
1281 
1282 
1283 
1284     //  ======================  Data  =============================
1285 
1286 
1287     //    CIFW are warnings which may be issued on reading the CIF file.
1288     // Each of them means actually a CIF syntax error.
1289 
1290     enum CIF_WARNING  {
1291       CIFW_UnrecognizedItems = 0x00000020,
1292       CIFW_MissingField      = 0x00000040,
1293       CIFW_EmptyLoop         = 0x00000080,
1294       CIFW_UnexpectedEOF     = 0x00000100,
1295       CIFW_LoopFieldMissing  = 0x00000200,
1296       CIFW_NotAStructure     = 0x00000400,
1297       CIFW_NotALoop          = 0x00000800,
1298       CIFW_DuplicateTag      = 0x00001000
1299     };
1300 
1301     //    CIFRC are return codes from procedures of extracting data from
1302     // the read CIF file. Negative returns reflect unsuccessful and
1303     // not accomplished operation.
1304     enum CIF_RC  {
1305       CIFRC_Loop           =  2,
1306       CIFRC_Structure      =  1,
1307       CIFRC_Ok             =  0,
1308       CIFRC_StructureNoTag = -1,
1309       CIFRC_LoopNoTag      = -2,
1310       CIFRC_NoCategory     = -3,
1311       CIFRC_WrongFormat    = -4,
1312       CIFRC_NoTag          = -5,
1313       CIFRC_NotAStructure  = -6,
1314       CIFRC_NotALoop       = -7,
1315       CIFRC_WrongIndex     = -8,
1316       CIFRC_NoField        = -9,
1317       CIFRC_Created        = -12,
1318       CIFRC_CantOpenFile   = -13,
1319       CIFRC_NoDataLine     = -14,
1320       CIFRC_NoData         = -15
1321     };
1322 
1323     //
1324     //  Functional flags:
1325     //  ~~~~~~~~~~~~~~~~~
1326     //
1327     //  CIFFL_PrintWarnings      when reading CIF file, all warning
1328     //                           messages will be printed. If the flag
1329     //                           is off, the warnings will be bit-encoded
1330     //                           in the return code
1331     //  CIFFL_StopOnWarnings     reading CIF file will stop at first
1332     //                           warning issued
1333     //  CIFFL_SuggestCategories  allows reading CIF file with loops having
1334     //                           no categories. Hidden category names
1335     //                           will be automatically generated for
1336     //                           internal consistency of the system.
1337     //                           These names will not appear in output.
1338     //                           As these names are hidden, they cannot
1339     //                           be used to access data. It is therefore
1340     //                           assumed that all tags in all loops without
1341     //                           categories are unique. Simply specify ""
1342     //                           for category when accessing such data
1343     //                           (it cannot be accessed through mmcif::Loop,
1344     //                           but only through mmcif::Data functions
1345     //                           taking both Category and Tag; note that
1346     //                           CIFFL_SuggestCategories flag must be on
1347     //                           while accessing such data).
1348     //  CIFFL_SuggestTags        allows for identical tags in a category
1349     //                           (including a hidden category). Hidden
1350     //                           suffixes to tag names will be generated
1351     //                           for internal consistency. At present,
1352     //                           only data for first non-unique tag may be
1353     //                           accessed.
1354     //
1355     enum CIF_FLAG  {
1356       CIFFL_PrintWarnings     = 0x00000001,
1357       CIFFL_StopOnWarnings    = 0x00000002,
1358       CIFFL_SuggestCategories = 0x00000004,
1359       CIFFL_SuggestTags       = 0x00000008
1360     };
1361 
1362     DefineClass(Data);
1363     DefineStreamFunctions(Data);
1364 
1365 
1366     /// \brief mmcif::Data represents mmCIF's \"data\" category, which keeps
1367     ///        structures and loops and is mandatory element of mmCIF file.
1368     /*!
1369     mmCIF's \"data\" category has the following form:
1370     \code
1371     data_DataName
1372 
1373     _structure1.tag1  value1
1374     ..........
1375 
1376     loop_
1377     ..........
1378 
1379     \endcode
1380     In the above example, all structures and loops that follow \b data_
1381     keyword until next \b data_ or end of file are part of data category
1382     with name \b DataName.
1383 
1384     mmcif::Data represents this construct by keeping a list of mmcif::Struct
1385     and mmcif::Loop class instances associated with the corresponding
1386     categories in the data block.
1387 
1388     The data object is created automatically when an mmCIF file is read,
1389     or it may be created programatically and then pushed into file.
1390 
1391     Access to data is provided via category (structures and loops) names,
1392     tags and data indexes (in case of loops). Alternatively, pointers to
1393     contained structures and loops may be obtained first, an used for
1394     fetching data using mmcif::Struct's and mmcif::Loop's interface
1395     functions.
1396 
1397     The following code gives an example of creating mmCIF's data category
1398     and populating it:
1399     \code
1400     mmcif::Data data;
1401 
1402       // Specify data name:
1403       data.PutDataName ( "Sample_Data" );
1404 
1405       // the following statement:
1406       data.PutInteger ( 12345,"_category1","id" );
1407       // creates structure "_category1" with tag "id" and assigns it
1408       // the integer value of 12345.
1409 
1410       data.PutString ( "a name","_category1","name" );
1411 
1412       // Loops may be created quite similarly:
1413       data.PutLoopInteger ( 12345   ,"_loop1","id"  ,2 );
1414       data.PutLoopInteger ( "a name","_loop1","name",0 );
1415 
1416       // push data into a file
1417       data.WriteMMCIFData ( "sample.cif" );
1418 
1419     \endcode
1420 
1421     The resulting file \b sample.cif will contain:
1422 
1423     \code
1424     data_Sample_Data
1425 
1426     _category1.id   12345
1427     _category1.name "a name"
1428 
1429     loop_
1430     _loop1.id
1431     _loop1.name
1432     .      "a name"
1433     .      .
1434     12345  .
1435     \endcode
1436 
1437     The same result may be achieved differently:
1438 
1439     \code
1440     mmcif::Data    data;
1441     mmcif::PStruct mmCIFStruct;  // equivalent to mmcif::Struct *mmCIFStruct
1442     mmcif::PLoop   mmCIFLoop;    // equivalent to mmcif::Loop   *mmCIFLoop
1443 
1444       // Specify data name:
1445       data.PutDataName ( "Sample_Data" );
1446 
1447       // create new mmCIF's structure in the data block:
1448       data.AddStructure ( "_category1",mmCIFStruct );
1449       if (mmCIFStruct)  {
1450         mmCIFStruct->PutInteger ( 12345   ,"id"   );
1451         mmCIFStruct->PutString  ( "a name","name" );
1452       }
1453 
1454       // similarly for the loop:
1455       data.AddLoop ( "_loop1",mmCIFLoop );
1456       if (mmCIFLoop)  {
1457         mmCIFLoop->PutInteger ( 12345   ,"id"  ,2 );
1458         mmCIFLoop->PutString  ( "a name","name",0 );
1459       }
1460 
1461       // push data into a file
1462       data.WriteMMCIFData ( "sample.cif" );
1463 
1464     \endcode
1465 
1466     See general principles of working with mmCIF files and mmCIF
1467     hierarchies, as well as some code samples, in Section
1468     \"\ref mmcif_handler\".
1469     */
1470 
1471     class Data : public io::Stream  {
1472 
1473       friend class File;
1474 
1475       public :
1476 
1477         /// \brief Basic constructor.
1478         Data ();
1479 
1480         /// \brief Constructor that assigns data block name.
1481         /// \param[in] N data block name.
1482         Data ( cpstr N );
1483 
1484         /// \brief Constructor for MMDB data streaming functions.
1485         Data ( io::RPStream Object );
1486 
1487         /// \brief Destructor.
1488         ~Data();
1489 
1490 
1491         // -------- General I/O functions
1492 
1493         /// \brief Sets flag to print warnings on reading mmCIF files.
1494         /// \param[in] SPW flag to print warnings:
1495         ///    \arg \b true : warnings will be printed to stdout
1496         ///    \arg \b false : warnings will not be printed but returned
1497         ///                    in return code (default)
1498         void  SetPrintWarnings ( bool SPW );
1499 
1500         /// \brief Sets flag to stop on warning when reading an mmCIF file.
1501         /// \param[in] SOW flag to stop on warning:
1502         ///    \arg \b true : reading will stop on first warning encountered
1503         ///    \arg \b false : warnings will not stop reading (default)
1504         void  SetStopOnWarning ( bool SOW );
1505 
1506         /// \brief Sets optional flag(s) for reading mmCIF files.
1507         /// By default, no flags are set.
1508         /// \param[in] F flag or logical \"or\" of several flags to be set:
1509         ///  \arg \b CIFFL_PrintWarnings  toggles printing warning messages
1510         ///               at reading an mmCIF file, in stdout. If this
1511         ///               flag is not set (default), the warnings will
1512         ///               be returned in the bit-encoded return code
1513         ///  \arg \b CIFFL_StopOnWarnings  if set, reading an mmCIF file
1514         ///               will stop at first warning issued
1515         ///  \arg \b CIFFL_SuggestCategories  allows for reading of mmCIF
1516         ///               files with loops and structures having no
1517         ///               category names (\"dirty CIFs\"). If this flag is
1518         ///               set, then hidden category names will be
1519         ///               automatically generated. These names will not
1520         ///               appear in the output. As these names are hidden,
1521         ///               they cannot be used to access data. In order to
1522         ///               access data in such categories, consider whether
1523         ///               they are structures or loops. In case of a
1524         ///               unnamed structure, simply specify \"\" (empty
1525         ///               string) for structure name in all access
1526         ///               functions ( note that \b CIFFL_SuggestCategories
1527         ///               flag must be on while accessing such data). In
1528         ///               case of a loop, first use the mmcif::Data::FindLoop
1529         ///               function to retrieve pointer on the hidden loop,
1530         ///               and then use mmcif::Loop's interface function to
1531         ///               fetch data from the loop.
1532         ///  \arg \b CIFFL_SuggestTags  allows for duplicate tags in a
1533         ///               category (structure or loop, including hidden
1534         ///               categories). This may help reading \"dirty CIFs\".
1535         ///               At present, only data for first non-unique tag
1536         ///               may be accessed.
1537         void  SetFlag ( CIF_FLAG F );
1538 
1539         /// \brief Removes optional flag(s) for reading mmCIF files.
1540         /// By default, no flags are set.
1541         /// \param[in] F flag or logical \"or\" of several flags to be
1542         ///              removed (unset):
1543         ///  \arg \b CIFFL_PrintWarnings  no wornings will be printed in
1544         ///               stdout, but rather returned in the bit-encoded
1545         ///               return code
1546         ///  \arg \b CIFFL_StopOnWarnings  warnings will not stop reading
1547         ///  \arg \b CIFFL_SuggestCategories  loops without names will
1548         ///               count as errors and stop reading
1549         ///  \arg \b CIFFL_SuggestTags  duplicate tags in structures and
1550         ///               loops will count as errors and stop reading.
1551         ///
1552         /// See more detail flag description in mmcif::Data::SetFlag().
1553         void  RemoveFlag ( CIF_FLAG F );
1554 
1555         /// \brief Returns bit-encoded warnings issued at last file read.
1556         /// \return an integer number, which is an or-superposition of
1557         ///         warning flags:
1558         /// \arg \b CIFW_UnrecognizedItems: unrecognized items were found
1559         /// \arg \b CIFW_MissingField: expected data field not found
1560         /// \arg \b CIFW_EmptyLoop: loop category was defined but has no
1561         ///                         data
1562         /// \arg \b CIFW_UnexpectedEOF: mmCIF construct finished prematurely
1563         /// \arg \b CIFW_LoopFieldMissing: loop category has wrong number
1564         ///                         of data fields
1565         /// \arg \b CIFW_NotAStructure: attempt to use a category name,
1566         ///                         which was once defined as a structure,
1567         ///                         as a loop
1568         /// \arg \b CIFW_NotALoop: attempt to use a category name, which was
1569         ///                         once defined as a loop, as a structure
1570         /// \arg \b CIFW_DuplicateTag: duplicate tag was found in a
1571         ///                         structure or loop
GetWarnings()1572         inline int  GetWarnings() { return Warning; }
1573 
1574         /// \brief Sets category names and tags that are to be ignored
1575         ///        on file read.
1576         /// \param[in] cats list of categories, terminated by NULL
1577         /// \param[in] tags list of tags, terminated by NULL.
1578         ///
1579         /// This special function is to aid reading corrupt mmCIF files.
1580         /// The input lists should be of equal length 'n', and specify
1581         /// 'n' \"wrong fields\" that should be ignored on input. E.g.,
1582         /// ith \"wrong field\" is identified as \"cats[i].taga[i]\".
1583         /// If \"wrong field\" belongs to a loop, then all the corresponding
1584         /// column is assumed to be absent. This corrects for mmCIF errors
1585         /// when defined tags in loops or structures do not have actual data
1586         /// associated with them.
1587         ///
1588         /// In order to remove settings, call SetWrongFields(NULL,NULL).
1589         ///
1590         /// Example:
1591         /*!
1592         \code
1593         // assume data for "_category.item1" and "_category.item2"
1594         // missed in a file to-be-read
1595         mmcif::Data data;
1596         cpstr cats[] = { "_category", "_category", NULL };
1597         cpstr tags[] = { "item1"    , "item2"    , NULL };
1598 
1599            data.SetWrongFields ( cats,tags );
1600            data.ReadMMCIFData  ( "corrupt.cif" );
1601         \endcode
1602         */
1603         void  SetWrongFields ( cpstr *cats, cpstr *tags );
1604 
1605         /// \brief Reads mmCIF data block from file.
1606         /// \param FName character null-terminated string with file name
1607         /// \param gzipMode flag to read compressed files:
1608         /// \arg \b GZM_NONE: do not assume any compression
1609         /// \arg \b GZM_CHECK: check compression type by file extension
1610         /// \arg \b GZM_ENFORCE: same as \b GZM_ENFORCE_GZIP
1611         /// \arg \b GZM_ENFORCE_GZIP: assume gzip compression (*.gz files)
1612         /// \arg \b GZM_ENFORCE_COMPRESS: assume compression with 'compress'
1613         ///         (*.Z files).
1614         /// \return \b CIFRC_Ok: no errors
1615         /// \return \b negative: there were errors
1616         /// \return \b positive: there were warnings.
1617         ///
1618         /// This function will read 1st data block from the specified file.
1619         /// In case of non-zero return, use GetCIFMessage() function to
1620         /// print the corresponding error message or warning:
1621         /*!
1622         \code
1623         mmcif::Data data;
1624         char       errLog[500];
1625         int        rc;
1626            rc = data.ReadMMCIFData  ( "myfile.cif" );
1627            if (rc<0)
1628              printf ( " There was an error:\n %s\n",
1629                       GetCIFMessage(errLog,rc) );
1630            else if (rc>0)
1631              printf ( " There were warnings:\n %s\n",
1632                       GetCIFMessage(errLog,rc) );
1633            else
1634              printf ( " mmCIF file has be read in successfully.\n" );
1635         \endcode
1636         */
1637         int  ReadMMCIFData ( cpstr FName,
1638                              io::GZ_MODE gzipMode=io::GZM_CHECK );
1639 
1640         /// \brief Reads sequential mmCIF data blocks from file.
1641         /// \param RCFile reference to a CFile object opened on a file
1642         /// \param S buffer string which represent a sliding read window.
1643         ///          The string should be at least 500 characters long,
1644         ///          initialized with empty-string value before first read,
1645         ///          and passed unchanged between the reads
1646         /// \param lcount line counter, should be set zero before first
1647         ///          read and passed unchanged between the reads.
1648         /// \return \b CIFRC_Ok: no errors
1649         /// \return \b negative: there were errors
1650         /// \return \b positive: there were warnings.
1651         ///
1652         /// This function will read 1st data block from the current position
1653         /// of the file. The function is useful if a file contains more than
1654         /// a single data block, which should be read sequentially.
1655         ///
1656         /// \note Alternatively, files with multiple data blocks can be
1657         /// read using mmcif::File class.
1658         ///
1659         /// In case of non-zero return, use GetCIFMessage() function to
1660         /// print the corresponding error message or warning:
1661         /*!
1662         \code
1663       mmcif::Data    mmCIFData;
1664       CFile         f;
1665       char          S[1000];
1666       int           rc,lcount;
1667 
1668         // open file first
1669         f.assign ( "/path/example.cif" );
1670         if (!f.reset(true))  {
1671           printf ( " *** cannot open file '%s' for reading.\n",
1672                    f.FileName() );
1673           return -1;
1674         }
1675 
1676         lcount = 0;         // global line counter through the file
1677         S[0]   = char(0);   // buffer string
1678         while (!f.FileEnd())  {
1679 
1680           rc = mmCIFData.ReadMMCIFData ( f,S,lcount );
1681 
1682           if (rc!=CIFRC_Ok)  {  // error or warning
1683             if ((rc<0) && (!f.FileEnd()))  { // error
1684               printf ( " *** error reading file %s:\n"
1685                        "     %s\n",f.FileName(),GetCIFMessage(S,rc) );
1686               return rc;
1687             } else if (rc>0)  { // warning
1688               printf ( " ... warning on reading file %s:\n"
1689                        "     %s\n",f.FileName(),GetCIFMessage(S,rc) );
1690             }
1691           } else  {
1692             // fetch needful values from the data block
1693             // ........
1694           }
1695 
1696         }
1697 
1698         f.shut();  // close file
1699 
1700         // NOTE: do not delete mmcif::Struct/mmcif::Loop
1701         // classes obtained from mmcif::Data. If you do, get a crash.
1702         // All these structures are containers that dispose their
1703         // content automatically.
1704         \endcode
1705         */
1706         int  ReadMMCIFData ( io::RFile f, pstr S, int & lcount );
1707 
1708         /// \brief Writes mmCIF data block into file.
1709         /// \param FName character null-terminated string with file name
1710         /// \param gzipMode flag to read compressed files:
1711         /// \arg \b GZM_NONE: do not compress
1712         /// \arg \b GZM_CHECK: compress according to file extension
1713         /// \arg \b GZM_ENFORCE: same as \b GZM_ENFORCE_GZIP
1714         /// \arg \b GZM_ENFORCE_GZIP: compress with gzip
1715         /// \arg \b GZM_ENFORCE_COMPRESS: compression with 'compress'.
1716         /// \return \b true: no errors
1717         /// \return \b false: file cannot be open for writing.
1718         bool WriteMMCIFData   ( cpstr FName,
1719                                 io::GZ_MODE gzipMode=io::GZM_CHECK );
1720 
1721         /// \brief Writes (next) mmCIF data block into file.
1722         /// \param RCFile reference to a CFile object opened on a file.
1723         ///
1724         /// This function allows for sequential write of mmCIF data blocks
1725         /// into a file.
1726         ///
1727         /// \note Alternatively, files with multiple data blocks can be
1728         /// created using mmcif::File class.
1729         ///
1730         /// Example:
1731         /*!
1732       \code
1733       io::File       f;
1734       mmcif::Data  cifData;
1735 
1736         // open file first
1737         f.assign ( "/path/example.cif" );
1738         if (!f.rewrite())  {
1739           printf ( " *** cannot open file '%s' for writing.\n",
1740                    f.FileName() );
1741           return -1;
1742         }
1743 
1744         cifData.PutDataName ( "name1" );
1745         // fill cifData with all data needed
1746         cifData.WriteMMCIF ( f ); // first data block written
1747 
1748         cifData.FreeMemory  ( 0 );  // reset data block to empty
1749         cifData.PutDataName ( "name2" );
1750         // fill cifData with all data needed
1751         cifData.WriteMMCIF ( f );  // second data block written
1752 
1753         // add as many data blocks as needed
1754 
1755         // now close the file
1756         f.shut();
1757 
1758       \endcode
1759 
1760         */
1761         void  WriteMMCIF ( io::RFile f );
1762 
1763 
1764         // -------- Retrieving data
1765 
1766         /// \brief Returns the number of categories (structures and loops)
1767         ///        in data block.
GetNumberOfCategories()1768         inline int   GetNumberOfCategories ()  { return nCategories; }
1769 
1770         /// \brief Retrieves pointer to category (a structure or a loop) by
1771         ///        category number.
1772         /// \param categoryNo category number to retrieve. Categories are
1773         ///        numbered from 0 to GetNumberOfCategories()-1.
1774         /// \return pointer to category, if \b categoryNo is in the right
1775         ///        range, or \b NULL otherwise.
1776         ///
1777         /// \note The category type (structure or loop) is returned by
1778         /// function mmcif::Category::GetCategoryID().
1779         /// \note The application should never attempt to deallocate
1780         /// the category returned. It will be properly disposed of by
1781         /// mmcif::Data's destructor.
1782         PCategory GetCategory ( int categoryNo ); // 0..nCategories-1
1783 
1784         /// \brief Retrieves mmCIF structure with given name.
1785         /// \param CName character string with name of the structure (must
1786         ///        start with underscore).
1787         /// \return pointer to structure if structure with given name was
1788         ///        found, and \b NULL otherwise.
1789         /// \note The application should never attempt to deallocate
1790         /// the structure returned. It will be properly disposed of by
1791         /// mmcif::Data's destructor.
1792         PStruct GetStructure  ( cpstr CName );
1793 
1794         /// \brief Retrieves mmCIF loop with given name.
1795         /// \param CName character string with name of the loop (must
1796         ///        start with underscore).
1797         /// \return pointer to loop if loop with given name was
1798         ///        found, and \b NULL otherwise.
1799         /// \note The application should never attempt to deallocate
1800         /// the loop returned. It will be properly disposed of by
1801         /// mmcif::Data's destructor.
1802         PLoop GetLoop ( cpstr CName );
1803 
1804         /// \brief Finds loop containing all tags from the tag list
1805         ///        provided.
1806         /// \param tagList list of tags to be looked for. The list should
1807         ///        be terminated by empty string \"\". The order of tags
1808         ///        is not significant.
1809         /// \return pointer to loop if loop with given tags was found, and
1810         ///         \b NULL otherwise.
1811         ///
1812         /// The function will look for first loop that includes all tags
1813         /// from the list. The list does not have to include all tags for
1814         /// that loop in order for function to succeed. This function is
1815         /// useful for reading \"dirty cifs\" that may contain loops without
1816         /// a name.
1817         PLoop FindLoop ( cpstr * tagList );
1818 
1819         /// \brief Retrieves data block name into dynamically-allocated
1820         ///        string.
1821         /// \param dname pointer reference to a string that accepts data
1822         ///        block name. If \b dname is not \b NULL, it is treated
1823         ///        as a pre-allocated string, which is disposed before
1824         ///        copying. The application is responsible for deallocating
1825         ///        \b dname.
1826         /// \param Remove flag to remove name from the data block.
1827         void GetDataName ( pstr & dname, bool Remove=false );
1828 
1829         /// \brief Returns data block name.
GetDataName()1830         inline pstr GetDataName()  { return name; }
1831 
1832         //   CheckData(..) returns positive value if the field is in the
1833         // file:
1834         //   CIFRC_Structure  category CName is a structure
1835         //   CIFRC_Loop       category CName is a loop
1836         // Negative returns mean:
1837         //   CIFRC_StructureNoTag  category CName is present,
1838         //                        it is a structure, but it does not
1839         //                        have tag TName
1840         //   CIFRC_LoopNoTag       category CName is present,
1841         //                        it is a loop, but it does not have
1842         //                        tag TName
1843         //   CIFRC_NoCategory      category CName is not present.
1844         // If TName is set to NULL then only the CName is checked and
1845         // possible returns are CIFRC_Structure, CIFRC_Loop and
1846         // CIFRC_NoCategory.
1847         int  CheckData       ( cpstr CName, cpstr TName );
1848 
1849         int  DeleteCategory  ( cpstr CName );
1850         int  DeleteStructure ( cpstr CName );
1851         int  DeleteLoop      ( cpstr CName );
1852 
1853         //   Optimize() optimizes the CIF data in memory allocation. It is
1854         // a good idea to call it once after extraction of data (GetXXXXXX
1855         // functions) with Remove flag set on has been completed.
1856         void Optimize();
1857 
1858         //   GetString(..), GetReal(..) and GetInteger(..) return 0 if the
1859         // requested field was found and successfully converted. Negative
1860         // returns mean:
1861         //    CIFRC_WrongFormat   the field was found but failed to convert
1862         //                        due to improper numeric format
1863         //    CIFRC_NoTag         category CName was found, but it does not
1864         //                        have tag TName
1865         //    CIFRC_NoCategory    category CName was not found
1866         //    CIFRC_NotAStructure category CName was found, but it is
1867         //                        a loop rather than a structure.
1868         //   GetString(..) will try to dispose Dest unless it is assigned
1869         // NULL value before the call. The string will be then dynamically
1870         // allocated and copied.
1871         //   If Remove is set to true, the field will be removed after
1872         // extraction.
1873         int  GetString   ( pstr & Dest, cpstr CName, cpstr TName,
1874                                         bool Remove=false );
1875         pstr GetString   ( cpstr CName, cpstr TName, int & RC );
1876         int  DeleteField ( cpstr CName, cpstr TName );
1877         int  GetReal     ( realtype & R, cpstr CName,
1878                            cpstr TName, bool Remove=false );
1879         int  GetInteger  ( int & I, cpstr CName, cpstr TName,
1880                                     bool Remove=false );
1881 
1882         //   GetLoopLength(..) returns CIFRC_NotALoop if the category CName
1883         // is not a loop, CIFRC_NoCategory if the category CName is not
1884         // found. Non-negative returns give the length of the loop (may be
1885         // 0 if the loop is empty).
1886         int  GetLoopLength ( cpstr CName );
1887 
1888         //   GetLoopString(..), GetLoopReal(..) and GetLoopInteger(..) act
1889         // like GetString(..), GetReal(..) and GetInteger(..) above for
1890         // nrow-th element of the 'loop_' (indexed like 0..N-1 where N
1891         // is obtained through GetLoopLength(..)). They will return
1892         // CIFRC_WrongIndex if nrow is out of range.
1893         //   If Remove is set to true, the field will be removed after
1894         // extraction.
1895         int  GetLoopString   ( pstr & Dest, cpstr CName,
1896                                             cpstr TName, int nrow,
1897                                             bool Remove=false );
1898         pstr GetLoopString   ( cpstr CName, cpstr TName,
1899                                int nrow, int & RC );
1900         int  DeleteLoopField ( cpstr CName, cpstr TName,
1901                                int nrow );
1902         int  GetLoopReal     ( realtype & R, cpstr CName,
1903                                              cpstr TName, int nrow,
1904                                              bool Remove=false );
1905         int  GetLoopInteger  ( int & I, cpstr CName,
1906                                         cpstr TName, int nrow,
1907                                         bool Remove=false );
1908 
1909         //   GetLoopSVector(..), GetLoopRVector(..) and GetLoopIVector(..)
1910         // read CIF 'loop_' data into allocated vectors of strings, reals
1911         // and integers, correspondingly. The vectors may be deallocated
1912         // prior to call and assigned NULL, in which case they will be
1913         // allocated with offsets of i1, which is also the lower index of
1914         // the 'loop_' data transferred into it. The upper vector index is
1915         // given by i2 or by the loop's length whichever is less. If
1916         // vectors are not assigned NULL prior the call, it is assumed
1917         // that they are properly (i1-offset, i2-i1+1 length) allocated.
1918         //   The return codes are same as those of GetLoopString(..),
1919         // GetLoopReal(..) and GetLoopInteger(..).
1920         int  GetLoopSVector ( psvector & S, cpstr CName,
1921                               cpstr TName, int i1=0, int i2=MaxInt4,
1922                               bool Remove=false );
1923         int  GetLoopRVector ( rvector  & R, cpstr CName,
1924                               cpstr TName, int i1=0, int i2=MaxInt4,
1925                               bool Remove=false );
1926         int  GetLoopIVector ( ivector  & I, cpstr CName,
1927                               cpstr TName, int i1=0, int i2=MaxInt4,
1928                               bool Remove=false );
1929 
1930 
1931         // -------- Storing data
1932 
1933         //   Unless the data are to be added to the existing CIF structure,
1934         // FreeMemory() should be called once before creating a new
1935         // CIF data set.
1936         void FreeMemory ( int key );
1937 
1938         void PutDataName ( cpstr dname ); // stores name for 'data_'
1939                                           // record
1940 
1941         //   PutString(..), PutReal(..) and PutInteger(..) will put the
1942         // values given into the specified category (CName) under the
1943         // specified tag (TName). The category, tag and field are created
1944         // automatically; the field will be replaced silently if identical
1945         // CName.TName is specified in two calls. Calls of these functions
1946         // may follow in random order; however CIF file will have all tags
1947         // grouped by categories and catgories will follow in the order
1948         // of first appearance in PutString(..), PutReal(..) or
1949         // PutInteger(..).
1950         //   Return code - one of CIFRC_Ok or CIFRC_NotAStruct
1951         int  PutNoData   ( int NoDataType, cpstr CName,
1952                            cpstr TName );
1953         int  PutString   ( cpstr S, cpstr CName,
1954                            cpstr TName, bool Concatenate=false );
1955         int  PutDate     ( cpstr CName, cpstr TName );
1956         int  PutReal     ( realtype R, cpstr CName, cpstr TName,
1957                                        int prec=8 );
1958         int  PutInteger  ( int I, cpstr CName, cpstr TName );
1959 
1960         //   If loop category CName is not present in the CIF data
1961         // structure, AddLoop(..) creates an empty one and returns
1962         // its pointer in Loop. If loop category CName is already in
1963         // the CIF data structure, its pointer is returned, and any
1964         // data which might be contained in it, remains untouched.
1965         //   To stuff the loop with data, first the data tags have to
1966         // be specified by calling  Loop->AddLoopTag(..). After all
1967         // tags are given, the data comes as a stream of calls
1968         // Loop->AddString(..), Loop->AddReal(..) and
1969         // Loop->AddInteger(..) which should provide data for every
1970         // tag in sequence in strictly the same order as the tags
1971         // were given. This essentially reflects reading a CIF loop
1972         // from a file.
1973         //   Alternatively, the loop data may be stored with PutLoopXXX()
1974         // functions given below, although this way may be less
1975         // efficient (but more flexible).
1976         //   AddLoop(..) may return
1977         //     CIFRC_Ok       category was present
1978         //     CIFRC_Created  category was not present but it has
1979         //                    been created; the category is empty
1980         //     CIFRC_NotALoop category was present as a structure, but
1981         //                    has been replaced for a loop;
1982         //                    the category is empty.
1983         int  AddLoop      ( cpstr CName, PLoop   & cifLoop   );
1984         int  AddStructure ( cpstr CName, PStruct & cifStruct );
1985 
1986         //   PutLoopString(..), PutLoopReal(..) and PutLoopInteger(..) act
1987         // like PutString(..), PutReal(..) and PutInteger(..) above for
1988         // nrow-th element of the 'loop_' CName (indexed begining from 0).
1989         // In consequitive calls, given values of nrow does not have to be
1990         // ordered; the most efficient way is to start with HIGHEST value
1991         // for nrow in the loop and move down to 0. The least efficient way
1992         // is to start with nrow=0 and move up.
1993         //   These functions allow to form loops in arbitrary way.
1994         //   The functions may return CIFRC_Ok or CIFRC_NotALoop.
1995         int  PutLoopNoData  ( int NoDataType, cpstr CName,
1996                                               cpstr TName, int nrow );
1997         int  PutLoopString  ( cpstr S,   cpstr CName,
1998                                               cpstr TName, int nrow );
1999         int  PutLoopReal    ( realtype R, cpstr CName,
2000                                           cpstr TName, int nrow,
2001                                           int  prec=8 );
2002         int  PutLoopInteger ( int I, cpstr CName, cpstr TName,
2003                                      int nrow );
2004 
2005         //   PutLoopSVector(..), PutLoopRVector(..) and PutLoopIVector(..)
2006         // put vectors of values into specified loop fields. Parameters i1
2007         // and i2 give the range of indices of values which are to be
2008         // transfered. To transfer an entire vector allocated as [0..N-1]
2009         // i1 shoudl be set to 0 and i2 - to N-1. Note that the loop is
2010         // always indexed as starting form 0 on, therefore negative i1 and
2011         // i2 are not allowed, and specifying i1>0 will leave first i1
2012         // elements of the CIF loop for the corresponding tag undefined
2013         // (will be output like '?').
2014         //   These functions allow to form loops in arbitrary way.
2015         int  PutLoopSVector ( psvector S, cpstr CName,
2016                               cpstr TName, int i1, int i2 );
2017         int  PutLoopRVector ( rvector  R, cpstr CName,
2018                               cpstr TName, int i1, int i2,
2019                               int prec=8 );
2020         int  PutLoopIVector ( ivector  I, cpstr CName,
2021                               cpstr TName, int i1, int i2 );
2022 
2023         int  RenameCategory ( cpstr CName, cpstr newCName );
2024 
2025         // --------
2026 
2027         void Copy         ( PData Data );
2028         int  CopyCategory ( PData Data, cpstr CName,
2029                                         cpstr newCName=NULL );
2030 
2031         void PrintCategories();  // for debuging only
2032 
2033         void write ( io::RFile f );
2034         void read  ( io::RFile f );
2035 
2036       protected:
2037         pstr       name;
2038         int        nCategories;
2039         PPCategory Category;
2040         ivector    index;
2041         int        flags;
2042         int        Warning;
2043         int        loopNo;  // used locally for suggesting categories
2044         int        tagNo;   // used locally for suggesting tags
2045         psvector   WrongCat;
2046         psvector   WrongTag;
2047         int        nWrongFields;
2048 
2049         void  InitData        ();
2050         void  FreeWrongFields ();
2051         bool  CheckWrongField ( cpstr C, cpstr T );
2052         void  Sort            ();
2053 
2054         //   GetCategoryNo searches for index of category cname
2055         // in Category[]. Return:
2056         //    >=0 : position of the category found
2057         //     <0 : the category was not found, it could be inserted before
2058         //          (-RC-1)th element, where RC is the return value
2059         int  GetCategoryNo  ( cpstr cname );
2060         int  AddCategory    ( cpstr cname );
2061         int  DeleteCategory ( int  CatNo );
2062 
2063         void GetDataItem    ( io::RFile f, pstr S, pstr & L, pstr & p,
2064                                         int & lcount, int & llen );
2065         void GetLoop        ( io::RFile f, pstr S, pstr & L, pstr & p,
2066                                         int & lcount, int & llen );
2067         int  GetField       ( io::RFile f, pstr S, pstr & L, pstr & p,
2068                                         int & lcount, int & llen );
2069 
2070     };
2071 
2072 
2073 
2074     //  ========================  File  =============================
2075 
2076     DefineClass(File);
2077     DefineStreamFunctions(File);
2078 
2079     class File : public io::Stream  {
2080 
2081       public :
2082         int      nData;
2083         ivector  index;
2084         PPData   data;
2085 
2086         File ();
2087         File ( cpstr FName, io::GZ_MODE gzipMode=io::GZM_CHECK );
2088         File ( io::RPStream Object );
2089         ~File();
2090 
SetPrintWarnings(bool SPW)2091         void  SetPrintWarnings ( bool SPW ) { PrintWarnings = SPW; }
SetStopOnWarning(bool SOW)2092         void  SetStopOnWarning ( bool SOW ) { StopOnWarning = SOW; }
2093 
2094         int   ReadMMCIFFile  ( cpstr FName,
2095                                io::GZ_MODE gzipMode=io::GZM_CHECK );
2096         int   WriteMMCIFFile ( cpstr FName,
2097                                io::GZ_MODE gzipMode=io::GZM_CHECK );
2098 
GetNofData()2099         int   GetNofData()  { return nData; }
2100         PData GetCIFData     ( int   dataNo );  // 0..nData-1
2101         PData GetCIFData     ( cpstr DName  );
2102         int   AddCIFData     ( cpstr DName  );
2103         int   DeleteCIFData  ( cpstr DName  );
2104         int   DeleteCIFData  ( int   dataNo );
2105         int   GetCIFDataNo   ( cpstr DName  );
2106 
2107         void  WriteMMCIF     ( io::RFile f  );
2108 
2109         void  Copy  ( PFile File );
2110 
2111         void  write ( io::RFile f );
2112         void  read  ( io::RFile f );
2113 
2114       protected:
2115         int  nAllocData;
2116         bool PrintWarnings;
2117         bool StopOnWarning;
2118 
2119         void  InitFile   ();
2120         void  FreeMemory ();
2121         void  Sort       ();
2122         void  ExpandData ( int nDataNew );
2123 
2124     };
2125 
2126 
2127     extern pstr GetMMCIFInputBuffer ( int & LineNo );
2128 
2129     //  isCIF will return
2130     //    -1   if file FName does not exist
2131     //     0   if file FName is likely a CIF file ( 'data_' is present )
2132     //     1   if file FName is not a CIF file ( 'data_' is absent )
2133     extern int isCIF ( cpstr FName, io::GZ_MODE gzipMode=io::GZM_CHECK );
2134     extern int isCIF ( io::RFile f );
2135 
2136     pstr GetCIFMessage ( pstr M, int RC );
2137 
2138 
2139   }  // namespace mmcif
2140 
2141 }  // namespace mmdb
2142 
2143 
2144 #endif
2145 
2146 
2147