1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6 
7 // Copyright (c) 2002,2003 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //         Dan Holloway <dan@hollywood.gso.uri.edu>
10 //         Reza Nekovei <reza@intcomm.net>
11 //
12 // This library is free software; you can redistribute it and/or
13 // modify it under the terms of the GNU Lesser General Public
14 // License as published by the Free Software Foundation; either
15 // version 2.1 of the License, or (at your option) any later version.
16 //
17 // This library is distributed in the hope that it will be useful,
18 // but WITHOUT ANY WARRANTY; without even the implied warranty of
19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 // Lesser General Public License for more details.
21 //
22 // You should have received a copy of the GNU Lesser General Public
23 // License along with this library; if not, write to the Free Software
24 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
25 //
26 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
27 
28 // (c) COPYRIGHT URI/MIT 1994-1999
29 // Please read the full copyright statement in the file COPYRIGHT_URI.
30 //
31 // Authors:
32 //      jhrg,jimg       James Gallagher <jgallagher@gso.uri.edu>
33 //      dan             Dan Holloway <dan@hollywood.gso.uri.edu>
34 //      reza            Reza Nekovei <reza@intcomm.net>
35 
36 // Abstract base class for the variables in a dataset. This is used to store
37 // the type-invariant information that describes a variable as given in the
38 // DODS API.
39 //
40 // jhrg 9/6/94
41 
42 #ifndef _basetype_h
43 #define _basetype_h 1
44 
45 #include <vector>
46 #include <stack>
47 #include <iostream>
48 #include <string>
49 
50 #include "AttrTable.h"
51 
52 #include "InternalErr.h"
53 
54 #include "dods-datatypes.h"
55 #include "Type.h"
56 
57 #include "DapObj.h"
58 
59 using namespace std;
60 
61 class Crc32;
62 
63 namespace libdap
64 {
65 
66 class ConstraintEvaluator;
67 
68 class DDS;
69 class Marshaller;
70 class UnMarshaller;
71 
72 class Constructor;
73 class XMLWrter;
74 
75 class DMR;
76 class D4Group;
77 class XMLWriter;
78 class D4StreamMarshaller;
79 class D4StreamUnMarshaller;
80 
81 class D4Attributes;
82 
83 /** This defines the basic data type features for the DODS data access
84     protocol (DAP) data types. All the DAP type classes (Float64, Array,
85     etc.) subclass it. This class is an abstract one; no variables will ever
86     be stored as BaseType instances, only as instances of its child classes.
87 
88     These classes and their methods give a user the capacity to set up
89     sophisticated data types. They do <i>not</i> provide sophisticated ways to
90     access and use this data. On the server side, in many cases, the class
91     instances will have no data in them at all until the
92     <tt>serialize</tt> function
93     is called to send data to the client. On the client side, most DAP
94     application programs will unpack the data promptly into whatever local
95     data structure the programmer deems the most useful.
96 
97     In order to use these classes on the server side of a DAP
98     client/server connection, you must write a <tt>read</tt> method
99     for each of the data types you expect to encounter in the
100     application. This function, whose purpose is to read data from a
101     local source into the class instance data buffer, is called in
102     <tt>serialize</tt>, when the data is about to be sent to the
103     client.  The <tt>read</tt> function may be called earlier, in the
104     case of data subset requests (constraint expressions) whose
105     evaluation requires it. (For example, the constraint expression
106     ``<tt>a,b&b>c</tt>'' requires that <tt>c</tt> be read even though
107     it will not be sent.)
108 
109     For some data types, the <tt>read</tt> function must be aware of
110     the constraints
111     to be returned. These cautions are outlined where they occur.
112 
113 	@note This class is ued by both DAP2 and DAP4.
114 
115     @brief The basic data type for the DODS DAP types.  */
116 
117 class BaseType : public DapObj
118 {
119 private:
120     string d_name;  // name of the instance
121     Type d_type;   // instance's type
122     string d_dataset; // name of the dataset used to create this BaseType
123 
124     bool d_is_read;  // true if the value has been read
125     bool d_is_send;  // Is the variable in the projection?
126 
127     // d_parent points to the Constructor or Vector which holds a particular
128     // variable. It is null for simple variables. The Vector and Constructor
129     // classes must maintain this variable.
130     BaseType *d_parent;
131 
132     // Attributes for this variable. Added 05/20/03 jhrg
133     AttrTable d_attr;
134 
135     D4Attributes *d_attributes;
136 
137     bool d_is_dap4;         // True if this is a DAP4 variable, false ... DAP2
138 
139     // These are non-empty only for DAP4 variables. Added 9/27/12 jhrg
140 
141 protected:
142     // These were/are used for DAP2 CEs, but not for DAP4 ones
143     bool d_in_selection; // Is the variable in the selection?
144     bool d_is_synthesized; // true if the variable is synthesized
145 
146     void m_duplicate(const BaseType &bt);
147 
148 public:
149     typedef stack<BaseType *> btp_stack;
150 
151     // These ctors assume is_dap4 is false
152     BaseType(const string &n, const Type &t, bool is_dap4 = false);
153     BaseType(const string &n, const string &d, const Type &t, bool is_dap4 = false);
154 
155     BaseType(const BaseType &copy_from);
156     virtual ~BaseType();
157 
158     virtual string toString();
159 
160     virtual void transform_to_dap4(D4Group *root, Constructor *container);
161     virtual std::vector<BaseType *> *transform_to_dap2(AttrTable *parent_attr_table);
162 
163     virtual void dump(ostream &strm) const ;
164 
165     BaseType &operator=(const BaseType &rhs);
166 
167     /**
168      * Remove any read or set data in the private data of the variable,
169      * setting read_p() to false. Used to clear any dynamically allocated
170      * storage that holds (potentially large) data. For the simple types,
171      * this no-op version is all that's needed. Vector and some other classes
172      * define a special version and have serialize() implementations that
173      * call it to free data as soon as possible after sending it.
174      *
175      * @note Added 7/5/15 jhrg
176      * @note Any specialization of this should make sure to reset the read_p
177      * property.
178      */
clear_local_data()179     virtual void clear_local_data() { set_read_p(false); }
180 
is_dap4()181     virtual bool is_dap4() const { return d_is_dap4; }
set_is_dap4(const bool v)182     virtual void set_is_dap4(const bool v) { d_is_dap4 = v;}
183 
184     /** Clone this instance. Allocate a new instance and copy \c *this into
185 	it. This method must perform a deep copy.
186 
187         @note This method should \e not copy data values, but must copy all
188         other fields in the object.
189 	@return A newly allocated copy of \c this. */
190     virtual BaseType *ptr_duplicate() = 0;
191 
192     virtual string name() const;
193     virtual void set_name(const string &n);
194     virtual std::string FQN() const;
195 
196     virtual Type type() const;
197     virtual void set_type(const Type &t);
198     virtual string type_name() const;
199 
200     virtual string dataset() const ;
201 
202     /**
203      * @brief How many elements are in this variable.
204      * @todo change the return type to int64_t
205      * @return The number of elements; 1 for scalars
206      */
length()207     virtual int length() const { return 1; }
208 
209     /**
210      * @brief Set the number of elements for this variable
211      * @todo change param type to int64_t
212      * @param l The number of elements
213      */
set_length(int)214     virtual void set_length(int) { }
215 
216     virtual bool is_simple_type() const;
217     virtual bool is_vector_type() const;
218     virtual bool is_constructor_type() const;
219 
220     virtual bool synthesized_p();
221     virtual void set_synthesized_p(bool state);
222 
223     virtual int element_count(bool leaves = false);
224 
225     virtual bool read_p();
226     virtual void set_read_p(bool state);
227 
228     virtual bool send_p();
229     virtual void set_send_p(bool state);
230 
231     virtual AttrTable &get_attr_table();
232     virtual void set_attr_table(const AttrTable &at);
233 
234     // DAP4 attributes
235     virtual D4Attributes *attributes();
236     virtual void set_attributes(D4Attributes *);
237     virtual void set_attributes_nocopy(D4Attributes *);
238 
239     virtual bool is_in_selection();
240     virtual void set_in_selection(bool state);
241 
242     virtual void set_parent(BaseType *parent);
243     virtual BaseType *get_parent() const;
244 
245     virtual void transfer_attributes(AttrTable *at);
246 
247     // I put this comment here because the version in BaseType.cc does not
248     // include the exact_match or s variables since they are not used. Doxygen
249     // was gaging on the comment.
250 
251     /** Returns a pointer to the contained variable in a composite class. The
252         composite classes are those made up of aggregated simple data types.
253         Array, Grid, and Structure are composite types, while Int and Float are
254         simple types. This function is only used by composite classes. The
255         BaseType implementation always returns null.
256 
257         Several of the subclasses provide alternate access methods
258         that make sense for that particular data type. For example,
259         the Array class defines a <tt>*var(int i)</tt> method that
260         returns the ith entry in the Array data, and the Structure
261         provides a <tt>*var(Vars_iter)</tt> function using a
262         pseudo-index to access the different members of the structure.
263 
264         @brief Returns a pointer to a member of a constructor class.
265         @param name The name of the class member.  Defaults to ""
266         @param exact_match True if only interested in variables whose
267         full names match \e n exactly. If false, returns the first
268         variable whose name matches \e name. For example, if \e name
269         is \c x and \c point.x is a variable, then var("x", false)
270         would return a BaseType pointer to \c point.x. If \e
271         exact_match was <tt>true</tt> then \e name would need to be \c
272         "point.x" for var to return that pointer. This feature
273         simplifies constraint expressions for datasets which have
274         complex, nested, constructor variables. Defaults to true.
275         @param s Record the path to \e name. Defaults to null, in
276         which case it is not used.
277         @return A pointer to the member named in the \e n argument. If
278         no name is given, the function returns the first (only)
279         variable. For example, an Array has only one variable, while a
280         Structure can have many. */
281     virtual BaseType *var(const string &name = "", bool exact_match = true, btp_stack *s = 0);
282     virtual BaseType *var(const string &name, btp_stack &s);
283 
284     virtual void add_var(BaseType *bt, Part part = nil);
285     virtual void add_var_nocopy(BaseType *bt, Part part = nil);
286 
287     virtual bool read();
288 
289     virtual bool check_semantics(string &msg, bool all = false);
290 
291     virtual bool ops(BaseType *b, int op);
292     virtual bool d4_ops(BaseType *b, int op);
293 
294     virtual unsigned int width(bool constrained = false) const;
295 
296     virtual void print_decl(FILE *out, string space = "    ",
297                             bool print_semi = true,
298                             bool constraint_info = false,
299                             bool constrained = false);
300 
301     virtual void print_xml(FILE *out, string space = "    ",
302                            bool constrained = false);
303 
304     virtual void print_decl(ostream &out, string space = "    ",
305                             bool print_semi = true,
306                             bool constraint_info = false,
307                             bool constrained = false);
308 
309     virtual void print_xml(ostream &out, string space = "    ",
310                            bool constrained = false);
311 
312     virtual void print_xml_writer(XMLWriter &xml, bool constrained = false);
313 
314     virtual void print_dap4(XMLWriter &xml, bool constrained = false);
315 
316     /** @name Abstract Methods */
317     //@{
318 #if 0
319     /** Return the number of bytes that are required to hold the
320 	instance's value. In the case of simple types such as Int32,
321 	this is the size of one Int32 (four bytes). For a String or
322 	Url type, <tt>width(bool constrained = false)</tt> returns the number of bytes needed
323 	for a <tt>String *</tt> variable, not the bytes needed for all
324 	the characters, since that value cannot be determined from
325 	type information alone. For Structure, and other constructor
326 	types size() returns the number of bytes needed to store
327 	pointers to the C++ objects.
328 
329 	@brief Returns the size of the class instance data. */
330     virtual unsigned int width(bool constrained = false) = 0;
331 #endif
332     /** Reads the class data into the memory referenced by <i>val</i>.
333 	The caller should either allocate enough storage to <i>val</i>
334 	to hold the class data or set \c *val to null. If <i>*val</i>
335 	is NULL, memory will be allocated by this function with
336 	<tt>new()</tt>. If the memory is allocated this way, the
337 	caller is responsible for deallocating that memory. Array and
338 	values for simple types are stored as C would store an array.
339 
340     @deprecated Use value() in the leaf classes.
341 
342 	@brief Reads the class data.
343 
344 	@param val A pointer to a pointer to the memory into which the
345 	class data will be copied. If the value pointed to is NULL,
346 	memory will be allocated to hold the data, and the pointer
347 	value modified accordingly. The calling program is responsible
348 	for deallocating the memory references by this pointer.
349 
350 	@return The size (in bytes) of the information copied to <i>val</i>.
351     */
352     virtual unsigned int buf2val(void **val) = 0;
353 
354     /** Store the value pointed to by <i>val</i> in the object's
355 	internal buffer. This function does not perform any checks, so
356 	users must be sure that the thing pointed to can actually be
357 	stored in the object's buffer.
358 
359 	Only simple objects (Int, Float, Byte, and so on) and arrays
360 	of these simple objects may be stored using this function. To
361 	put data into more complex constructor types, use the
362 	functions provided by that class.
363 
364     @deprecated Use set_value() in the leaf classes.
365 
366 	@brief Loads class data.
367 
368 	@param val A pointer to the data to be inserted into the class
369 	data buffer.
370 
371 	@param reuse A boolean value, indicating whether the class
372 	internal data storage can be reused or not. If this argument
373 	is TRUE, the class buffer is assumed to be large enough to
374 	hold the incoming data, and it is <i>not</i> reallocated. If
375 	FALSE, new storage is allocated. If the internal buffer has
376 	not been allocated at all, this argument has no effect. This
377 	is currently used only in the Vector class.
378 
379 	@return The size (in bytes) of the information copied from
380 	<i>val</i>.
381 	@see Grid
382 	@see Vector::val2buf */
383     virtual unsigned int val2buf(void *val, bool reuse = false) = 0;
384 
385     /** Similar to using serialize() and deserialize() together in one object.
386         Data are read as for serialize and those values are stored in the
387         objects as deserialize() does but does not write and then read data
388         to/from a stream.
389 
390         This method is defined by the various data type classes. It calls the
391         read() abstract method. Unlike serialize(), this method does not
392         clear the memory use to hold the data values, so the caller should
393         make sure to delete the DDS or the variable as soon as possible.
394 
395         @param eval Use this as the constraint expression evaluator.
396         @param dds The Data Descriptor Structure object corresponding
397         to this dataset. See <i>The DODS User Manual</i> for
398         information about this structure. */
399     virtual void intern_data(ConstraintEvaluator &eval, DDS &dds);
400 
401     /** Sends the data from the indicated (local) dataset through the
402 	connection identified by the Marshaller parameter. If the
403 	data is not already incorporated into the DDS object, read the
404 	data from the dataset. Once the data are sent (written to the
405 	Marshaller), they are deleted from the object and the object
406 	state is reset so that they will be read again if the read()
407 	method is called.
408 
409 	This function is only used on the server side of the
410 	client/server connection, and is generally only called from
411 	the ResponseBuilder functions. It has no BaseType
412 	implementation; each datatype child class supplies its own
413 	implementation.
414 
415 	@brief Move data to the net, then remove them from the object.
416 
417         @param eval Use this as the constraint expression evaluator.
418 	@param dds The Data Descriptor Structure object corresponding
419 	to this dataset. See <i>The DODS User Manual</i> for
420 	information about this structure.
421 	@param m A marshaller used to serialize data types
422 	@param ce_eval A boolean value indicating whether to evaluate
423 	the DODS constraint expression that may accompany this
424 	dataset. The constraint expression is stored in the <i>dds</i>.
425 	@return This method always returns true. Older versions used
426 	the return value to signal success or failure.
427 
428 	@note We changed the default behavior of this method so that it
429 	calls BaseType::clear_local_data() once the values are sent. This,
430 	combined with the behavior that read() is called by this method
431 	just before data are sent, means that data for any given variable
432 	remain in memory for the shortest time possible. Furthermore, since
433 	variables are serialized one at a time, no more than one variable's
434 	data will be in memory at any given time when using the default
435 	behavior. Some code - code that uses intern_data() or server functions -
436 	might alter this default behavior. Only Array (i.e. Vector), Sequence,
437 	D4Sequence and D4Opaque types actually hold data in dynamically allocated
438 	memory, so sonly those types have the new/changed behavior.
439 	This change was made on 7/5/15.
440 
441 	@exception InternalErr.
442 	@exception Error.
443 	@see DDS */
444     virtual bool serialize(ConstraintEvaluator &eval, DDS &dds, Marshaller &m, bool ce_eval = true);
445 
446 #if 0
447     /**
448      * Provide a way to get the old behavior of serialize() - calling this
449      * method will serialize the BaseType object's data but _not_ delete its
450      * data storage.
451      *
452      * @note This method's behavior differs only for Array (i.e. Vector), Sequence,
453      * D4Sequence and D4Opaque types; the other types do not use dynamic memory to
454      * hold data values.
455      *
456      * @param eval Use this as the constraint expression evaluator.
457      * @param dds The Data Descriptor Structure object corresponding
458      * to this dataset. See <i>The DODS User Manual</i> for
459      * information about this structure.
460      * @param m A marshaller used to serialize data types
461      * @param ce_eval A boolean value indicating whether to evaluate
462      * the DODS constraint expression that may accompany this
463      * @return This method always returns true. Older versions used
464      * the return value to signal success or failure.
465      * @param
466      */
467     virtual bool serialize_no_release(ConstraintEvaluator &eval, DDS &dds, Marshaller &m, bool ce_eval = true) {
468         return serialize(eval, dds, m, ce_eval);
469     }
470 #endif
471 
472     /**
473      * @brief include the data for this variable in the checksum
474      * DAP4 includes a checksum with every data response. This method adds the
475      * variable's data to that checksum.
476      * @param checksum A Crc32 instance that holds the current checksum.
477      */
478     virtual void compute_checksum(Crc32 &checksum) = 0;
479 
480     virtual void intern_data(/*Crc32 &checksum, DMR &dmr, ConstraintEvaluator &eval*/);
481 
482     /**
483      * @brief The DAP4 serialization method.
484      * Serialize a variable's values for DAP4. This does not write the DMR
485      * persistent representation but does write that part of the binary
486      * data blob that holds a variable's data. Once a variable's data are
487      * serialized, that memory is reclaimed (by calling BaseType::clear_local_data())
488      *
489      * @param m
490      * @param dmr
491      * @param eval
492      * @param filter True if there is one variable that should be 'filtered'
493      * @exception Error or InternalErr
494      */
495     virtual void serialize(D4StreamMarshaller &m, DMR &dmr, bool filter = false);
496 
497 #if 0
498     /**
499      * @brief Variation on the DAP4 serialization method - retain data after serialization
500      * Serialize a variable's values for DAP4. This does not write the DMR
501      * persistent representation but does write that part of the binary
502      * data blob that holds a variable's data. Once a variable's data are
503      * serialized, that memory is reclaimed (by calling BaseType::clear_local_data())
504      *
505      * @note This version does not delete the storage of Array, D4Sequence or
506      * D4Opaque variables, as it the case with serialize(). For other types,
507      * this method and serialize have the same beavior (since those types do
508      * not us dynamic memory to hold data values).
509      *
510      * @param m
511      * @param dmr
512      * @param eval
513      * @param filter True if there is one variable that should be 'filtered'
514      * @exception Error or InternalErr
515      */
516     virtual void serialize_no_release(D4StreamMarshaller &m, DMR &dmr, bool filter = false) {
517         serialize(m, dmr, filter);
518     }
519 #endif
520 
521     /** Receives data from the network connection identified by the
522 	<tt>source</tt> parameter. The data is put into the class data
523 	buffer according to the input <tt>dds</tt>.
524 
525 	This function is only used on the client side of the DODS
526 	client/server connection.
527 
528 	@brief Receive data from the net.
529 
530 	@param um An UnMarshaller that knows how to deserialize data types
531 	@param dds The Data Descriptor Structure object corresponding
532 	to this dataset. See <i>The DODS User Manual</i> for
533 	information about this structure. This would have been
534 	received from the server in an earlier transmission.
535 	@param reuse A boolean value, indicating whether the class
536 	internal data storage can be reused or not. If this argument
537 	is TRUE, the class buffer is assumed to be large enough to
538 	hold the incoming data, and it is <i>not</i> reallocated. If
539 	FALSE, new storage is allocated. If the internal buffer has
540 	not been allocated at all, this argument has no effect.
541 	@return Always returns TRUE.
542 	@exception Error when a problem reading from the UnMarshaller is
543 	found.
544 	@see DDS */
545     virtual bool deserialize(UnMarshaller &um, DDS *dds, bool reuse = false);
546 
547     /**
548      * The DAP4 deserialization method.
549      * @param um
550      * @param dmr
551      * @exception Error or InternalErr
552      */
553     virtual void deserialize(D4StreamUnMarshaller &um, DMR &dmr);
554 
555     /** Prints the value of the variable, with its declaration. This
556 	function is primarily intended for debugging DODS
557 	applications. However, it can be overloaded and used to do
558 	some useful things. Take a look at the asciival and writeval
559 	clients, both of which overload this to output the values of
560 	variables in different ways.
561 
562 	@brief Prints the value of the variable.
563 
564 	@param out The output stream on which to print the value.
565 	@param space This value is passed to the print_decl()
566 	function, and controls the leading spaces of the output.
567 	@param print_decl_p A boolean value controlling whether the
568 	variable declaration is printed as well as the value. */
569 
570     virtual void print_val(FILE *out, string space = "",
571                            bool print_decl_p = true);
572 
573     /** Prints the value of the variable, with its declaration. This
574 	function is primarily intended for debugging DODS
575 	applications. However, it can be overloaded and used to do
576 	some useful things. Take a look at the asciival and writeval
577 	clients, both of which overload this to output the values of
578 	variables in different ways.
579 
580 	@brief Prints the value of the variable.
581 
582 	@param out The output ostream on which to print the value.
583 	@param space This value is passed to the print_decl()
584 	function, and controls the leading spaces of the output.
585 	@param print_decl_p A boolean value controlling whether the
586 	variable declaration is printed as well as the value. */
587     virtual void print_val(ostream &out, string space = "",
588                            bool print_decl_p = true) = 0;
589     //@}
590 };
591 
592 } // namespace libdap
593 
594 #endif // _basetype_h
595