1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
4 // Access Protocol.
5 
6 // Copyright (c) 2011 OPeNDAP, Inc.
7 // Author: James Gallagher <jgallagher@opendap.org>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
22 //
23 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24 
25 #include "config.h"
26 
27 #include <signal.h>
28 #include <unistd.h>
29 #include <sys/stat.h>
30 
31 #ifdef HAVE_UUID_UUID_H
32 #include <uuid/uuid.h>  // used to build CID header value for data ddx
33 #elif defined(HAVE_UUID_H)
34 #include <uuid.h>
35 #else
36 #error "Could not find UUID library header"
37 #endif
38 
39 #ifndef WIN32
40 #include <sys/wait.h>
41 #else
42 #include <io.h>
43 #include <fcntl.h>
44 #include <process.h>
45 #endif
46 
47 #include <iostream>
48 #include <string>
49 #include <sstream>
50 #include <fstream>
51 
52 #include <cstring>
53 #include <ctime>
54 
55 //#define DODS_DEBUG
56 
57 #include "DAS.h"
58 #include "DDS.h"
59 #include "ConstraintEvaluator.h"
60 #include "DDXParserSAX2.h"
61 #include "Ancillary.h"
62 #include "ResponseBuilder.h"
63 #include "XDRStreamMarshaller.h"
64 #include "XDRFileUnMarshaller.h"
65 
66 //#include "DAPCache3.h"
67 //#include "ResponseCache.h"
68 
69 #include "debug.h"
70 #include "mime_util.h"	// for last_modified_time() and rfc_822_date()
71 #include "escaping.h"
72 #include "util.h"
73 
74 #ifndef WIN32
75 #include "SignalHandler.h"
76 #include "EventHandler.h"
77 #include "AlarmHandler.h"
78 #endif
79 
80 #define CRLF "\r\n"             // Change here, expr-test.cc
81 
82 using namespace std;
83 using namespace libdap;
84 
85 /** Called when initializing a ResponseBuilder that's not going to be passed
86  command line arguments. */
initialize()87 void ResponseBuilder::initialize()
88 {
89     // Set default values. Don't use the C++ constructor initialization so
90     // that a subclass can have more control over this process.
91     d_dataset = "";
92     d_dap2ce = "";
93     d_dap2_btp_func_ce = "";
94     d_timeout = 0;
95 
96     d_default_protocol = DAP_PROTOCOL_VERSION;
97 }
98 
~ResponseBuilder()99 ResponseBuilder::~ResponseBuilder()
100 {
101 	// If an alarm was registered, delete it. The register code in SignalHandler
102 	// always deletes the old alarm handler object, so only the one returned by
103 	// remove_handler needs to be deleted at this point.
104 	delete dynamic_cast<AlarmHandler*>(SignalHandler::instance()->remove_handler(SIGALRM));
105 }
106 
107 /** Return the entire constraint expression in a string.  This
108  includes both the projection and selection clauses, but not the
109  question mark.
110 
111  @brief Get the constraint expression.
112  @return A string object that contains the constraint expression. */
get_ce() const113 string ResponseBuilder::get_ce() const
114 {
115     return d_dap2ce;
116 }
117 
118 /** Set the constraint expression. This will filter the CE text removing
119  * any 'WWW' escape characters except space. Spaces are left in the CE
120  * because the CE parser uses whitespace to delimit tokens while some
121  * datasets have identifiers that contain spaces. It's possible to use
122  * double quotes around identifiers too, but most client software doesn't
123  * know about that.
124  *
125  * @@brief Set the CE
126  * @param _ce The constraint expression
127  */
set_ce(string _ce)128 void ResponseBuilder::set_ce(string _ce)
129 {
130     d_dap2ce = www2id(_ce, "%", "%20");
131 }
132 
133 /** The ``dataset name'' is the filename or other string that the
134  filter program will use to access the data. In some cases this
135  will indicate a disk file containing the data.  In others, it
136  may represent a database query or some other exotic data
137  access method.
138 
139  @brief Get the dataset name.
140  @return A string object that contains the name of the dataset. */
get_dataset_name() const141 string ResponseBuilder::get_dataset_name() const
142 {
143     return d_dataset;
144 }
145 
146 /** Set the dataset name, which is a string used to access the dataset
147  * on the machine running the server. That is, this is typically a pathname
148  * to a data file, although it doesn't have to be. This is not
149  * echoed in error messages (because that would reveal server
150  * storage patterns that data providers might want to hide). All WWW-style
151  * escapes are replaced except for spaces.
152  *
153  * @brief Set the dataset pathname.
154  * @param ds The pathname (or equivalent) to the dataset.
155  */
set_dataset_name(const string ds)156 void ResponseBuilder::set_dataset_name(const string ds)
157 {
158     d_dataset = www2id(ds, "%", "%20");
159 }
160 #if 0
161 /** Set the server's timeout value. A value of zero (the default) means no
162  timeout.
163 
164  @see To establish a timeout, call establish_timeout(ostream &)
165  @param t Server timeout in seconds. Default is zero (no timeout). */
166 void ResponseBuilder::set_timeout(int t)
167 {
168     d_timeout = t;
169 }
170 
171 /** Get the server's timeout value. */
172 int ResponseBuilder::get_timeout() const
173 {
174     return d_timeout;
175 }
176 
177 /** Use values of this instance to establish a timeout alarm for the server.
178  If the timeout value is zero, do nothing.
179 */
180 void ResponseBuilder::establish_timeout(ostream &stream) const
181 {
182 #ifndef WIN32
183     if (d_timeout > 0) {
184         SignalHandler *sh = SignalHandler::instance();
185         EventHandler *old_eh = sh->register_handler(SIGALRM, new AlarmHandler(stream));
186         delete old_eh;
187         alarm(d_timeout);
188     }
189 #endif
190 }
191 #endif
192 
193 /**
194  *  Split the CE so that the server functions that compute new values are
195  *  separated into their own string and can be evaluated separately from
196  *  the rest of the CE (which can contain simple and slicing projection
197  *  as well as other types of function calls).
198  */
199 void
split_ce(ConstraintEvaluator & eval,const string & expr)200 ResponseBuilder::split_ce(ConstraintEvaluator &eval, const string &expr)
201 {
202     string ce;
203     if (!expr.empty())
204         ce = expr;
205     else
206         ce = d_dap2ce;
207 
208     string btp_function_ce = "";
209     string::size_type pos = 0;
210     DBG(cerr << "ce: " << ce << endl);
211 
212     string::size_type first_paren = ce.find("(", pos);
213     string::size_type closing_paren = ce.find(")", pos);
214     while (first_paren != string::npos && closing_paren != string::npos) {
215         // Maybe a BTP function; get the name of the potential function
216         string name = ce.substr(pos, first_paren-pos);
217         DBG(cerr << "name: " << name << endl);
218         // is this a BTP function
219         btp_func f;
220         if (eval.find_function(name, &f)) {
221             // Found a BTP function
222             if (!btp_function_ce.empty())
223                 btp_function_ce += ",";
224             btp_function_ce += ce.substr(pos, closing_paren+1-pos);
225             ce.erase(pos, closing_paren+1-pos);
226             if (ce[pos] == ',')
227                 ce.erase(pos, 1);
228         }
229         else {
230             pos = closing_paren + 1;
231             // exception?
232             if (pos < ce.length() && ce.at(pos) == ',')
233                 ++pos;
234         }
235 
236         first_paren = ce.find("(", pos);
237         closing_paren = ce.find(")", pos);
238     }
239 
240     DBG(cerr << "Modified constraint: " << ce << endl);
241     DBG(cerr << "BTP Function part: " << btp_function_ce << endl);
242 
243     d_dap2ce = ce;
244     d_dap2_btp_func_ce = btp_function_ce;
245 }
246 
247 #if 0
248 /** This function formats and prints an ASCII representation of a
249  DAS on stdout.  This has the effect of sending the DAS object
250  back to the client program.
251 
252  @note This is the DAP2 attribute response.
253 
254  @brief Send a DAS.
255 
256  @param out The output stream to which the DAS is to be sent.
257  @param das The DAS object to be sent.
258  @param with_mime_headers If true (the default) send MIME headers.
259  @return void
260  @see DAS
261  @deprecated */
262 void ResponseBuilder::send_das(ostream &out, DAS &das, bool with_mime_headers) const
263 {
264     if (with_mime_headers)
265         set_mime_text(out, dods_das, x_plain, last_modified_time(d_dataset), "2.0");
266 
267     das.print(out);
268 
269     out << flush;
270 }
271 
272 /** Send the DAP2 DAS response to the given stream. This version of
273  * send_das() uses the DDS object, assuming that it contains attribute
274  * information. If there is a constraint expression associated with this
275  * instance of ResponseBuilder, then it will be applied. This means
276  * that CEs that contain server functions will populate the response cache
277  * even if the server's initial request is for a DAS. This is different
278  * from the older behavior of libdap where CEs were never evaluated for
279  * the DAS response. This does not actually change the resulting DAS,
280  * just the behavior 'under the covers'.
281  *
282  * @param out Send the response to this ostream
283  * @param dds Use this DDS object
284  * @param eval A Constraint Evaluator to use for any CE bound to this
285  * ResponseBuilder instance
286  * @param constrained Should the result be constrained
287  * @param with_mime_headers Should MIME headers be sent to out?
288  */
289 void ResponseBuilder::send_das(ostream &out, DDS &dds, ConstraintEvaluator &eval, bool constrained, bool with_mime_headers)
290 {
291     // Set up the alarm.
292     establish_timeout(out);
293     dds.set_timeout(d_timeout);
294 
295     if (!constrained) {
296         if (with_mime_headers)
297             set_mime_text(out, dods_das, x_plain, last_modified_time(d_dataset), "2.0");
298 
299         dds.print_das(out);
300         out << flush;
301 
302         return;
303     }
304 
305     split_ce(eval);
306 
307     // If there are functions, parse them and eval.
308     // Use that DDS and parse the non-function ce
309     // Serialize using the second ce and the second dds
310     if (!d_btp_func_ce.empty()) {
311         DDS *fdds = 0;
312         string cache_token = "";
313 
314         if (responseCache()) {
315             DBG(cerr << "Using the cache for the server function CE" << endl);
316             fdds = responseCache()->read_cached_dataset(dds, d_btp_func_ce, this, &eval, cache_token);
317         }
318         else {
319             DBG(cerr << "Cache not found; (re)calculating" << endl);
320             eval.parse_constraint(d_btp_func_ce, dds);
321             fdds = eval.eval_function_clauses(dds);
322         }
323 
324         if (with_mime_headers)
325             set_mime_text(out, dods_das, x_plain, last_modified_time(d_dataset), dds.get_dap_version());
326 
327         fdds->print_das(out);
328 
329         if (responseCache())
330         	responseCache()->unlock_and_close(cache_token);
331 
332         delete fdds;
333     }
334     else {
335         DBG(cerr << "Simple constraint" << endl);
336 
337         eval.parse_constraint(d_dap2ce, dds); // Throws Error if the ce doesn't parse.
338 
339         if (with_mime_headers)
340             set_mime_text(out, dods_das, x_plain, last_modified_time(d_dataset), dds.get_dap_version());
341 
342         dds.print_das(out);
343     }
344 
345     out << flush;
346 }
347 
348 /** This function formats and prints an ASCII representation of a
349  DDS on stdout. Either an entire DDS or a constrained DDS may be sent.
350  This function looks in the local cache and uses a DDS object there
351  if it's valid. Otherwise, if the request CE contains server functions
352  that build data for the response, the resulting DDS will be cached.
353 
354  @brief Transmit a DDS.
355  @param out The output stream to which the DAS is to be sent.
356  @param dds The DDS to send back to a client.
357  @param eval A reference to the ConstraintEvaluator to use.
358  @param constrained If this argument is true, evaluate the
359  current constraint expression and send the `constrained DDS'
360  back to the client.
361  @param constrained If true, apply the constraint bound to this instance
362  of ResponseBuilder
363  @param with_mime_headers If true (default) send MIME headers.
364  @return void
365  @see DDS */
366 void ResponseBuilder::send_dds(ostream &out, DDS &dds, ConstraintEvaluator &eval, bool constrained,
367         bool with_mime_headers)
368 {
369     if (!constrained) {
370         if (with_mime_headers)
371             set_mime_text(out, dods_dds, x_plain, last_modified_time(d_dataset), dds.get_dap_version());
372 
373         dds.print(out);
374         out << flush;
375         return;
376     }
377 
378     // Set up the alarm.
379     establish_timeout(out);
380     dds.set_timeout(d_timeout);
381 
382     // Split constraint into two halves
383     split_ce(eval);
384 
385     // If there are functions, parse them and eval.
386     // Use that DDS and parse the non-function ce
387     // Serialize using the second ce and the second dds
388     if (!d_btp_func_ce.empty()) {
389         string cache_token = "";
390         DDS *fdds = 0;
391 
392         if (responseCache()) {
393             DBG(cerr << "Using the cache for the server function CE" << endl);
394             fdds = responseCache()->read_cached_dataset(dds, d_btp_func_ce, this, &eval, cache_token);
395         }
396         else {
397             DBG(cerr << "Cache not found; (re)calculating" << endl);
398             eval.parse_constraint(d_btp_func_ce, dds);
399             fdds = eval.eval_function_clauses(dds);
400         }
401 
402         // Server functions might mark variables to use their read()
403         // methods. Clear that so the CE in d_dap2ce will control what is
404         // sent. If that is empty (there was only a function call) all
405         // of the variables in the intermediate DDS (i.e., the function
406         // result) will be sent.
407         fdds->mark_all(false);
408 
409         eval.parse_constraint(d_dap2ce, *fdds);
410 
411         if (with_mime_headers)
412             set_mime_text(out, dods_dds, x_plain, last_modified_time(d_dataset), dds.get_dap_version());
413 
414         fdds->print_constrained(out);
415 
416         if (responseCache())
417         	responseCache()->unlock_and_close(cache_token);
418 
419         delete fdds;
420     }
421     else {
422         DBG(cerr << "Simple constraint" << endl);
423 
424         eval.parse_constraint(d_dap2ce, dds); // Throws Error if the ce doesn't parse.
425 
426         if (with_mime_headers)
427             set_mime_text(out, dods_dds, x_plain, last_modified_time(d_dataset), dds.get_dap_version());
428 
429         dds.print_constrained(out);
430     }
431 
432     out << flush;
433 }
434 #endif
435 
436 /**
437  * Build/return the BLOB part of the DAP2 data response.
438  */
dataset_constraint(ostream & out,DDS & dds,ConstraintEvaluator & eval,bool ce_eval)439 void ResponseBuilder::dataset_constraint(ostream &out, DDS & dds, ConstraintEvaluator & eval, bool ce_eval)
440 {
441     DBG(cerr << "Inside dataset_constraint" << endl);
442 
443     dds.print_constrained(out);
444     out << "Data:\n";
445     out << flush;
446 
447     XDRStreamMarshaller m(out);
448 
449     try {
450         // Send all variables in the current projection (send_p())
451         for (DDS::Vars_iter i = dds.var_begin(); i != dds.var_end(); i++)
452             if ((*i)->send_p()) {
453                 (*i)->serialize(eval, dds, m, ce_eval);
454             }
455     }
456     catch (Error & e) {
457         throw;
458     }
459 }
460 
461 /** Send the data in the DDS object back to the client program. The data is
462  encoded using a Marshaller, and enclosed in a MIME document which is all sent
463  to \c data_stream.
464 
465  @note This is the DAP2 data response.
466 
467  @brief Transmit data.
468  @param dds A DDS object containing the data to be sent.
469  @param eval A reference to the ConstraintEvaluator to use.
470  @param data_stream Write the response to this stream.
471  @param anc_location A directory to search for ancillary files (in
472  addition to the CWD).  This is used in a call to
473  get_data_last_modified_time().
474  @param with_mime_headers If true, include the MIME headers in the response.
475  Defaults to true.
476  @return void */
send_data(ostream & data_stream,DDS & dds,ConstraintEvaluator & eval,bool with_mime_headers)477 void ResponseBuilder::send_data(ostream &data_stream, DDS &dds, ConstraintEvaluator &eval, bool with_mime_headers)
478 {
479     // Split constraint into two halves
480     split_ce(eval);
481 
482     // If there are functions, parse them and eval.
483     // Use that DDS and parse the non-function ce
484     // Serialize using the second ce and the second dds
485     if (!d_dap2_btp_func_ce.empty()) {
486         DBG(cerr << "Found function(s) in CE: " << d_btp_func_ce << endl);
487         string cache_token = "";
488         DDS *fdds = 0;
489 
490         // The BES code caches the function result
491             eval.parse_constraint(d_dap2_btp_func_ce, dds);
492             fdds = eval.eval_function_clauses(dds);
493 
494         DBG(fdds->print_constrained(cerr));
495 
496         // Server functions might mark variables to use their read()
497         // methods. Clear that so the CE in d_dap2ce will control what is
498         // sent. If that is empty (there was only a function call) all
499         // of the variables in the intermediate DDS (i.e., the function
500         // result) will be sent.
501         fdds->mark_all(false);
502 
503         eval.parse_constraint(d_dap2ce, *fdds);
504 
505         fdds->tag_nested_sequences(); // Tag Sequences as Parent or Leaf node.
506 
507         if (fdds->get_response_limit() != 0 && fdds->get_request_size(true) > fdds->get_response_limit()) {
508             string msg = "The Request for " + long_to_string(dds.get_request_size(true) / 1024)
509                     + "KB is too large; requests for this user are limited to "
510                     + long_to_string(dds.get_response_limit() / 1024) + "KB.";
511             throw Error(msg);
512         }
513 
514         if (with_mime_headers)
515             set_mime_binary(data_stream, dods_data, x_plain, last_modified_time(d_dataset), dds.get_dap_version());
516 
517         DBG(cerr << "About to call dataset_constraint" << endl);
518         dataset_constraint(data_stream, *fdds, eval, false);
519 
520         delete fdds;
521     }
522     else {
523 
524 	DBG(cerr << "Simple constraint" << endl);
525 
526 	eval.parse_constraint(d_dap2ce, dds); // Throws Error if the ce doesn't parse.
527 
528 	dds.tag_nested_sequences(); // Tag Sequences as Parent or Leaf node.
529 
530 	if (dds.get_response_limit() != 0 && dds.get_request_size(true) > dds.get_response_limit()) {
531 		string msg = "The Request for " + long_to_string(dds.get_request_size(true) / 1024)
532 				+ "KB is too large; requests for this user are limited to "
533 				+ long_to_string(dds.get_response_limit() / 1024) + "KB.";
534 		throw Error(msg);
535 	}
536 
537 	if (with_mime_headers)
538 		set_mime_binary(data_stream, dods_data, x_plain, last_modified_time(d_dataset), dds.get_dap_version());
539 
540 	dataset_constraint(data_stream, dds, eval);
541     }
542 
543 	data_stream << flush;
544 }
545