1 // -*- mode: c++; c-basic-offset:4 -*-
2
3 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
4 // Access Protocol.
5
6 // Copyright (c) 2011 OPeNDAP, Inc.
7 // Author: James Gallagher <jgallagher@opendap.org>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 //
23 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24
25 #include "config.h"
26
27 #include <signal.h>
28 #include <unistd.h>
29 #include <sys/stat.h>
30
31 #ifdef HAVE_UUID_UUID_H
32 #include <uuid/uuid.h> // used to build CID header value for data ddx
33 #elif defined(HAVE_UUID_H)
34 #include <uuid.h>
35 #else
36 #error "Could not find UUID library header"
37 #endif
38
39 #ifndef WIN32
40 #include <sys/wait.h>
41 #else
42 #include <io.h>
43 #include <fcntl.h>
44 #include <process.h>
45 #endif
46
47 #include <iostream>
48 #include <string>
49 #include <sstream>
50 #include <fstream>
51
52 #include <cstring>
53 #include <ctime>
54
55 //#define DODS_DEBUG
56
57 #include "DAS.h"
58 #include "DDS.h"
59 #include "ConstraintEvaluator.h"
60 #include "DDXParserSAX2.h"
61 #include "Ancillary.h"
62 #include "ResponseBuilder.h"
63 #include "XDRStreamMarshaller.h"
64 #include "XDRFileUnMarshaller.h"
65
66 //#include "DAPCache3.h"
67 //#include "ResponseCache.h"
68
69 #include "debug.h"
70 #include "mime_util.h" // for last_modified_time() and rfc_822_date()
71 #include "escaping.h"
72 #include "util.h"
73
74 #ifndef WIN32
75 #include "SignalHandler.h"
76 #include "EventHandler.h"
77 #include "AlarmHandler.h"
78 #endif
79
80 #define CRLF "\r\n" // Change here, expr-test.cc
81
82 using namespace std;
83 using namespace libdap;
84
85 /** Called when initializing a ResponseBuilder that's not going to be passed
86 command line arguments. */
initialize()87 void ResponseBuilder::initialize()
88 {
89 // Set default values. Don't use the C++ constructor initialization so
90 // that a subclass can have more control over this process.
91 d_dataset = "";
92 d_dap2ce = "";
93 d_dap2_btp_func_ce = "";
94 d_timeout = 0;
95
96 d_default_protocol = DAP_PROTOCOL_VERSION;
97 }
98
~ResponseBuilder()99 ResponseBuilder::~ResponseBuilder()
100 {
101 // If an alarm was registered, delete it. The register code in SignalHandler
102 // always deletes the old alarm handler object, so only the one returned by
103 // remove_handler needs to be deleted at this point.
104 delete dynamic_cast<AlarmHandler*>(SignalHandler::instance()->remove_handler(SIGALRM));
105 }
106
107 /** Return the entire constraint expression in a string. This
108 includes both the projection and selection clauses, but not the
109 question mark.
110
111 @brief Get the constraint expression.
112 @return A string object that contains the constraint expression. */
get_ce() const113 string ResponseBuilder::get_ce() const
114 {
115 return d_dap2ce;
116 }
117
118 /** Set the constraint expression. This will filter the CE text removing
119 * any 'WWW' escape characters except space. Spaces are left in the CE
120 * because the CE parser uses whitespace to delimit tokens while some
121 * datasets have identifiers that contain spaces. It's possible to use
122 * double quotes around identifiers too, but most client software doesn't
123 * know about that.
124 *
125 * @@brief Set the CE
126 * @param _ce The constraint expression
127 */
set_ce(string _ce)128 void ResponseBuilder::set_ce(string _ce)
129 {
130 d_dap2ce = www2id(_ce, "%", "%20");
131 }
132
133 /** The ``dataset name'' is the filename or other string that the
134 filter program will use to access the data. In some cases this
135 will indicate a disk file containing the data. In others, it
136 may represent a database query or some other exotic data
137 access method.
138
139 @brief Get the dataset name.
140 @return A string object that contains the name of the dataset. */
get_dataset_name() const141 string ResponseBuilder::get_dataset_name() const
142 {
143 return d_dataset;
144 }
145
146 /** Set the dataset name, which is a string used to access the dataset
147 * on the machine running the server. That is, this is typically a pathname
148 * to a data file, although it doesn't have to be. This is not
149 * echoed in error messages (because that would reveal server
150 * storage patterns that data providers might want to hide). All WWW-style
151 * escapes are replaced except for spaces.
152 *
153 * @brief Set the dataset pathname.
154 * @param ds The pathname (or equivalent) to the dataset.
155 */
set_dataset_name(const string ds)156 void ResponseBuilder::set_dataset_name(const string ds)
157 {
158 d_dataset = www2id(ds, "%", "%20");
159 }
160 #if 0
161 /** Set the server's timeout value. A value of zero (the default) means no
162 timeout.
163
164 @see To establish a timeout, call establish_timeout(ostream &)
165 @param t Server timeout in seconds. Default is zero (no timeout). */
166 void ResponseBuilder::set_timeout(int t)
167 {
168 d_timeout = t;
169 }
170
171 /** Get the server's timeout value. */
172 int ResponseBuilder::get_timeout() const
173 {
174 return d_timeout;
175 }
176
177 /** Use values of this instance to establish a timeout alarm for the server.
178 If the timeout value is zero, do nothing.
179 */
180 void ResponseBuilder::establish_timeout(ostream &stream) const
181 {
182 #ifndef WIN32
183 if (d_timeout > 0) {
184 SignalHandler *sh = SignalHandler::instance();
185 EventHandler *old_eh = sh->register_handler(SIGALRM, new AlarmHandler(stream));
186 delete old_eh;
187 alarm(d_timeout);
188 }
189 #endif
190 }
191 #endif
192
193 /**
194 * Split the CE so that the server functions that compute new values are
195 * separated into their own string and can be evaluated separately from
196 * the rest of the CE (which can contain simple and slicing projection
197 * as well as other types of function calls).
198 */
199 void
split_ce(ConstraintEvaluator & eval,const string & expr)200 ResponseBuilder::split_ce(ConstraintEvaluator &eval, const string &expr)
201 {
202 string ce;
203 if (!expr.empty())
204 ce = expr;
205 else
206 ce = d_dap2ce;
207
208 string btp_function_ce = "";
209 string::size_type pos = 0;
210 DBG(cerr << "ce: " << ce << endl);
211
212 string::size_type first_paren = ce.find("(", pos);
213 string::size_type closing_paren = ce.find(")", pos);
214 while (first_paren != string::npos && closing_paren != string::npos) {
215 // Maybe a BTP function; get the name of the potential function
216 string name = ce.substr(pos, first_paren-pos);
217 DBG(cerr << "name: " << name << endl);
218 // is this a BTP function
219 btp_func f;
220 if (eval.find_function(name, &f)) {
221 // Found a BTP function
222 if (!btp_function_ce.empty())
223 btp_function_ce += ",";
224 btp_function_ce += ce.substr(pos, closing_paren+1-pos);
225 ce.erase(pos, closing_paren+1-pos);
226 if (ce[pos] == ',')
227 ce.erase(pos, 1);
228 }
229 else {
230 pos = closing_paren + 1;
231 // exception?
232 if (pos < ce.length() && ce.at(pos) == ',')
233 ++pos;
234 }
235
236 first_paren = ce.find("(", pos);
237 closing_paren = ce.find(")", pos);
238 }
239
240 DBG(cerr << "Modified constraint: " << ce << endl);
241 DBG(cerr << "BTP Function part: " << btp_function_ce << endl);
242
243 d_dap2ce = ce;
244 d_dap2_btp_func_ce = btp_function_ce;
245 }
246
247 #if 0
248 /** This function formats and prints an ASCII representation of a
249 DAS on stdout. This has the effect of sending the DAS object
250 back to the client program.
251
252 @note This is the DAP2 attribute response.
253
254 @brief Send a DAS.
255
256 @param out The output stream to which the DAS is to be sent.
257 @param das The DAS object to be sent.
258 @param with_mime_headers If true (the default) send MIME headers.
259 @return void
260 @see DAS
261 @deprecated */
262 void ResponseBuilder::send_das(ostream &out, DAS &das, bool with_mime_headers) const
263 {
264 if (with_mime_headers)
265 set_mime_text(out, dods_das, x_plain, last_modified_time(d_dataset), "2.0");
266
267 das.print(out);
268
269 out << flush;
270 }
271
272 /** Send the DAP2 DAS response to the given stream. This version of
273 * send_das() uses the DDS object, assuming that it contains attribute
274 * information. If there is a constraint expression associated with this
275 * instance of ResponseBuilder, then it will be applied. This means
276 * that CEs that contain server functions will populate the response cache
277 * even if the server's initial request is for a DAS. This is different
278 * from the older behavior of libdap where CEs were never evaluated for
279 * the DAS response. This does not actually change the resulting DAS,
280 * just the behavior 'under the covers'.
281 *
282 * @param out Send the response to this ostream
283 * @param dds Use this DDS object
284 * @param eval A Constraint Evaluator to use for any CE bound to this
285 * ResponseBuilder instance
286 * @param constrained Should the result be constrained
287 * @param with_mime_headers Should MIME headers be sent to out?
288 */
289 void ResponseBuilder::send_das(ostream &out, DDS &dds, ConstraintEvaluator &eval, bool constrained, bool with_mime_headers)
290 {
291 // Set up the alarm.
292 establish_timeout(out);
293 dds.set_timeout(d_timeout);
294
295 if (!constrained) {
296 if (with_mime_headers)
297 set_mime_text(out, dods_das, x_plain, last_modified_time(d_dataset), "2.0");
298
299 dds.print_das(out);
300 out << flush;
301
302 return;
303 }
304
305 split_ce(eval);
306
307 // If there are functions, parse them and eval.
308 // Use that DDS and parse the non-function ce
309 // Serialize using the second ce and the second dds
310 if (!d_btp_func_ce.empty()) {
311 DDS *fdds = 0;
312 string cache_token = "";
313
314 if (responseCache()) {
315 DBG(cerr << "Using the cache for the server function CE" << endl);
316 fdds = responseCache()->read_cached_dataset(dds, d_btp_func_ce, this, &eval, cache_token);
317 }
318 else {
319 DBG(cerr << "Cache not found; (re)calculating" << endl);
320 eval.parse_constraint(d_btp_func_ce, dds);
321 fdds = eval.eval_function_clauses(dds);
322 }
323
324 if (with_mime_headers)
325 set_mime_text(out, dods_das, x_plain, last_modified_time(d_dataset), dds.get_dap_version());
326
327 fdds->print_das(out);
328
329 if (responseCache())
330 responseCache()->unlock_and_close(cache_token);
331
332 delete fdds;
333 }
334 else {
335 DBG(cerr << "Simple constraint" << endl);
336
337 eval.parse_constraint(d_dap2ce, dds); // Throws Error if the ce doesn't parse.
338
339 if (with_mime_headers)
340 set_mime_text(out, dods_das, x_plain, last_modified_time(d_dataset), dds.get_dap_version());
341
342 dds.print_das(out);
343 }
344
345 out << flush;
346 }
347
348 /** This function formats and prints an ASCII representation of a
349 DDS on stdout. Either an entire DDS or a constrained DDS may be sent.
350 This function looks in the local cache and uses a DDS object there
351 if it's valid. Otherwise, if the request CE contains server functions
352 that build data for the response, the resulting DDS will be cached.
353
354 @brief Transmit a DDS.
355 @param out The output stream to which the DAS is to be sent.
356 @param dds The DDS to send back to a client.
357 @param eval A reference to the ConstraintEvaluator to use.
358 @param constrained If this argument is true, evaluate the
359 current constraint expression and send the `constrained DDS'
360 back to the client.
361 @param constrained If true, apply the constraint bound to this instance
362 of ResponseBuilder
363 @param with_mime_headers If true (default) send MIME headers.
364 @return void
365 @see DDS */
366 void ResponseBuilder::send_dds(ostream &out, DDS &dds, ConstraintEvaluator &eval, bool constrained,
367 bool with_mime_headers)
368 {
369 if (!constrained) {
370 if (with_mime_headers)
371 set_mime_text(out, dods_dds, x_plain, last_modified_time(d_dataset), dds.get_dap_version());
372
373 dds.print(out);
374 out << flush;
375 return;
376 }
377
378 // Set up the alarm.
379 establish_timeout(out);
380 dds.set_timeout(d_timeout);
381
382 // Split constraint into two halves
383 split_ce(eval);
384
385 // If there are functions, parse them and eval.
386 // Use that DDS and parse the non-function ce
387 // Serialize using the second ce and the second dds
388 if (!d_btp_func_ce.empty()) {
389 string cache_token = "";
390 DDS *fdds = 0;
391
392 if (responseCache()) {
393 DBG(cerr << "Using the cache for the server function CE" << endl);
394 fdds = responseCache()->read_cached_dataset(dds, d_btp_func_ce, this, &eval, cache_token);
395 }
396 else {
397 DBG(cerr << "Cache not found; (re)calculating" << endl);
398 eval.parse_constraint(d_btp_func_ce, dds);
399 fdds = eval.eval_function_clauses(dds);
400 }
401
402 // Server functions might mark variables to use their read()
403 // methods. Clear that so the CE in d_dap2ce will control what is
404 // sent. If that is empty (there was only a function call) all
405 // of the variables in the intermediate DDS (i.e., the function
406 // result) will be sent.
407 fdds->mark_all(false);
408
409 eval.parse_constraint(d_dap2ce, *fdds);
410
411 if (with_mime_headers)
412 set_mime_text(out, dods_dds, x_plain, last_modified_time(d_dataset), dds.get_dap_version());
413
414 fdds->print_constrained(out);
415
416 if (responseCache())
417 responseCache()->unlock_and_close(cache_token);
418
419 delete fdds;
420 }
421 else {
422 DBG(cerr << "Simple constraint" << endl);
423
424 eval.parse_constraint(d_dap2ce, dds); // Throws Error if the ce doesn't parse.
425
426 if (with_mime_headers)
427 set_mime_text(out, dods_dds, x_plain, last_modified_time(d_dataset), dds.get_dap_version());
428
429 dds.print_constrained(out);
430 }
431
432 out << flush;
433 }
434 #endif
435
436 /**
437 * Build/return the BLOB part of the DAP2 data response.
438 */
dataset_constraint(ostream & out,DDS & dds,ConstraintEvaluator & eval,bool ce_eval)439 void ResponseBuilder::dataset_constraint(ostream &out, DDS & dds, ConstraintEvaluator & eval, bool ce_eval)
440 {
441 DBG(cerr << "Inside dataset_constraint" << endl);
442
443 dds.print_constrained(out);
444 out << "Data:\n";
445 out << flush;
446
447 XDRStreamMarshaller m(out);
448
449 try {
450 // Send all variables in the current projection (send_p())
451 for (DDS::Vars_iter i = dds.var_begin(); i != dds.var_end(); i++)
452 if ((*i)->send_p()) {
453 (*i)->serialize(eval, dds, m, ce_eval);
454 }
455 }
456 catch (Error & e) {
457 throw;
458 }
459 }
460
461 /** Send the data in the DDS object back to the client program. The data is
462 encoded using a Marshaller, and enclosed in a MIME document which is all sent
463 to \c data_stream.
464
465 @note This is the DAP2 data response.
466
467 @brief Transmit data.
468 @param dds A DDS object containing the data to be sent.
469 @param eval A reference to the ConstraintEvaluator to use.
470 @param data_stream Write the response to this stream.
471 @param anc_location A directory to search for ancillary files (in
472 addition to the CWD). This is used in a call to
473 get_data_last_modified_time().
474 @param with_mime_headers If true, include the MIME headers in the response.
475 Defaults to true.
476 @return void */
send_data(ostream & data_stream,DDS & dds,ConstraintEvaluator & eval,bool with_mime_headers)477 void ResponseBuilder::send_data(ostream &data_stream, DDS &dds, ConstraintEvaluator &eval, bool with_mime_headers)
478 {
479 // Split constraint into two halves
480 split_ce(eval);
481
482 // If there are functions, parse them and eval.
483 // Use that DDS and parse the non-function ce
484 // Serialize using the second ce and the second dds
485 if (!d_dap2_btp_func_ce.empty()) {
486 DBG(cerr << "Found function(s) in CE: " << d_btp_func_ce << endl);
487 string cache_token = "";
488 DDS *fdds = 0;
489
490 // The BES code caches the function result
491 eval.parse_constraint(d_dap2_btp_func_ce, dds);
492 fdds = eval.eval_function_clauses(dds);
493
494 DBG(fdds->print_constrained(cerr));
495
496 // Server functions might mark variables to use their read()
497 // methods. Clear that so the CE in d_dap2ce will control what is
498 // sent. If that is empty (there was only a function call) all
499 // of the variables in the intermediate DDS (i.e., the function
500 // result) will be sent.
501 fdds->mark_all(false);
502
503 eval.parse_constraint(d_dap2ce, *fdds);
504
505 fdds->tag_nested_sequences(); // Tag Sequences as Parent or Leaf node.
506
507 if (fdds->get_response_limit() != 0 && fdds->get_request_size(true) > fdds->get_response_limit()) {
508 string msg = "The Request for " + long_to_string(dds.get_request_size(true) / 1024)
509 + "KB is too large; requests for this user are limited to "
510 + long_to_string(dds.get_response_limit() / 1024) + "KB.";
511 throw Error(msg);
512 }
513
514 if (with_mime_headers)
515 set_mime_binary(data_stream, dods_data, x_plain, last_modified_time(d_dataset), dds.get_dap_version());
516
517 DBG(cerr << "About to call dataset_constraint" << endl);
518 dataset_constraint(data_stream, *fdds, eval, false);
519
520 delete fdds;
521 }
522 else {
523
524 DBG(cerr << "Simple constraint" << endl);
525
526 eval.parse_constraint(d_dap2ce, dds); // Throws Error if the ce doesn't parse.
527
528 dds.tag_nested_sequences(); // Tag Sequences as Parent or Leaf node.
529
530 if (dds.get_response_limit() != 0 && dds.get_request_size(true) > dds.get_response_limit()) {
531 string msg = "The Request for " + long_to_string(dds.get_request_size(true) / 1024)
532 + "KB is too large; requests for this user are limited to "
533 + long_to_string(dds.get_response_limit() / 1024) + "KB.";
534 throw Error(msg);
535 }
536
537 if (with_mime_headers)
538 set_mime_binary(data_stream, dods_data, x_plain, last_modified_time(d_dataset), dds.get_dap_version());
539
540 dataset_constraint(data_stream, dds, eval);
541 }
542
543 data_stream << flush;
544 }
545