1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
4 // Access Protocol.
5 
6 // Copyright (c) 2002,2003 OPeNDAP, Inc.
7 // Author: James Gallagher <jgallagher@opendap.org>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
22 //
23 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24 
25 #include <string>
26 #include <sstream>
27 #include <iterator>
28 
29 //#define DODS_DEBUG
30 
31 #include "D4CEScanner.h"
32 #include "D4ConstraintEvaluator.h"
33 #include "d4_ce_parser.tab.hh"
34 
35 #include "DMR.h"
36 #include "D4Group.h"
37 #include "D4Dimensions.h"
38 #include "D4Maps.h"
39 #include "BaseType.h"
40 #include "Array.h"
41 #include "Constructor.h"
42 #include "D4Sequence.h"
43 
44 #include "D4RValue.h"
45 #include "D4FilterClause.h"
46 
47 #include "escaping.h"
48 #include "parser.h"		// for get_ull()
49 #include "debug.h"
50 
51 // Always define this for a production release.
52 #define PREVENT_XXS_VIA_CE 1
53 #if NDEBUG && !PREVENT_XXS_VIA_CE
54 #error("Never release libdap with PREVENT_XXS_VIA_CE turned off")
55 #endif
56 
57 namespace libdap {
58 
parse(const std::string & expr)59 bool D4ConstraintEvaluator::parse(const std::string &expr)
60 {
61 
62     d_expr = expr;	// set for error messages. See the %initial-action section of .yy
63 
64     DBG(cerr << "Entering D4ConstraintEvaluator::parse: "  << endl);
65     std::istringstream iss(expr);
66     D4CEScanner scanner(iss);
67     D4CEParser parser(scanner, *this /* driver */);
68 
69     if (trace_parsing()) {
70         parser.set_debug_level(1);
71         parser.set_debug_stream(std::cerr);
72     }
73 
74     if(expr.empty())
75         d_dmr->set_ce_empty(true);
76 
77     return parser.parse() == 0;
78 }
79 
80 /**
81  * print an error message. If PREVENT_XXS_VIA_CE is true (it should be), then
82  * id won't be printed. The value of 'ident' is a literal that identifies the
83  * parse rule to help locate the source of the error.
84  * @param ident
85  */
throw_not_found(const string &,const string &)86 void D4ConstraintEvaluator::throw_not_found(const string &/* id */, const string &/* ident */)
87 {
88 #if PREVENT_XXS_VIA_CE
89     throw Error(no_such_variable, string("The constraint expression referenced a variable that was not found in the dataset."));
90 #else
91     throw Error(no_such_variable, d_expr + ": The variable " + id + " was not found in the dataset (" + ident + ").");
92 #endif
93 }
94 
throw_not_array(const string &,const string &)95 void D4ConstraintEvaluator::throw_not_array(const string &/* id */, const string &/* ident */)
96 {
97 #if PREVENT_XXS_VIA_CE
98     throw Error(no_such_variable, string("The constraint expression referenced an Array that was not found in the dataset."));
99 #else
100     throw Error(no_such_variable, d_expr + ": The variable '" + id + "' is not an Array variable (" + ident + ").");
101 #endif
102 }
103 
search_for_and_mark_arrays(BaseType * btp)104 void D4ConstraintEvaluator::search_for_and_mark_arrays(BaseType *btp)
105 {
106     DBG(cerr << "Entering D4ConstraintEvaluator::search_for_and_mark_arrays...(" << btp->name() << ")" << endl);
107 
108     assert(btp->is_constructor_type());
109 
110     Constructor *ctor = static_cast<Constructor*>(btp);
111     for (Constructor::Vars_iter i = ctor->var_begin(), e = ctor->var_end(); i != e; ++i) {
112         switch ((*i)->type()) {
113         case dods_array_c:
114             DBG(cerr << "Found an array: " << (*i)->name() << endl);
115             mark_array_variable(*i);
116             break;
117         case dods_structure_c:
118         case dods_sequence_c:
119             DBG(cerr << "Found a ctor: " << (*i)->name() << endl);
120             search_for_and_mark_arrays(*i);
121             break;
122         default:
123             break;
124         }
125     }
126 }
127 
128 /**
129  * When an identifier is used in a CE, is becomes part of the 'current projection,'
130  * which means it is part of the set of variables to be sent back to the client. This
131  * method sets a flag in the variable (send_p; pronounced 'send predicate') indicating that.
132  *
133  * @note This will check if the variable is an array and set it's slices accordingly
134  * @param btp BaseType pointer to the variable. Must be non-null
135  * @return The BaseType* to the variable; the send_p flag is set as a side effect.
136  */
137 BaseType *
mark_variable(BaseType * btp)138 D4ConstraintEvaluator::mark_variable(BaseType *btp)
139 {
140     assert(btp);
141 
142     DBG(cerr << "In D4ConstraintEvaluator::mark_variable... (" << btp->name() << "; " << btp->type_name() << ")" << endl);
143 
144     btp->set_send_p(true);
145 
146     if (btp->type() == dods_array_c) {
147         mark_array_variable(btp);
148     }
149 
150     // Test for Constructors and marks arrays they contain
151     if (btp->is_constructor_type()) {
152         search_for_and_mark_arrays(btp);
153     }
154     else if (btp->type() == dods_array_c && btp->var() && btp->var()->is_constructor_type()) {
155         search_for_and_mark_arrays(btp->var());
156     }
157 
158     // Now set the parent variables
159     BaseType *parent = btp->get_parent();
160     while (parent) {
161         parent->BaseType::set_send_p(true); // Just set the parent using BaseType's impl.
162         parent = parent->get_parent();
163     }
164 
165     return btp;
166 }
167 
array_uses_shared_dimension(Array * map,D4Dimension * source_dim)168 static bool array_uses_shared_dimension(Array *map, D4Dimension *source_dim)
169 {
170     for (Array::Dim_iter d = map->dim_begin(), e = map->dim_end(); d != e; ++d) {
171         if (source_dim->name() == (*d).name) return true;
172     }
173 
174     return false;
175 }
176 
177 /**
178  * Add an array to the current projection with slicing. Calling this method will result
179  * in the array being returned with anonymous dimensions.
180  *
181  * @note If btp is an array that has shared dimensions and uses '[]' where a shared dimension
182  * is found and if that shared dimension has been sliced, then the slice is used as the array's
183  * slice for that dimension (there must be an easier way to explain that...)
184  *
185  * @param btp
186  * @return The BaseType* to the Array variable; the send_p and slicing information is
187  * set as a side effect.
188  */
189 
190 // Note: If a Map is not part of the current projection, do not include mention of it
191 // in the response DMR (CDMR)
192 BaseType *
mark_array_variable(BaseType * btp)193 D4ConstraintEvaluator::mark_array_variable(BaseType *btp)
194 {
195     assert(btp->type() == dods_array_c);
196 
197     Array *a = static_cast<Array*>(btp);
198 
199     // If an array appears in a CE without the slicing operators ([]) we still have to
200     // call add_constraint(...) for all of it's sdims for them to appear in
201     // the Constrained DMR.
202     if (d_indexes.empty()) {
203         for (Array::Dim_iter d = a->dim_begin(), de = a->dim_end(); d != de; ++d) {
204             D4Dimension *dim = a->dimension_D4dim(d);
205             if (dim) {
206                 a->add_constraint(d, dim);
207             }
208         }
209     }
210     else {
211         // Test that the indexes and dimensions match in number
212         if (d_indexes.size() != a->dimensions())
213             throw Error(malformed_expr, "The index constraint for '" + btp->name() + "' does not match its rank.");
214 
215         Array::Dim_iter d = a->dim_begin();
216         for (vector<index>::iterator i = d_indexes.begin(), e = d_indexes.end(); i != e; ++i) {
217             if ((*i).stride > (unsigned long long) (a->dimension_stop(d, false) - a->dimension_start(d, false)) + 1)
218                 throw Error(malformed_expr,
219                     "For '" + btp->name()
220                         + "', the index stride value is greater than the number of elements in the Array");
221             if (!(*i).rest
222                 && ((*i).stop) > (unsigned long long) (a->dimension_stop(d, false) - a->dimension_start(d, false)) + 1)
223                 throw Error(malformed_expr,
224                     "For '" + btp->name()
225                         + "', the index stop value is greater than the number of elements in the Array");
226 
227             D4Dimension *dim = a->dimension_D4dim(d);
228 
229             // In a DAP4 CE, specifying '[]' as an array dimension slice has two meanings.
230             // It can mean 'all the elements' of the dimension or 'apply the slicing inherited
231             // from the shared dimension'. The latter might be provide 'all the elements'
232             // but regardless, the Array object must record the CE correctly.
233 
234             if (dim && (*i).empty) {
235                 // This case corresponds to a CE that uses the '[]' notation for a
236                 // particular dimension - meaning, use the Shared Dimension size for
237                 // this dimension's 'slice'.
238                 a->add_constraint(d, dim);  // calls set_used_by_projected_var(true) + more
239             }
240             else {
241                 // This case corresponds to a 'local dimension slice' (See sections 8.6.2 and
242                 // 8.7 of the spec as of 4/12/16). When a local dimension slice is used, drop
243                 // the Map(s) that include that dimension. This enables people to constrain
244                 // an Array when some of the Array's dimensions don't use Shared Dimensions
245                 // but others do.
246 
247                 // First apply the constraint to the Array's dimension
248                 a->add_constraint(d, (*i).start, (*i).stride, (*i).rest ? -1 : (*i).stop);
249 
250                 // Then, if the Array has Maps, scan those Maps for any that use dimensions
251                 // that match the name of this particular dimension. If any such Maps are found
252                 // remove them. This ensure that the Array can be constrained using the  'local
253                 // dimension slice' without the constrained DMR containing references to Maps
254                 // that don't exist (or are otherwise nonsensical).
255                 //
256                 // This code came about as a fix for problems discovered during testing of
257                 // local dimension slices. See https://opendap.atlassian.net/browse/HYRAX-98
258                 // jhrg 4/12/16
259                 if (!a->maps()->empty()) {
260                     for (D4Maps::D4MapsIter m = a->maps()->map_begin(), e = a->maps()->map_end(); m != e; ++m) {
261                         if ((*m)->array() == 0)
262                             throw Error(malformed_expr,
263                                 "An array with Maps was found, but one of the Maps was not defined correctly.");
264 
265                         Array *map = const_cast<Array*>((*m)->array()); // Array lacks const iterator support
266                         // Added a test to ensure 'dim' is not null. This could be the case if
267                         // execution gets here and the index *i was not empty. jhrg 4/18/17
268                         if (dim && array_uses_shared_dimension(map, dim)) {
269                             D4Map *map_to_be_removed = *m;
270                             a->maps()->remove_map(map_to_be_removed); // Invalidates the iterator
271                             delete map_to_be_removed;   // removed from container; delete
272                             break; // must leave the for loop because 'm' is now invalid
273                         }
274                     }
275                 }
276             }
277 
278             ++d;
279         }
280     }
281 
282     d_indexes.clear();  // Clear the info so the next slice expression can be parsed.
283 
284     return btp;
285 }
286 
287 /**
288  * Add an array to the current projection with slicing. Calling this method will result
289  * in the array being returned with anonymous dimensions.
290  *
291  * @param id
292  * @return The BaseType* to the Array variable; the send_p and slicing information is
293  * set as a side effect.
294  */
295 D4Dimension *
slice_dimension(const std::string & id,const index & i)296 D4ConstraintEvaluator::slice_dimension(const std::string &id, const index &i)
297 {
298     D4Dimension *dim = dmr()->root()->find_dim(id);
299 
300     if (i.stride > dim->size())
301         throw Error(malformed_expr,
302             "For '" + id + "', the index stride value is greater than the size of the dimension");
303     if (!i.rest && (i.stop > dim->size() - 1))
304         throw Error(malformed_expr, "For '" + id + "', the index stop value is greater than the size of the dimension");
305 
306     dim->set_constraint(i.start, i.stride, i.rest ? dim->size() - 1 : i.stop);
307 
308     return dim;
309 }
310 
make_index(const std::string & i)311 D4ConstraintEvaluator::index D4ConstraintEvaluator::make_index(const std::string &i)
312 {
313     unsigned long long v = get_int64(i.c_str());
314     return index(v, 1, v, false, false /*empty*/, "");
315 }
316 
make_index(const std::string & i,const std::string & s,const std::string & e)317 D4ConstraintEvaluator::index D4ConstraintEvaluator::make_index(const std::string &i, const std::string &s,
318     const std::string &e)
319 {
320     return index(get_int64(i.c_str()), get_int64(s.c_str()), get_int64(e.c_str()), false, false /*empty*/, "");
321 }
322 
make_index(const std::string & i,unsigned long long s,const std::string & e)323 D4ConstraintEvaluator::index D4ConstraintEvaluator::make_index(const std::string &i, unsigned long long s,
324     const std::string &e)
325 {
326     return index(get_int64(i.c_str()), s, get_int64(e.c_str()), false, false /*empty*/, "");
327 }
328 
make_index(const std::string & i,const std::string & s)329 D4ConstraintEvaluator::index D4ConstraintEvaluator::make_index(const std::string &i, const std::string &s)
330 {
331     return index(get_int64(i.c_str()), get_int64(s.c_str()), 0, true, false /*empty*/, "");
332 }
333 
make_index(const std::string & i,unsigned long long s)334 D4ConstraintEvaluator::index D4ConstraintEvaluator::make_index(const std::string &i, unsigned long long s)
335 {
336     return index(get_uint64(i.c_str()), s, 0, true, false /*empty*/, "");
337 }
338 
expr_msg(const std::string & op,const std::string & arg1,const std::string & arg2)339 static string expr_msg(const std::string &op, const std::string &arg1, const std::string &arg2)
340 {
341     return "(" + arg1 + " " + op + " " + arg2 + ").";
342 }
343 
344 /**
345  * @brief Return the D4FilterClause constant for an operator
346  *
347  * Here are the strings returned by the parser:
348  *   GREATER ">"
349  *   LESS_EQUAL "<="
350  *   GREATER_EQUAL ">="
351  *   EQUAL "=="
352  *   NOT_EQUAL "!="
353  *   REGEX_MATCH "~="
354  *
355  *   LESS_BBOX "<<"
356  *   GREATER_BBOX ">>"
357  *
358  *   MASK "@="
359  *   ND "ND"
360  */
get_op_code(const std::string & op)361 static D4FilterClause::ops get_op_code(const std::string &op)
362 {
363     DBGN(cerr << "Entering " << __PRETTY_FUNCTION__ << endl << "op: " << op << endl);
364 
365     if (op == "<")
366         return D4FilterClause::less;
367     else if (op == ">")
368         return D4FilterClause::greater;
369     else if (op == "<=")
370         return D4FilterClause::less_equal;
371     else if (op == ">=")
372         return D4FilterClause::greater_equal;
373     else if (op == "==")
374         return D4FilterClause::equal;
375     else if (op == "!=")
376         return D4FilterClause::not_equal;
377     else if (op == "~=")
378         return D4FilterClause::match;
379     else
380         throw Error(malformed_expr, "The opertator '" + op + "' is not supported.");
381 }
382 
383 /**
384  * @brief Add a D4FilterClause
385  *
386  * This method adds a filter clause to the D4Sequence that is on the top of the
387  * parser's stack. If there is not a D4Sequence on the stack, an exception is
388  * thrown. Similarly, if the filter clause parameters are not valid, then an
389  * exception is thrown.
390  *
391  * Filter clause rules: One of the parameters must be a variable in a D4Sequence
392  * and the other must be a constant. The operator must be one of the valid relops.
393  * Note that the D4FilterClause objects use the same numerical codes as the DAP2
394  * parser/evaluator.
395  *
396  * @note The parser will have pushed the Sequence onto the BaseType stack during
397  * the parse, so variables can be looked up using the top_basetype() (which
398  * must be a D4Sequence).
399  *
400  * @param arg1 The first argument; a D4Sequence variable or a constant.
401  * @param arg2 The second argument; a D4Sequence variable or a constant.
402  * @param op The infix relop
403  */
add_filter_clause(const std::string & op,const std::string & arg1,const std::string & arg2)404 void D4ConstraintEvaluator::add_filter_clause(const std::string &op, const std::string &arg1, const std::string &arg2)
405 {
406     DBG(cerr << "Entering: " << __PRETTY_FUNCTION__ << endl);
407 
408     // Check that there really is a D4Sequence associated with this filter clause.
409     D4Sequence *s = dynamic_cast<D4Sequence*>(top_basetype());
410     if (!s)
411         throw Error(malformed_expr,
412             "When a filter expression is used, it must be bound to a Sequence variable: " + expr_msg(op, arg1, arg2));
413 
414     DBG(cerr << "s->name(): " << s->name() << endl);
415 
416     // Check that arg1 and 2 are valid
417     BaseType *a1 = s->var(arg1);
418     BaseType *a2 = s->var(arg2);
419     DBG(cerr << "a1: " << a1 << ", a2: " << a2 << endl);
420 
421     if (a1 && a2)
422         throw Error(malformed_expr,
423             "One of the arguments in a filter expression must be a constant: " + expr_msg(op, arg1, arg2));
424     if (!(a1 || a2))
425         throw Error(malformed_expr,
426             "One of the arguments in a filter expression must be a variable in a Sequence: "
427                 + expr_msg(op, arg1, arg2));
428 
429     // Now we know a1 XOR a2 is true
430     if (a1) {
431         s->clauses().add_clause(new D4FilterClause(get_op_code(op), new D4RValue(a1), D4RValueFactory(arg2)));
432     }
433     else {
434         s->clauses().add_clause(new D4FilterClause(get_op_code(op), D4RValueFactory(arg1), new D4RValue(a2)));
435     }
436 }
437 
438 /**
439  * @brief If the string has surrounding quotes, remove them.
440  *
441  * @param src The source string, passed by reference and modified in place
442  * @return A reference to the sting parameter.
443  */
444 string &
remove_quotes(string & s)445 D4ConstraintEvaluator::remove_quotes(string &s)
446 {
447     if (*s.begin() == '\"' && *(s.end() - 1) == '\"') {
448         s.erase(s.begin());
449         s.erase(s.end() - 1);
450     }
451 
452     return s;
453 }
454 
455 // This method is called from the parser (see d4_ce_parser.yy, down in the code
456 // section). This will be called during the call to D4CEParser::parse(), that
457 // is inside D4ConstraintEvaluator::parse(...)
458 //
459 // Including the value passed in for 'l' allows the CE text to leak into
460 // the error message, a potential XSS attack vector. jhrg 4/15/20
error(const libdap::location &,const std::string & m)461 void D4ConstraintEvaluator::error(const libdap::location &, const std::string &m)
462 {
463     ostringstream oss;
464 #if PREVENT_XXS_VIA_CE
465     oss << "Constraint expression parse error: " << m << ends;
466 #else
467     oss << l << ": " << m << ends;
468 #endif
469     throw Error(malformed_expr, oss.str());
470 }
471 
472 } /* namespace libdap */
473