1
2 // -*- mode: c++; c-basic-offset:4 -*-
3
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6
7 // Copyright (c) 2003 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25
26 #include "config.h"
27
28 //#define DODS_DEBUG 1
29 //#define DODS_DEBUG2 1
30
31 #include <cstring>
32 #include <cstdarg>
33
34 #include "BaseType.h"
35 #include "Byte.h"
36 #include "Int16.h"
37 #include "UInt16.h"
38 #include "Int32.h"
39 #include "UInt32.h"
40 #include "Float32.h"
41 #include "Float64.h"
42 #include "Str.h"
43 #include "Url.h"
44 #include "Array.h"
45 #include "Structure.h"
46 #include "Sequence.h"
47 #include "Grid.h"
48
49 #include "DDXParserSAX2.h"
50
51 #include "util.h"
52 #include "mime_util.h"
53 #include "debug.h"
54
55 namespace libdap {
56
57 #if defined(DODS_DEBUG) || defined(DODS_DEUG2)
58 static const char *states[] =
59 {
60 "start",
61
62 "dataset",
63
64 "attribute_container",
65 "attribute",
66 "attribute_value",
67 "other_xml_attribute",
68
69 "alias",
70
71 "simple_type",
72
73 "array",
74 "dimension",
75
76 "grid",
77 "map",
78
79 "structure",
80 "sequence",
81
82 "blob href",
83
84 "unknown",
85 "error"
86 };
87 #endif
88 // Glue the BaseTypeFactory to the enum-based factory defined statically
89 // here.
90
factory(Type t,const string & name)91 BaseType *DDXParser::factory(Type t, const string & name)
92 {
93 switch (t) {
94 case dods_byte_c:
95 return d_factory->NewByte(name);
96
97 case dods_int16_c:
98 return d_factory->NewInt16(name);
99
100 case dods_uint16_c:
101 return d_factory->NewUInt16(name);
102
103 case dods_int32_c:
104 return d_factory->NewInt32(name);
105
106 case dods_uint32_c:
107 return d_factory->NewUInt32(name);
108
109 case dods_float32_c:
110 return d_factory->NewFloat32(name);
111
112 case dods_float64_c:
113 return d_factory->NewFloat64(name);
114
115 case dods_str_c:
116 return d_factory->NewStr(name);
117
118 case dods_url_c:
119 return d_factory->NewUrl(name);
120
121 case dods_array_c:
122 return d_factory->NewArray(name);
123
124 case dods_structure_c:
125 return d_factory->NewStructure(name);
126
127 case dods_sequence_c:
128 return d_factory->NewSequence(name);
129
130 case dods_grid_c:
131 return d_factory->NewGrid(name);
132
133 default:
134 return 0;
135 }
136 }
137
is_not(const char * name,const char * tag)138 static bool is_not(const char *name, const char *tag)
139 {
140 return strcmp(name, tag) != 0;
141 }
142
set_state(DDXParser::ParseState state)143 void DDXParser::set_state(DDXParser::ParseState state)
144 {
145 s.push(state);
146 }
147
get_state() const148 DDXParser::ParseState DDXParser::get_state() const
149 {
150 return s.top();
151 }
152
pop_state()153 void DDXParser::pop_state()
154 {
155 s.pop();
156 }
157
158 /** Dump XML attributes to local store so they can be easily manipulated.
159 Attribute names are always folded to lower case.
160 @param attrs The XML attribute array */
transfer_xml_attrs(const xmlChar ** attributes,int nb_attributes)161 void DDXParser::transfer_xml_attrs(const xmlChar **attributes, int nb_attributes)
162 {
163 if (!attribute_table.empty())
164 attribute_table.clear(); // erase old attributes
165
166 unsigned int index = 0;
167 for (int i = 0; i < nb_attributes; ++i, index += 5) {
168 // Make a value using the attribute name and the prefix, namespace URI
169 // and the value. The prefix might be null.
170 attribute_table.insert(map<string, XMLAttribute>::value_type(
171 string((const char *)attributes[index]),
172 XMLAttribute(attributes + index + 1)));
173
174 DBG(cerr << "Attribute '" << (const char *)attributes[index] << "': "
175 << attribute_table[(const char *)attributes[index]].value << endl);
176 }
177 }
178
transfer_xml_ns(const xmlChar ** namespaces,int nb_namespaces)179 void DDXParser::transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces)
180 {
181 for (int i = 0; i < nb_namespaces; ++i ) {
182 // make a value with the prefix and namespace URI. The prefix might be
183 // null.
184 namespace_table.insert(map<string,string>::value_type(
185 namespaces[i*2] != 0 ? (const char *)namespaces[i*2] : "",
186 (const char *)namespaces[i*2+1]));
187 }
188 }
189
190 /** Is an attribute present? Attribute names are always lower case.
191 @note To use this method, first call transfer_xml_attrs.
192 @param attr The XML attribute
193 @return True if the XML attribute was present in the last tag */
check_required_attribute(const string & attr)194 bool DDXParser::check_required_attribute(const string & attr)
195 {
196 map < string, XMLAttribute >::iterator i = attribute_table.find(attr);
197 if (i == attribute_table.end())
198 ddx_fatal_error(this, "Required attribute '%s' not found.",
199 attr.c_str());
200 return true;
201 }
202
203 /** Is an attribute present? Attribute names are always lower case.
204 @note To use this method, first call transfer_xml_attrs.
205 @param attr The XML attribute
206 @return True if the XML attribute was present in the last/current tag,
207 false otherwise. */
check_attribute(const string & attr)208 bool DDXParser::check_attribute(const string & attr)
209 {
210 return (attribute_table.find(attr) != attribute_table.end());
211 }
212
213 /** Given that an \c Attribute tag has just been read, determine whether the
214 element is a container or a simple type, set the state and, for a simple
215 type record the type and name for use when \c value elements are found.
216
217 @note Modified to discriminate between OtherXML and the older DAP2.0
218 attribute types (container, Byte, ...).
219
220 @param attrs The array of XML attribute values */
process_attribute_element(const xmlChar ** attrs,int nb_attributes)221 void DDXParser::process_attribute_element(const xmlChar **attrs, int nb_attributes)
222 {
223 // These methods set the state to parser_error if a problem is found.
224 transfer_xml_attrs(attrs, nb_attributes);
225
226 bool error = !(check_required_attribute(string("name"))
227 && check_required_attribute(string("type")));
228 if (error)
229 return;
230
231 if (attribute_table["type"].value == "Container") {
232 set_state(inside_attribute_container);
233
234 AttrTable *child;
235 AttrTable *parent = at_stack.top();
236
237 child = parent->append_container(attribute_table["name"].value);
238 at_stack.push(child); // save.
239 DBG2(cerr << "Pushing at" << endl);
240 }
241 else if (attribute_table["type"].value == "OtherXML") {
242 set_state(inside_other_xml_attribute);
243
244 dods_attr_name = attribute_table["name"].value;
245 dods_attr_type = attribute_table["type"].value;
246 }
247 else {
248 set_state(inside_attribute);
249 // *** Modify parser. Add a special state for inside OtherXML since it
250 // does not use the <value> element.
251
252 dods_attr_name = attribute_table["name"].value;
253 dods_attr_type = attribute_table["type"].value;
254 }
255 }
256
257 /** Given that an \c Alias tag has just been read, set the state and process
258 the alias.
259 @param attrs The XML attribute array */
process_attribute_alias(const xmlChar ** attrs,int nb_attributes)260 void DDXParser::process_attribute_alias(const xmlChar **attrs, int nb_attributes)
261 {
262 transfer_xml_attrs(attrs, nb_attributes);
263 if (check_required_attribute(string("name"))
264 && check_required_attribute(string("attribute"))) {
265 set_state(inside_alias);
266 at_stack.top()->attr_alias(attribute_table["name"].value,
267 attribute_table["attribute"].value);
268 }
269 }
270
271 /** Given that a tag which opens a variable declaration has just been read,
272 create the variable. Once created, push the variable onto the stack of
273 variables, push that variables attribute table onto the attribute table
274 stack and update the state of the parser.
275 @param t The type of variable to create.
276 @param s The next state of the parser.
277 @param attrs the attributes read with the tag */
process_variable(Type t,ParseState s,const xmlChar ** attrs,int nb_attributes)278 void DDXParser::process_variable(Type t, ParseState s, const xmlChar **attrs,
279 int nb_attributes)
280 {
281 transfer_xml_attrs(attrs, nb_attributes);
282
283 set_state(s);
284
285 if (bt_stack.top()->type() == dods_array_c
286 || check_required_attribute("name")) { // throws on error/false
287 BaseType *btp = factory(t, attribute_table["name"].value);
288 if (!btp) {
289 ddx_fatal_error(this, "Internal parser error; could not instantiate the variable '%s'.",
290 attribute_table["name"].value.c_str());
291 }
292 else {
293 // Only run this code if btp is not null! jhrg 9/14/15
294 // Once we make the new variable, we not only load it on to the
295 // BaseType stack, we also load its AttrTable on the AttrTable stack.
296 // The attribute processing software always operates on the AttrTable
297 // at the top of the AttrTable stack (at_stack).
298 bt_stack.push(btp);
299 at_stack.push(&btp->get_attr_table());
300 }
301 }
302 }
303
304 /** Given that a \c dimension tag has just been read, add that information to
305 the array on the top of the BaseType stack.
306 @param attrs The XML attributes included in the \c dimension tag */
process_dimension(const xmlChar ** attrs,int nb_attributes)307 void DDXParser::process_dimension(const xmlChar **attrs, int nb_attributes)
308 {
309 transfer_xml_attrs(attrs, nb_attributes);
310 if (check_required_attribute(string("size"))) {
311 set_state(inside_dimension);
312 Array *ap = dynamic_cast < Array * >(bt_stack.top());
313 if (!ap) {
314 ddx_fatal_error(this, "Parse error: Expected an array variable.");
315 return;
316 }
317
318 ap->append_dim(atoi(attribute_table["size"].value.c_str()),
319 attribute_table["name"].value);
320 }
321 }
322
323 /** Given that a \c blob tag has just been read, extract and save the CID
324 included in the element. */
process_blob(const xmlChar ** attrs,int nb_attributes)325 void DDXParser::process_blob(const xmlChar **attrs, int nb_attributes)
326 {
327 transfer_xml_attrs(attrs, nb_attributes);
328 if (check_required_attribute(string("href"))) {
329 set_state(inside_blob_href);
330 *blob_href = attribute_table["href"].value;
331 }
332 }
333
334 /** Check to see if the current tag is either an \c Attribute or an \c Alias
335 start tag. This method is a glorified macro...
336
337 @param name The start tag name
338 @param attrs The tag's XML attributes
339 @return True if the tag was an \c Attribute or \c Alias tag */
340 inline bool
is_attribute_or_alias(const char * name,const xmlChar ** attrs,int nb_attributes)341 DDXParser::is_attribute_or_alias(const char *name, const xmlChar **attrs,
342 int nb_attributes)
343 {
344 if (strcmp(name, "Attribute") == 0) {
345 process_attribute_element(attrs, nb_attributes);
346 // next state: inside_attribtue or inside_attribute_container
347 return true;
348 }
349 else if (strcmp(name, "Alias") == 0) {
350 process_attribute_alias(attrs, nb_attributes);
351 // next state: inside_alias
352 return true;
353 }
354
355 return false;
356 }
357
358 /** Check to see if the current tag is the start of a variable declaration.
359 If so, process it. A glorified macro...
360 @param name The start tag name
361 @param attrs The tag's XML attributes
362 @return True if the tag was a variable tag */
is_variable(const char * name,const xmlChar ** attrs,int nb_attributes)363 inline bool DDXParser::is_variable(const char *name, const xmlChar **attrs,
364 int nb_attributes)
365 {
366 Type t = get_type(name);
367 //if ((t = is_simple_type(name)) != dods_null_c) {
368 if (is_simple_type(t)) {
369 process_variable(t, inside_simple_type, attrs, nb_attributes);
370 return true;
371 }
372 else if (strcmp(name, "Array") == 0) {
373 process_variable(dods_array_c, inside_array, attrs, nb_attributes);
374 return true;
375 }
376 else if (strcmp(name, "Structure") == 0) {
377 process_variable(dods_structure_c, inside_structure, attrs, nb_attributes);
378 return true;
379 }
380 else if (strcmp(name, "Sequence") == 0) {
381 process_variable(dods_sequence_c, inside_sequence, attrs, nb_attributes);
382 return true;
383 }
384 else if (strcmp(name, "Grid") == 0) {
385 process_variable(dods_grid_c, inside_grid, attrs, nb_attributes);
386 return true;
387 }
388
389 return false;
390 }
391
finish_variable(const char * tag,Type t,const char * expected)392 void DDXParser::finish_variable(const char *tag, Type t, const char *expected)
393 {
394 if (strcmp(tag, expected) != 0) {
395 DDXParser::ddx_fatal_error(this,
396 "Expected an end tag for a %s; found '%s' instead.",
397 expected, tag);
398 return;
399 }
400
401 pop_state();
402
403 BaseType *btp = bt_stack.top();
404
405 bt_stack.pop();
406 at_stack.pop();
407
408 if (btp->type() != t) {
409 DDXParser::ddx_fatal_error(this,
410 "Internal error: Expected a %s variable.",
411 expected);
412 delete btp;
413 return;
414 }
415 // Once libxml2 validates, this can go away. 05/30/03 jhrg
416 if (t == dods_array_c
417 && static_cast<Array*>(btp)->dimensions() == 0) {
418 DDXParser::ddx_fatal_error(this,
419 "No dimension element included in the Array '%s'.",
420 btp->name().c_str());
421 delete btp;
422 return;
423 }
424
425 BaseType *parent = bt_stack.top();
426
427 if (!(parent->is_vector_type() || parent->is_constructor_type())) {
428 DDXParser::ddx_fatal_error(this,
429 "Tried to add the array variable '%s' to a non-constructor type (%s %s).",
430 tag,
431 bt_stack.top()->type_name().c_str(),
432 bt_stack.top()->name().c_str());
433 delete btp;
434 return;
435 }
436
437 parent->add_var_nocopy(btp);
438 }
439
440 /** @name SAX Parser Callbacks
441
442 These methods are declared static in the class header. This gives them C
443 linkage which allows them to be used as callbacks by the SAX parser
444 engine. */
445 //@{
446
447 /** Initialize the SAX parser state object. This object is passed to each
448 callback as a void pointer. The initial state is parser_start.
449
450 @param p The SAX parser */
ddx_start_document(void * p)451 void DDXParser::ddx_start_document(void * p)
452 {
453 DDXParser *parser = static_cast<DDXParser*>(p);
454 parser->error_msg = "";
455 parser->char_data = "";
456
457 // init attr table stack.
458 parser->at_stack.push(&parser->dds->get_attr_table());
459
460 // Trick; DDS *should* be a child of Structure. To simplify parsing,
461 // stuff a Structure on the bt_stack and dump the top level variables
462 // there. Once we're done, transfer the variables to the DDS.
463 parser->bt_stack.push(new Structure("dummy_dds"));
464
465 parser->set_state(parser_start);
466
467 DBG2(cerr << "Parser state: " << states[parser->get_state()] << endl);
468 }
469
470 /** Clean up after finishing a parse.
471 @param p The SAX parser */
ddx_end_document(void * p)472 void DDXParser::ddx_end_document(void * p)
473 {
474 DDXParser *parser = static_cast<DDXParser*>(p);
475 DBG2(cerr << "Ending state == " << states[parser->get_state()] <<
476 endl);
477
478 if (parser->get_state() != parser_start)
479 DDXParser::ddx_fatal_error(parser, "The document contained unbalanced tags.");
480
481 // If we've found any sort of error, don't make the DDX; intern() will
482 // take care of the error.
483 if (parser->get_state() == parser_error) {
484 return;
485 }
486
487 // Pop the temporary Structure off the stack and transfer its variables
488 // to the DDS.
489 Constructor *cp = dynamic_cast < Constructor * >(parser->bt_stack.top());
490 if (!cp) {
491 delete parser->bt_stack.top();
492 parser->bt_stack.pop();
493 ddx_fatal_error(parser, "Parse error: Expected a Structure, Sequence or Grid variable.");
494 return;
495 }
496
497 for (Constructor::Vars_iter i = cp->var_begin(); i != cp->var_end(); ++i) {
498 (*i)->set_parent(0); // top-level vars have no parents
499 parser->dds->add_var(*i);
500 }
501
502 delete parser->bt_stack.top();
503 parser->bt_stack.pop();
504 }
505
ddx_sax2_start_element(void * p,const xmlChar * l,const xmlChar * prefix,const xmlChar * URI,int nb_namespaces,const xmlChar ** namespaces,int nb_attributes,int,const xmlChar ** attributes)506 void DDXParser::ddx_sax2_start_element(void *p,
507 const xmlChar *l, const xmlChar *prefix, const xmlChar *URI,
508 int nb_namespaces, const xmlChar **namespaces,
509 int nb_attributes, int /*nb_defaulted*/, const xmlChar **attributes)
510 {
511 DDXParser *parser = static_cast<DDXParser*>(p);
512 const char *localname = (const char *)l;
513
514 DBG2(cerr << "start element: " << localname << ", states: "
515 << states[parser->get_state()]);
516
517 switch (parser->get_state()) {
518 case parser_start:
519 if (strcmp(localname, "Dataset") == 0) {
520 parser->set_state(inside_dataset);
521 parser->root_ns = URI != 0 ? (const char *)URI: "";
522 parser->transfer_xml_attrs(attributes, nb_attributes);
523
524 if (parser->check_required_attribute(string("name")))
525 parser->dds->set_dataset_name(parser->attribute_table["name"].value);
526
527 if (parser->check_attribute("dapVersion"))
528 parser->dds->set_dap_version(parser->attribute_table["dapVersion"].value);
529 }
530 else
531 DDXParser::ddx_fatal_error(parser,
532 "Expected response to start with a Dataset element; found '%s' instead.",
533 localname);
534 break;
535
536 case inside_dataset:
537 if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
538 break;
539 else if (parser->is_variable(localname, attributes, nb_attributes))
540 break;
541 else if (strcmp(localname, "blob") == 0 || strcmp(localname, "dataBLOB") == 0) {
542 parser->process_blob(attributes, nb_attributes);
543 // next state: inside_data_blob
544 }
545 else
546 DDXParser::ddx_fatal_error(parser,
547 "Expected an Attribute, Alias or variable element; found '%s' instead.",
548 localname);
549 break;
550
551 case inside_attribute_container:
552 if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
553 break;
554 else
555 DDXParser::ddx_fatal_error(parser,
556 "Expected an Attribute or Alias element; found '%s' instead.",
557 localname);
558 break;
559
560 case inside_attribute:
561 if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
562 break;
563 else if (strcmp(localname, "value") == 0)
564 parser->set_state(inside_attribute_value);
565 else
566 ddx_fatal_error(parser,
567 "Expected an 'Attribute', 'Alias' or 'value' element; found '%s' instead.",
568 localname);
569 break;
570
571 case inside_attribute_value:
572 ddx_fatal_error(parser,
573 "Internal parser error; unexpected state, inside value while processing element '%s'.",
574 localname);
575 break;
576
577 case inside_other_xml_attribute:
578 DBGN(cerr << endl << "\t inside_other_xml_attribute: " << localname << endl);
579
580 parser->other_xml_depth++;
581
582 // Accumulate the elements here
583
584 parser->other_xml.append("<");
585 if (prefix) {
586 parser->other_xml.append((const char *)prefix);
587 parser->other_xml.append(":");
588 }
589 parser->other_xml.append(localname);
590
591 if (nb_namespaces != 0) {
592 parser->transfer_xml_ns(namespaces, nb_namespaces);
593
594 for (map<string,string>::iterator i = parser->namespace_table.begin();
595 i != parser->namespace_table.end();
596 ++i) {
597 parser->other_xml.append(" xmlns");
598 if (!i->first.empty()) {
599 parser->other_xml.append(":");
600 parser->other_xml.append(i->first);
601 }
602 parser->other_xml.append("=\"");
603 parser->other_xml.append(i->second);
604 parser->other_xml.append("\"");
605 }
606 }
607
608 if (nb_attributes != 0) {
609 parser->transfer_xml_attrs(attributes, nb_attributes);
610 for (XMLAttrMap::iterator i = parser->attr_table_begin();
611 i != parser->attr_table_end();
612 ++i) {
613 parser->other_xml.append(" ");
614 if (!i->second.prefix.empty()) {
615 parser->other_xml.append(i->second.prefix);
616 parser->other_xml.append(":");
617 }
618 parser->other_xml.append(i->first);
619 parser->other_xml.append("=\"");
620 parser->other_xml.append(i->second.value);
621 parser->other_xml.append("\"");
622 }
623 }
624
625 parser->other_xml.append(">");
626 break;
627
628 case inside_alias:
629 ddx_fatal_error(parser,
630 "Internal parser error; unexpected state, inside alias while processing element '%s'.",
631 localname);
632 break;
633
634 case inside_simple_type:
635 if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
636 break;
637 else
638 ddx_fatal_error(parser,
639 "Expected an 'Attribute' or 'Alias' element; found '%s' instead.",
640 localname);
641 break;
642
643 case inside_array:
644 if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
645 break;
646 else if (is_not(localname, "Array")
647 && parser->is_variable(localname, attributes, nb_attributes))
648 break;
649 else if (strcmp(localname, "dimension") == 0) {
650 parser->process_dimension(attributes, nb_attributes);
651 // next state: inside_dimension
652 }
653 else
654 ddx_fatal_error(parser,
655 "Expected an 'Attribute' or 'Alias' element; found '%s' instead.",
656 localname);
657 break;
658
659 case inside_dimension:
660 ddx_fatal_error(parser,
661 "Internal parser error; unexpected state, inside dimension while processing element '%s'.",
662 localname);
663 break;
664
665 case inside_structure:
666 if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
667 break;
668 else if (parser->is_variable(localname, attributes, nb_attributes))
669 break;
670 else
671 DDXParser::ddx_fatal_error(parser,
672 "Expected an Attribute, Alias or variable element; found '%s' instead.",
673 localname);
674 break;
675
676 case inside_sequence:
677 if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
678 break;
679 else if (parser->is_variable(localname, attributes, nb_attributes))
680 break;
681 else
682 DDXParser::ddx_fatal_error(parser,
683 "Expected an Attribute, Alias or variable element; found '%s' instead.",
684 localname);
685 break;
686
687 case inside_grid:
688 if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
689 break;
690 else if (strcmp(localname, "Array") == 0)
691 parser->process_variable(dods_array_c, inside_array, attributes, nb_attributes);
692 else if (strcmp(localname, "Map") == 0)
693 parser->process_variable(dods_array_c, inside_map, attributes, nb_attributes);
694 else
695 DDXParser::ddx_fatal_error(parser,
696 "Expected an Attribute, Alias or variable element; found '%s' instead.",
697 localname);
698 break;
699
700 case inside_map:
701 if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
702 break;
703 else if (is_not(localname, "Array") && is_not(localname, "Sequence")
704 && is_not(localname, "Grid")
705 && parser->is_variable(localname, attributes, nb_attributes))
706 break;
707 else if (strcmp(localname, "dimension") == 0) {
708 parser->process_dimension(attributes, nb_attributes);
709 // next state: inside_dimension
710 }
711 else
712 ddx_fatal_error(parser,
713 "Expected an 'Attribute', 'Alias', variable or 'dimension' element; found '%s' instead.",
714 localname);
715 break;
716
717 case inside_blob_href:
718 ddx_fatal_error(parser,
719 "Internal parser error; unexpected state, inside blob href while processing element '%s'.",
720 localname);
721 break;
722
723 case parser_unknown:
724 // *** Never used? If so remove/error
725 parser->set_state(parser_unknown);
726 break;
727
728 case parser_error:
729 break;
730 }
731
732 DBGN(cerr << " ... " << states[parser->get_state()] << endl);
733 }
734
ddx_sax2_end_element(void * p,const xmlChar * l,const xmlChar * prefix,const xmlChar * URI)735 void DDXParser::ddx_sax2_end_element(void *p, const xmlChar *l,
736 const xmlChar *prefix, const xmlChar *URI)
737 {
738 DDXParser *parser = static_cast<DDXParser*>(p);
739 const char *localname = (const char *)l;
740
741 DBG2(cerr << "End element " << localname << " (state "
742 << states[parser->get_state()] << ")" << endl);
743
744 switch (parser->get_state()) {
745 case parser_start:
746 ddx_fatal_error(parser,
747 "Internal parser error; unexpected state, inside start state while processing element '%s'.",
748 localname);
749 break;
750
751 case inside_dataset:
752 if (strcmp(localname, "Dataset") == 0)
753 parser->pop_state();
754 else
755 DDXParser::ddx_fatal_error(parser,
756 "Expected an end Dataset tag; found '%s' instead.",
757 localname);
758 break;
759
760 case inside_attribute_container:
761 if (strcmp(localname, "Attribute") == 0) {
762 parser->pop_state();
763 parser->at_stack.pop(); // pop when leaving a container.
764 }
765 else
766 DDXParser::ddx_fatal_error(parser,
767 "Expected an end Attribute tag; found '%s' instead.",
768 localname);
769 break;
770
771 case inside_attribute:
772 if (strcmp(localname, "Attribute") == 0)
773 parser->pop_state();
774 else
775 DDXParser::ddx_fatal_error(parser,
776 "Expected an end Attribute tag; found '%s' instead.",
777 localname);
778 break;
779
780 case inside_attribute_value:
781 if (strcmp(localname, "value") == 0) {
782 parser->pop_state();
783 AttrTable *atp = parser->at_stack.top();
784 atp->append_attr(parser->dods_attr_name,
785 parser->dods_attr_type, parser->char_data);
786 parser->char_data = ""; // Null this after use.
787 }
788 else
789 DDXParser::ddx_fatal_error(parser,
790 "Expected an end value tag; found '%s' instead.",
791 localname);
792
793 break;
794
795 case inside_other_xml_attribute: {
796 if (strcmp(localname, "Attribute") == 0
797 && parser->root_ns == (const char *)URI) {
798
799 DBGN(cerr << endl << "\t Popping the 'inside_other_xml_attribute' state"
800 << endl);
801
802 parser->pop_state();
803
804 AttrTable *atp = parser->at_stack.top();
805 atp->append_attr(parser->dods_attr_name,
806 parser->dods_attr_type, parser->other_xml);
807
808 parser->other_xml = ""; // Null this after use.
809 }
810 else {
811 DBGN(cerr << endl << "\t inside_other_xml_attribute: " << localname
812 << ", depth: " << parser->other_xml_depth << endl);
813 if (parser->other_xml_depth == 0)
814 DDXParser::ddx_fatal_error(parser,
815 "Expected an OtherXML attribute to end! Instead I found '%s'",
816 localname);
817 parser->other_xml_depth--;
818
819 parser->other_xml.append("</");
820 if (prefix) {
821 parser->other_xml.append((const char *)prefix);
822 parser->other_xml.append(":");
823 }
824 parser->other_xml.append(localname);
825 parser->other_xml.append(">");
826 }
827 break;
828 }
829 // Alias is busted in libdap++ 05/29/03 jhrg
830 case inside_alias:
831 parser->pop_state();
832 break;
833
834 case inside_simple_type: {
835 Type t = get_type(localname);
836 if (is_simple_type(t)) {
837 parser->pop_state();
838 BaseType *btp = parser->bt_stack.top();
839 parser->bt_stack.pop();
840 parser->at_stack.pop();
841
842 BaseType *parent = parser->bt_stack.top();
843
844 if (parent->is_vector_type() || parent->is_constructor_type()) {
845 parent->add_var(btp);
846 delete btp;
847 }
848 else {
849 DDXParser::ddx_fatal_error(parser,
850 "Tried to add the simple-type variable '%s' to a non-constructor type (%s %s).",
851 localname,
852 parser->bt_stack.top()->
853 type_name().c_str(),
854 parser->bt_stack.top()->name().
855 c_str());
856 delete btp;
857 }
858 }
859 else {
860 DDXParser::ddx_fatal_error(parser,
861 "Expected an end tag for a simple type; found '%s' instead.",
862 localname);
863 }
864 break;
865 }
866
867 case inside_array:
868 parser->finish_variable(localname, dods_array_c, "Array");
869 break;
870
871 case inside_dimension:
872 if (strcmp(localname, "dimension") == 0)
873 parser->pop_state();
874 else
875 DDXParser::ddx_fatal_error(parser,
876 "Expected an end dimension tag; found '%s' instead.",
877 localname);
878 break;
879
880 case inside_structure:
881 parser->finish_variable(localname, dods_structure_c, "Structure");
882 break;
883
884 case inside_sequence:
885 parser->finish_variable(localname, dods_sequence_c, "Sequence");
886 break;
887
888 case inside_grid:
889 parser->finish_variable(localname, dods_grid_c, "Grid");
890 break;
891
892 case inside_map:
893 parser->finish_variable(localname, dods_array_c, "Map");
894 break;
895
896 case inside_blob_href:
897 if (strcmp(localname, "blob") == 0 || strcmp(localname, "dataBLOB") == 0)
898 parser->pop_state();
899 else
900 DDXParser::ddx_fatal_error(parser,
901 "Expected an end dataBLOB/blob tag; found '%s' instead.",
902 localname);
903 break;
904
905 case parser_unknown:
906 parser->pop_state();
907 break;
908
909 case parser_error:
910 break;
911 }
912
913
914 DBGN(cerr << " ... " << states[parser->get_state()] << endl);
915 }
916
917 /** Process/accumulate character data. This may be called more than once for
918 one logical clump of data. Only save character data when processing
919 'value' elements; throw away all other characters. */
ddx_get_characters(void * p,const xmlChar * ch,int len)920 void DDXParser::ddx_get_characters(void * p, const xmlChar * ch, int len)
921 {
922 DDXParser *parser = static_cast<DDXParser*>(p);
923
924 switch (parser->get_state()) {
925 case inside_attribute_value:
926 parser->char_data.append((const char *)(ch), len);
927 DBG2(cerr << "Characters: '" << parser->char_data << "'" << endl);
928 break;
929
930 case inside_other_xml_attribute:
931 parser->other_xml.append((const char *)(ch), len);
932 DBG2(cerr << "Other XML Characters: '" << parser->other_xml << "'" << endl);
933 break;
934
935 default:
936 break;
937 }
938 }
939
940 /** Read whitespace that's not really important for content. This is used
941 only for the OtherXML attribute type to preserve formating of the XML.
942 Doing so makes the attribute value far easier to read.
943 */
ddx_ignoreable_whitespace(void * p,const xmlChar * ch,int len)944 void DDXParser::ddx_ignoreable_whitespace(void *p, const xmlChar *ch,
945 int len)
946 {
947 DDXParser *parser = static_cast<DDXParser*>(p);
948
949 switch (parser->get_state()) {
950 case inside_other_xml_attribute:
951 parser->other_xml.append((const char *)(ch), len);
952 break;
953
954 default:
955 break;
956 }
957 }
958
959 /** Get characters in a cdata block. DAP does not use CData, but XML in an
960 OtherXML attribute (the value of that DAP attribute) might use it. This
961 callback also allows CData when the parser is in the 'parser_unknown'
962 state since some future DAP element might use it.
963 */
ddx_get_cdata(void * p,const xmlChar * value,int len)964 void DDXParser::ddx_get_cdata(void *p, const xmlChar *value, int len)
965 {
966 DDXParser *parser = static_cast<DDXParser*>(p);
967
968 switch (parser->get_state()) {
969 case inside_other_xml_attribute:
970 parser->other_xml.append((const char *)(value), len);
971 break;
972
973 case parser_unknown:
974 break;
975
976 default:
977 DDXParser::ddx_fatal_error(parser,
978 "Found a CData block but none are allowed by DAP.");
979
980 break;
981 }
982 }
983
984 /** Handle the standard XML entities.
985
986 @param parser The SAX parser
987 @param name The XML entity. */
ddx_get_entity(void *,const xmlChar * name)988 xmlEntityPtr DDXParser::ddx_get_entity(void *, const xmlChar * name)
989 {
990 return xmlGetPredefinedEntity(name);
991 }
992
993 /** Process an XML fatal error. Note that SAX provides for warnings, errors
994 and fatal errors. This code treats them all as fatal errors since there's
995 typically no way to tell a user about the error since there's often no
996 user interface for this software.
997
998 @param p The SAX parser
999 @param msg A printf-style format string. */
ddx_fatal_error(void * p,const char * msg,...)1000 void DDXParser::ddx_fatal_error(void * p, const char *msg, ...)
1001 {
1002 va_list args;
1003 DDXParser *parser = static_cast<DDXParser*>(p);
1004
1005 parser->set_state(parser_error);
1006
1007 va_start(args, msg);
1008 char str[1024];
1009 vsnprintf(str, 1024, msg, args);
1010 va_end(args);
1011
1012 int line = xmlSAX2GetLineNumber(parser->ctxt);
1013
1014 parser->error_msg += "At line " + long_to_string(line) + ": ";
1015 parser->error_msg += string(str) + string("\n");
1016 }
1017
1018 //@}
1019
cleanup_parse(xmlParserCtxtPtr & context)1020 void DDXParser::cleanup_parse(xmlParserCtxtPtr & context)
1021 {
1022 bool wellFormed = context->wellFormed;
1023 bool valid = context->valid;
1024
1025 context->sax = NULL;
1026 xmlFreeParserCtxt(context);
1027
1028 // If there's an error, there may still be items on the stack at the
1029 // end of the parse.
1030 while (!bt_stack.empty()) {
1031 delete bt_stack.top();
1032 bt_stack.pop();
1033 }
1034
1035 if (!wellFormed) {
1036 throw DDXParseFailed(string("The DDX is not a well formed XML document.\n") + error_msg);
1037 }
1038
1039 if (!valid) {
1040 throw DDXParseFailed(string("The DDX is not a valid document.\n") + error_msg);
1041 }
1042
1043 if (get_state() == parser_error) {
1044 throw DDXParseFailed(string("Error parsing DDX response.\n") + error_msg);
1045 }
1046 }
1047
1048 /** Read a DDX from a C++ input stream and populate a DDS object.
1049 *
1050 * @param in
1051 * @param dds
1052 * @param cid
1053 * @param boundary
1054 */
intern_stream(istream & in,DDS * dest_dds,string & cid,const string & boundary)1055 void DDXParser::intern_stream(istream &in, DDS *dest_dds, string &cid, const string &boundary)
1056 {
1057 // Code example from libxml2 docs re: read from a stream.
1058 if (!in || in.eof())
1059 throw InternalErr(__FILE__, __LINE__, "Input stream not open or read error");
1060
1061 const int size = 1024;
1062 char chars[size + 1];
1063
1064 // int res = fread(chars, 1, 4, in);
1065 in.read(chars, 4);
1066 int res = in.gcount();
1067 if (res > 0) {
1068 chars[4]='\0';
1069 xmlParserCtxtPtr context = xmlCreatePushParserCtxt(NULL, NULL, chars, res, "stream");
1070
1071 if (!context)
1072 throw DDXParseFailed("Error parsing DDX response: Input does not look like XML");
1073
1074 ctxt = context; // need ctxt for error messages
1075 dds = dest_dds; // dump values here
1076 blob_href = &cid; // cid goes here
1077
1078 xmlSAXHandler ddx_sax_parser;
1079 memset( &ddx_sax_parser, 0, sizeof(xmlSAXHandler) );
1080
1081 ddx_sax_parser.getEntity = &DDXParser::ddx_get_entity;
1082 ddx_sax_parser.startDocument = &DDXParser::ddx_start_document;
1083 ddx_sax_parser.endDocument = &DDXParser::ddx_end_document;
1084 ddx_sax_parser.characters = &DDXParser::ddx_get_characters;
1085 ddx_sax_parser.ignorableWhitespace = &DDXParser::ddx_ignoreable_whitespace;
1086 ddx_sax_parser.cdataBlock = &DDXParser::ddx_get_cdata;
1087 ddx_sax_parser.warning = &DDXParser::ddx_fatal_error;
1088 ddx_sax_parser.error = &DDXParser::ddx_fatal_error;
1089 ddx_sax_parser.fatalError = &DDXParser::ddx_fatal_error;
1090 ddx_sax_parser.initialized = XML_SAX2_MAGIC;
1091 ddx_sax_parser.startElementNs = &DDXParser::ddx_sax2_start_element;
1092 ddx_sax_parser.endElementNs = &DDXParser::ddx_sax2_end_element;
1093
1094 context->sax = &ddx_sax_parser;
1095 context->userData = this;
1096 context->validate = true;
1097
1098 in.getline(chars, size); // chars has size+1 elements
1099 res = in.gcount();
1100 chars[res-1] = '\n'; // libxml needs the newline; w/o it the parse will fail
1101 chars[res] = '\0';
1102 while (res > 0 && !is_boundary(chars, boundary)) {
1103 DBG(cerr << "line (" << res << "): " << chars << endl);
1104 xmlParseChunk(ctxt, chars, res, 0);
1105
1106 in.getline(chars, size); // chars has size+1 elements
1107 res = in.gcount();
1108 if (res > 0) {
1109 chars[res-1] = '\n';
1110 chars[res] = '\0';
1111 }
1112 }
1113
1114 // This call ends the parse: The fourth argument of xmlParseChunk is
1115 // the bool 'terminate.'
1116 xmlParseChunk(ctxt, chars, 0, 1);
1117
1118 cleanup_parse(context);
1119 }
1120 else {
1121 throw DDXParseFailed("Error parsing DDX response: Could not read from input stream.");
1122 }
1123 }
1124
1125 /** @brief Read the DDX from a stream instead of a file.
1126 @see DDXParser::intern(). */
intern_stream(FILE * in,DDS * dest_dds,string & cid,const string & boundary)1127 void DDXParser::intern_stream(FILE *in, DDS *dest_dds, string &cid, const string &boundary)
1128 {
1129 // Code example from libxml2 docs re: read from a stream.
1130 if (!in || feof(in) || ferror(in))
1131 throw InternalErr(__FILE__, __LINE__, "Input stream not open or read error");
1132
1133 const int size = 1024;
1134 char chars[size];
1135
1136 int res = fread(chars, 1, 4, in);
1137 if (res > 0) {
1138 chars[4]='\0';
1139 xmlParserCtxtPtr context = xmlCreatePushParserCtxt(NULL, NULL, chars, res, "stream");
1140
1141 if (!context)
1142 throw DDXParseFailed("Error parsing DDX response: Input does not look like XML");
1143
1144 ctxt = context; // need ctxt for error messages
1145 dds = dest_dds; // dump values here
1146 blob_href = &cid; // cid goes here
1147
1148 xmlSAXHandler ddx_sax_parser;
1149 memset( &ddx_sax_parser, 0, sizeof(xmlSAXHandler) );
1150
1151 ddx_sax_parser.getEntity = &DDXParser::ddx_get_entity;
1152 ddx_sax_parser.startDocument = &DDXParser::ddx_start_document;
1153 ddx_sax_parser.endDocument = &DDXParser::ddx_end_document;
1154 ddx_sax_parser.characters = &DDXParser::ddx_get_characters;
1155 ddx_sax_parser.ignorableWhitespace = &DDXParser::ddx_ignoreable_whitespace;
1156 ddx_sax_parser.cdataBlock = &DDXParser::ddx_get_cdata;
1157 ddx_sax_parser.warning = &DDXParser::ddx_fatal_error;
1158 ddx_sax_parser.error = &DDXParser::ddx_fatal_error;
1159 ddx_sax_parser.fatalError = &DDXParser::ddx_fatal_error;
1160 ddx_sax_parser.initialized = XML_SAX2_MAGIC;
1161 ddx_sax_parser.startElementNs = &DDXParser::ddx_sax2_start_element;
1162 ddx_sax_parser.endElementNs = &DDXParser::ddx_sax2_end_element;
1163
1164 context->sax = &ddx_sax_parser;
1165 context->userData = this;
1166 context->validate = true;
1167
1168
1169 while ((fgets(chars, size, in) != 0) && !is_boundary(chars, boundary)) {
1170 DBG(cerr << "line (" << strlen(chars) << "): " << chars << endl);
1171 xmlParseChunk(ctxt, chars, strlen(chars), 0);
1172 }
1173 // This call ends the parse: The fourth argument of xmlParseChunk is
1174 // the bool 'terminate.'
1175 xmlParseChunk(ctxt, chars, 0, 1);
1176
1177 cleanup_parse(context);
1178 }
1179 else {
1180 throw DDXParseFailed("Error parsing DDX response: Could not read from input file.");
1181 }
1182 }
1183
1184
1185 /** Parse a DDX document stored in a file. The XML in the document is parsed
1186 and a binary DDX is built. This implementation stores the result in a DDS
1187 object where each instance of BaseType can hold an AttrTable object.
1188
1189 @param document Read the DDX from this file.
1190 @param dest_dds Value/result parameter; dumps the information to this DDS
1191 instance.
1192 @param cid Value/result parameter; puts the href which references the \c
1193 CID.
1194 @exception DDXParseFailed Thrown if the XML document could not be
1195 read or parsed. */
intern(const string & document,DDS * dest_dds,string & cid)1196 void DDXParser::intern(const string & document, DDS * dest_dds, string &cid)
1197 {
1198 // Create the context pointer explicitly so that we can store a pointer
1199 // to it in the DDXParser instance. This provides a way to generate our
1200 // own error messages *with* line numbers. The messages are pretty
1201 // meaningless otherwise. This means that we use an interface from the
1202 // 'parser internals' header, and not the 'parser' header. However, this
1203 // interface is also used in one of the documented examples, so it's
1204 // probably pretty stable. 06/02/03 jhrg
1205 xmlParserCtxtPtr context = xmlCreateFileParserCtxt(document.c_str());
1206 if (!context)
1207 throw
1208 DDXParseFailed(string
1209 ("Could not initialize the parser with the file: '")
1210 + document + string("'."));
1211
1212 dds = dest_dds; // dump values here
1213 blob_href = &cid;
1214 ctxt = context; // need ctxt for error messages
1215
1216 xmlSAXHandler ddx_sax_parser;
1217 memset( &ddx_sax_parser, 0, sizeof(xmlSAXHandler) );
1218
1219 ddx_sax_parser.getEntity = &DDXParser::ddx_get_entity;
1220 ddx_sax_parser.startDocument = &DDXParser::ddx_start_document;
1221 ddx_sax_parser.endDocument = &DDXParser::ddx_end_document;
1222 ddx_sax_parser.characters = &DDXParser::ddx_get_characters;
1223 ddx_sax_parser.ignorableWhitespace = &DDXParser::ddx_ignoreable_whitespace;
1224 ddx_sax_parser.cdataBlock = &DDXParser::ddx_get_cdata;
1225 ddx_sax_parser.warning = &DDXParser::ddx_fatal_error;
1226 ddx_sax_parser.error = &DDXParser::ddx_fatal_error;
1227 ddx_sax_parser.fatalError = &DDXParser::ddx_fatal_error;
1228 ddx_sax_parser.initialized = XML_SAX2_MAGIC;
1229 ddx_sax_parser.startElementNs = &DDXParser::ddx_sax2_start_element;
1230 ddx_sax_parser.endElementNs = &DDXParser::ddx_sax2_end_element;
1231
1232 context->sax = &ddx_sax_parser;
1233 context->userData = this;
1234 context->validate = false;
1235
1236 xmlParseDocument(context);
1237
1238 cleanup_parse(context);
1239 }
1240
1241 } // namespace libdap
1242