1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6 
7 // Copyright (c) 2003 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 #include "config.h"
27 
28 //#define DODS_DEBUG 1
29 //#define DODS_DEBUG2 1
30 
31 #include <cstring>
32 #include <cstdarg>
33 
34 #include "BaseType.h"
35 #include "Byte.h"
36 #include "Int16.h"
37 #include "UInt16.h"
38 #include "Int32.h"
39 #include "UInt32.h"
40 #include "Float32.h"
41 #include "Float64.h"
42 #include "Str.h"
43 #include "Url.h"
44 #include "Array.h"
45 #include "Structure.h"
46 #include "Sequence.h"
47 #include "Grid.h"
48 
49 #include "DDXParserSAX2.h"
50 
51 #include "util.h"
52 #include "mime_util.h"
53 #include "debug.h"
54 
55 namespace libdap {
56 
57 #if defined(DODS_DEBUG) || defined(DODS_DEUG2)
58 static const char *states[] =
59     {
60         "start",
61 
62         "dataset",
63 
64         "attribute_container",
65         "attribute",
66         "attribute_value",
67         "other_xml_attribute",
68 
69         "alias",
70 
71         "simple_type",
72 
73         "array",
74         "dimension",
75 
76         "grid",
77         "map",
78 
79         "structure",
80         "sequence",
81 
82         "blob href",
83 
84         "unknown",
85         "error"
86     };
87 #endif
88 // Glue the BaseTypeFactory to the enum-based factory defined statically
89 // here.
90 
factory(Type t,const string & name)91 BaseType *DDXParser::factory(Type t, const string & name)
92 {
93     switch (t) {
94     case dods_byte_c:
95         return d_factory->NewByte(name);
96 
97     case dods_int16_c:
98         return d_factory->NewInt16(name);
99 
100     case dods_uint16_c:
101         return d_factory->NewUInt16(name);
102 
103     case dods_int32_c:
104         return d_factory->NewInt32(name);
105 
106     case dods_uint32_c:
107         return d_factory->NewUInt32(name);
108 
109     case dods_float32_c:
110         return d_factory->NewFloat32(name);
111 
112     case dods_float64_c:
113         return d_factory->NewFloat64(name);
114 
115     case dods_str_c:
116         return d_factory->NewStr(name);
117 
118     case dods_url_c:
119         return d_factory->NewUrl(name);
120 
121     case dods_array_c:
122         return d_factory->NewArray(name);
123 
124     case dods_structure_c:
125         return d_factory->NewStructure(name);
126 
127     case dods_sequence_c:
128         return d_factory->NewSequence(name);
129 
130     case dods_grid_c:
131         return d_factory->NewGrid(name);
132 
133     default:
134         return 0;
135     }
136 }
137 
is_not(const char * name,const char * tag)138 static bool is_not(const char *name, const char *tag)
139 {
140     return strcmp(name, tag) != 0;
141 }
142 
set_state(DDXParser::ParseState state)143 void DDXParser::set_state(DDXParser::ParseState state)
144 {
145     s.push(state);
146 }
147 
get_state() const148 DDXParser::ParseState DDXParser::get_state() const
149 {
150     return s.top();
151 }
152 
pop_state()153 void DDXParser::pop_state()
154 {
155     s.pop();
156 }
157 
158 /** Dump XML attributes to local store so they can be easily manipulated.
159     Attribute names are always folded to lower case.
160     @param attrs The XML attribute array */
transfer_xml_attrs(const xmlChar ** attributes,int nb_attributes)161 void DDXParser::transfer_xml_attrs(const xmlChar **attributes, int nb_attributes)
162 {
163     if (!attribute_table.empty())
164         attribute_table.clear(); // erase old attributes
165 
166     unsigned int index = 0;
167     for (int i = 0; i < nb_attributes; ++i, index += 5) {
168         // Make a value using the attribute name and the prefix, namespace URI
169         // and the value. The prefix might be null.
170         attribute_table.insert(map<string, XMLAttribute>::value_type(
171                 string((const char *)attributes[index]),
172                 XMLAttribute(attributes + index + 1)));
173 
174         DBG(cerr << "Attribute '" << (const char *)attributes[index] << "': "
175                 << attribute_table[(const char *)attributes[index]].value << endl);
176     }
177 }
178 
transfer_xml_ns(const xmlChar ** namespaces,int nb_namespaces)179 void DDXParser::transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces)
180 {
181     for (int i = 0; i < nb_namespaces; ++i ) {
182         // make a value with the prefix and namespace URI. The prefix might be
183         // null.
184         namespace_table.insert(map<string,string>::value_type(
185                 namespaces[i*2] != 0 ? (const char *)namespaces[i*2] : "",
186                 (const char *)namespaces[i*2+1]));
187     }
188 }
189 
190 /** Is an attribute present? Attribute names are always lower case.
191     @note To use this method, first call transfer_xml_attrs.
192     @param attr The XML attribute
193     @return True if the XML attribute was present in the last tag */
check_required_attribute(const string & attr)194 bool DDXParser::check_required_attribute(const string & attr)
195 {
196     map < string, XMLAttribute >::iterator i = attribute_table.find(attr);
197     if (i == attribute_table.end())
198         ddx_fatal_error(this, "Required attribute '%s' not found.",
199                         attr.c_str());
200     return true;
201 }
202 
203 /** Is an attribute present? Attribute names are always lower case.
204     @note To use this method, first call transfer_xml_attrs.
205     @param attr The XML attribute
206     @return True if the XML attribute was present in the last/current tag,
207     false otherwise. */
check_attribute(const string & attr)208 bool DDXParser::check_attribute(const string & attr)
209 {
210     return (attribute_table.find(attr) != attribute_table.end());
211 }
212 
213 /** Given that an \c Attribute tag has just been read, determine whether the
214     element is a container or a simple type, set the state and, for a simple
215     type record the type and name for use when \c value elements are found.
216 
217     @note Modified to discriminate between OtherXML and the older DAP2.0
218     attribute types (container, Byte, ...).
219 
220     @param attrs The array of XML attribute values */
process_attribute_element(const xmlChar ** attrs,int nb_attributes)221 void DDXParser::process_attribute_element(const xmlChar **attrs, int nb_attributes)
222 {
223     // These methods set the state to parser_error if a problem is found.
224     transfer_xml_attrs(attrs, nb_attributes);
225 
226     bool error = !(check_required_attribute(string("name"))
227                    && check_required_attribute(string("type")));
228     if (error)
229         return;
230 
231     if (attribute_table["type"].value == "Container") {
232         set_state(inside_attribute_container);
233 
234         AttrTable *child;
235         AttrTable *parent = at_stack.top();
236 
237         child = parent->append_container(attribute_table["name"].value);
238         at_stack.push(child);   // save.
239         DBG2(cerr << "Pushing at" << endl);
240     }
241     else if (attribute_table["type"].value == "OtherXML") {
242         set_state(inside_other_xml_attribute);
243 
244         dods_attr_name = attribute_table["name"].value;
245         dods_attr_type = attribute_table["type"].value;
246     }
247     else {
248         set_state(inside_attribute);
249         // *** Modify parser. Add a special state for inside OtherXML since it
250         // does not use the <value> element.
251 
252         dods_attr_name = attribute_table["name"].value;
253         dods_attr_type = attribute_table["type"].value;
254     }
255 }
256 
257 /** Given that an \c Alias tag has just been read, set the state and process
258     the alias.
259     @param attrs The XML attribute array */
process_attribute_alias(const xmlChar ** attrs,int nb_attributes)260 void DDXParser::process_attribute_alias(const xmlChar **attrs, int nb_attributes)
261 {
262     transfer_xml_attrs(attrs, nb_attributes);
263     if (check_required_attribute(string("name"))
264         && check_required_attribute(string("attribute"))) {
265         set_state(inside_alias);
266         at_stack.top()->attr_alias(attribute_table["name"].value,
267                                    attribute_table["attribute"].value);
268     }
269 }
270 
271 /** Given that a tag which opens a variable declaration has just been read,
272     create the variable. Once created, push the variable onto the stack of
273     variables, push that variables attribute table onto the attribute table
274     stack and update the state of the parser.
275     @param t The type of variable to create.
276     @param s The next state of the parser.
277     @param attrs the attributes read with the tag */
process_variable(Type t,ParseState s,const xmlChar ** attrs,int nb_attributes)278 void DDXParser::process_variable(Type t, ParseState s, const xmlChar **attrs,
279         int nb_attributes)
280 {
281     transfer_xml_attrs(attrs, nb_attributes);
282 
283     set_state(s);
284 
285     if (bt_stack.top()->type() == dods_array_c
286             || check_required_attribute("name")) { // throws on error/false
287         BaseType *btp = factory(t, attribute_table["name"].value);
288         if (!btp) {
289             ddx_fatal_error(this, "Internal parser error; could not instantiate the variable '%s'.",
290                 attribute_table["name"].value.c_str());
291         }
292         else {
293             // Only run this code if btp is not null! jhrg 9/14/15
294             // Once we make the new variable, we not only load it on to the
295             // BaseType stack, we also load its AttrTable on the AttrTable stack.
296             // The attribute processing software always operates on the AttrTable
297             // at the top of the AttrTable stack (at_stack).
298             bt_stack.push(btp);
299             at_stack.push(&btp->get_attr_table());
300         }
301     }
302 }
303 
304 /** Given that a \c dimension tag has just been read, add that information to
305     the array on the top of the BaseType stack.
306     @param attrs The XML attributes included in the \c dimension tag */
process_dimension(const xmlChar ** attrs,int nb_attributes)307 void DDXParser::process_dimension(const xmlChar **attrs, int nb_attributes)
308 {
309     transfer_xml_attrs(attrs, nb_attributes);
310     if (check_required_attribute(string("size"))) {
311         set_state(inside_dimension);
312         Array *ap = dynamic_cast < Array * >(bt_stack.top());
313 		if (!ap) {
314 			ddx_fatal_error(this, "Parse error: Expected an array variable.");
315 			return;
316 		}
317 
318         ap->append_dim(atoi(attribute_table["size"].value.c_str()),
319                        attribute_table["name"].value);
320     }
321 }
322 
323 /** Given that a \c blob tag has just been read, extract and save the CID
324     included in the element. */
process_blob(const xmlChar ** attrs,int nb_attributes)325 void DDXParser::process_blob(const xmlChar **attrs, int nb_attributes)
326 {
327     transfer_xml_attrs(attrs, nb_attributes);
328     if (check_required_attribute(string("href"))) {
329         set_state(inside_blob_href);
330         *blob_href = attribute_table["href"].value;
331     }
332 }
333 
334 /** Check to see if the current tag is either an \c Attribute or an \c Alias
335     start tag. This method is a glorified macro...
336 
337     @param name The start tag name
338     @param attrs The tag's XML attributes
339     @return True if the tag was an \c Attribute or \c Alias tag */
340 inline bool
is_attribute_or_alias(const char * name,const xmlChar ** attrs,int nb_attributes)341 DDXParser::is_attribute_or_alias(const char *name, const xmlChar **attrs,
342         int nb_attributes)
343 {
344     if (strcmp(name, "Attribute") == 0) {
345         process_attribute_element(attrs, nb_attributes);
346         // next state: inside_attribtue or inside_attribute_container
347         return true;
348     }
349     else if (strcmp(name, "Alias") == 0) {
350         process_attribute_alias(attrs, nb_attributes);
351         // next state: inside_alias
352         return true;
353     }
354 
355     return false;
356 }
357 
358 /** Check to see if the current tag is the start of a variable declaration.
359     If so, process it. A glorified macro...
360     @param name The start tag name
361     @param attrs The tag's XML attributes
362     @return True if the tag was a variable tag */
is_variable(const char * name,const xmlChar ** attrs,int nb_attributes)363 inline bool DDXParser::is_variable(const char *name, const xmlChar **attrs,
364         int nb_attributes)
365 {
366     Type t = get_type(name);
367     //if ((t = is_simple_type(name)) != dods_null_c) {
368     if (is_simple_type(t)) {
369         process_variable(t, inside_simple_type, attrs, nb_attributes);
370         return true;
371     }
372     else if (strcmp(name, "Array") == 0) {
373         process_variable(dods_array_c, inside_array, attrs, nb_attributes);
374         return true;
375     }
376     else if (strcmp(name, "Structure") == 0) {
377         process_variable(dods_structure_c, inside_structure, attrs, nb_attributes);
378         return true;
379     }
380     else if (strcmp(name, "Sequence") == 0) {
381         process_variable(dods_sequence_c, inside_sequence, attrs, nb_attributes);
382         return true;
383     }
384     else if (strcmp(name, "Grid") == 0) {
385         process_variable(dods_grid_c, inside_grid, attrs, nb_attributes);
386         return true;
387     }
388 
389     return false;
390 }
391 
finish_variable(const char * tag,Type t,const char * expected)392 void DDXParser::finish_variable(const char *tag, Type t, const char *expected)
393 {
394     if (strcmp(tag, expected) != 0) {
395         DDXParser::ddx_fatal_error(this,
396                                    "Expected an end tag for a %s; found '%s' instead.",
397                                    expected, tag);
398         return;
399     }
400 
401     pop_state();
402 
403     BaseType *btp = bt_stack.top();
404 
405     bt_stack.pop();
406     at_stack.pop();
407 
408     if (btp->type() != t) {
409         DDXParser::ddx_fatal_error(this,
410                                    "Internal error: Expected a %s variable.",
411                                    expected);
412         delete btp;
413         return;
414     }
415     // Once libxml2 validates, this can go away. 05/30/03 jhrg
416     if (t == dods_array_c
417         && static_cast<Array*>(btp)->dimensions() == 0) {
418         DDXParser::ddx_fatal_error(this,
419                                    "No dimension element included in the Array '%s'.",
420                                    btp->name().c_str());
421         delete btp;
422         return;
423     }
424 
425     BaseType *parent = bt_stack.top();
426 
427     if (!(parent->is_vector_type() || parent->is_constructor_type())) {
428         DDXParser::ddx_fatal_error(this,
429                                    "Tried to add the array variable '%s' to a non-constructor type (%s %s).",
430                                    tag,
431                                    bt_stack.top()->type_name().c_str(),
432                                    bt_stack.top()->name().c_str());
433         delete btp;
434         return;
435     }
436 
437     parent->add_var_nocopy(btp);
438 }
439 
440 /** @name SAX Parser Callbacks
441 
442     These methods are declared static in the class header. This gives them C
443     linkage which allows them to be used as callbacks by the SAX parser
444     engine. */
445 //@{
446 
447 /** Initialize the SAX parser state object. This object is passed to each
448     callback as a void pointer. The initial state is parser_start.
449 
450     @param p The SAX parser  */
ddx_start_document(void * p)451 void DDXParser::ddx_start_document(void * p)
452 {
453     DDXParser *parser = static_cast<DDXParser*>(p);
454     parser->error_msg = "";
455     parser->char_data = "";
456 
457     // init attr table stack.
458     parser->at_stack.push(&parser->dds->get_attr_table());
459 
460     // Trick; DDS *should* be a child of Structure. To simplify parsing,
461     // stuff a Structure on the bt_stack and dump the top level variables
462     // there. Once we're done, transfer the variables to the DDS.
463     parser->bt_stack.push(new Structure("dummy_dds"));
464 
465     parser->set_state(parser_start);
466 
467     DBG2(cerr << "Parser state: " << states[parser->get_state()] << endl);
468 }
469 
470 /** Clean up after finishing a parse.
471     @param p The SAX parser  */
ddx_end_document(void * p)472 void DDXParser::ddx_end_document(void * p)
473 {
474     DDXParser *parser = static_cast<DDXParser*>(p);
475     DBG2(cerr << "Ending state == " << states[parser->get_state()] <<
476          endl);
477 
478     if (parser->get_state() != parser_start)
479         DDXParser::ddx_fatal_error(parser, "The document contained unbalanced tags.");
480 
481     // If we've found any sort of error, don't make the DDX; intern() will
482     // take care of the error.
483     if (parser->get_state() == parser_error) {
484         return;
485     }
486 
487     // Pop the temporary Structure off the stack and transfer its variables
488     // to the DDS.
489     Constructor *cp = dynamic_cast < Constructor * >(parser->bt_stack.top());
490     if (!cp) {
491         delete parser->bt_stack.top();
492         parser->bt_stack.pop();
493     	ddx_fatal_error(parser, "Parse error: Expected a Structure, Sequence or Grid variable.");
494 		return;
495     }
496 
497     for (Constructor::Vars_iter i = cp->var_begin(); i != cp->var_end(); ++i) {
498         (*i)->set_parent(0);        // top-level vars have no parents
499         parser->dds->add_var(*i);
500     }
501 
502     delete parser->bt_stack.top();
503     parser->bt_stack.pop();
504 }
505 
ddx_sax2_start_element(void * p,const xmlChar * l,const xmlChar * prefix,const xmlChar * URI,int nb_namespaces,const xmlChar ** namespaces,int nb_attributes,int,const xmlChar ** attributes)506 void DDXParser::ddx_sax2_start_element(void *p,
507         const xmlChar *l, const xmlChar *prefix, const xmlChar *URI,
508         int nb_namespaces, const xmlChar **namespaces,
509         int nb_attributes, int /*nb_defaulted*/, const xmlChar **attributes)
510 {
511     DDXParser *parser = static_cast<DDXParser*>(p);
512     const char *localname = (const char *)l;
513 
514     DBG2(cerr << "start element: " << localname << ", states: "
515          << states[parser->get_state()]);
516 
517     switch (parser->get_state()) {
518     case parser_start:
519         if (strcmp(localname, "Dataset") == 0) {
520             parser->set_state(inside_dataset);
521             parser->root_ns = URI != 0 ? (const char *)URI: "";
522             parser->transfer_xml_attrs(attributes, nb_attributes);
523 
524             if (parser->check_required_attribute(string("name")))
525                 parser->dds->set_dataset_name(parser->attribute_table["name"].value);
526 
527             if (parser->check_attribute("dapVersion"))
528                 parser->dds->set_dap_version(parser->attribute_table["dapVersion"].value);
529         }
530         else
531             DDXParser::ddx_fatal_error(parser,
532                                        "Expected response to start with a Dataset element; found '%s' instead.",
533                                        localname);
534         break;
535 
536     case inside_dataset:
537         if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
538             break;
539         else if (parser->is_variable(localname, attributes, nb_attributes))
540             break;
541         else if (strcmp(localname, "blob") == 0 || strcmp(localname, "dataBLOB") == 0) {
542             parser->process_blob(attributes, nb_attributes);
543             // next state: inside_data_blob
544         }
545         else
546             DDXParser::ddx_fatal_error(parser,
547                                        "Expected an Attribute, Alias or variable element; found '%s' instead.",
548                                        localname);
549         break;
550 
551     case inside_attribute_container:
552         if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
553             break;
554         else
555             DDXParser::ddx_fatal_error(parser,
556                                        "Expected an Attribute or Alias element; found '%s' instead.",
557                                        localname);
558         break;
559 
560     case inside_attribute:
561         if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
562             break;
563         else if (strcmp(localname, "value") == 0)
564             parser->set_state(inside_attribute_value);
565         else
566             ddx_fatal_error(parser,
567                             "Expected an 'Attribute', 'Alias' or 'value' element; found '%s' instead.",
568                             localname);
569         break;
570 
571     case inside_attribute_value:
572         ddx_fatal_error(parser,
573                         "Internal parser error; unexpected state, inside value while processing element '%s'.",
574                         localname);
575         break;
576 
577     case inside_other_xml_attribute:
578         DBGN(cerr << endl << "\t inside_other_xml_attribute: " << localname << endl);
579 
580         parser->other_xml_depth++;
581 
582         // Accumulate the elements here
583 
584         parser->other_xml.append("<");
585         if (prefix) {
586             parser->other_xml.append((const char *)prefix);
587             parser->other_xml.append(":");
588         }
589         parser->other_xml.append(localname);
590 
591         if (nb_namespaces != 0) {
592             parser->transfer_xml_ns(namespaces, nb_namespaces);
593 
594             for (map<string,string>::iterator i = parser->namespace_table.begin();
595                 i != parser->namespace_table.end();
596                 ++i) {
597                 parser->other_xml.append(" xmlns");
598                 if (!i->first.empty()) {
599                     parser->other_xml.append(":");
600                     parser->other_xml.append(i->first);
601                 }
602                 parser->other_xml.append("=\"");
603                 parser->other_xml.append(i->second);
604                 parser->other_xml.append("\"");
605             }
606         }
607 
608         if (nb_attributes != 0) {
609             parser->transfer_xml_attrs(attributes, nb_attributes);
610             for (XMLAttrMap::iterator i = parser->attr_table_begin();
611                 i != parser->attr_table_end();
612                 ++i) {
613                 parser->other_xml.append(" ");
614                 if (!i->second.prefix.empty()) {
615                     parser->other_xml.append(i->second.prefix);
616                     parser->other_xml.append(":");
617                 }
618                 parser->other_xml.append(i->first);
619                 parser->other_xml.append("=\"");
620                 parser->other_xml.append(i->second.value);
621                 parser->other_xml.append("\"");
622             }
623         }
624 
625         parser->other_xml.append(">");
626         break;
627 
628     case inside_alias:
629         ddx_fatal_error(parser,
630                         "Internal parser error; unexpected state, inside alias while processing element '%s'.",
631                         localname);
632         break;
633 
634     case inside_simple_type:
635         if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
636             break;
637         else
638             ddx_fatal_error(parser,
639                             "Expected an 'Attribute' or 'Alias' element; found '%s' instead.",
640                             localname);
641         break;
642 
643     case inside_array:
644         if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
645             break;
646         else if (is_not(localname, "Array")
647                 && parser->is_variable(localname, attributes, nb_attributes))
648             break;
649         else if (strcmp(localname, "dimension") == 0) {
650             parser->process_dimension(attributes, nb_attributes);
651             // next state: inside_dimension
652         }
653         else
654             ddx_fatal_error(parser,
655                             "Expected an 'Attribute' or 'Alias' element; found '%s' instead.",
656                             localname);
657         break;
658 
659     case inside_dimension:
660         ddx_fatal_error(parser,
661                         "Internal parser error; unexpected state, inside dimension while processing element '%s'.",
662                         localname);
663         break;
664 
665     case inside_structure:
666         if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
667             break;
668         else if (parser->is_variable(localname, attributes, nb_attributes))
669             break;
670         else
671             DDXParser::ddx_fatal_error(parser,
672                                        "Expected an Attribute, Alias or variable element; found '%s' instead.",
673                                        localname);
674         break;
675 
676     case inside_sequence:
677         if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
678             break;
679         else if (parser->is_variable(localname, attributes, nb_attributes))
680             break;
681         else
682             DDXParser::ddx_fatal_error(parser,
683                                        "Expected an Attribute, Alias or variable element; found '%s' instead.",
684                                        localname);
685         break;
686 
687     case inside_grid:
688         if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
689             break;
690         else if (strcmp(localname, "Array") == 0)
691             parser->process_variable(dods_array_c, inside_array, attributes, nb_attributes);
692         else if (strcmp(localname, "Map") == 0)
693             parser->process_variable(dods_array_c, inside_map, attributes, nb_attributes);
694         else
695             DDXParser::ddx_fatal_error(parser,
696                                        "Expected an Attribute, Alias or variable element; found '%s' instead.",
697                                        localname);
698         break;
699 
700     case inside_map:
701         if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
702             break;
703         else if (is_not(localname, "Array") && is_not(localname, "Sequence")
704                  && is_not(localname, "Grid")
705                  && parser->is_variable(localname, attributes, nb_attributes))
706             break;
707         else if (strcmp(localname, "dimension") == 0) {
708             parser->process_dimension(attributes, nb_attributes);
709             // next state: inside_dimension
710         }
711         else
712             ddx_fatal_error(parser,
713                             "Expected an 'Attribute', 'Alias', variable or 'dimension' element; found '%s' instead.",
714                             localname);
715         break;
716 
717     case inside_blob_href:
718         ddx_fatal_error(parser,
719                         "Internal parser error; unexpected state, inside blob href while processing element '%s'.",
720                         localname);
721         break;
722 
723     case parser_unknown:
724         // *** Never used? If so remove/error
725         parser->set_state(parser_unknown);
726         break;
727 
728     case parser_error:
729         break;
730     }
731 
732     DBGN(cerr << " ... " << states[parser->get_state()] << endl);
733 }
734 
ddx_sax2_end_element(void * p,const xmlChar * l,const xmlChar * prefix,const xmlChar * URI)735 void DDXParser::ddx_sax2_end_element(void *p, const xmlChar *l,
736         const xmlChar *prefix, const xmlChar *URI)
737 {
738     DDXParser *parser = static_cast<DDXParser*>(p);
739     const char *localname = (const char *)l;
740 
741     DBG2(cerr << "End element " << localname << " (state "
742          << states[parser->get_state()] << ")" << endl);
743 
744     switch (parser->get_state()) {
745     case parser_start:
746         ddx_fatal_error(parser,
747                         "Internal parser error; unexpected state, inside start state while processing element '%s'.",
748                         localname);
749         break;
750 
751     case inside_dataset:
752         if (strcmp(localname, "Dataset") == 0)
753             parser->pop_state();
754         else
755             DDXParser::ddx_fatal_error(parser,
756                                        "Expected an end Dataset tag; found '%s' instead.",
757                                        localname);
758         break;
759 
760     case inside_attribute_container:
761         if (strcmp(localname, "Attribute") == 0) {
762             parser->pop_state();
763             parser->at_stack.pop();     // pop when leaving a container.
764         }
765         else
766             DDXParser::ddx_fatal_error(parser,
767                                        "Expected an end Attribute tag; found '%s' instead.",
768                                        localname);
769         break;
770 
771     case inside_attribute:
772         if (strcmp(localname, "Attribute") == 0)
773             parser->pop_state();
774         else
775             DDXParser::ddx_fatal_error(parser,
776                                        "Expected an end Attribute tag; found '%s' instead.",
777                                        localname);
778         break;
779 
780     case inside_attribute_value:
781         if (strcmp(localname, "value") == 0) {
782             parser->pop_state();
783             AttrTable *atp = parser->at_stack.top();
784             atp->append_attr(parser->dods_attr_name,
785                              parser->dods_attr_type, parser->char_data);
786             parser->char_data = "";     // Null this after use.
787         }
788         else
789             DDXParser::ddx_fatal_error(parser,
790                                        "Expected an end value tag; found '%s' instead.",
791                                        localname);
792 
793         break;
794 
795     case inside_other_xml_attribute: {
796             if (strcmp(localname, "Attribute") == 0
797                     && parser->root_ns == (const char *)URI) {
798 
799                 DBGN(cerr << endl << "\t Popping the 'inside_other_xml_attribute' state"
800                         << endl);
801 
802                 parser->pop_state();
803 
804                 AttrTable *atp = parser->at_stack.top();
805                 atp->append_attr(parser->dods_attr_name,
806                         parser->dods_attr_type, parser->other_xml);
807 
808                 parser->other_xml = ""; // Null this after use.
809             }
810             else {
811                 DBGN(cerr << endl << "\t inside_other_xml_attribute: " << localname
812                         << ", depth: " << parser->other_xml_depth << endl);
813                 if (parser->other_xml_depth == 0)
814                     DDXParser::ddx_fatal_error(parser,
815                                                "Expected an OtherXML attribute to end! Instead I found '%s'",
816                                                localname);
817                 parser->other_xml_depth--;
818 
819                 parser->other_xml.append("</");
820                 if (prefix) {
821                     parser->other_xml.append((const char *)prefix);
822                     parser->other_xml.append(":");
823                 }
824                 parser->other_xml.append(localname);
825                 parser->other_xml.append(">");
826             }
827             break;
828         }
829         // Alias is busted in libdap++ 05/29/03 jhrg
830     case inside_alias:
831         parser->pop_state();
832         break;
833 
834     case inside_simple_type: {
835         Type t = get_type(localname);
836         if (is_simple_type(t)) {
837             parser->pop_state();
838             BaseType *btp = parser->bt_stack.top();
839             parser->bt_stack.pop();
840             parser->at_stack.pop();
841 
842             BaseType *parent = parser->bt_stack.top();
843 
844             if (parent->is_vector_type() || parent->is_constructor_type()) {
845                 parent->add_var(btp);
846                 delete btp;
847             }
848             else {
849                 DDXParser::ddx_fatal_error(parser,
850                                            "Tried to add the simple-type variable '%s' to a non-constructor type (%s %s).",
851                                            localname,
852                                            parser->bt_stack.top()->
853                                            type_name().c_str(),
854                                            parser->bt_stack.top()->name().
855                                            c_str());
856                 delete btp;
857             }
858         }
859         else {
860             DDXParser::ddx_fatal_error(parser,
861                                        "Expected an end tag for a simple type; found '%s' instead.",
862                                        localname);
863         }
864         break;
865     }
866 
867     case inside_array:
868         parser->finish_variable(localname, dods_array_c, "Array");
869         break;
870 
871     case inside_dimension:
872         if (strcmp(localname, "dimension") == 0)
873             parser->pop_state();
874         else
875             DDXParser::ddx_fatal_error(parser,
876                                        "Expected an end dimension tag; found '%s' instead.",
877                                        localname);
878         break;
879 
880     case inside_structure:
881         parser->finish_variable(localname, dods_structure_c, "Structure");
882         break;
883 
884     case inside_sequence:
885         parser->finish_variable(localname, dods_sequence_c, "Sequence");
886         break;
887 
888     case inside_grid:
889         parser->finish_variable(localname, dods_grid_c, "Grid");
890         break;
891 
892     case inside_map:
893         parser->finish_variable(localname, dods_array_c, "Map");
894         break;
895 
896     case inside_blob_href:
897         if (strcmp(localname, "blob") == 0 || strcmp(localname, "dataBLOB") == 0)
898             parser->pop_state();
899         else
900             DDXParser::ddx_fatal_error(parser,
901                                        "Expected an end dataBLOB/blob tag; found '%s' instead.",
902                                        localname);
903         break;
904 
905     case parser_unknown:
906         parser->pop_state();
907         break;
908 
909     case parser_error:
910         break;
911     }
912 
913 
914     DBGN(cerr << " ... " << states[parser->get_state()] << endl);
915 }
916 
917 /** Process/accumulate character data. This may be called more than once for
918     one logical clump of data. Only save character data when processing
919     'value' elements; throw away all other characters. */
ddx_get_characters(void * p,const xmlChar * ch,int len)920 void DDXParser::ddx_get_characters(void * p, const xmlChar * ch, int len)
921 {
922     DDXParser *parser = static_cast<DDXParser*>(p);
923 
924     switch (parser->get_state()) {
925         case inside_attribute_value:
926             parser->char_data.append((const char *)(ch), len);
927             DBG2(cerr << "Characters: '" << parser->char_data << "'" << endl);
928             break;
929 
930         case inside_other_xml_attribute:
931             parser->other_xml.append((const char *)(ch), len);
932             DBG2(cerr << "Other XML Characters: '" << parser->other_xml << "'" << endl);
933             break;
934 
935         default:
936             break;
937     }
938 }
939 
940 /** Read whitespace that's not really important for content. This is used
941     only for the OtherXML attribute type to preserve formating of the XML.
942     Doing so makes the attribute value far easier to read.
943  */
ddx_ignoreable_whitespace(void * p,const xmlChar * ch,int len)944 void DDXParser::ddx_ignoreable_whitespace(void *p, const xmlChar *ch,
945         int len)
946 {
947     DDXParser *parser = static_cast<DDXParser*>(p);
948 
949     switch (parser->get_state()) {
950          case inside_other_xml_attribute:
951              parser->other_xml.append((const char *)(ch), len);
952              break;
953 
954          default:
955              break;
956     }
957 }
958 
959 /** Get characters in a cdata block. DAP does not use CData, but XML in an
960     OtherXML attribute (the value of that DAP attribute) might use it. This
961     callback also allows CData when the parser is in the 'parser_unknown'
962     state since some future DAP element might use it.
963  */
ddx_get_cdata(void * p,const xmlChar * value,int len)964 void DDXParser::ddx_get_cdata(void *p, const xmlChar *value, int len)
965 {
966     DDXParser *parser = static_cast<DDXParser*>(p);
967 
968     switch (parser->get_state()) {
969          case inside_other_xml_attribute:
970              parser->other_xml.append((const char *)(value), len);
971              break;
972 
973          case parser_unknown:
974              break;
975 
976          default:
977              DDXParser::ddx_fatal_error(parser,
978                                         "Found a CData block but none are allowed by DAP.");
979 
980              break;
981     }
982 }
983 
984 /** Handle the standard XML entities.
985 
986     @param parser The SAX parser
987     @param name The XML entity. */
ddx_get_entity(void *,const xmlChar * name)988 xmlEntityPtr DDXParser::ddx_get_entity(void *, const xmlChar * name)
989 {
990     return xmlGetPredefinedEntity(name);
991 }
992 
993 /** Process an XML fatal error. Note that SAX provides for warnings, errors
994     and fatal errors. This code treats them all as fatal errors since there's
995     typically no way to tell a user about the error since there's often no
996     user interface for this software.
997 
998     @param p The SAX parser
999     @param msg A printf-style format string. */
ddx_fatal_error(void * p,const char * msg,...)1000 void DDXParser::ddx_fatal_error(void * p, const char *msg, ...)
1001 {
1002     va_list args;
1003     DDXParser *parser = static_cast<DDXParser*>(p);
1004 
1005     parser->set_state(parser_error);
1006 
1007     va_start(args, msg);
1008     char str[1024];
1009     vsnprintf(str, 1024, msg, args);
1010     va_end(args);
1011 
1012     int line = xmlSAX2GetLineNumber(parser->ctxt);
1013 
1014     parser->error_msg += "At line " + long_to_string(line) + ": ";
1015     parser->error_msg += string(str) + string("\n");
1016 }
1017 
1018 //@}
1019 
cleanup_parse(xmlParserCtxtPtr & context)1020 void DDXParser::cleanup_parse(xmlParserCtxtPtr & context)
1021 {
1022     bool wellFormed = context->wellFormed;
1023     bool valid = context->valid;
1024 
1025     context->sax = NULL;
1026     xmlFreeParserCtxt(context);
1027 
1028     // If there's an error, there may still be items on the stack at the
1029     // end of the parse.
1030     while (!bt_stack.empty()) {
1031         delete bt_stack.top();
1032         bt_stack.pop();
1033     }
1034 
1035     if (!wellFormed) {
1036         throw DDXParseFailed(string("The DDX is not a well formed XML document.\n") + error_msg);
1037     }
1038 
1039     if (!valid) {
1040         throw DDXParseFailed(string("The DDX is not a valid document.\n") + error_msg);
1041     }
1042 
1043     if (get_state() == parser_error) {
1044         throw DDXParseFailed(string("Error parsing DDX response.\n") + error_msg);
1045     }
1046 }
1047 
1048 /** Read a DDX from a C++ input stream and populate a DDS object.
1049  *
1050  * @param in
1051  * @param dds
1052  * @param cid
1053  * @param boundary
1054  */
intern_stream(istream & in,DDS * dest_dds,string & cid,const string & boundary)1055 void DDXParser::intern_stream(istream &in, DDS *dest_dds, string &cid, const string &boundary)
1056 {
1057     // Code example from libxml2 docs re: read from a stream.
1058     if (!in || in.eof())
1059         throw InternalErr(__FILE__, __LINE__, "Input stream not open or read error");
1060 
1061     const int size = 1024;
1062     char chars[size + 1];
1063 
1064     // int res = fread(chars, 1, 4, in);
1065     in.read(chars, 4);
1066     int res = in.gcount();
1067     if (res > 0) {
1068         chars[4]='\0';
1069         xmlParserCtxtPtr context = xmlCreatePushParserCtxt(NULL, NULL, chars, res, "stream");
1070 
1071         if (!context)
1072             throw DDXParseFailed("Error parsing DDX response: Input does not look like XML");
1073 
1074         ctxt = context;         // need ctxt for error messages
1075         dds = dest_dds;         // dump values here
1076         blob_href = &cid; 	// cid goes here
1077 
1078         xmlSAXHandler ddx_sax_parser;
1079         memset( &ddx_sax_parser, 0, sizeof(xmlSAXHandler) );
1080 
1081         ddx_sax_parser.getEntity = &DDXParser::ddx_get_entity;
1082         ddx_sax_parser.startDocument = &DDXParser::ddx_start_document;
1083         ddx_sax_parser.endDocument = &DDXParser::ddx_end_document;
1084         ddx_sax_parser.characters = &DDXParser::ddx_get_characters;
1085         ddx_sax_parser.ignorableWhitespace = &DDXParser::ddx_ignoreable_whitespace;
1086         ddx_sax_parser.cdataBlock = &DDXParser::ddx_get_cdata;
1087         ddx_sax_parser.warning = &DDXParser::ddx_fatal_error;
1088         ddx_sax_parser.error = &DDXParser::ddx_fatal_error;
1089         ddx_sax_parser.fatalError = &DDXParser::ddx_fatal_error;
1090         ddx_sax_parser.initialized = XML_SAX2_MAGIC;
1091         ddx_sax_parser.startElementNs = &DDXParser::ddx_sax2_start_element;
1092         ddx_sax_parser.endElementNs = &DDXParser::ddx_sax2_end_element;
1093 
1094         context->sax = &ddx_sax_parser;
1095         context->userData = this;
1096         context->validate = true;
1097 
1098         in.getline(chars, size);	// chars has size+1 elements
1099         res = in.gcount();
1100         chars[res-1] = '\n';		// libxml needs the newline; w/o it the parse will fail
1101         chars[res] = '\0';
1102         while (res > 0 && !is_boundary(chars, boundary)) {
1103         	DBG(cerr << "line (" << res << "): " << chars << endl);
1104         	xmlParseChunk(ctxt, chars, res, 0);
1105 
1106         	in.getline(chars, size);	// chars has size+1 elements
1107         	res = in.gcount();
1108         	if (res > 0) {
1109         		chars[res-1] = '\n';
1110         		chars[res] = '\0';
1111         	}
1112         }
1113 
1114         // This call ends the parse: The fourth argument of xmlParseChunk is
1115         // the bool 'terminate.'
1116         xmlParseChunk(ctxt, chars, 0, 1);
1117 
1118         cleanup_parse(context);
1119     }
1120     else {
1121         throw DDXParseFailed("Error parsing DDX response: Could not read from input stream.");
1122     }
1123 }
1124 
1125 /** @brief Read the DDX from a stream instead of a file.
1126     @see DDXParser::intern(). */
intern_stream(FILE * in,DDS * dest_dds,string & cid,const string & boundary)1127 void DDXParser::intern_stream(FILE *in, DDS *dest_dds, string &cid, const string &boundary)
1128 {
1129     // Code example from libxml2 docs re: read from a stream.
1130     if (!in || feof(in) || ferror(in))
1131         throw InternalErr(__FILE__, __LINE__, "Input stream not open or read error");
1132 
1133     const int size = 1024;
1134     char chars[size];
1135 
1136     int res = fread(chars, 1, 4, in);
1137     if (res > 0) {
1138         chars[4]='\0';
1139         xmlParserCtxtPtr context = xmlCreatePushParserCtxt(NULL, NULL, chars, res, "stream");
1140 
1141         if (!context)
1142             throw DDXParseFailed("Error parsing DDX response: Input does not look like XML");
1143 
1144         ctxt = context;         // need ctxt for error messages
1145         dds = dest_dds;         // dump values here
1146         blob_href = &cid; 	// cid goes here
1147 
1148         xmlSAXHandler ddx_sax_parser;
1149         memset( &ddx_sax_parser, 0, sizeof(xmlSAXHandler) );
1150 
1151         ddx_sax_parser.getEntity = &DDXParser::ddx_get_entity;
1152         ddx_sax_parser.startDocument = &DDXParser::ddx_start_document;
1153         ddx_sax_parser.endDocument = &DDXParser::ddx_end_document;
1154         ddx_sax_parser.characters = &DDXParser::ddx_get_characters;
1155         ddx_sax_parser.ignorableWhitespace = &DDXParser::ddx_ignoreable_whitespace;
1156         ddx_sax_parser.cdataBlock = &DDXParser::ddx_get_cdata;
1157         ddx_sax_parser.warning = &DDXParser::ddx_fatal_error;
1158         ddx_sax_parser.error = &DDXParser::ddx_fatal_error;
1159         ddx_sax_parser.fatalError = &DDXParser::ddx_fatal_error;
1160         ddx_sax_parser.initialized = XML_SAX2_MAGIC;
1161         ddx_sax_parser.startElementNs = &DDXParser::ddx_sax2_start_element;
1162         ddx_sax_parser.endElementNs = &DDXParser::ddx_sax2_end_element;
1163 
1164         context->sax = &ddx_sax_parser;
1165         context->userData = this;
1166         context->validate = true;
1167 
1168 
1169         while ((fgets(chars, size, in) != 0) && !is_boundary(chars, boundary)) {
1170             DBG(cerr << "line (" << strlen(chars) << "): " << chars << endl);
1171             xmlParseChunk(ctxt, chars, strlen(chars), 0);
1172         }
1173         // This call ends the parse: The fourth argument of xmlParseChunk is
1174         // the bool 'terminate.'
1175         xmlParseChunk(ctxt, chars, 0, 1);
1176 
1177         cleanup_parse(context);
1178     }
1179     else {
1180         throw DDXParseFailed("Error parsing DDX response: Could not read from input file.");
1181     }
1182 }
1183 
1184 
1185 /** Parse a DDX document stored in a file. The XML in the document is parsed
1186     and a binary DDX is built. This implementation stores the result in a DDS
1187     object where each instance of BaseType can hold an AttrTable object.
1188 
1189     @param document Read the DDX from this file.
1190     @param dest_dds Value/result parameter; dumps the information to this DDS
1191     instance.
1192     @param cid Value/result parameter; puts the href which references the \c
1193     CID.
1194     @exception DDXParseFailed Thrown if the XML document could not be
1195     read or parsed. */
intern(const string & document,DDS * dest_dds,string & cid)1196 void DDXParser::intern(const string & document, DDS * dest_dds, string &cid)
1197 {
1198     // Create the context pointer explicitly so that we can store a pointer
1199     // to it in the DDXParser instance. This provides a way to generate our
1200     // own error messages *with* line numbers. The messages are pretty
1201     // meaningless otherwise. This means that we use an interface from the
1202     // 'parser internals' header, and not the 'parser' header. However, this
1203     // interface is also used in one of the documented examples, so it's
1204     // probably pretty stable. 06/02/03 jhrg
1205     xmlParserCtxtPtr context = xmlCreateFileParserCtxt(document.c_str());
1206     if (!context)
1207         throw
1208         DDXParseFailed(string
1209                        ("Could not initialize the parser with the file: '")
1210                        + document + string("'."));
1211 
1212     dds = dest_dds;             // dump values here
1213     blob_href = &cid;
1214     ctxt = context;             // need ctxt for error messages
1215 
1216     xmlSAXHandler ddx_sax_parser;
1217     memset( &ddx_sax_parser, 0, sizeof(xmlSAXHandler) );
1218 
1219     ddx_sax_parser.getEntity = &DDXParser::ddx_get_entity;
1220     ddx_sax_parser.startDocument = &DDXParser::ddx_start_document;
1221     ddx_sax_parser.endDocument = &DDXParser::ddx_end_document;
1222     ddx_sax_parser.characters = &DDXParser::ddx_get_characters;
1223     ddx_sax_parser.ignorableWhitespace = &DDXParser::ddx_ignoreable_whitespace;
1224     ddx_sax_parser.cdataBlock = &DDXParser::ddx_get_cdata;
1225     ddx_sax_parser.warning = &DDXParser::ddx_fatal_error;
1226     ddx_sax_parser.error = &DDXParser::ddx_fatal_error;
1227     ddx_sax_parser.fatalError = &DDXParser::ddx_fatal_error;
1228     ddx_sax_parser.initialized = XML_SAX2_MAGIC;
1229     ddx_sax_parser.startElementNs = &DDXParser::ddx_sax2_start_element;
1230     ddx_sax_parser.endElementNs = &DDXParser::ddx_sax2_end_element;
1231 
1232     context->sax = &ddx_sax_parser;
1233     context->userData = this;
1234     context->validate = false;
1235 
1236     xmlParseDocument(context);
1237 
1238     cleanup_parse(context);
1239 }
1240 
1241 } // namespace libdap
1242