1 /**
2   This file defines the top-level entry routine called from R and S to parse and convert
3   a DTD into a user-level object.
4 
5   Most of the routines are support routines. We leave them as global symbols (as opposed to static) so that others might be
6   able to utilize them. Some are called from the other files (DocParse, specifically).
7 
8 
9  * See Copyright for the license status of this software.
10 
11  */
12 
13 #include "RSDTD.h"
14 
15 #ifdef USE_S
16 extern char *strdup(const char *);
17 #endif
18 
19 
20 
21 #include "Utils.h" /* for SET_CLASS_NAME */
22 
23 #include <sys/stat.h>
24 
25 /* For reading DTDs directly from text, not files.
26    Copied directly from parser.c in the libxml(-1.7.3) library.
27 */
28 
29 #ifdef FROM_GNOME_XML_DIR
30 #include <gnome-xml/parserInternals.h>
31 #else
32 #include <libxml/parserInternals.h>
33 #endif
34 
35 
36 #define INPUT_CHUNK	250
37 #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
38 
39 #ifdef OLD_SKIP_BLANKS
40 
41 #define SKIP_BLANKS 							\
42     do { 								\
43 	while (IS_BLANK(CUR)) NEXT;					\
44 	if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);	\
45 	if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);	\
46     } while (IS_BLANK(CUR));
47 
48 #define NEXT {								\
49     if (ctxt->token != 0) ctxt->token = 0;				\
50     else {								\
51     if ((*ctxt->input->cur == 0) &&					\
52         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {		\
53 	    xmlPopInput(ctxt);						\
54     } else {								\
55         if (*(ctxt->input->cur) == '\n') {				\
56 	    ctxt->input->line++; ctxt->input->col = 1;			\
57 	} else ctxt->input->col++;					\
58 	ctxt->input->cur++;						\
59         if (*ctxt->input->cur == 0)					\
60 	    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);		\
61     }									\
62     if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);	\
63     if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);	\
64 }}
65 
66 #else
67 
68 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
69 #define NEXT xmlNextChar(ctxt)
70 
71 #endif
72 
73 
74 /* end temporary. */
75 
76 
77 
78 /* Macro that sets the name of an enumerated value by indexing into an array of
79    names based on the value being represented.
80  */
81 #define SET_ENUM_NAME(names, which, obj) RS_XML_SetNames(1, RS_XML_##names+which-1,obj);
82 
83 
84 
85 enum {DTD_ELEMENTS_SLOT, DTD_ENTITIES_SLOT, DTD_NUM_SLOTS};
86 const char *RS_XML(DtdNames)[] = {"elements", "entities"};
87 /**
88   Top-level entry point for reading the DTD.
89    dtdFileName - name of the DTD.
90    externalId  - file identfying the DTD from which its contents are read.
91  */
92 USER_OBJECT_
RS_XML(getDTD)93 RS_XML(getDTD)(USER_OBJECT_ dtdFileName, USER_OBJECT_ externalId,
94   	       USER_OBJECT_ asText, USER_OBJECT_ isURL, USER_OBJECT_ errorFun)
95 {
96  USER_OBJECT_ ans;
97  const char * dtdName = strdup(CHAR_DEREF(STRING_ELT(dtdFileName, 0)));
98  const char * extId = strdup(CHAR_DEREF(STRING_ELT(externalId, 0)));
99  int localAsText = LOGICAL_DATA(asText)[0];
100  xmlParserCtxtPtr ctxt;
101  xmlDtdPtr        dtd;
102 
103 
104  if(localAsText) {
105      ctxt = xmlCreateDocParserCtxt((xmlChar*) extId);
106  } else {
107      if(LOGICAL_DATA(isURL)[0] == 0) {
108 	 struct stat tmp_stat;
109 	 if(extId == NULL || stat(extId, &tmp_stat) < 0) {
110              Rf_error("Can't find file %s", extId);
111 	 }
112      }
113 
114       ctxt = xmlCreateFileParserCtxt(extId);  /* from parser.c xmlSAXParseFile */
115  }
116 
117  if(ctxt == NULL) {
118      Rf_error("error creating XML parser for `%s'", extId);
119  }
120 
121   ctxt->validate = 1;
122 
123 #ifdef RS_XML_SET_STRUCTURED_ERROR  /* Done in R code now. */
124   xmlSetStructuredErrorFunc(errorFun == NULL_USER_OBJECT ? NULL : errorFun, R_xmlStructuredErrorHandler);
125 #endif
126 
127   if(ctxt->myDoc == NULL)
128     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
129 
130 
131   if(localAsText) {
132     xmlCreateIntSubset(ctxt->myDoc, CHAR_TO_XMLCHAR(dtdName), NULL, NULL);
133     while(ctxt->input->cur && ctxt->input->cur[0]) {
134       SKIP_BLANKS;
135       xmlParseMarkupDecl(ctxt);
136     }
137     dtd = ctxt->myDoc->intSubset;
138   }  else {
139        /* Read the file. */
140        /* Added for 2.2.12. May need to be conditional for 1.8.9 */
141     ctxt->sax->internalSubset(ctxt->userData, CHAR_TO_XMLCHAR(dtdName), CHAR_TO_XMLCHAR(extId), CHAR_TO_XMLCHAR(extId));
142        /* Warnings will ensue about not being in internal subset if we don't go to level 2. */
143 #ifdef USE_EXTERNAL_SUBSET
144     ctxt->inSubset = 2;
145     ctxt->sax->externalSubset(ctxt->userData, CHAR_TO_XMLCHAR(dtdName), CHAR_TO_XMLCHAR(extId), CHAR_TO_XMLCHAR(extId));
146     ctxt->inSubset = 0;
147 #endif
148     dtd = ctxt->myDoc->extSubset;
149   }
150 
151 #ifdef RS_XML_SET_STRUCTURED_ERROR
152   xmlSetStructuredErrorFunc(NULL, NULL);
153 #endif
154 
155   if(dtd == NULL) {
156       if(errorFun != NULL_USER_OBJECT) {
157         RSXML_structuredStop(errorFun, NULL);
158       } else
159         return(stop("DTDParseError", "error parsing %s", dtdName));
160 
161       Rf_error("error in DTD %s", extId);
162   }
163 
164   if(localAsText) {
165       /* Don't bother with the internal and external split, just do the internal and return it. */
166     ans = RS_XML(createDTDParts)(dtd, ctxt);
167   } else
168     ans = RS_XML(ConstructDTDList)(ctxt->myDoc, 0, ctxt);
169 
170   return(ans);
171 }
172 
173 
174 const char *RS_XML(DtdTypeNames)[] = {"external", "internal"};
175 /**
176   Create the representation of the DTD contained in the Document pointer,
177   using both the internal and external descriptions and returning a list
178   of the appropriate length. If the external description is empty, then we just
179   return the description of the internal description. Otherwise, we return a named
180   list of length 2 containing descriptions of both.
181  */
182 USER_OBJECT_
RS_XML(ConstructDTDList)183 RS_XML(ConstructDTDList)(xmlDocPtr myDoc, int processInternals, xmlParserCtxtPtr ctxt)
184 {
185  USER_OBJECT_ ans, el, klass;
186  int i;
187  xmlDtdPtr sets[2];
188 
189  int num = processInternals ? 2  : 1;
190  sets[0] = myDoc->extSubset;
191 
192  if(processInternals) {
193    sets[1] = myDoc->intSubset;
194  }
195 
196   PROTECT(ans = NEW_LIST(num));
197   for(i = 0; i < num; i++) {
198     if(sets[i]) {
199       SET_VECTOR_ELT(ans, i, el= RS_XML(createDTDParts)(sets[i], ctxt));
200       PROTECT(klass = NEW_CHARACTER(1));
201       SET_STRING_ELT(klass, 0, mkChar(i==0 ? "ExternalDTD" : "InternalDTD"));
202       SET_CLASS(el, klass);
203       UNPROTECT(1);
204     }
205   }
206    RS_XML(SetNames)(num, RS_XML(DtdTypeNames), ans);
207 
208    UNPROTECT(1);
209 
210 
211  return(processInternals ? ans : VECTOR_ELT(ans, 0));
212 }
213 
214 /**
215   Process the entities and elements of the DTD, returning a list
216   of length 2, irrespective if either is empty.
217  */
218 USER_OBJECT_
RS_XML(createDTDParts)219 RS_XML(createDTDParts)(xmlDtdPtr dtd,  xmlParserCtxtPtr ctxt)
220 {
221  xmlEntitiesTablePtr entities;
222  xmlElementTable *table;
223   USER_OBJECT_ ans;
224   PROTECT(ans = NEW_LIST(DTD_NUM_SLOTS));
225    table = (xmlElementTable*) dtd->elements;
226    if(table)
227       SET_VECTOR_ELT(ans, DTD_ELEMENTS_SLOT,  RS_XML(ProcessElements)(table, ctxt));
228 
229 
230   entities = (xmlEntitiesTablePtr) dtd->entities;
231   if(entities)
232      SET_VECTOR_ELT(ans, DTD_ENTITIES_SLOT, RS_XML(ProcessEntities)(entities, ctxt));
233 
234    RS_XML(SetNames)(DTD_NUM_SLOTS, RS_XML(DtdNames), ans);
235 
236    UNPROTECT(1);
237  return(ans);
238 }
239 
240 
241 #ifdef LIBXML2
242 struct ElementTableScanner {
243   USER_OBJECT_ dtdEls;
244   USER_OBJECT_ dtdNames;
245   int counter;
246 };
247 
248 #if LIBXML_VERSION >= 20908
249 # define CONST const
250 #else
251 # define CONST
252 #endif
253 
254 #ifndef NO_XML_HASH_SCANNER_RETURN
255 void *RS_xmlElementTableConverter(void *payload, void *data, CONST xmlChar *name);
256 void* RS_xmlEntityTableConverter(void *payload, void *data, CONST xmlChar *name);
257 #else
258 void RS_xmlElementTableConverter(void *payload, void *data, CONST xmlChar *name);
259 void RS_xmlEntityTableConverter(void *payload, void *data, CONST xmlChar *name);
260 #endif
261 
262 
263 #endif
264 
265 /**
266  Convert the elements into a named list of objects with each element
267  representing an element.
268  */
269 USER_OBJECT_
RS_XML(ProcessElements)270 RS_XML(ProcessElements)(xmlElementTablePtr table, xmlParserCtxtPtr ctxt)
271 {
272  USER_OBJECT_ dtdEls = NULL_USER_OBJECT;
273  int n;
274 #ifdef LIBXML2
275  n = xmlHashSize(table);
276 #else
277  int i;
278  xmlElementPtr xmlEl;
279  n = table->nb_elements;
280 #endif
281 
282   if(n > 0) {
283     USER_OBJECT_ dtdNames = NULL_USER_OBJECT;
284 
285     PROTECT_INDEX ipx;
286     PROTECT_WITH_INDEX(dtdEls = NEW_LIST(n), &ipx);
287     PROTECT(dtdNames = NEW_CHARACTER(n));
288 #ifdef LIBXML2
289  {
290    struct ElementTableScanner scanData;
291    scanData.dtdEls = dtdEls;
292    scanData.dtdNames = dtdNames;
293    scanData.counter = 0;
294 
295    xmlHashScan(table, RS_xmlElementTableConverter, &scanData);
296 
297    SET_LENGTH(dtdEls, scanData.counter);
298    REPROTECT(dtdEls, ipx);
299    SET_LENGTH(dtdNames, scanData.counter);
300  }
301 #else
302       for(i = 0; i < n; i++) {
303      	xmlEl = table->table[i];
304      	SET_VECTOR_ELT(dtdEls, i, RS_XML(createDTDElement)(xmlEl));
305      	SET_STRING_ELT(dtdNames , i, COPY_TO_USER_STRING(xmlEl->name));
306       }
307 #endif
308     SET_NAMES(dtdEls, dtdNames);
309     UNPROTECT(2);
310   }
311 
312   return(dtdEls);
313 }
314 
315 #ifdef LIBXML2
316 /* libxml2 2.4.21 (and perhaps earlier) redefines this to have a return type of void,
317    rather than void*. Need to figure out if this makes any real difference to the interface
318    and also when to
319 */
320 #ifndef NO_XML_HASH_SCANNER_RETURN
321 void*
322 #else
323 void
324 #endif
RS_xmlElementTableConverter(void * payload,void * data,CONST xmlChar * name)325 RS_xmlElementTableConverter(void *payload, void *data, CONST xmlChar *name)
326 {
327   struct ElementTableScanner *scanData = (struct ElementTableScanner *)data;
328 
329  SET_VECTOR_ELT(scanData->dtdEls, scanData->counter, RS_XML(createDTDElement)( payload));
330  SET_STRING_ELT(scanData->dtdNames, scanData->counter, COPY_TO_USER_STRING(XMLCHAR_TO_CHAR(name)));
331 
332  scanData->counter++;
333 #ifndef NO_XML_HASH_SCANNER_RETURN
334  return(payload);
335 #endif
336 }
337 #endif
338 
339 /**
340   Process the list of entities and convert them into a named list containing
341   entity descriptions.
342  */
343 USER_OBJECT_
RS_XML(ProcessEntities)344 RS_XML(ProcessEntities)(xmlEntitiesTablePtr table, xmlParserCtxtPtr ctxt)
345 {
346  USER_OBJECT_ dtdEls = NULL_USER_OBJECT;
347  int n;
348 #ifdef LIBXML2
349  n = xmlHashSize(table);
350 #else
351  xmlEntity *xmlEl;
352  int i;
353  n = table->nb_entities;
354 #endif
355   if(n > 0) {
356     USER_OBJECT_ dtdNames;
357 
358     PROTECT_INDEX ipx;
359     PROTECT_WITH_INDEX(dtdEls = NEW_LIST(n), &ipx);
360     PROTECT(dtdNames = NEW_CHARACTER(n));
361 
362 #ifdef LIBXML2
363  {
364    struct ElementTableScanner scanData;
365    scanData.dtdEls = dtdEls;
366    scanData.dtdNames = dtdNames;
367    scanData.counter = 0;
368 
369    xmlHashScan(table, RS_xmlEntityTableConverter, &scanData);
370      /* Reset the length to be the actual number rather than the
371         capacity of the table.
372         See ProcessElements also.
373       */
374 
375    SET_LENGTH(dtdEls, scanData.counter);
376    REPROTECT(dtdEls, ipx);
377    SET_LENGTH(dtdNames, scanData.counter);
378 
379  }
380 #else
381       for(i = 0; i < n; i++) {
382      	xmlEl = table->table +i;
383      	SET_VECTOR_ELT(dtdEls, i, RS_XML(createDTDEntity)(xmlEl));
384      	SET_STRING_ELT(dtdNames, i, COPY_TO_USER_STRING(xmlEl->name));
385       }
386 #endif
387     SET_NAMES(dtdEls, dtdNames);
388     UNPROTECT(2);
389   }
390 
391   return(dtdEls);
392 }
393 
394 #ifdef LIBXML2
395 
396 #ifndef NO_XML_HASH_SCANNER_RETURN
397 void*
398 #else
399 void
400 #endif
RS_xmlEntityTableConverter(void * payload,void * data,CONST xmlChar * name)401 RS_xmlEntityTableConverter(void *payload, void *data, CONST xmlChar *name)
402 {
403   struct ElementTableScanner *scanData = (struct ElementTableScanner *)data;
404 
405  SET_VECTOR_ELT(scanData->dtdEls, scanData->counter, RS_XML(createDTDEntity)( payload));
406  SET_STRING_ELT(scanData->dtdNames, scanData->counter, COPY_TO_USER_STRING(XMLCHAR_TO_CHAR(name)));
407 
408  scanData->counter++;
409 #ifndef NO_XML_HASH_SCANNER_RETURN
410  return(payload);
411 #endif
412 }
413 #endif /* End of LIBXML2 for definint RS_xmlEntityTableConverter */
414 
415 /**
416   Convert an entity definition into a user-level object, handling both internal and system entities.
417 
418   We could have different slots for the two types of entities, but that may make it harder to program.
419   S3/R classes aren't exactly good with inheritance of slots.
420  */
421 
422 /**
423   Indices for the slots of the user-level list representing the entity.
424  */
425 enum { DTD_ENTITY_NAME_SLOT,  DTD_ENTITY_CONTENT_SLOT, DTD_ENTITY_ORIG_SLOT, DTD_ENTITY_NUM_SLOTS};
426 /*
427   Names for the slots of the user-level list representing the entity.
428 */
429 const char *RS_XML(EntityNames)[] = {"name", "value", "original"};
430 
431 USER_OBJECT_
RS_XML(createDTDEntity)432 RS_XML(createDTDEntity)(xmlEntityPtr entity)
433 {
434   USER_OBJECT_ ans;
435   const xmlChar *value;
436   const char *localClassName;
437 
438   PROTECT(ans = NEW_LIST(DTD_ENTITY_NUM_SLOTS));
439 
440   SET_VECTOR_ELT(ans, DTD_ENTITY_NAME_SLOT, NEW_CHARACTER(1));
441   SET_STRING_ELT(VECTOR_ELT(ans, DTD_ENTITY_NAME_SLOT), 0, COPY_TO_USER_STRING(XMLCHAR_TO_CHAR(entity->name)));
442 
443    if(entity->content == NULL) {
444      value = entity->SystemID;
445      localClassName = "XMLSystemEntity";
446    } else {
447      value = entity->content;
448      localClassName = "XMLEntity";
449    }
450 
451   SET_VECTOR_ELT(ans, DTD_ENTITY_CONTENT_SLOT, NEW_CHARACTER(1));
452    SET_STRING_ELT(VECTOR_ELT(ans, DTD_ENTITY_CONTENT_SLOT), 0, COPY_TO_USER_STRING(XMLCHAR_TO_CHAR(value)));
453 
454    if(entity->orig) {
455      SET_VECTOR_ELT(ans, DTD_ENTITY_ORIG_SLOT, NEW_CHARACTER(1));
456       SET_STRING_ELT(VECTOR_ELT(ans, DTD_ENTITY_ORIG_SLOT), 0, COPY_TO_USER_STRING(XMLCHAR_TO_CHAR(entity->orig)));
457    }
458 
459    RS_XML(SetNames)(DTD_ENTITY_NUM_SLOTS, RS_XML(EntityNames), ans);
460 
461    /* Set the class of the specified object based on whether it is a internal or external entity. */
462    SET_CLASS_NAME(localClassName, ans);
463 
464    UNPROTECT(1);
465 
466   return(ans);
467 }
468 
469 
470 
471 
472 
473 
474 enum { DTD_ELEMENT_NAME_SLOT, DTD_ELEMENT_TYPE_SLOT, DTD_ELEMENT_CONTENT_SLOT, DTD_ELEMENT_ATTRIBUTES_SLOT, DTD_ELEMENT_NUM_SLOTS};
475 const char *RS_XML(ElementNames)[] = {"name", "type", "contents","attributes"};
476 const char *RS_XML(ElementTypeNames)[] = {"empty", "any", "mixed","element"};
477 
478 /**
479   Creates the user-level object representing the definition of an element within a DTD,
480   including its attribute definitions, its type, name and finally contents.
481   This is an object of class XMLElementDef.
482  */
483 USER_OBJECT_
RS_XML(createDTDElement)484 RS_XML(createDTDElement)(xmlElementPtr el)
485 {
486  USER_OBJECT_ rel;
487  int type;
488 
489 #ifdef XML_ELEMENT_ETYPE
490  type = el->etype;
491 #else
492  type = el->type;
493 #endif
494 
495    PROTECT(rel =  NEW_LIST(DTD_ELEMENT_NUM_SLOTS));
496 
497  SET_VECTOR_ELT(rel, DTD_ELEMENT_NAME_SLOT, NEW_CHARACTER(1));
498  SET_STRING_ELT(VECTOR_ELT(rel, DTD_ELEMENT_NAME_SLOT), 0, COPY_TO_USER_STRING( XMLCHAR_TO_CHAR( ( el->name ? el->name : (xmlChar*)""))));
499 
500  SET_VECTOR_ELT(rel, DTD_ELEMENT_TYPE_SLOT, NEW_INTEGER(1));
501  INTEGER_DATA(VECTOR_ELT(rel, DTD_ELEMENT_TYPE_SLOT))[0] = el->type;
502  SET_ENUM_NAME(ElementTypeNames, type, VECTOR_ELT(rel, DTD_ELEMENT_TYPE_SLOT));
503 
504 
505  if(el->content != NULL)
506    SET_VECTOR_ELT(rel, DTD_ELEMENT_CONTENT_SLOT, RS_XML(createDTDElementContents)(el->content, el, 1));
507 
508  SET_VECTOR_ELT(rel, DTD_ELEMENT_ATTRIBUTES_SLOT, RS_XML(createDTDElementAttributes)(el->attributes, el));
509 
510  RS_XML(SetNames)(DTD_ELEMENT_NUM_SLOTS, RS_XML(ElementNames), rel);
511 
512  SET_CLASS_NAME("XMLElementDef", rel);
513 
514  UNPROTECT(1);
515  return(rel);
516 }
517 
518 
519 /* Indices for the slots/elements in the list. */
520 enum {DTD_CONTENT_TYPE_SLOT, DTD_CONTENT_OCCURANCE_SLOT, DTD_CONTENT_ELEMENTS_SLOT, DTD_CONTENT_NUM_SLOTS};
521 /* names for the elements */
522 const char *RS_XML(ContentNames)[] = {"type", "ocur", "elements"};
523 /* Names for the enumerated types of the entries in the data */
524 const char *RS_XML(ContentTypeNames)[] = {"PCData", "Element", "Sequence","Or"};
525 const char *RS_XML(OccuranceNames)[] = {"Once", "Zero or One", "Mult","One or More"};
526 
527 /**
528 
529   Create an object representing the DTD element. The returned value is a list
530   with 3 elements. The names are given by the array ContentNames above.  The
531   type and ocur elements are simple named integers identifying that the element
532   is simple parsed character data, an element or a composite element which is
533   either an one of several possible types (that is an OR or |) or an ordered
534   sequence of types. The ocur field indicates whether this element is to be
535   expected in this position exactly once (default qualifier), zero or one
536   (i.e. optional) (?) , any number of times (including omitted) (*) and finally
537   , at least once, but possible more(+)
538 
539 
540   The recursive argument allows the RS_XML(SequenceContent) routine  to use part of this
541   routine.
542 
543 */
544 
545 USER_OBJECT_
RS_XML(createDTDElementContents)546 RS_XML(createDTDElementContents)(xmlElementContentPtr vals, xmlElementPtr el, int recursive)
547 {
548   char *localClassName = NULL;
549   int num = 0;
550   USER_OBJECT_ ans = NULL_USER_OBJECT;
551 
552   PROTECT(ans = NEW_LIST(DTD_CONTENT_NUM_SLOTS));
553 
554   SET_VECTOR_ELT(ans, DTD_CONTENT_TYPE_SLOT, NEW_INTEGER(1));
555   INTEGER_DATA(VECTOR_ELT(ans, DTD_CONTENT_TYPE_SLOT))[0] = vals->type;
556   SET_ENUM_NAME(ContentTypeNames, vals->type, VECTOR_ELT(ans, DTD_CONTENT_TYPE_SLOT));
557 
558   SET_VECTOR_ELT(ans, DTD_CONTENT_OCCURANCE_SLOT, NEW_INTEGER(1));
559   INTEGER_DATA(VECTOR_ELT(ans, DTD_CONTENT_OCCURANCE_SLOT))[0] = vals->ocur;
560   SET_ENUM_NAME(OccuranceNames, vals->ocur, VECTOR_ELT(ans, DTD_CONTENT_OCCURANCE_SLOT));
561 
562 
563   if(vals->type == XML_ELEMENT_CONTENT_SEQ && recursive) {
564        SET_VECTOR_ELT(ans, DTD_CONTENT_ELEMENTS_SLOT, RS_XML(SequenceContent)(vals, el));
565   } else {
566     num += (vals->c1 != NULL);
567     if(recursive || 1)
568       num += (vals->c2 != NULL);
569 
570     if(num > 0) {
571       SET_VECTOR_ELT(ans, DTD_CONTENT_ELEMENTS_SLOT, NEW_LIST(num));
572       num = 0;
573       if(vals->c1) {
574         SET_VECTOR_ELT(VECTOR_ELT(ans, DTD_CONTENT_ELEMENTS_SLOT), num++, RS_XML(createDTDElementContents)(vals->c1, el, 1));
575       }
576 
577       if(recursive || 1) {
578         if(vals->c2) {
579          SET_VECTOR_ELT(VECTOR_ELT(ans, DTD_CONTENT_ELEMENTS_SLOT), num++, RS_XML(createDTDElementContents)(vals->c2, el, 1));
580         }
581        }
582     } else {
583      if(vals->name) {
584         SET_VECTOR_ELT(ans, DTD_CONTENT_ELEMENTS_SLOT,  NEW_CHARACTER(1));
585         SET_STRING_ELT(VECTOR_ELT(ans, DTD_CONTENT_ELEMENTS_SLOT), 0, COPY_TO_USER_STRING(XMLCHAR_TO_CHAR(vals->name)));
586       }
587    }
588   }
589   switch(vals->type) {
590     case XML_ELEMENT_CONTENT_SEQ:
591       localClassName = "XMLSequenceContent";
592       break;
593     case XML_ELEMENT_CONTENT_OR:
594       localClassName = "XMLOrContent";
595       break;
596     default:
597       localClassName = "XMLElementContent";
598   }
599 
600       if(localClassName) {
601 	SET_CLASS_NAME(localClassName, ans);
602       }
603 
604 
605   RS_XML(SetNames)(DTD_CONTENT_NUM_SLOTS, RS_XML(ContentNames), ans);
606 
607   UNPROTECT(1);
608   return(ans);
609 }
610 
611 
612 /**
613   Process the DTD element, knowing that it is a sequence definition.
614   Compute the number of elements in the sequence by flattening  out the
615   lob-sided tree and then convert the each element and append it to the list.
616  */
617 USER_OBJECT_
RS_XML(SequenceContent)618 RS_XML(SequenceContent)(xmlElementContentPtr vals, xmlElementPtr el)
619 {
620   xmlElementContentPtr ptr = vals->c2;
621   int ok = 1, n=1, deep = 0;
622   USER_OBJECT_ ans = NULL_USER_OBJECT;
623   USER_OBJECT_ tmp;
624 
625      /* Count the number of elements in this sequence.
626         Descend all the c2's below this one.
627       */
628   while(ptr && ok) {
629     ok = (ptr->type == XML_ELEMENT_CONTENT_SEQ);
630     ptr = ptr->c2;
631     n++;
632   }
633 
634   /* Now build the list and the elements within it.*/
635   PROTECT(ans = NEW_LIST(n));
636   SET_VECTOR_ELT(ans, 0, RS_XML(createDTDElementContents)(vals->c1, el, 1));
637 
638   ptr = vals->c2;
639   n = 1;
640   do {
641       /* Some jumping around here beacuse of the recursion and split types. Should be cleaner. */
642     deep = (ptr->c1  != NULL && ptr->type == XML_ELEMENT_CONTENT_SEQ );
643     tmp = RS_XML(createDTDElementContents)( deep ? ptr->c1 : ptr, el, deep);
644     SET_VECTOR_ELT(ans, n, tmp);
645     ok = (ptr->type == XML_ELEMENT_CONTENT_SEQ);
646     ptr = ptr->c2;
647     n++;
648   } while(ptr && ok);
649 
650   UNPROTECT(1);
651   return(ans);
652 }
653 
654 
655 
656 
657 
658 /**
659    Routine that creates a named list of XMLAttributeDef objects from a collection of
660    attribute definitions associated with the specified XML element definition.
661  */
662 USER_OBJECT_
RS_XML(createDTDElementAttributes)663 RS_XML(createDTDElementAttributes)(xmlAttributePtr vals, xmlElementPtr el)
664 {
665   USER_OBJECT_ ans = NULL_USER_OBJECT;
666   USER_OBJECT_ names;
667   xmlAttributePtr tmp = vals;
668   int n = 0, i;
669 
670   while(tmp) {
671 #ifdef LIBXML2
672     tmp = tmp->nexth;
673 #else
674     tmp = tmp->next;
675 #endif
676     n++;
677   }
678 
679   if(n > 0) {
680     tmp = vals;
681     PROTECT(ans = NEW_LIST(n));
682     PROTECT(names = NEW_CHARACTER(n));
683     for(i=0; i < n; i++) {
684        SET_VECTOR_ELT(ans, i,  RS_XML(createDTDAttribute)(tmp, el));
685        SET_STRING_ELT(names, i, COPY_TO_USER_STRING(XMLCHAR_TO_CHAR(tmp->name)));
686 #ifdef LIBXML2
687     tmp = tmp->nexth;
688 #else
689     tmp = tmp->next;
690 #endif
691     }
692     SET_NAMES(ans, names);
693     UNPROTECT(2);
694   }
695 
696   return(ans);
697 }
698 
699 
700 enum {DTD_ATTRIBUTE_NAME_SLOT, DTD_ATTRIBUTE_TYPE_SLOT, DTD_ATTRIBUTE_DEFAULT_SLOT, DTD_ATTRIBUTE_DEFAULT_VALUE_SLOT, DTD_ATTRIBUTE_NUM_SLOTS};
701 
702 /* Names for the possible types of an attribute. */
703 const char *RS_XML(AttributeTypeNames)   [] = {"CDATA","Id", "IDRef", "IDRefs", "Entity","Entities", "NMToken", "NMTokens", "Enumeration", "Notation"};
704 /* Names for the possible modes or default types of an attribute. */
705 const char *RS_XML(AttributeDefaultNames)[] = {"None", "Required", "Implied", "Fixed"};
706 
707 /* Names of the elements within the returned list */
708 const char *RS_XML(AttributeSlotNames)[] = {"name", "type", "defaultType", "defaultValue"};
709 
710 
711 /**
712   Create a user-level version of a DTD attribute within an Attribute list within the DTD.
713   Return a vector of length 4 with elements named
714        Name, Type, Default Type and Default Value.
715    The first is a simple string (character vector of length 1). The next two are enumerated
716    types describing the type of the attribute value and whether it is required, fixed, implied, etc.
717    The final value is the default value
718  */
719 USER_OBJECT_
RS_XML(createDTDAttribute)720 RS_XML(createDTDAttribute)(xmlAttributePtr val, xmlElementPtr el)
721 {
722   USER_OBJECT_ ans;
723   int attrType;
724 
725 #ifdef XML_ATTRIBUTE_ATYPE
726   attrType = val->atype;
727 #else
728   attrType = val->type;
729 #endif
730 
731   PROTECT(ans = NEW_LIST(DTD_ATTRIBUTE_NUM_SLOTS));
732 
733   SET_VECTOR_ELT(ans, DTD_ATTRIBUTE_NAME_SLOT, NEW_CHARACTER(1));
734   SET_STRING_ELT(VECTOR_ELT(ans, DTD_ATTRIBUTE_NAME_SLOT), 0, COPY_TO_USER_STRING(XMLCHAR_TO_CHAR(val->name)));
735 
736  SET_VECTOR_ELT(ans, DTD_ATTRIBUTE_TYPE_SLOT, NEW_INTEGER(1));
737  INTEGER_DATA(VECTOR_ELT(ans, DTD_ATTRIBUTE_TYPE_SLOT))[0] =  val->type;
738  SET_ENUM_NAME(AttributeTypeNames, attrType, VECTOR_ELT(ans, DTD_ATTRIBUTE_TYPE_SLOT));
739 
740  SET_VECTOR_ELT(ans, DTD_ATTRIBUTE_DEFAULT_SLOT, NEW_INTEGER(1));
741  INTEGER_DATA(VECTOR_ELT(ans, DTD_ATTRIBUTE_DEFAULT_SLOT))[0] =  val->def;
742  SET_ENUM_NAME(AttributeDefaultNames, val->def, VECTOR_ELT(ans, DTD_ATTRIBUTE_DEFAULT_SLOT));
743 
744  if(val->type == (xmlElementType)XML_ATTRIBUTE_ENUMERATION) {
745    SET_VECTOR_ELT(ans, DTD_ATTRIBUTE_DEFAULT_VALUE_SLOT, RS_XML(AttributeEnumerationList)(val->tree, val, el));
746  } else {
747     SET_VECTOR_ELT(ans, DTD_ATTRIBUTE_DEFAULT_VALUE_SLOT, NEW_CHARACTER(1));
748     SET_STRING_ELT(VECTOR_ELT(ans, DTD_ATTRIBUTE_DEFAULT_VALUE_SLOT), 0, COPY_TO_USER_STRING( XMLCHAR_TO_CHAR( (val->defaultValue ? val->defaultValue : (xmlChar*)""))));
749  }
750   RS_XML(SetNames)(DTD_ATTRIBUTE_NUM_SLOTS, RS_XML(AttributeSlotNames),  ans);
751 
752 
753   SET_CLASS_NAME("XMLAttributeDef", ans);
754 
755   UNPROTECT(1);
756   return(ans);
757 }
758 
759 
760 /**
761   Return a character vector containing the elements listed in the enumeration of possible
762   values in the attribute. These arise in DTD entries such as
763     <ATTLIST el
764                foo   (true | false)
765     >
766  */
767 USER_OBJECT_
RS_XML(AttributeEnumerationList)768 RS_XML(AttributeEnumerationList)(xmlEnumerationPtr list, xmlAttributePtr attr, xmlElementPtr element)
769 {
770   USER_OBJECT_ ans = NULL_USER_OBJECT;
771   xmlEnumerationPtr tmp = list;
772   int n = 0;
773 
774     /* Count the number of entries in the list/table. */
775   while(tmp) {
776     n++;
777     tmp = tmp->next;
778   }
779 
780        /* Now convert each entry and add it to a list. */
781   if(n > 0) {
782     int i;
783     PROTECT(ans  = NEW_CHARACTER(n));
784     tmp = list;
785     for(i = 0; i < n; i++) {
786       SET_STRING_ELT(ans, i, COPY_TO_USER_STRING(XMLCHAR_TO_CHAR(tmp->name)));
787       tmp = tmp->next;
788     }
789     UNPROTECT(1);
790   }
791   return(ans);
792 }
793