1 /**
2 This file defines the top-level entry routine called from R and S to parse and convert
3 a DTD into a user-level object.
4
5 Most of the routines are support routines. We leave them as global symbols (as opposed to static) so that others might be
6 able to utilize them. Some are called from the other files (DocParse, specifically).
7
8
9 * See Copyright for the license status of this software.
10
11 */
12
13 #include "RSDTD.h"
14
15 #ifdef USE_S
16 extern char *strdup(const char *);
17 #endif
18
19
20
21 #include "Utils.h" /* for SET_CLASS_NAME */
22
23 #include <sys/stat.h>
24
25 /* For reading DTDs directly from text, not files.
26 Copied directly from parser.c in the libxml(-1.7.3) library.
27 */
28
29 #ifdef FROM_GNOME_XML_DIR
30 #include <gnome-xml/parserInternals.h>
31 #else
32 #include <libxml/parserInternals.h>
33 #endif
34
35
36 #define INPUT_CHUNK 250
37 #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
38
39 #ifdef OLD_SKIP_BLANKS
40
41 #define SKIP_BLANKS \
42 do { \
43 while (IS_BLANK(CUR)) NEXT; \
44 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
45 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
46 } while (IS_BLANK(CUR));
47
48 #define NEXT { \
49 if (ctxt->token != 0) ctxt->token = 0; \
50 else { \
51 if ((*ctxt->input->cur == 0) && \
52 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { \
53 xmlPopInput(ctxt); \
54 } else { \
55 if (*(ctxt->input->cur) == '\n') { \
56 ctxt->input->line++; ctxt->input->col = 1; \
57 } else ctxt->input->col++; \
58 ctxt->input->cur++; \
59 if (*ctxt->input->cur == 0) \
60 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
61 } \
62 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
63 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
64 }}
65
66 #else
67
68 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
69 #define NEXT xmlNextChar(ctxt)
70
71 #endif
72
73
74 /* end temporary. */
75
76
77
78 /* Macro that sets the name of an enumerated value by indexing into an array of
79 names based on the value being represented.
80 */
81 #define SET_ENUM_NAME(names, which, obj) RS_XML_SetNames(1, RS_XML_##names+which-1,obj);
82
83
84
85 enum {DTD_ELEMENTS_SLOT, DTD_ENTITIES_SLOT, DTD_NUM_SLOTS};
86 const char *RS_XML(DtdNames)[] = {"elements", "entities"};
87 /**
88 Top-level entry point for reading the DTD.
89 dtdFileName - name of the DTD.
90 externalId - file identfying the DTD from which its contents are read.
91 */
92 USER_OBJECT_
RS_XML(getDTD)93 RS_XML(getDTD)(USER_OBJECT_ dtdFileName, USER_OBJECT_ externalId,
94 USER_OBJECT_ asText, USER_OBJECT_ isURL, USER_OBJECT_ errorFun)
95 {
96 USER_OBJECT_ ans;
97 const char * dtdName = strdup(CHAR_DEREF(STRING_ELT(dtdFileName, 0)));
98 const char * extId = strdup(CHAR_DEREF(STRING_ELT(externalId, 0)));
99 int localAsText = LOGICAL_DATA(asText)[0];
100 xmlParserCtxtPtr ctxt;
101 xmlDtdPtr dtd;
102
103
104 if(localAsText) {
105 ctxt = xmlCreateDocParserCtxt((xmlChar*) extId);
106 } else {
107 if(LOGICAL_DATA(isURL)[0] == 0) {
108 struct stat tmp_stat;
109 if(extId == NULL || stat(extId, &tmp_stat) < 0) {
110 Rf_error("Can't find file %s", extId);
111 }
112 }
113
114 ctxt = xmlCreateFileParserCtxt(extId); /* from parser.c xmlSAXParseFile */
115 }
116
117 if(ctxt == NULL) {
118 Rf_error("error creating XML parser for `%s'", extId);
119 }
120
121 ctxt->validate = 1;
122
123 #ifdef RS_XML_SET_STRUCTURED_ERROR /* Done in R code now. */
124 xmlSetStructuredErrorFunc(errorFun == NULL_USER_OBJECT ? NULL : errorFun, R_xmlStructuredErrorHandler);
125 #endif
126
127 if(ctxt->myDoc == NULL)
128 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
129
130
131 if(localAsText) {
132 xmlCreateIntSubset(ctxt->myDoc, CHAR_TO_XMLCHAR(dtdName), NULL, NULL);
133 while(ctxt->input->cur && ctxt->input->cur[0]) {
134 SKIP_BLANKS;
135 xmlParseMarkupDecl(ctxt);
136 }
137 dtd = ctxt->myDoc->intSubset;
138 } else {
139 /* Read the file. */
140 /* Added for 2.2.12. May need to be conditional for 1.8.9 */
141 ctxt->sax->internalSubset(ctxt->userData, CHAR_TO_XMLCHAR(dtdName), CHAR_TO_XMLCHAR(extId), CHAR_TO_XMLCHAR(extId));
142 /* Warnings will ensue about not being in internal subset if we don't go to level 2. */
143 #ifdef USE_EXTERNAL_SUBSET
144 ctxt->inSubset = 2;
145 ctxt->sax->externalSubset(ctxt->userData, CHAR_TO_XMLCHAR(dtdName), CHAR_TO_XMLCHAR(extId), CHAR_TO_XMLCHAR(extId));
146 ctxt->inSubset = 0;
147 #endif
148 dtd = ctxt->myDoc->extSubset;
149 }
150
151 #ifdef RS_XML_SET_STRUCTURED_ERROR
152 xmlSetStructuredErrorFunc(NULL, NULL);
153 #endif
154
155 if(dtd == NULL) {
156 if(errorFun != NULL_USER_OBJECT) {
157 RSXML_structuredStop(errorFun, NULL);
158 } else
159 return(stop("DTDParseError", "error parsing %s", dtdName));
160
161 Rf_error("error in DTD %s", extId);
162 }
163
164 if(localAsText) {
165 /* Don't bother with the internal and external split, just do the internal and return it. */
166 ans = RS_XML(createDTDParts)(dtd, ctxt);
167 } else
168 ans = RS_XML(ConstructDTDList)(ctxt->myDoc, 0, ctxt);
169
170 return(ans);
171 }
172
173
174 const char *RS_XML(DtdTypeNames)[] = {"external", "internal"};
175 /**
176 Create the representation of the DTD contained in the Document pointer,
177 using both the internal and external descriptions and returning a list
178 of the appropriate length. If the external description is empty, then we just
179 return the description of the internal description. Otherwise, we return a named
180 list of length 2 containing descriptions of both.
181 */
182 USER_OBJECT_
RS_XML(ConstructDTDList)183 RS_XML(ConstructDTDList)(xmlDocPtr myDoc, int processInternals, xmlParserCtxtPtr ctxt)
184 {
185 USER_OBJECT_ ans, el, klass;
186 int i;
187 xmlDtdPtr sets[2];
188
189 int num = processInternals ? 2 : 1;
190 sets[0] = myDoc->extSubset;
191
192 if(processInternals) {
193 sets[1] = myDoc->intSubset;
194 }
195
196 PROTECT(ans = NEW_LIST(num));
197 for(i = 0; i < num; i++) {
198 if(sets[i]) {
199 SET_VECTOR_ELT(ans, i, el= RS_XML(createDTDParts)(sets[i], ctxt));
200 PROTECT(klass = NEW_CHARACTER(1));
201 SET_STRING_ELT(klass, 0, mkChar(i==0 ? "ExternalDTD" : "InternalDTD"));
202 SET_CLASS(el, klass);
203 UNPROTECT(1);
204 }
205 }
206 RS_XML(SetNames)(num, RS_XML(DtdTypeNames), ans);
207
208 UNPROTECT(1);
209
210
211 return(processInternals ? ans : VECTOR_ELT(ans, 0));
212 }
213
214 /**
215 Process the entities and elements of the DTD, returning a list
216 of length 2, irrespective if either is empty.
217 */
218 USER_OBJECT_
RS_XML(createDTDParts)219 RS_XML(createDTDParts)(xmlDtdPtr dtd, xmlParserCtxtPtr ctxt)
220 {
221 xmlEntitiesTablePtr entities;
222 xmlElementTable *table;
223 USER_OBJECT_ ans;
224 PROTECT(ans = NEW_LIST(DTD_NUM_SLOTS));
225 table = (xmlElementTable*) dtd->elements;
226 if(table)
227 SET_VECTOR_ELT(ans, DTD_ELEMENTS_SLOT, RS_XML(ProcessElements)(table, ctxt));
228
229
230 entities = (xmlEntitiesTablePtr) dtd->entities;
231 if(entities)
232 SET_VECTOR_ELT(ans, DTD_ENTITIES_SLOT, RS_XML(ProcessEntities)(entities, ctxt));
233
234 RS_XML(SetNames)(DTD_NUM_SLOTS, RS_XML(DtdNames), ans);
235
236 UNPROTECT(1);
237 return(ans);
238 }
239
240
241 #ifdef LIBXML2
242 struct ElementTableScanner {
243 USER_OBJECT_ dtdEls;
244 USER_OBJECT_ dtdNames;
245 int counter;
246 };
247
248 #if LIBXML_VERSION >= 20908
249 # define CONST const
250 #else
251 # define CONST
252 #endif
253
254 #ifndef NO_XML_HASH_SCANNER_RETURN
255 void *RS_xmlElementTableConverter(void *payload, void *data, CONST xmlChar *name);
256 void* RS_xmlEntityTableConverter(void *payload, void *data, CONST xmlChar *name);
257 #else
258 void RS_xmlElementTableConverter(void *payload, void *data, CONST xmlChar *name);
259 void RS_xmlEntityTableConverter(void *payload, void *data, CONST xmlChar *name);
260 #endif
261
262
263 #endif
264
265 /**
266 Convert the elements into a named list of objects with each element
267 representing an element.
268 */
269 USER_OBJECT_
RS_XML(ProcessElements)270 RS_XML(ProcessElements)(xmlElementTablePtr table, xmlParserCtxtPtr ctxt)
271 {
272 USER_OBJECT_ dtdEls = NULL_USER_OBJECT;
273 int n;
274 #ifdef LIBXML2
275 n = xmlHashSize(table);
276 #else
277 int i;
278 xmlElementPtr xmlEl;
279 n = table->nb_elements;
280 #endif
281
282 if(n > 0) {
283 USER_OBJECT_ dtdNames = NULL_USER_OBJECT;
284
285 PROTECT_INDEX ipx;
286 PROTECT_WITH_INDEX(dtdEls = NEW_LIST(n), &ipx);
287 PROTECT(dtdNames = NEW_CHARACTER(n));
288 #ifdef LIBXML2
289 {
290 struct ElementTableScanner scanData;
291 scanData.dtdEls = dtdEls;
292 scanData.dtdNames = dtdNames;
293 scanData.counter = 0;
294
295 xmlHashScan(table, RS_xmlElementTableConverter, &scanData);
296
297 SET_LENGTH(dtdEls, scanData.counter);
298 REPROTECT(dtdEls, ipx);
299 SET_LENGTH(dtdNames, scanData.counter);
300 }
301 #else
302 for(i = 0; i < n; i++) {
303 xmlEl = table->table[i];
304 SET_VECTOR_ELT(dtdEls, i, RS_XML(createDTDElement)(xmlEl));
305 SET_STRING_ELT(dtdNames , i, COPY_TO_USER_STRING(xmlEl->name));
306 }
307 #endif
308 SET_NAMES(dtdEls, dtdNames);
309 UNPROTECT(2);
310 }
311
312 return(dtdEls);
313 }
314
315 #ifdef LIBXML2
316 /* libxml2 2.4.21 (and perhaps earlier) redefines this to have a return type of void,
317 rather than void*. Need to figure out if this makes any real difference to the interface
318 and also when to
319 */
320 #ifndef NO_XML_HASH_SCANNER_RETURN
321 void*
322 #else
323 void
324 #endif
RS_xmlElementTableConverter(void * payload,void * data,CONST xmlChar * name)325 RS_xmlElementTableConverter(void *payload, void *data, CONST xmlChar *name)
326 {
327 struct ElementTableScanner *scanData = (struct ElementTableScanner *)data;
328
329 SET_VECTOR_ELT(scanData->dtdEls, scanData->counter, RS_XML(createDTDElement)( payload));
330 SET_STRING_ELT(scanData->dtdNames, scanData->counter, COPY_TO_USER_STRING(XMLCHAR_TO_CHAR(name)));
331
332 scanData->counter++;
333 #ifndef NO_XML_HASH_SCANNER_RETURN
334 return(payload);
335 #endif
336 }
337 #endif
338
339 /**
340 Process the list of entities and convert them into a named list containing
341 entity descriptions.
342 */
343 USER_OBJECT_
RS_XML(ProcessEntities)344 RS_XML(ProcessEntities)(xmlEntitiesTablePtr table, xmlParserCtxtPtr ctxt)
345 {
346 USER_OBJECT_ dtdEls = NULL_USER_OBJECT;
347 int n;
348 #ifdef LIBXML2
349 n = xmlHashSize(table);
350 #else
351 xmlEntity *xmlEl;
352 int i;
353 n = table->nb_entities;
354 #endif
355 if(n > 0) {
356 USER_OBJECT_ dtdNames;
357
358 PROTECT_INDEX ipx;
359 PROTECT_WITH_INDEX(dtdEls = NEW_LIST(n), &ipx);
360 PROTECT(dtdNames = NEW_CHARACTER(n));
361
362 #ifdef LIBXML2
363 {
364 struct ElementTableScanner scanData;
365 scanData.dtdEls = dtdEls;
366 scanData.dtdNames = dtdNames;
367 scanData.counter = 0;
368
369 xmlHashScan(table, RS_xmlEntityTableConverter, &scanData);
370 /* Reset the length to be the actual number rather than the
371 capacity of the table.
372 See ProcessElements also.
373 */
374
375 SET_LENGTH(dtdEls, scanData.counter);
376 REPROTECT(dtdEls, ipx);
377 SET_LENGTH(dtdNames, scanData.counter);
378
379 }
380 #else
381 for(i = 0; i < n; i++) {
382 xmlEl = table->table +i;
383 SET_VECTOR_ELT(dtdEls, i, RS_XML(createDTDEntity)(xmlEl));
384 SET_STRING_ELT(dtdNames, i, COPY_TO_USER_STRING(xmlEl->name));
385 }
386 #endif
387 SET_NAMES(dtdEls, dtdNames);
388 UNPROTECT(2);
389 }
390
391 return(dtdEls);
392 }
393
394 #ifdef LIBXML2
395
396 #ifndef NO_XML_HASH_SCANNER_RETURN
397 void*
398 #else
399 void
400 #endif
RS_xmlEntityTableConverter(void * payload,void * data,CONST xmlChar * name)401 RS_xmlEntityTableConverter(void *payload, void *data, CONST xmlChar *name)
402 {
403 struct ElementTableScanner *scanData = (struct ElementTableScanner *)data;
404
405 SET_VECTOR_ELT(scanData->dtdEls, scanData->counter, RS_XML(createDTDEntity)( payload));
406 SET_STRING_ELT(scanData->dtdNames, scanData->counter, COPY_TO_USER_STRING(XMLCHAR_TO_CHAR(name)));
407
408 scanData->counter++;
409 #ifndef NO_XML_HASH_SCANNER_RETURN
410 return(payload);
411 #endif
412 }
413 #endif /* End of LIBXML2 for definint RS_xmlEntityTableConverter */
414
415 /**
416 Convert an entity definition into a user-level object, handling both internal and system entities.
417
418 We could have different slots for the two types of entities, but that may make it harder to program.
419 S3/R classes aren't exactly good with inheritance of slots.
420 */
421
422 /**
423 Indices for the slots of the user-level list representing the entity.
424 */
425 enum { DTD_ENTITY_NAME_SLOT, DTD_ENTITY_CONTENT_SLOT, DTD_ENTITY_ORIG_SLOT, DTD_ENTITY_NUM_SLOTS};
426 /*
427 Names for the slots of the user-level list representing the entity.
428 */
429 const char *RS_XML(EntityNames)[] = {"name", "value", "original"};
430
431 USER_OBJECT_
RS_XML(createDTDEntity)432 RS_XML(createDTDEntity)(xmlEntityPtr entity)
433 {
434 USER_OBJECT_ ans;
435 const xmlChar *value;
436 const char *localClassName;
437
438 PROTECT(ans = NEW_LIST(DTD_ENTITY_NUM_SLOTS));
439
440 SET_VECTOR_ELT(ans, DTD_ENTITY_NAME_SLOT, NEW_CHARACTER(1));
441 SET_STRING_ELT(VECTOR_ELT(ans, DTD_ENTITY_NAME_SLOT), 0, COPY_TO_USER_STRING(XMLCHAR_TO_CHAR(entity->name)));
442
443 if(entity->content == NULL) {
444 value = entity->SystemID;
445 localClassName = "XMLSystemEntity";
446 } else {
447 value = entity->content;
448 localClassName = "XMLEntity";
449 }
450
451 SET_VECTOR_ELT(ans, DTD_ENTITY_CONTENT_SLOT, NEW_CHARACTER(1));
452 SET_STRING_ELT(VECTOR_ELT(ans, DTD_ENTITY_CONTENT_SLOT), 0, COPY_TO_USER_STRING(XMLCHAR_TO_CHAR(value)));
453
454 if(entity->orig) {
455 SET_VECTOR_ELT(ans, DTD_ENTITY_ORIG_SLOT, NEW_CHARACTER(1));
456 SET_STRING_ELT(VECTOR_ELT(ans, DTD_ENTITY_ORIG_SLOT), 0, COPY_TO_USER_STRING(XMLCHAR_TO_CHAR(entity->orig)));
457 }
458
459 RS_XML(SetNames)(DTD_ENTITY_NUM_SLOTS, RS_XML(EntityNames), ans);
460
461 /* Set the class of the specified object based on whether it is a internal or external entity. */
462 SET_CLASS_NAME(localClassName, ans);
463
464 UNPROTECT(1);
465
466 return(ans);
467 }
468
469
470
471
472
473
474 enum { DTD_ELEMENT_NAME_SLOT, DTD_ELEMENT_TYPE_SLOT, DTD_ELEMENT_CONTENT_SLOT, DTD_ELEMENT_ATTRIBUTES_SLOT, DTD_ELEMENT_NUM_SLOTS};
475 const char *RS_XML(ElementNames)[] = {"name", "type", "contents","attributes"};
476 const char *RS_XML(ElementTypeNames)[] = {"empty", "any", "mixed","element"};
477
478 /**
479 Creates the user-level object representing the definition of an element within a DTD,
480 including its attribute definitions, its type, name and finally contents.
481 This is an object of class XMLElementDef.
482 */
483 USER_OBJECT_
RS_XML(createDTDElement)484 RS_XML(createDTDElement)(xmlElementPtr el)
485 {
486 USER_OBJECT_ rel;
487 int type;
488
489 #ifdef XML_ELEMENT_ETYPE
490 type = el->etype;
491 #else
492 type = el->type;
493 #endif
494
495 PROTECT(rel = NEW_LIST(DTD_ELEMENT_NUM_SLOTS));
496
497 SET_VECTOR_ELT(rel, DTD_ELEMENT_NAME_SLOT, NEW_CHARACTER(1));
498 SET_STRING_ELT(VECTOR_ELT(rel, DTD_ELEMENT_NAME_SLOT), 0, COPY_TO_USER_STRING( XMLCHAR_TO_CHAR( ( el->name ? el->name : (xmlChar*)""))));
499
500 SET_VECTOR_ELT(rel, DTD_ELEMENT_TYPE_SLOT, NEW_INTEGER(1));
501 INTEGER_DATA(VECTOR_ELT(rel, DTD_ELEMENT_TYPE_SLOT))[0] = el->type;
502 SET_ENUM_NAME(ElementTypeNames, type, VECTOR_ELT(rel, DTD_ELEMENT_TYPE_SLOT));
503
504
505 if(el->content != NULL)
506 SET_VECTOR_ELT(rel, DTD_ELEMENT_CONTENT_SLOT, RS_XML(createDTDElementContents)(el->content, el, 1));
507
508 SET_VECTOR_ELT(rel, DTD_ELEMENT_ATTRIBUTES_SLOT, RS_XML(createDTDElementAttributes)(el->attributes, el));
509
510 RS_XML(SetNames)(DTD_ELEMENT_NUM_SLOTS, RS_XML(ElementNames), rel);
511
512 SET_CLASS_NAME("XMLElementDef", rel);
513
514 UNPROTECT(1);
515 return(rel);
516 }
517
518
519 /* Indices for the slots/elements in the list. */
520 enum {DTD_CONTENT_TYPE_SLOT, DTD_CONTENT_OCCURANCE_SLOT, DTD_CONTENT_ELEMENTS_SLOT, DTD_CONTENT_NUM_SLOTS};
521 /* names for the elements */
522 const char *RS_XML(ContentNames)[] = {"type", "ocur", "elements"};
523 /* Names for the enumerated types of the entries in the data */
524 const char *RS_XML(ContentTypeNames)[] = {"PCData", "Element", "Sequence","Or"};
525 const char *RS_XML(OccuranceNames)[] = {"Once", "Zero or One", "Mult","One or More"};
526
527 /**
528
529 Create an object representing the DTD element. The returned value is a list
530 with 3 elements. The names are given by the array ContentNames above. The
531 type and ocur elements are simple named integers identifying that the element
532 is simple parsed character data, an element or a composite element which is
533 either an one of several possible types (that is an OR or |) or an ordered
534 sequence of types. The ocur field indicates whether this element is to be
535 expected in this position exactly once (default qualifier), zero or one
536 (i.e. optional) (?) , any number of times (including omitted) (*) and finally
537 , at least once, but possible more(+)
538
539
540 The recursive argument allows the RS_XML(SequenceContent) routine to use part of this
541 routine.
542
543 */
544
545 USER_OBJECT_
RS_XML(createDTDElementContents)546 RS_XML(createDTDElementContents)(xmlElementContentPtr vals, xmlElementPtr el, int recursive)
547 {
548 char *localClassName = NULL;
549 int num = 0;
550 USER_OBJECT_ ans = NULL_USER_OBJECT;
551
552 PROTECT(ans = NEW_LIST(DTD_CONTENT_NUM_SLOTS));
553
554 SET_VECTOR_ELT(ans, DTD_CONTENT_TYPE_SLOT, NEW_INTEGER(1));
555 INTEGER_DATA(VECTOR_ELT(ans, DTD_CONTENT_TYPE_SLOT))[0] = vals->type;
556 SET_ENUM_NAME(ContentTypeNames, vals->type, VECTOR_ELT(ans, DTD_CONTENT_TYPE_SLOT));
557
558 SET_VECTOR_ELT(ans, DTD_CONTENT_OCCURANCE_SLOT, NEW_INTEGER(1));
559 INTEGER_DATA(VECTOR_ELT(ans, DTD_CONTENT_OCCURANCE_SLOT))[0] = vals->ocur;
560 SET_ENUM_NAME(OccuranceNames, vals->ocur, VECTOR_ELT(ans, DTD_CONTENT_OCCURANCE_SLOT));
561
562
563 if(vals->type == XML_ELEMENT_CONTENT_SEQ && recursive) {
564 SET_VECTOR_ELT(ans, DTD_CONTENT_ELEMENTS_SLOT, RS_XML(SequenceContent)(vals, el));
565 } else {
566 num += (vals->c1 != NULL);
567 if(recursive || 1)
568 num += (vals->c2 != NULL);
569
570 if(num > 0) {
571 SET_VECTOR_ELT(ans, DTD_CONTENT_ELEMENTS_SLOT, NEW_LIST(num));
572 num = 0;
573 if(vals->c1) {
574 SET_VECTOR_ELT(VECTOR_ELT(ans, DTD_CONTENT_ELEMENTS_SLOT), num++, RS_XML(createDTDElementContents)(vals->c1, el, 1));
575 }
576
577 if(recursive || 1) {
578 if(vals->c2) {
579 SET_VECTOR_ELT(VECTOR_ELT(ans, DTD_CONTENT_ELEMENTS_SLOT), num++, RS_XML(createDTDElementContents)(vals->c2, el, 1));
580 }
581 }
582 } else {
583 if(vals->name) {
584 SET_VECTOR_ELT(ans, DTD_CONTENT_ELEMENTS_SLOT, NEW_CHARACTER(1));
585 SET_STRING_ELT(VECTOR_ELT(ans, DTD_CONTENT_ELEMENTS_SLOT), 0, COPY_TO_USER_STRING(XMLCHAR_TO_CHAR(vals->name)));
586 }
587 }
588 }
589 switch(vals->type) {
590 case XML_ELEMENT_CONTENT_SEQ:
591 localClassName = "XMLSequenceContent";
592 break;
593 case XML_ELEMENT_CONTENT_OR:
594 localClassName = "XMLOrContent";
595 break;
596 default:
597 localClassName = "XMLElementContent";
598 }
599
600 if(localClassName) {
601 SET_CLASS_NAME(localClassName, ans);
602 }
603
604
605 RS_XML(SetNames)(DTD_CONTENT_NUM_SLOTS, RS_XML(ContentNames), ans);
606
607 UNPROTECT(1);
608 return(ans);
609 }
610
611
612 /**
613 Process the DTD element, knowing that it is a sequence definition.
614 Compute the number of elements in the sequence by flattening out the
615 lob-sided tree and then convert the each element and append it to the list.
616 */
617 USER_OBJECT_
RS_XML(SequenceContent)618 RS_XML(SequenceContent)(xmlElementContentPtr vals, xmlElementPtr el)
619 {
620 xmlElementContentPtr ptr = vals->c2;
621 int ok = 1, n=1, deep = 0;
622 USER_OBJECT_ ans = NULL_USER_OBJECT;
623 USER_OBJECT_ tmp;
624
625 /* Count the number of elements in this sequence.
626 Descend all the c2's below this one.
627 */
628 while(ptr && ok) {
629 ok = (ptr->type == XML_ELEMENT_CONTENT_SEQ);
630 ptr = ptr->c2;
631 n++;
632 }
633
634 /* Now build the list and the elements within it.*/
635 PROTECT(ans = NEW_LIST(n));
636 SET_VECTOR_ELT(ans, 0, RS_XML(createDTDElementContents)(vals->c1, el, 1));
637
638 ptr = vals->c2;
639 n = 1;
640 do {
641 /* Some jumping around here beacuse of the recursion and split types. Should be cleaner. */
642 deep = (ptr->c1 != NULL && ptr->type == XML_ELEMENT_CONTENT_SEQ );
643 tmp = RS_XML(createDTDElementContents)( deep ? ptr->c1 : ptr, el, deep);
644 SET_VECTOR_ELT(ans, n, tmp);
645 ok = (ptr->type == XML_ELEMENT_CONTENT_SEQ);
646 ptr = ptr->c2;
647 n++;
648 } while(ptr && ok);
649
650 UNPROTECT(1);
651 return(ans);
652 }
653
654
655
656
657
658 /**
659 Routine that creates a named list of XMLAttributeDef objects from a collection of
660 attribute definitions associated with the specified XML element definition.
661 */
662 USER_OBJECT_
RS_XML(createDTDElementAttributes)663 RS_XML(createDTDElementAttributes)(xmlAttributePtr vals, xmlElementPtr el)
664 {
665 USER_OBJECT_ ans = NULL_USER_OBJECT;
666 USER_OBJECT_ names;
667 xmlAttributePtr tmp = vals;
668 int n = 0, i;
669
670 while(tmp) {
671 #ifdef LIBXML2
672 tmp = tmp->nexth;
673 #else
674 tmp = tmp->next;
675 #endif
676 n++;
677 }
678
679 if(n > 0) {
680 tmp = vals;
681 PROTECT(ans = NEW_LIST(n));
682 PROTECT(names = NEW_CHARACTER(n));
683 for(i=0; i < n; i++) {
684 SET_VECTOR_ELT(ans, i, RS_XML(createDTDAttribute)(tmp, el));
685 SET_STRING_ELT(names, i, COPY_TO_USER_STRING(XMLCHAR_TO_CHAR(tmp->name)));
686 #ifdef LIBXML2
687 tmp = tmp->nexth;
688 #else
689 tmp = tmp->next;
690 #endif
691 }
692 SET_NAMES(ans, names);
693 UNPROTECT(2);
694 }
695
696 return(ans);
697 }
698
699
700 enum {DTD_ATTRIBUTE_NAME_SLOT, DTD_ATTRIBUTE_TYPE_SLOT, DTD_ATTRIBUTE_DEFAULT_SLOT, DTD_ATTRIBUTE_DEFAULT_VALUE_SLOT, DTD_ATTRIBUTE_NUM_SLOTS};
701
702 /* Names for the possible types of an attribute. */
703 const char *RS_XML(AttributeTypeNames) [] = {"CDATA","Id", "IDRef", "IDRefs", "Entity","Entities", "NMToken", "NMTokens", "Enumeration", "Notation"};
704 /* Names for the possible modes or default types of an attribute. */
705 const char *RS_XML(AttributeDefaultNames)[] = {"None", "Required", "Implied", "Fixed"};
706
707 /* Names of the elements within the returned list */
708 const char *RS_XML(AttributeSlotNames)[] = {"name", "type", "defaultType", "defaultValue"};
709
710
711 /**
712 Create a user-level version of a DTD attribute within an Attribute list within the DTD.
713 Return a vector of length 4 with elements named
714 Name, Type, Default Type and Default Value.
715 The first is a simple string (character vector of length 1). The next two are enumerated
716 types describing the type of the attribute value and whether it is required, fixed, implied, etc.
717 The final value is the default value
718 */
719 USER_OBJECT_
RS_XML(createDTDAttribute)720 RS_XML(createDTDAttribute)(xmlAttributePtr val, xmlElementPtr el)
721 {
722 USER_OBJECT_ ans;
723 int attrType;
724
725 #ifdef XML_ATTRIBUTE_ATYPE
726 attrType = val->atype;
727 #else
728 attrType = val->type;
729 #endif
730
731 PROTECT(ans = NEW_LIST(DTD_ATTRIBUTE_NUM_SLOTS));
732
733 SET_VECTOR_ELT(ans, DTD_ATTRIBUTE_NAME_SLOT, NEW_CHARACTER(1));
734 SET_STRING_ELT(VECTOR_ELT(ans, DTD_ATTRIBUTE_NAME_SLOT), 0, COPY_TO_USER_STRING(XMLCHAR_TO_CHAR(val->name)));
735
736 SET_VECTOR_ELT(ans, DTD_ATTRIBUTE_TYPE_SLOT, NEW_INTEGER(1));
737 INTEGER_DATA(VECTOR_ELT(ans, DTD_ATTRIBUTE_TYPE_SLOT))[0] = val->type;
738 SET_ENUM_NAME(AttributeTypeNames, attrType, VECTOR_ELT(ans, DTD_ATTRIBUTE_TYPE_SLOT));
739
740 SET_VECTOR_ELT(ans, DTD_ATTRIBUTE_DEFAULT_SLOT, NEW_INTEGER(1));
741 INTEGER_DATA(VECTOR_ELT(ans, DTD_ATTRIBUTE_DEFAULT_SLOT))[0] = val->def;
742 SET_ENUM_NAME(AttributeDefaultNames, val->def, VECTOR_ELT(ans, DTD_ATTRIBUTE_DEFAULT_SLOT));
743
744 if(val->type == (xmlElementType)XML_ATTRIBUTE_ENUMERATION) {
745 SET_VECTOR_ELT(ans, DTD_ATTRIBUTE_DEFAULT_VALUE_SLOT, RS_XML(AttributeEnumerationList)(val->tree, val, el));
746 } else {
747 SET_VECTOR_ELT(ans, DTD_ATTRIBUTE_DEFAULT_VALUE_SLOT, NEW_CHARACTER(1));
748 SET_STRING_ELT(VECTOR_ELT(ans, DTD_ATTRIBUTE_DEFAULT_VALUE_SLOT), 0, COPY_TO_USER_STRING( XMLCHAR_TO_CHAR( (val->defaultValue ? val->defaultValue : (xmlChar*)""))));
749 }
750 RS_XML(SetNames)(DTD_ATTRIBUTE_NUM_SLOTS, RS_XML(AttributeSlotNames), ans);
751
752
753 SET_CLASS_NAME("XMLAttributeDef", ans);
754
755 UNPROTECT(1);
756 return(ans);
757 }
758
759
760 /**
761 Return a character vector containing the elements listed in the enumeration of possible
762 values in the attribute. These arise in DTD entries such as
763 <ATTLIST el
764 foo (true | false)
765 >
766 */
767 USER_OBJECT_
RS_XML(AttributeEnumerationList)768 RS_XML(AttributeEnumerationList)(xmlEnumerationPtr list, xmlAttributePtr attr, xmlElementPtr element)
769 {
770 USER_OBJECT_ ans = NULL_USER_OBJECT;
771 xmlEnumerationPtr tmp = list;
772 int n = 0;
773
774 /* Count the number of entries in the list/table. */
775 while(tmp) {
776 n++;
777 tmp = tmp->next;
778 }
779
780 /* Now convert each entry and add it to a list. */
781 if(n > 0) {
782 int i;
783 PROTECT(ans = NEW_CHARACTER(n));
784 tmp = list;
785 for(i = 0; i < n; i++) {
786 SET_STRING_ELT(ans, i, COPY_TO_USER_STRING(XMLCHAR_TO_CHAR(tmp->name)));
787 tmp = tmp->next;
788 }
789 UNPROTECT(1);
790 }
791 return(ans);
792 }
793