1 /* $Id: sgmlfiles.c,v 1.163 2009/10/21 13:13:47 ht Exp $ */
2
3 #ifndef lint
4 static char vcid[] = "$Id: sgmlfiles.c,v 1.163 2009/10/21 13:13:47 ht Exp $";
5 #endif /* lint */
6
7 /* sgmlfiles.c -- Henry Thompson Tue Jan 30 1996
8 * Basic file processing for XML/NSL
9 */
10
11 /* This version using Richard's XML aware parser */
12
13 #include <assert.h>
14
15 #include "lt-memory.h"
16 #include "nsllib.h"
17 #include "string16.h"
18 #include "stdio16.h"
19 #include "lt-safe.h"
20 #include "lt-umalloc.h"
21 #include "dtd.h"
22 #include "input.h"
23 #include "url.h"
24 #include "xmlparser.h"
25 #include "readddb.h"
26 #include "catalog.h"
27
28 static boolean doctype_init_alloc(NSL_Doctype_I *doctype,
29 int nitems, int ndata, int nattr);
30
31 /* XXX to be removed asap */
32
33 boolean ParseAttributeString( NSL_Doctype_I *doctype, NSL_Item *item,
34 struct attribute *source, int do_ns);
35
36 size_t BufCopyMax=BUFCOPYMAX,BufMargin=BUFMARGIN;
37
38 static NSL_File_I *open_output(FILE16 *fp, NSL_Doctype dtype, NSL_FType type,
39 CharacterEncoding encoding);
40 static NSL_File_I *open_source(InputSource source, NSL_Doctype dtype,
41 NSL_FType type);
42
43 static int CheckFlags(int type);
44
45 /*--- File handling ---*/
46
CheckFlags(int type)47 static int CheckFlags(int type)
48 {
49 if(((type & NSL_read) && (type & NSL_write_flags)) ||
50 ((type & NSL_write) && (type & NSL_read_flags)) ||
51 (type & (NSL_read | NSL_write)) == 0)
52 {
53 LT_ERROR(NEIO,
54 "Bad flag combination when opening file or stream\n");
55 return -1;
56 }
57 return 0;
58 }
59
OpenURL(const char8 * url,NSL_Doctype dtype,NSL_FType type,CharacterEncoding encoding,const char8 * base)60 NSL_File OpenURL(const char8 *url, NSL_Doctype dtype, NSL_FType type,
61 CharacterEncoding encoding, const char8 *base)
62 {
63 char8 *m_url;
64 Entity entity;
65 InputSource source;
66 FILE16 *f16;
67 NSL_File_I *file;
68
69 ECEN(CheckFlags(type));
70
71 if(type & NSL_read)
72 {
73 ECNN(entity = NewExternalEntity(0, 0, url, 0, 0));
74 entity->encoding = encoding;
75 ECNN(source = EntityOpen(entity));
76 return open_source(source, dtype, type);
77 }
78 else
79 {
80 ECNN(f16 = url_open(url, base, "w", &m_url));
81 SetCloseUnderlying(f16, 1);
82 file = open_output(f16, dtype, type, encoding);
83 return file;
84 }
85 }
86
OpenStream(FILE * fp,NSL_Doctype dtype,NSL_FType type,CharacterEncoding encoding,const char8 * name)87 NSL_File OpenStream(FILE *fp, NSL_Doctype dtype, NSL_FType type,
88 CharacterEncoding encoding, const char8 *name)
89 {
90 InputSource source;
91 FILE16 *f16;
92
93 ECEN(CheckFlags(type));
94
95 if(type & NSL_read)
96 {
97 source = SourceFromStream(name, fp);
98 source->entity->encoding = encoding;
99 return open_source(source, dtype, type);
100 }
101 else
102 {
103 ECNN(f16 = MakeFILE16FromFILE(fp, "w"));
104 return open_output(f16, dtype, type, encoding);
105 }
106 }
107
SetMode(NSL_File_I * file,int xml)108 static void SetMode(NSL_File_I *file, int xml)
109 {
110 Parser p = file->pstate;
111
112 ParserSetFlag(p, XMLSyntax, xml);
113 ParserSetFlag(p, XMLPredefinedEntities, xml);
114 ParserSetFlag(p, XMLExternalIDs, xml);
115 ParserSetFlag(p, XMLMiscWFErrors, xml);
116 ParserSetFlag(p, ErrorOnUnquotedAttributeValues, xml);
117 ParserSetFlag(p, NormaliseAttributeValues, xml);
118 ParserSetFlag(p, XMLLessThan, xml);
119 ParserSetFlag(p, IgnoreEntities, !xml);
120 ParserSetFlag(p, ExpandCharacterEntities, xml);
121 ParserSetFlag(p, ExpandGeneralEntities, xml);
122 /* The parser must not check end tags in nSGML mode, because it
123 doesn't have real ElementDefinition records. */
124 ParserSetFlag(p, MaintainElementStack, xml);
125 /* We set these to stop the parser trying to auto-declare undeclared
126 elements and attributes in nSGML mode, which wouldn't work. */
127 ParserSetFlag(p, ErrorOnUndefinedElements, !xml);
128 ParserSetFlag(p, ErrorOnUndefinedAttributes, !xml);
129 if (xml) CatalogEnable(p);
130 }
131
132 /* Open a string for input or output. */
133
OpenString(Char * text,NSL_Doctype dtype,NSL_FType type)134 NSL_File OpenString(Char *text, NSL_Doctype dtype, NSL_FType type)
135 {
136 NSL_File_I *file;
137 static Char string[] = {'<','s','t','r','i','n','g','>',0};
138
139 ECEN(CheckFlags(type));
140
141 if(type & NSL_read)
142 {
143 Entity entity;
144 InputSource source;
145
146 entity = NewInternalEntity(string, text, 0, 0, 0, 0);
147 ECNN(source = EntityOpen(entity));
148 /* Don't read prolog yet, we want to set flag first */
149 file = open_source(source, dtype, type|NSL_read_no_consume_prolog);
150 /* So we don't get warnings about xml declaration in internal entity */
151 ParserSetFlag(file->pstate, IgnorePlacementErrors, 1);
152 /* Now we can read the prolog */
153 if(!(type & NSL_read_no_consume_prolog))
154 ReadProlog(file);
155 }
156 else
157 {
158 FILE16 *f16;
159
160 ECNN(f16 = MakeFILE16FromString(text, -1, "w"));
161 file = open_output(f16, dtype, type, CE_unknown);
162 }
163
164 return file;
165 }
166
167 /* Open a string for input and read a single Item from it.
168 */
169
GetItemFromString(const Char * text,NSL_Doctype dtype)170 NSL_Item *GetItemFromString(const Char *text, NSL_Doctype dtype)
171 {
172 NSL_File file;
173 NSL_Item *item;
174
175 ECNN(file = OpenString((Char *)text, dtype, NSL_read));
176 item = GetNextItem(file);
177 SFclose(file);
178 return item;
179 }
180
181 /* Manipulate the base URL of the root entity of a file */
182
GetFileURL(NSL_File file)183 const char8 *GetFileURL(NSL_File file)
184 {
185 return EntityBaseURL(ParserRootEntity(file->pstate));
186 }
187
SetFileURL(NSL_File file,const char8 * url)188 void SetFileURL(NSL_File file, const char8 *url)
189 {
190 EntitySetBaseURL(ParserRootEntity(file->pstate), url);
191 }
192
193
194 /* Backward compatibility functions.
195 * These are superseded by OpenURL and OpenStream.
196 */
197
SFopen(const char8 * urlname,NSL_Doctype_I * dtype,NSL_FType type)198 NSL_File_I *SFopen(const char8 *urlname, NSL_Doctype_I *dtype,
199 NSL_FType type)
200 {
201 return OpenURL(urlname, dtype, type, CE_unknown, 0);
202 }
203
SFFopen(FILE * fp,NSL_Doctype_I * dtype,NSL_FType type,const char8 * fname)204 NSL_File_I *SFFopen( FILE *fp, NSL_Doctype_I *dtype,
205 NSL_FType type, const char8 *fname)
206 {
207 return OpenStream(fp, dtype, type, CE_unknown, fname);
208 }
209
210 void NSL_Parser_declaration(XBit bit, void *callback_arg);
211
212 static const char* sddNames[]={"unspecified","no","yes"};
213
214
215
216 /* utility to ensure all fields of sf filled in sensibly */
217 /* and seen to be filled in sensibly */
218
NewNullFile()219 static NSL_File_I * NewNullFile () {
220 NSL_File_I *sf;
221
222 ECNN(sf=tsalloc(NSL_File_I, 1));
223 sf->file16 = NULL;
224 sf->fileToClose = NULL;
225 sf->doctype = NULL;
226 sf->type = NSL_write;
227 sf->cnum = 0;
228 sf->pstate = NULL;
229 sf->currentbase = NULL;
230 sf->_bit1.type=NSL_bad;
231 sf->_bit1.nsc=0;
232 sf->_bit1.nsowned = 0;
233 sf->peekBit=0;
234 sf->currentItemCharPosn = -1;
235 sf->currentBitOffset = -1;
236
237 /* rest only valid for output files */
238 sf->state=_elt_unknown;
239
240 return sf;
241 }
242
243 #define NELEMENTS 1024
244 #define ELEMENTNAMES 16384
245 #define NATTRS 1024
246 #define ATTRNAMES 16384
247
xrinsert(NSL_Doctype_I * doctype,const Char * name,int len,RHashTableHdr * tbl,RHVal val)248 RHTEntry* xrinsert(NSL_Doctype_I *doctype,
249 const Char* name,int len,RHashTableHdr* tbl,RHVal val) {
250 if (!len) {
251 len=Strlen(name);
252 };
253 if ((len+tbl->freekey)>tbl->length) {
254 LT_ERROR(NEMEM,"hash table keychars overflow\n");
255 return NULL;
256 };
257 /* Should make this less left-handed */
258 if (tbl==doctype->attrNames) {
259 if (doctype->attrNameCount++==(NATTRS/2)) {
260 WARN(NEMEM,"attr hash table half full!\n");
261 }
262 else if (doctype->attrNameCount>NATTRS) {
263 LT_ERROR(NEMEM,"attr hash table entry overflow\n");
264 return NULL;
265 };
266 }
267 else if (tbl==doctype->elements) {
268 if (doctype->elementCount++==(NELEMENTS/2)) {
269 WARN(NEMEM,"element hash table half full!\n");
270 }
271 else if (doctype->elementCount>NELEMENTS) {
272 LT_ERROR(NEMEM,"element hash table entry overflow\n");
273 return NULL;
274 };
275 }
276 else {
277 SHOULDNT;
278 };
279 return rinsert(name,len,tbl,val);
280 }
281
282 static int eltSpace=64*(sizeof(NSL_ElementSummary_I)+
283 (4*sizeof(AttributeSummary))+16);
284
NewEltTable(NSL_Doctype_I * doctype,const char * oldTbl)285 char *NewEltTable(NSL_Doctype_I *doctype,const char *oldTbl) {
286 /* called when XML overflows doc/attr
287 summary table */
288 ECNN(doctype->elementPtr=doctype->elementBase=(char*)salloc(eltSpace));
289 /* ground forwarding pointer */
290 #if 0
291 /* XXX check this change is right */
292 *((const char**)doctype->elementPtr)++=oldTbl;
293 #else
294 *((const char**)doctype->elementPtr) = oldTbl;
295 doctype->elementPtr += sizeof(const char **);
296 #endif
297 doctype->elementLimit=doctype->elementBase+eltSpace;
298 return (char*)doctype->elementPtr;
299 }
300
301 /* Open an entity for reading */
302
open_source(InputSource source,NSL_Doctype dtype,NSL_FType type)303 static NSL_File_I *open_source(InputSource source, NSL_Doctype dtype, NSL_FType type)
304 {
305 NSL_File_I *sf = NewNullFile();
306 static NSL_ElementSummary_I rootElt = {0,dc_element,0,0};
307 MarkupLanguage ml;
308
309 sf->type = type;
310 sf->doctype = dtype;
311
312 sf->pstate = NewParser();
313 ParserSetFlag(sf->pstate, ReturnDefaultedAttributes, 0);
314 ParserSetFlag(sf->pstate, MergePCData, 1);
315 ParserSetFlag(sf->pstate, ReturnComments, 0);
316 ParserSetFlag(sf->pstate, TrustSDD, 0); /* we want to decide ourselves */
317 ParserSetFlag(sf->pstate, AllowMultipleElements, 1);
318
319 if(type & NSL_read_strict)
320 /* Need this *before* we do the push, which may parse the XML decl.
321 But must postpone setting other flags, so they aren't undone
322 by SetMode later. */
323 ParserSetFlag(sf->pstate, XMLStrictWFErrors, 1);
324
325 if(ParserPush(sf->pstate, source) != 0)
326 {
327 ParserPerror(sf->pstate, &sf->pstate->xbit);
328 LT_ERROR(NEPARSE, "Error opening source\n");
329 return 0;
330 }
331
332 /* Choose xml mode unless there's an NSL declaration, or we were
333 passed an NSL doctype. */
334
335 ml = source->entity->ml_decl;
336
337 switch(ml)
338 {
339 case ML_xml:
340 SetMode(sf, 1);
341 if(dtype && !dtype->XMLMode)
342 WARN(NWMLMIXUP,
343 "You are trying to read an XML document with an nSGML\n"
344 "doctype. Expect a bus error.\n"
345 "LTG makes no representations about the suitability of this software\n"
346 "and data for any purpose. It is provided \"as is\" without express or\n"
347 "implied warranty. LTG disclaims all warranties with regard to this\n"
348 "software and data, including all implied warranties of merchantability\n"
349 "and fitness, in no event shall LTG be liable for any special, indirect\n"
350 "or consequential damages or any damages whatsoever, action of\n"
351 "contract, negligence or other tortious action, arising out of or in\n"
352 "connection with the use or performance of this software.\n");
353 break;
354 case ML_nsl:
355 SetMode(sf, 0);
356 if(dtype && dtype->XMLMode)
357 WARN(NWMLMIXUP,
358 "You are trying to read an nSGML document with an XML\n"
359 "doctype. Who knows what will happen? Good luck.\n")
360 break;
361 case ML_unspecified:
362 SetMode(sf, !dtype || dtype->XMLMode);
363 break;
364 }
365
366 if(type & NSL_read_no_expand) {
367 ParserSetFlag(sf->pstate, ExpandCharacterEntities, 0);
368 ParserSetFlag(sf->pstate, ExpandGeneralEntities, 0);
369 ParserSetFlag(sf->pstate, MergePCData, 0);
370 };
371
372 if(type & NSL_read_no_normalise_attributes) {
373 ParserSetFlag(sf->pstate, NormaliseAttributeValues, 0);
374 }
375
376 if(type & NSL_read_all_bits) {
377 ParserSetFlag(sf->pstate, ReturnComments, 1);
378 ParserSetFlag(sf->pstate, MergePCData, 0);
379 }
380
381 {
382 int w = (type & NSL_read_declaration_warnings);
383 ParserSetFlag(sf->pstate, WarnOnRedefinitions, w);
384 }
385
386 if(type & NSL_read_relaxed_any) {
387 ParserSetFlag(sf->pstate, RelaxedAny, 1);
388 }
389
390 if(type & NSL_read_allow_undeclared_nsattributes) {
391 ParserSetFlag(sf->pstate, AllowUndeclaredNSAttributes, 1);
392 }
393
394 if(type & NSL_read_strict)
395 {
396 #if 0
397 /* Doesn't work, fix it on output instead */
398 /* Actually, it does work now for XML, but we won't change it */
399 ParserSetFlag(sf->pstate, ReturnDefaultedAttributes, 1);
400 #endif
401 ParserSetFlag(sf->pstate, AllowMultipleElements, 0);
402 ParserSetFlag(sf->pstate, ErrorOnBadCharacterEntities, 1);
403 ParserSetFlag(sf->pstate, ErrorOnUndefinedEntities, 1);
404 ParserSetFlag(sf->pstate, XMLStrictWFErrors, 1);
405 }
406
407 if(type & NSL_read_validate)
408 {
409 if((dtype && !dtype->XMLMode) || ml == ML_nsl)
410 ;
411 else
412 {
413 /* Must set this so that RXP keeps an element stack */
414 ParserSetFlag(sf->pstate, MaintainElementStack, 1);
415 ParserSetFlag(sf->pstate, Validate, 1);
416 }
417 }
418
419 if(type & NSL_read_namespaces)
420 {
421 if(ml == ML_nsl || (dtype && !dtype->XMLMode))
422 /* namespace don't make sense for nSGML */
423 type &= ~NSL_read_namespaces;
424 else
425 ParserSetFlag(sf->pstate, XMLNamespaces, 1);
426 }
427
428 if(type & NSL_read_defaulted_attributes)
429 {
430 if(ml == ML_nsl || (dtype && !dtype->XMLMode))
431 /* won't work for nSGML */
432 type &= ~NSL_read_defaulted_attributes;
433 else
434 ParserSetFlag(sf->pstate, ReturnDefaultedAttributes, 1);
435 }
436
437 ECNF(sf->eltContent.base =
438 sf->eltContent.current = NEWSTACK(StackSize));
439 sf->eltContent.end=sf->eltContent.base+StackSize;
440 PUSH(sf->eltContent,&rootElt); /* fencepost, true if off the top */
441
442 ParserSetDtdCallback(sf->pstate, NSL_Parser_declaration);
443 ParserSetDtdCallbackArg(sf->pstate, sf);
444
445 if(dtype)
446 {
447 FreeDtd(sf->pstate->dtd);
448 sf->pstate->dtd = dtype->rxp_dtd;
449 sf->pstate->have_dtd = 1;
450 }
451 else
452 {
453 if(ml == ML_nsl)
454 {
455 SetMode(sf, 0);
456 ECNN(sf->doctype = dtype =
457 DoctypeFromDdb(source->entity->ddb_filename));
458 /* DoctypeFromDdb allocates an RXP dtd but we already have one */
459 FreeDtd(dtype->rxp_dtd);
460 sf->pstate->have_dtd = 1;
461 }
462 else
463 {
464 /* Create a fake doctype for an XML document */
465 /* This is common code from DoctypeFromDdb */
466 ECNN(sf->doctype=dtype=tsalloc(NSL_Doctype_I,1));
467 dtype->doctype=NULL;
468 dtype->XMLMode = TRUE;
469 dtype->ddb=NULL;
470 dtype->ddbfile=NULL;
471 dtype->seenDTD = FALSE;
472 dtype->sdd = (sddCode)sf->pstate->standalone;
473 ECFN(doctype_init_alloc(dtype, 100, 100, 100));
474
475 ECNN(dtype->attrNames=rcreate(NATTRS,ATTRNAMES));/*(4096,65536));*/
476 ECNN(dtype->elements=rcreate(NELEMENTS,ELEMENTNAMES)); /*1024,16384));*/
477 dtype->attrNameCount=dtype->elementCount=0;
478 ECNN(NewEltTable(dtype,0));
479 dtype->permanentBase=dtype->elementBase;
480 ECNN(dtype->offAttrsBase=salloc(sizeof(OffboardAttrs)*128));
481 dtype->offAttrsIndex=0;
482 dtype->offAttrsLimit=128;
483 dtype->entities=NULL; /* handled separately */
484 dtype->entityBase=NULL;
485 }
486 dtype->rxp_dtd = sf->pstate->dtd;
487 dtype->root_entity = source->entity;
488 dtype->doctypeStatement=NULL;
489 dtype->defaultOutputEncoding = source->entity->encoding;
490 dtype->fallbackEncodingDeclaration = source->entity->encoding_decl;
491 }
492
493 sf->pstate->dtd->doctype = dtype;
494
495 sf->currentbase=NewNullNSLData(dtype);
496
497 if(!(type & NSL_read_no_consume_prolog))
498 ReadProlog(sf);
499
500 return sf;
501 }
502
ReadProlog(NSL_File_I * sf)503 int ReadProlog(NSL_File_I *sf)
504 {
505 while(1)
506 {
507 NSL_Bit *bit = NextBit(sf);
508 switch(bit->type)
509 {
510 case NSL_start_bit:
511 case NSL_empty_bit:
512 case NSL_eof_bit:
513 case NSL_bad:
514 /* We've reached the end of the prolog */
515 /* XXX just leave an error to be read later - is that right? */
516 sf->peekBit = bit;
517 return 0;
518
519 default:
520 FreeBit(bit);
521 break;
522 }
523 }
524 }
525
SynthesizePIBit(NSL_File_I * sf)526 static boolean SynthesizePIBit(NSL_File_I * sf) {
527 const NSL_Doctype_I * dtype = sf->doctype;
528 Char *buffer;
529
530 ECNF(buffer = salloc(1024 * sizeof(Char)));
531 sf->_bit1.type = NSL_pi_bit;
532 sf->_bit1.value.body = buffer;
533
534 if( dtype->XMLMode ){
535 char sdds[24];
536 char ecs[50];
537 if (dtype->sdd!=sdd_unspecified) {
538 sprintf(sdds," standalone='%s'",sddNames[dtype->sdd]);
539 }
540 else {
541 sdds[0]=0;
542 };
543 if (GetFileEncoding(sf->file16)!=CE_unspecified_ascii_superset) {
544 sprintf(ecs," encoding='%s'",
545 CharacterEncodingName[GetFileEncoding(sf->file16)]);
546 }
547 else {
548 if(dtype->fallbackEncodingDeclaration != CE_unknown)
549 sprintf(ecs," encoding='%s'",
550 CharacterEncodingName[dtype->fallbackEncodingDeclaration]);
551 else
552 ecs[0]=0;
553 };
554 Sprintf(buffer, InternalCharacterEncoding,
555 "xml version='1.0'%s%s",
556 ecs,
557 sdds);
558 } else {
559 Sprintf(buffer, InternalCharacterEncoding,
560 "NSL DDB %s 0", dtype->ddbfile);
561 }
562
563 return TRUE;
564 }
565
566 #include "nsl-ibit.h"
567
SynthesizeDoctypeBit(NSL_File_I * sf)568 static void SynthesizeDoctypeBit(NSL_File_I * sf) {
569 NSL_Doctype_I * dtype = sf->doctype;
570 /* record the data in the bit */
571 sf->_bit1.type = NSL_doctype_bit;
572 sf->_bit1.value.body = (void *)dtype->doctypeStatement;
573 }
574
575
576 /* Set up a file for writing, but don't actually write anything
577 yet.
578 */
579
open_output(FILE16 * f16,NSL_Doctype dtype,NSL_FType type,CharacterEncoding encoding)580 static NSL_File_I *open_output(FILE16 *f16, NSL_Doctype dtype, NSL_FType type,
581 CharacterEncoding encoding)
582 {
583 NSL_File_I *sf = NewNullFile();
584 static NSL_ElementSummary_I rootElt = {0,dc_mixed,0,0};
585
586 sf->type = type;
587 sf->file16 = f16;
588 sf->doctype = dtype;
589 if(encoding)
590 SetFileEncoding(sf->file16, encoding);
591 else if(dtype)
592 SetFileEncoding(sf->file16, dtype->defaultOutputEncoding);
593
594 encoding = GetFileEncoding(sf->file16);
595 if(encoding == CE_UTF_16B || encoding == CE_UTF_16L)
596 /* Byte-order mark */
597 Fprintf(sf->file16, "%c", 0xfeff);
598
599 /* Set up element-only content stack if we're going to pretty-print */
600
601 if((type & NSL_write_style) == NSL_write_canonical ||
602 (type & NSL_write_style) == NSL_write_plain)
603 {
604 sf->eltContent.base=0;
605 }
606 else
607 {
608 if(dtype)
609 {
610 ECNN(sf->eltContent.base =
611 sf->eltContent.current=NEWSTACK(StackSize));
612 /* N.B. now have two pointers to same storage in sf->elementContent,
613 let's hope we eventually free exactly one of them
614 */
615 sf->eltContent.end=sf->eltContent.base+StackSize;
616 PUSH(sf->eltContent,&rootElt); /* fencepost */
617 }
618 else
619 {
620 WARN(NWOFND,
621 "NSL Output file needs doctype for normal or pretty output\n"
622 "but none supplied or defaulted: minimal output will ensue\n");
623 sf->type &= ~NSL_write_style;
624 sf->type |= NSL_write_plain;
625 sf->eltContent.base=0;
626 }
627 }
628
629 /* Print PI and doctype, if we want it */
630
631 if(!(type & NSL_write_no_doctype) && dtype)
632 {
633 static Char newline[] = {'\n',0};
634 /* The call to SynthesizePIBit is made in order
635 * that all output pass through PrintBit.
636 */
637 ECFN(SynthesizePIBit(sf));
638 /* XXX -- use a global to ensure that PrintBit
639 * doesn't output the DOCTYPE statement. Ugly
640 * temporary patch
641 */
642 PrintBit(sf,&(sf->_bit1));
643 FreeBit(&sf->_bit1);
644 PrintTextLiteral(sf, newline);
645
646 /* ... in the same way we synthesise and print
647 * a doctype bit if there is one...
648 */
649 if(dtype->doctypeStatement) {
650 SynthesizeDoctypeBit(sf);
651 PrintBit(sf,&(sf->_bit1));
652 }
653 }
654
655 return sf;
656 }
657
658 /* XML DTD call backs */
659
NSL_Parser_declaration(XBit bit,void * callback_arg)660 void NSL_Parser_declaration(XBit bit, void *callback_arg){
661 #if 0
662 NSL_File_I *sf = (NSL_File_I*)callback_arg;
663 #endif
664 switch(bit->type) {
665 case XBIT_comment:
666 COMMENT1("CB:XBIT_comment %s\n", bit->comment_chars);
667 FreeXBit(bit);
668 break;
669
670 case XBIT_pi:
671 COMMENT1("CB:XBIT_pi %s", bit->pi_name);
672 COMMENT1(" %s\n",bit->pi_chars);
673 FreeXBit(bit);
674 break;
675
676 default:
677 SHOULDNT;
678 break;
679 }
680 }
681
682 /* This should not be called directly now. Use DefineElement[N]. */
DeclareElement(NSL_Doctype_I * doctype,const Char * name,int length,const char * data,ctVals modelType)683 RHTEntry *DeclareElement(NSL_Doctype_I *doctype, const Char *name,
684 int length, const char *data, ctVals modelType) {
685 RHTEntry *res;
686 if (!doctype) return 0; // RXP internally-read document, e.g. catalog
687 NSL_ElementSummary_I *value=(NSL_ElementSummary_I *)doctype->elementPtr;
688 if ((char*)(value+1)>doctype->elementLimit) {
689 ECNN(value=(NSL_ElementSummary_I *)NewEltTable(doctype,
690 doctype->elementBase));
691 };
692 res=xrinsert(doctype,name, length, (RHashTableHdr*)doctype->elements,
693 ((char*)value)-doctype->permanentBase);
694 COMMENT1("Inserting element %s into doctype\n",name);
695 value->numAttr=value->omitStart=value->omitEnd=0;
696 value->contentType=modelType;
697
698 doctype->elementPtr=(char*)(value+1);
699 return res;
700 }
701
702 /* This should not be called directly now. Use DefineAttribute[N]. */
DeclareAttr(NSL_Doctype_I * doctype,const Char * name,int length,NSL_Attr_Declared_Value declared_value,const Char * allowed_values,int allowed_values_count,NSL_ADefType default_type,const Char * default_value,NSL_ElementSummary_I ** eltptr,const Char * eltName)703 const Char *DeclareAttr(NSL_Doctype_I *doctype, const Char *name,
704 int length,
705 NSL_Attr_Declared_Value declared_value,
706 const Char *allowed_values, int allowed_values_count,
707 NSL_ADefType default_type, const Char *default_value,
708 NSL_ElementSummary_I **eltptr,
709 const Char *eltName) {
710 /* Only called in XML mode */
711 RHTEntry *attr;
712 AttributeSummary *value;
713 ASPtr *asp;
714 OffboardAttrs *oa;
715 NSL_ElementSummary_I *oPtr,*elt=*eltptr;
716 if (!eltName) {
717 SHOULDNT;
718 };
719 if (!(attr=rsearch(name,length,doctype->attrNames))) {
720 attr=xrinsert(doctype,name,length,(RHashTableHdr*)doctype->attrNames,1);
721 }
722 value=(AttributeSummary *)doctype->elementPtr;
723 if ((char*)(value+1)>doctype->elementLimit) {
724 value=(AttributeSummary *)NewEltTable(doctype,doctype->elementBase);
725 };
726 oPtr=(NSL_ElementSummary_I *)doctype->elementPtr;
727 doctype->elementPtr=(char*)(value+1);
728 /* what value? */
729 value->namePtr=((Char*)doctype->attrNames+attr->keyptr)-(Char*)value;
730 value->defaultPtr=default_value?default_value-(Char*)value:0;
731 value->allowedValuesPtr=allowed_values?allowed_values-(Char*)value:0;
732 value->numAV=allowed_values_count;
733 value->declaredValue=(char)declared_value;
734 value->defaultValueType=(char)default_type;
735 if (elt->numAttr<0) {
736 /* Some summaries are already offboard, add to them */
737 oa=(OffboardAttrs*)doctype->offAttrsBase+(-1-elt->numAttr);
738 asp=(ASPtr *)doctype->elementPtr;
739 if ((char*)(asp+1)>doctype->elementLimit) {
740 asp=(ASPtr *)NewEltTable(doctype,doctype->elementBase);
741 };
742 doctype->elementPtr=(char*)(asp+1);
743 asp->next=oa->asp;
744 asp->as=value;
745 oa->asp=asp;
746 }
747 else if ((char*)elt!=(char*)((AttributeSummary*)
748 (oPtr-1)-
749 elt->numAttr)) {
750 /* Not contiguous with pre-existing ASs for this elt, if any,
751 either because of intervening allocation or change in table */
752 /* Shift to offboard attr sums */
753 if (doctype->offAttrsIndex==doctype->offAttrsLimit) {
754 /* No more room: reallocate and move offboard table */
755 int newLimit=doctype->offAttrsLimit=doctype->offAttrsLimit+128;
756 ECNN(doctype->offAttrsBase=(const OffboardAttrs *)srealloc(
757 (void *)doctype->offAttrsBase,
758 sizeof(OffboardAttrs)*newLimit));
759 };
760 oa=(OffboardAttrs *)doctype->offAttrsBase+(doctype->offAttrsIndex++);
761 asp=(ASPtr *)doctype->elementPtr;
762 if ((char*)(asp+1)>doctype->elementLimit) {
763 asp=(ASPtr *)NewEltTable(doctype,doctype->elementBase);
764 };
765 doctype->elementPtr=(char*)(asp+1);
766 asp->next=0;
767 asp->as=value;
768 oa->asp=asp;
769 oa->oldNumAttr=elt->numAttr;
770 elt->numAttr=-doctype->offAttrsIndex; /* Note this is, correctly,
771 -1-(the index of the new entry) */
772 }
773 else {
774 elt->numAttr+=1;
775 };
776 return (const Char*)doctype->attrNames+attr->keyptr;
777 }
778
779 #if 0
780 void CheckinElement(NSL_Doctype_I *doctype, Char *element_name,
781 const char *element_data, ctVals modelType) {
782 size_t len;
783
784 if (!rsearch(element_name, (len=Strlen(element_name)), doctype->elements)) {
785 DeclareElement(doctype,element_name,len,element_data,modelType);
786 }
787 return;
788 }
789 #endif
790
791 extern NSL_Item *NNI(const NSL_ElementSummary_I *elt,
792 const NSL_Doctype_I *doctype,
793 const Char *name);
794
795 #if 0
796 int CheckInEntity(NSL_Doctype_I *doctype, const char *entity_name, int length){
797 RHTEntry* entry;
798 RHVal value = 0;
799
800 if( !length ){ length = strlen(entity_name); };
801 entry=rsearch(entity_name, length, doctype->entities);
802 if (entry) {
803 COMMENT1("Entity %s already in doctype\n",entity_name);
804 } else {
805 xrinsert(doctype,
806 entity_name, length, (RHashTableHdr*)doctype->entities,value);
807 COMMENT1("Inserting entity %s into doctype\n",entity_name);
808 }
809 return 0;
810 }
811 #endif
812
813 /* ================================================= */
814
815 /* XXX this function is broken, given the XML parser. */
816
SFFreopen(NSL_File_I * file,FILE * filep)817 boolean SFFreopen( NSL_File_I *file, FILE *filep ) {
818 #if 1
819 LT_ERROR(NEUNSUP,
820 "The function SFFreopen has been removed from the library\n");
821 return FALSE;
822 #else
823 ECEF(sfclose(file->filep));
824 file->filep=(FILE *)filep;
825 return TRUE;
826 #endif
827 }
828
SFclose(NSL_File_I * f)829 int SFclose( NSL_File_I *f) {
830
831 if (f->type & NSL_read) {
832 Entity docent = 0;
833 FreeBit(f->peekBit);
834 FreeData(f->currentbase, f->doctype);
835 /* Free the document entity only if it's not needed by the dtd.
836 This will be true only when we got the doctype from another file.
837 Don't free it until after FreeParser, because FreeParser looks
838 at the entities. */
839 if(f->pstate->document_entity != f->doctype->root_entity)
840 docent = f->pstate->document_entity;
841 FreeParser(f->pstate);
842 if(docent) {
843 /* Don't free strings passed in to OpenString */
844 docent->text = 0;
845 FreeEntity(docent);
846 }
847 } else {
848 if((f->type & NSL_write_style) == NSL_write_default ||
849 (f->type & NSL_write_style) == NSL_write_fancy) {
850 ForceNewline(f);
851 }
852 ECEE(ForceOutput(f)); /* will do FlushRe */
853 ECEE(Fclose(f->file16));
854 if(f->fileToClose)
855 ECEE(stdfclose(f->fileToClose)); /* Only close what we opened */
856 }
857 /* We don't free the doctype, because some other file may use it.
858 Tough tittie, but you have to call FreeDoctype yourself. */
859 if (f->eltContent.base) {
860 ECFE(sfree(f->eltContent.base));
861 }
862 ECFE(sfree(f));
863 return 0;
864 }
865
866 /* An unsafe (see documentation) version of SFclose which cleans up
867 all heap storage associated with the file.
868 The DOCTYPE is released iff releaseDoctype is TRUE */
869
SFrelease(NSL_File_I * f,boolean releaseDoctype)870 int SFrelease( NSL_File_I *f, boolean releaseDoctype) {
871 NSL_Doctype_I *dct = f->doctype;
872
873 ECEE(SFclose(f));
874
875 if( releaseDoctype ){
876 FreeDoctype(dct);
877 }
878
879 return 0;
880 }
881
882 #if 0
883 void SetFileDoctype( NSL_File_I *f, NSL_Doctype_I *d ) {
884 f->doctype=d;
885 }
886 #endif
887
888 /* Given a NSL_File_I *return the NSL_Doctype_I *associated with it */
889
DoctypeFromFile(NSL_File_I * file)890 NSL_Doctype_I *DoctypeFromFile( NSL_File_I *file) {
891 return (NSL_Doctype_I *)file->doctype;
892 }
893
894 /* Opens three standard files streams as NSGML streams */
895
896 NSL_File_I *sgstdin, *sgstdout, *sgstderr;
897
StdFiles(NSL_FType type)898 boolean StdFiles(NSL_FType type) {
899 ECNF(sgstdin=SFFopen(stdin, NULL, NSL_read, "stdin"));
900 ECNF(sgstdout=SFFopen(stdout, DoctypeFromFile(sgstdin), type, "stdout"));
901 ECNF(sgstderr=SFFopen(stderr, NULL, NSL_write_plain|NSL_write_no_doctype,
902 "stderr"));
903 return TRUE;
904 }
905
906 #if 0
907 void MakeFilesSameDoctype( NSL_File_I *ffrom, NSL_File_I *fto ) {
908 fto->doctype=ffrom->doctype;
909 }
910 #endif
911
912 /* Return the byte offset in the file of the current read position */
913 /* XXX only works for root entity; need a better interface */
914 /* Why does this return size_t??? Should be long (like ftell) or
915 off_t (like lseek). */
916
SFtell(NSL_File_I * sf)917 size_t SFtell(NSL_File_I *sf) {
918 return SourceTell(ParserRootSource(sf->pstate));
919 }
920
921 /* Move read position in file to pos bytes in */
922 /* XXX only works for root entity; need a better interface */
923
SFseek(NSL_File_I * sf,size_t pos)924 int SFseek(NSL_File_I *sf,size_t pos) {
925 Parser p = sf->pstate;
926
927 if (sf->peekBit) {
928 FreeBit(sf->peekBit);
929 sf->peekBit=0;
930 }
931
932 while(p->source->parent)
933 ParserPop(p);
934
935 p->state = PS_body; /* What else could we do? */
936 ParserSetFlag(p, IgnorePlacementErrors, 1);
937
938 return SourceSeek(p->source, pos);
939 }
940
941 /* Called by NSL_Init */
942
ParseInit(void)943 boolean ParseInit( void ) {
944 init_parser();
945 return TRUE;
946 }
947
948 /* Read and return the next NSL_Bit in the input file */
949
NextBit(NSL_File_I * sf)950 NSL_Bit *NextBit(NSL_File_I *sf) {
951
952 XBit bit;
953 NSL_Bit *nslbit;
954 NSL_BI_Type lastType;
955 int n;
956 NSL_Doctype_I *doctype;
957 boolean XMLMode;
958 char8 *sysid=0;
959 Char *dtdchars=0, *intsubset=0;
960 Char *data;
961 static Char empty_string[] = {0};
962
963 if (sf->peekBit) {
964 NSL_Bit *tmp=sf->peekBit;
965 sf->peekBit=0;
966 return tmp;
967 }
968
969 doctype=(NSL_Doctype_I *)sf->doctype;
970 nslbit = &(sf->_bit1);
971 bit = ReadXBit(sf->pstate);
972
973 sf->currentBitOffset = bit->byte_offset;
974
975 XMLMode=doctype?doctype->XMLMode:FALSE;
976 lastType=nslbit->type;
977
978 nslbit->nsc = 0;
979 nslbit->nsowned = 0;
980
981 switch(bit->type) {
982 case XBIT_error:
983 ParserPerror(sf->pstate, bit);
984 LT_ERROR(NEPARSE, "");
985 nslbit->type = NSL_bad;
986 FreeXBit(bit);
987 break;
988 case XBIT_start:
989 case XBIT_empty:
990 if( bit->type == XBIT_empty ){
991 nslbit->type = NSL_empty_bit;
992 } else {
993 nslbit->type = NSL_start_bit;
994 }
995
996 nslbit->value.item=NNI(bit->element_definition->eltsum,
997 doctype,
998 bit->element_definition->name);
999 nslbit->value.item->prefix=bit->element_definition->prefix;
1000
1001 nslbit->label=nslbit->value.item->label;
1002 nslbit->prefix=nslbit->value.item->prefix;
1003
1004 ParseAttributeString(doctype,nslbit->value.item, bit->attributes,
1005 sf->type & NSL_read_namespaces);
1006
1007 if(sf->type & NSL_read_namespaces)
1008 {
1009 if(bit->ns_element_definition)
1010 {
1011 nslbit->llabel = nslbit->value.item->llabel =
1012 bit->ns_element_definition->name;
1013 nslbit->nsuri = nslbit->value.item->nsuri =
1014 bit->ns_element_definition->namespace->nsname;
1015 }
1016 else
1017 {
1018 nslbit->llabel= nslbit->label;
1019 nslbit->nsuri = 0;
1020 }
1021
1022 nslbit->value.item->ns_dict = nslbit->ns_dict = bit->ns_dict;
1023 nslbit->value.item->nsc = nslbit->nsc = bit->nsc;
1024
1025 /* Take ownership of the ns records */
1026 if(bit->type == XBIT_empty)
1027 {
1028 nslbit->value.item->nsowned = 1;
1029 bit->nsowned = 0;
1030 }
1031 }
1032
1033 if (XMLMode) {
1034 if (bit->type==XBIT_empty) {
1035 nslbit->value.item->type=NSL_empty;
1036 }
1037 } else {
1038 if (nslbit->value.item->defn->contentType==dc_empty) {
1039 nslbit->type=NSL_empty_bit;
1040 nslbit->value.item->type=NSL_empty;
1041 } else if ( nslbit->value.item->type == NSL_empty ){
1042 /* item type may have been set to NSL_empty if we */
1043 /* came across an explicit CONREF attribute value */
1044 nslbit->type = NSL_empty_bit;
1045 }
1046 }
1047
1048 if( nslbit->type == NSL_start_bit ){
1049 PUSH(sf->eltContent,
1050 bit->element_definition->eltsum);
1051 }
1052 else if(sf->eltContent.current - 1 == sf->eltContent.base)
1053 /* See comment below under XBIT_end */
1054 sf->pstate->state = PS_epilog;
1055
1056 break;
1057 case XBIT_pi:
1058 /* A processing instruction */
1059 COMMENT1("%s", xbit_type_name[bit->type]);
1060 COMMENT1(" %s",bit->pi_name);
1061 COMMENT1(" %s\n", bit->pi_chars);
1062 nslbit->type=NSL_pi_bit;
1063 { Char *new;
1064 ECNN(new=salloc((Strlen(bit->pi_name)+1+Strlen(bit->pi_chars)+1)*sizeof(Char)));
1065 if (bit->pi_chars[0]) {
1066 Sprintf(new,InternalCharacterEncoding,"%S %S",bit->pi_name,bit->pi_chars);
1067 }
1068 else {
1069 Sprintf(new,InternalCharacterEncoding,"%S",bit->pi_name);
1070 };
1071 nslbit->value.body = new;
1072 }
1073 #if 0
1074 /* NSL PIs are now handled in the low-level parser */
1075 if (HandlePi(bit,sf) < 0) {
1076 nslbit->type=NSL_bad;
1077 }
1078 #endif
1079 /* Since we copied them, we should free the originals */
1080 FreeXBit(bit);
1081 break;
1082 case XBIT_pcdata:
1083 COMMENT1("%s", xbit_type_name[bit->type]);
1084 COMMENT1(" [%s]\n", bit->pcdata_chars);
1085 if (!(sf->type & NSL_read_all_bits) &&
1086 (TOP(sf->eltContent))->contentType==dc_element) {
1087 /* this assumes any PCData in the wrong place is white . . . */
1088 /* and that eltOnly is true outside the document element */
1089 FreeXBit(bit);
1090 return NextBit(sf);
1091 }
1092 data=bit->pcdata_chars;
1093 if(data[0] == '&' && (sf->type & NSL_read_no_expand))
1094 nslbit->flags = NSL_text_isERef;
1095 else
1096 nslbit->flags = 0;
1097 if (!XMLMode) {
1098 int incr=0;
1099 Char *realdata=data;
1100
1101 if ( data[0] == '\n' ) {
1102 switch (lastType) {
1103 case NSL_start_bit:
1104 case NSL_pi_bit:
1105 /* one RE is ignored after start tag or pi */
1106 if (*(++(data))=='\0') {
1107 /* skip the whole thing */
1108 sfree(realdata);
1109 return NextBit(sf);
1110 } else {
1111 incr=1;
1112 }
1113 break;
1114 default:
1115 break;
1116 }
1117 }
1118
1119 n=Strlen(data);
1120 if (n--==0) {
1121 sfree(realdata);
1122 return NextBit(sf);
1123 }
1124 /* check last char if before end tag */
1125 /* what about PIs? */
1126 if (PeekXBit(sf->pstate)->type==XBIT_end) {
1127 if (n==0 && data[0]=='\n') {
1128 /* empty, try again */
1129 sfree(realdata);
1130 return NextBit(sf);
1131 } else if (data[n]=='\n') {
1132 data[n]='\000';
1133 }
1134 }
1135 if (incr) {
1136 /* o bother, hope this is rare because it's STUPID */
1137 data=Strdup(data);
1138 sfree(realdata);
1139 }
1140 }
1141
1142 nslbit->type = NSL_text_bit;
1143 nslbit->value.body = (Char*)data;
1144 break;
1145
1146 case XBIT_end:
1147 nslbit->type = NSL_end_bit;
1148 COMMENT1("%s", xbit_type_name[bit->type]);
1149 COMMENT1(" %s\n", bit->element_definition->name);
1150 if ((NSL_ElementSummary_I *)TOP(sf->eltContent)!=
1151 bit->element_definition->eltsum) {
1152 char buf[100];
1153 bit->type=XBIT_error;
1154 Sprintf(buf,CE_ISO_8859_1,"unmatched end tag %.70S",
1155 bit->element_definition->name);
1156 bit->error_message=buf;
1157 ParserPerror(sf->pstate, bit);
1158 LT_ERROR(NEPARSE,"")
1159 };
1160 POPNV(sf->eltContent);
1161 nslbit->label = bit->element_definition->name;
1162 nslbit->prefix = bit->element_definition->prefix;
1163 nslbit->value.item = NULL;
1164 if(sf->eltContent.current - 1 == sf->eltContent.base)
1165 /* Have to do this here, because the low-level parser doesn't keep
1166 an element stack when used with NSL. The stack is empty when
1167 there is only one element on it (sigh). */
1168 sf->pstate->state = PS_epilog;
1169
1170 if(sf->type & NSL_read_namespaces)
1171 {
1172 if(bit->ns_element_definition)
1173 {
1174 nslbit->llabel = bit->ns_element_definition->name;
1175 nslbit->nsuri = bit->ns_element_definition->namespace->nsname;
1176 }
1177 else
1178 {
1179 nslbit->llabel= nslbit->label;
1180 nslbit->nsuri = 0;
1181 }
1182
1183 /* Take ownership of the ns records */
1184 nslbit->ns_dict = bit->ns_dict;
1185 nslbit->nsc = bit->nsc;
1186 nslbit->nsowned = 1;
1187 bit->nsowned = 0;
1188 }
1189
1190 FreeXBit(bit);
1191 break;
1192
1193 case XBIT_eof:
1194 nslbit->type = NSL_eof_bit;
1195 COMMENT1("%s\n", xbit_type_name[bit->type]);
1196 break;
1197
1198 case XBIT_comment:
1199 COMMENT1("%s", xbit_type_name[bit->type]);
1200 COMMENT1(" %s\n", bit->comment_chars);
1201 nslbit->type = NSL_comment_bit;
1202 nslbit->value.body = bit->comment_chars;
1203 break;
1204
1205 case XBIT_dtd:
1206 if (XMLMode && !doctype->seenDTD) {
1207 COMMENT1("%s", xbit_type_name[bit->type]);
1208 doctype->seenDTD = TRUE;
1209 {
1210 struct xbit oldbit = *bit;
1211 if (sf->pstate->dtd->internal_part) {
1212 bit = ParseDtd(sf->pstate, sf->pstate->dtd->internal_part);
1213 if (bit->type==XBIT_error) {
1214 ParserPerror(sf->pstate, bit);
1215 LT_ERROR(NEPARSE, "");
1216 nslbit->type = NSL_bad;
1217 FreeXBit(&oldbit);
1218 FreeXBit(bit);
1219 break;
1220 }
1221 }
1222 if ((doctype->sdd!=sdd_yes || sf->type & NSL_read_validate) &&
1223 sf->pstate->dtd->external_part) {
1224 bit = ParseDtd(sf->pstate, sf->pstate->dtd->external_part);
1225 if (bit->type == XBIT_error) {
1226 ParserPerror(sf->pstate, bit);
1227 LT_ERROR(NEPARSE, "");
1228 nslbit->type = NSL_bad;
1229 FreeXBit(&oldbit);
1230 FreeXBit(bit);
1231 break;
1232 }
1233 FreeXBit(bit);
1234 }
1235 *bit = oldbit;
1236 }
1237 } else {
1238 COMMENT1("%s", xbit_type_name[bit->type]);
1239 }
1240 if (sf->pstate->dtd->external_part &&
1241 sf->pstate->dtd->external_part->systemid) {
1242 sysid=salloc(strlen(sf->pstate->dtd->external_part->systemid)+4);
1243 sprintf(sysid,"\"%s\" ",sf->pstate->dtd->external_part->systemid);
1244 }
1245 if (sf->pstate->dtd->internal_part) {
1246 intsubset=salloc((Strlen(sf->pstate->dtd->internal_part->text)+3)*
1247 sizeof(Char));
1248 Sprintf(intsubset, InternalCharacterEncoding,
1249 "[%S]",sf->pstate->dtd->internal_part->text);
1250 }
1251 if (sf->pstate->dtd->external_part &&
1252 sf->pstate->dtd->external_part->publicid) {
1253 dtdchars=salloc((Strlen(sf->pstate->dtd->name)+
1254 9+
1255 strlen8(sf->pstate->dtd->external_part->publicid)+
1256 2+
1257 (sysid?strlen8(sysid):0)+
1258 (intsubset?Strlen(intsubset):0)+
1259 1) * sizeof(Char));
1260 Sprintf(dtdchars, InternalCharacterEncoding, "%S PUBLIC \"%s\" %s%S",
1261 sf->pstate->dtd->name,
1262 sf->pstate->dtd->external_part->publicid,
1263 sysid?sysid:(char8 *)"",
1264 intsubset?intsubset:empty_string);
1265 } else {
1266 dtdchars=salloc((Strlen(sf->pstate->dtd->name)+
1267 1+
1268 (sysid?7:0)+
1269 (sysid?strlen8(sysid):0)+
1270 (intsubset?Strlen(intsubset):0)+
1271 1) * sizeof(Char));
1272 Sprintf(dtdchars, InternalCharacterEncoding, "%S %s%s%S",
1273 sf->pstate->dtd->name,
1274 sysid?"SYSTEM ":"",
1275 sysid?sysid:(char8 *)"",
1276 intsubset?intsubset:empty_string);
1277 }
1278 sfree(sysid);
1279 sfree(intsubset);
1280 FreeXBit(bit);
1281
1282 doctype->doctypeStatement = dtdchars;
1283
1284 nslbit->type = NSL_doctype_bit;
1285 nslbit->value.body = Strdup(dtdchars);
1286 break;
1287
1288 case XBIT_cdsect:
1289 COMMENT1("%s\n", xbit_type_name[bit->type]);
1290 nslbit->type=NSL_text_bit;
1291 nslbit->value.body=bit->cdsect_chars;
1292 if(sf->type & NSL_read_all_bits)
1293 nslbit->flags = NSL_text_isCData;
1294 else
1295 nslbit->flags = 0;
1296 break;
1297
1298 default:
1299 SHOULDNT;
1300 }
1301 return nslbit;
1302 }
1303
1304 /* NB, this should be tied much more closely in with Richard's code,
1305 since his attribute and our NSL_Attr are VERY similar */
1306
1307 static const AttributeSummary ConstAtSum={0,0,0,0,0,0};
1308
PAS1(NSL_Doctype_I * dct,NSL_Item * item,struct attribute * source,int do_ns,NSL_Attr ** prev)1309 static boolean PAS1(NSL_Doctype_I *dct, NSL_Item *item,
1310 struct attribute *source, int do_ns, NSL_Attr **prev) {
1311 NSL_Attr *refvar, *res;
1312 const AttributeSummary *atsum;
1313
1314 /* If no attributes then we are finished */
1315 if (!source) {
1316 item->attr=NULL;
1317 return TRUE;
1318 }
1319
1320 /* Recurse down list of attributes - in effect we are starting at
1321 the end and working backwards (So that order of attributes is
1322 reversed twice, and hence in the same order as in the source
1323 file) */
1324 if (source->next) {
1325 ECFF(PAS1(dct, item, source->next, do_ns, &res));
1326 }
1327
1328 /* Now we process this attribut/value pair */
1329
1330 if(dct->XMLMode)
1331 atsum = source->definition->attrsum;
1332 else
1333 /* In NSL mode the attribute summary itself is returned */
1334 atsum = (AttributeSummary *)source->definition;
1335
1336 ECNF(refvar=AttrFromSpec(atsum, dct));
1337
1338 /* We have an explicit #CONREF attribute, this means that
1339 The item is of type NSL_empty, despite what the DTD says */
1340 if( refvar->deft == NSL_defval_conref ){
1341 item->type = NSL_empty;
1342 }
1343
1344 ECFF(SetAttrValue(refvar, source->value));
1345
1346 if(do_ns)
1347 {
1348 if(source->ns_definition && !source->ns_definition->element)
1349 {
1350 refvar->lname = source->ns_definition->name;
1351 refvar->nsuri = source->ns_definition->namespace->nsname;
1352 }
1353 else
1354 {
1355 refvar->lname = source->definition->name;
1356 refvar->nsuri = 0;
1357 }
1358 }
1359
1360 if( !(source->next) ){
1361 item->attr=refvar;
1362 } else {
1363 res->next=refvar;
1364 }
1365 if (prev) {
1366 *prev=refvar;
1367 }
1368
1369 sfree(source);
1370 return TRUE;
1371 }
1372
ParseAttributeString(NSL_Doctype_I * dct,NSL_Item * item,struct attribute * source,int do_ns)1373 int ParseAttributeString(NSL_Doctype_I *dct, NSL_Item *item,
1374 struct attribute *source, int do_ns) {
1375 return PAS1(dct, item, source, do_ns, 0);
1376 }
1377
DocumentIsNSGML(NSL_Doctype_I * dct)1378 boolean DocumentIsNSGML(NSL_Doctype_I *dct)
1379 {
1380 return !dct->XMLMode;
1381 }
1382
1383 /* ======================================================================== */
1384 /* Moved from sgmldef.c */
1385
DoctypeFromDdb(const char8 * filename)1386 NSL_Doctype_I *DoctypeFromDdb(const char8 *filename) {
1387
1388 NSL_Doctype_I *doctype;
1389 DDBHeader* ddb;
1390 ECNN(ddb=readddb(filename));
1391
1392 checkddb(filename,ddb,FALSE);
1393
1394 ECNN(doctype=tsalloc(NSL_Doctype_I, 1));
1395
1396 doctype->doctype=(char*)(ddb+1);
1397 COMMENT1("[!DOCTYPE %s ",doctype->doctype);
1398 COMMENT1("from DDB %s]\n",filename);
1399
1400 doctype->ddb=ddb;
1401 doctype->ddbfile=filename;
1402
1403 doctype->attrNames=(RHashTableHdr*)(((char*)ddb)+ddb->anameTableOffset);
1404 doctype->elements=(RHashTableHdr*)(((char*)ddb)+ddb->elementTableOffset);
1405 doctype->elementBase=((char*)doctype->elements)+doctype->elements->length;
1406 doctype->permanentBase=doctype->elementBase;
1407 doctype->entities=(RHashTableHdr*)(((char*)ddb)+ddb->entityTableOffset);
1408 doctype->entityBase=((char*)doctype->entities)+doctype->entities->length;
1409 doctype->doctypeStatement = NULL;
1410 doctype->XMLMode=FALSE;
1411 doctype->seenDTD = TRUE;
1412
1413 ECFN(doctype_init_alloc(doctype, 100, 100, 100));
1414
1415 doctype->defaultOutputEncoding = CE_UTF_8; /* XXX is this a good default? */
1416 doctype->fallbackEncodingDeclaration = CE_unknown;
1417 doctype->rxp_dtd = NewDtd();
1418 doctype->root_entity = 0;
1419
1420 return doctype;
1421 }
1422
doctype_init_alloc(NSL_Doctype_I * doctype,int nitems,int ndata,int nattr)1423 static boolean doctype_init_alloc(NSL_Doctype_I *doctype,
1424 int nitems, int ndata, int nattr)
1425 {
1426 if ((doctype->itemstack=Uinit(sizeof(NSL_Item), nitems, 30))!=NULL &&
1427 (doctype->datastack=Uinit(sizeof(NSL_Data), ndata, 30))!=NULL &&
1428 (doctype->attrstack=Uinit(sizeof(NSL_Attr), nattr, 30))!=NULL) {
1429 NameUmalloc(doctype->itemstack, "NSL_Item");
1430 NameUmalloc(doctype->datastack, "NSL_Data");
1431 NameUmalloc(doctype->attrstack, "NSL_Attr");
1432 return TRUE;
1433 } else {
1434 return FALSE;
1435 }
1436 }
1437
LoadDoctype(const char8 * filename)1438 NSL_Doctype_I *LoadDoctype(const char8 *filename)
1439 {
1440 int len;
1441 NSL_File_I *sf;
1442 NSL_Doctype_I *dct;
1443
1444 /* If it's a .ddb file, use DoctypeFromDdb */
1445
1446 len = strlen(filename);
1447 if(len > 4 && strcmp(filename+len-4, ".ddb") == 0)
1448 return DoctypeFromDdb(filename);
1449
1450 /* Otherwise, open as an nSGML/XML file, and get its doctype */
1451
1452 /* Open in no_consume_prolog mode, so we don't attempt to read the first
1453 element (we want it to work for files just containing <!DOCTYPE ... >) */
1454
1455 ECNN(sf = SFopen(filename, 0, NSL_read|NSL_read_no_consume_prolog));
1456
1457 /* If it's a nSGML file, we should now have read the doctype */
1458
1459 dct = sf->doctype;
1460 if(!dct->XMLMode)
1461 {
1462 SFclose(sf);
1463 return dct;
1464 }
1465
1466 /* Otherwise we must read until we get the doctype bit */
1467
1468 while(1)
1469 {
1470 NSL_Bit *bit = NextBit(sf);
1471 switch(bit->type)
1472 {
1473 case NSL_bad:
1474 SFrelease(sf, 1);
1475 return 0;
1476
1477 case NSL_start_bit:
1478 case NSL_empty_bit:
1479 case NSL_eof_bit:
1480 /* We've reached the end of the prolog without seeing <!DOCTYPE>.
1481 Just return the empty doctype. */
1482 case NSL_doctype_bit:
1483 /* We found it. */
1484 FreeBit(bit);
1485 SFclose(sf);
1486 return dct;
1487
1488 default:
1489 /* Ignore other bits (PIs, comments). */
1490 FreeBit(bit);
1491 break;
1492 }
1493 }
1494 }
1495
1496 /* ======================================================================== */
1497 /* Moved from sgmldef.c */
1498
MakeSpec(const NSL_Doctype_I * doctype)1499 const char8 *MakeSpec(const NSL_Doctype_I *doctype) {
1500 char *spstr;
1501
1502 ECNN(spstr=salloc(200));
1503 sprintf(spstr, "<?NSL DDB %s 0>\n",doctype->ddbfile);
1504 return spstr;
1505 }
1506
1507 /* ======================================================================== */
1508
CurrentBitOffset(NSL_File_I * sf)1509 extern int CurrentBitOffset( NSL_File_I *sf ){
1510 if (sf->currentBitOffset<0) {
1511 LT_ERROR(NEOFFST,"No bit offset yet\n");
1512 };
1513 return sf->currentBitOffset;
1514 }
1515
1516 /* end of file */
1517
1518