1 /* 	$Id: sgmlfiles.c,v 1.163 2009/10/21 13:13:47 ht Exp $	 */
2 
3 #ifndef lint
4 static char vcid[] = "$Id: sgmlfiles.c,v 1.163 2009/10/21 13:13:47 ht Exp $";
5 #endif /* lint */
6 
7 /* sgmlfiles.c	-- Henry Thompson Tue Jan 30 1996
8  * Basic file processing for XML/NSL
9  */
10 
11 /* This version using Richard's XML aware parser */
12 
13 #include <assert.h>
14 
15 #include "lt-memory.h"
16 #include "nsllib.h"
17 #include "string16.h"
18 #include "stdio16.h"
19 #include "lt-safe.h"
20 #include "lt-umalloc.h"
21 #include "dtd.h"
22 #include "input.h"
23 #include "url.h"
24 #include "xmlparser.h"
25 #include "readddb.h"
26 #include "catalog.h"
27 
28 static boolean doctype_init_alloc(NSL_Doctype_I *doctype,
29 				 int nitems, int ndata, int nattr);
30 
31 /* XXX to be removed asap */
32 
33 boolean ParseAttributeString( NSL_Doctype_I *doctype, NSL_Item *item,
34 			      struct attribute *source, int do_ns);
35 
36 size_t BufCopyMax=BUFCOPYMAX,BufMargin=BUFMARGIN;
37 
38 static NSL_File_I *open_output(FILE16 *fp, NSL_Doctype dtype, NSL_FType type,
39 			       CharacterEncoding encoding);
40 static NSL_File_I *open_source(InputSource source, NSL_Doctype dtype,
41 			       NSL_FType type);
42 
43 static int CheckFlags(int type);
44 
45 /*--- File handling ---*/
46 
CheckFlags(int type)47 static int CheckFlags(int type)
48 {
49     if(((type & NSL_read) && (type & NSL_write_flags)) ||
50        ((type & NSL_write) && (type & NSL_read_flags)) ||
51        (type & (NSL_read | NSL_write)) == 0)
52     {
53 	LT_ERROR(NEIO,
54 		    "Bad flag combination when opening file or stream\n");
55 	return -1;
56     }
57     return 0;
58 }
59 
OpenURL(const char8 * url,NSL_Doctype dtype,NSL_FType type,CharacterEncoding encoding,const char8 * base)60 NSL_File OpenURL(const char8 *url, NSL_Doctype dtype, NSL_FType type,
61 		 CharacterEncoding encoding, const char8 *base)
62 {
63     char8 *m_url;
64     Entity entity;
65     InputSource source;
66     FILE16 *f16;
67     NSL_File_I *file;
68 
69     ECEN(CheckFlags(type));
70 
71     if(type & NSL_read)
72     {
73 	ECNN(entity = NewExternalEntity(0, 0, url, 0, 0));
74 	entity->encoding = encoding;
75 	ECNN(source = EntityOpen(entity));
76 	return open_source(source, dtype, type);
77     }
78     else
79     {
80 	ECNN(f16 = url_open(url, base, "w", &m_url));
81 	SetCloseUnderlying(f16, 1);
82 	file = open_output(f16, dtype, type, encoding);
83 	return file;
84     }
85 }
86 
OpenStream(FILE * fp,NSL_Doctype dtype,NSL_FType type,CharacterEncoding encoding,const char8 * name)87 NSL_File OpenStream(FILE *fp, NSL_Doctype dtype, NSL_FType type,
88 		    CharacterEncoding encoding, const char8 *name)
89 {
90     InputSource source;
91     FILE16 *f16;
92 
93     ECEN(CheckFlags(type));
94 
95     if(type & NSL_read)
96     {
97 	source = SourceFromStream(name, fp);
98 	source->entity->encoding = encoding;
99 	return open_source(source, dtype, type);
100     }
101     else
102     {
103 	ECNN(f16 = MakeFILE16FromFILE(fp, "w"));
104 	return open_output(f16, dtype, type, encoding);
105     }
106 }
107 
SetMode(NSL_File_I * file,int xml)108 static void SetMode(NSL_File_I *file, int xml)
109 {
110     Parser p = file->pstate;
111 
112     ParserSetFlag(p, XMLSyntax, xml);
113     ParserSetFlag(p, XMLPredefinedEntities, xml);
114     ParserSetFlag(p, XMLExternalIDs, xml);
115     ParserSetFlag(p, XMLMiscWFErrors, xml);
116     ParserSetFlag(p, ErrorOnUnquotedAttributeValues, xml);
117     ParserSetFlag(p, NormaliseAttributeValues, xml);
118     ParserSetFlag(p, XMLLessThan, xml);
119     ParserSetFlag(p, IgnoreEntities, !xml);
120     ParserSetFlag(p, ExpandCharacterEntities, xml);
121     ParserSetFlag(p, ExpandGeneralEntities, xml);
122     /* The parser must not check end tags in nSGML mode, because it
123        doesn't have real ElementDefinition records. */
124     ParserSetFlag(p, MaintainElementStack, xml);
125     /* We set these to stop the parser trying to auto-declare undeclared
126        elements and attributes in nSGML mode, which wouldn't work. */
127     ParserSetFlag(p, ErrorOnUndefinedElements, !xml);
128     ParserSetFlag(p, ErrorOnUndefinedAttributes, !xml);
129     if (xml) CatalogEnable(p);
130 }
131 
132 /* Open a string for input or output. */
133 
OpenString(Char * text,NSL_Doctype dtype,NSL_FType type)134 NSL_File OpenString(Char *text, NSL_Doctype dtype, NSL_FType type)
135 {
136     NSL_File_I *file;
137     static Char string[] = {'<','s','t','r','i','n','g','>',0};
138 
139     ECEN(CheckFlags(type));
140 
141     if(type & NSL_read)
142     {
143 	Entity entity;
144 	InputSource source;
145 
146 	entity = NewInternalEntity(string, text, 0, 0, 0, 0);
147 	ECNN(source = EntityOpen(entity));
148 	/* Don't read prolog yet, we want to set flag first */
149 	file = open_source(source, dtype, type|NSL_read_no_consume_prolog);
150 	/* So we don't get warnings about xml declaration in internal entity */
151 	ParserSetFlag(file->pstate, IgnorePlacementErrors, 1);
152 	/* Now we can read the prolog */
153 	if(!(type & NSL_read_no_consume_prolog))
154 	    ReadProlog(file);
155     }
156     else
157     {
158 	FILE16 *f16;
159 
160 	ECNN(f16 = MakeFILE16FromString(text, -1, "w"));
161 	file = open_output(f16, dtype, type, CE_unknown);
162     }
163 
164     return file;
165 }
166 
167 /* Open a string for input and read a single Item from it.
168  */
169 
GetItemFromString(const Char * text,NSL_Doctype dtype)170 NSL_Item *GetItemFromString(const Char *text, NSL_Doctype dtype)
171 {
172     NSL_File file;
173     NSL_Item *item;
174 
175     ECNN(file = OpenString((Char *)text, dtype, NSL_read));
176     item = GetNextItem(file);
177     SFclose(file);
178     return item;
179 }
180 
181 /* Manipulate the base URL of the root entity of a file */
182 
GetFileURL(NSL_File file)183 const char8 *GetFileURL(NSL_File file)
184 {
185     return EntityBaseURL(ParserRootEntity(file->pstate));
186 }
187 
SetFileURL(NSL_File file,const char8 * url)188 void SetFileURL(NSL_File file, const char8 *url)
189 {
190     EntitySetBaseURL(ParserRootEntity(file->pstate), url);
191 }
192 
193 
194 /* Backward compatibility functions.
195  * These are superseded by OpenURL and OpenStream.
196  */
197 
SFopen(const char8 * urlname,NSL_Doctype_I * dtype,NSL_FType type)198 NSL_File_I *SFopen(const char8 *urlname, NSL_Doctype_I *dtype,
199 		   NSL_FType type)
200 {
201     return OpenURL(urlname, dtype, type, CE_unknown, 0);
202 }
203 
SFFopen(FILE * fp,NSL_Doctype_I * dtype,NSL_FType type,const char8 * fname)204 NSL_File_I *SFFopen( FILE *fp, NSL_Doctype_I *dtype,
205 		     NSL_FType type, const char8 *fname)
206 {
207     return OpenStream(fp, dtype, type, CE_unknown, fname);
208 }
209 
210 void NSL_Parser_declaration(XBit bit, void *callback_arg);
211 
212 static const char* sddNames[]={"unspecified","no","yes"};
213 
214 
215 
216 /* utility to ensure all fields of sf filled in sensibly */
217 /* and seen to be filled in sensibly */
218 
NewNullFile()219 static NSL_File_I * NewNullFile () {
220 	NSL_File_I *sf;
221 
222   ECNN(sf=tsalloc(NSL_File_I, 1));
223   sf->file16 = NULL;
224   sf->fileToClose = NULL;
225   sf->doctype = NULL;
226   sf->type = NSL_write;
227   sf->cnum = 0;
228   sf->pstate = NULL;
229   sf->currentbase = NULL;
230   sf->_bit1.type=NSL_bad;
231   sf->_bit1.nsc=0;
232   sf->_bit1.nsowned = 0;
233   sf->peekBit=0;
234   sf->currentItemCharPosn = -1;
235   sf->currentBitOffset = -1;
236 
237   /* rest only valid for output files */
238   sf->state=_elt_unknown;
239 
240 	return sf;
241 	}
242 
243 #define NELEMENTS 1024
244 #define ELEMENTNAMES 16384
245 #define NATTRS 1024
246 #define ATTRNAMES 16384
247 
xrinsert(NSL_Doctype_I * doctype,const Char * name,int len,RHashTableHdr * tbl,RHVal val)248 RHTEntry* xrinsert(NSL_Doctype_I *doctype,
249 		   const Char* name,int len,RHashTableHdr* tbl,RHVal val) {
250   if (!len) {
251     len=Strlen(name);
252   };
253   if ((len+tbl->freekey)>tbl->length) {
254     LT_ERROR(NEMEM,"hash table keychars overflow\n");
255     return NULL;
256   };
257 				/* Should make this less left-handed */
258   if (tbl==doctype->attrNames) {
259     if (doctype->attrNameCount++==(NATTRS/2)) {
260       WARN(NEMEM,"attr hash table half full!\n");
261     }
262     else if (doctype->attrNameCount>NATTRS) {
263       LT_ERROR(NEMEM,"attr hash table entry overflow\n");
264       return NULL;
265     };
266   }
267   else if (tbl==doctype->elements) {
268     if (doctype->elementCount++==(NELEMENTS/2)) {
269       WARN(NEMEM,"element hash table half full!\n");
270     }
271     else if (doctype->elementCount>NELEMENTS) {
272       LT_ERROR(NEMEM,"element hash table entry overflow\n");
273       return NULL;
274     };
275   }
276   else {
277     SHOULDNT;
278   };
279   return rinsert(name,len,tbl,val);
280 }
281 
282 static int eltSpace=64*(sizeof(NSL_ElementSummary_I)+
283 			 (4*sizeof(AttributeSummary))+16);
284 
NewEltTable(NSL_Doctype_I * doctype,const char * oldTbl)285 char *NewEltTable(NSL_Doctype_I *doctype,const char *oldTbl) {
286 				/* called when XML overflows doc/attr
287 				   summary table */
288   ECNN(doctype->elementPtr=doctype->elementBase=(char*)salloc(eltSpace));
289   /* ground forwarding pointer */
290 #if 0
291   /* XXX check this change is right */
292   *((const char**)doctype->elementPtr)++=oldTbl;
293 #else
294   *((const char**)doctype->elementPtr) = oldTbl;
295   doctype->elementPtr += sizeof(const char **);
296 #endif
297   doctype->elementLimit=doctype->elementBase+eltSpace;
298   return (char*)doctype->elementPtr;
299 }
300 
301 /* Open an entity for reading */
302 
open_source(InputSource source,NSL_Doctype dtype,NSL_FType type)303 static NSL_File_I *open_source(InputSource source, NSL_Doctype dtype, NSL_FType type)
304 {
305     NSL_File_I *sf = NewNullFile();
306     static NSL_ElementSummary_I rootElt = {0,dc_element,0,0};
307     MarkupLanguage ml;
308 
309     sf->type = type;
310     sf->doctype = dtype;
311 
312     sf->pstate = NewParser();
313     ParserSetFlag(sf->pstate, ReturnDefaultedAttributes, 0);
314     ParserSetFlag(sf->pstate, MergePCData, 1);
315     ParserSetFlag(sf->pstate, ReturnComments, 0);
316     ParserSetFlag(sf->pstate, TrustSDD, 0); /* we want to decide ourselves */
317     ParserSetFlag(sf->pstate, AllowMultipleElements, 1);
318 
319     if(type & NSL_read_strict)
320 	/* Need this *before* we do the push, which may parse the XML decl.
321 	   But must postpone setting other flags, so they aren't undone
322 	   by SetMode later. */
323 	ParserSetFlag(sf->pstate, XMLStrictWFErrors, 1);
324 
325     if(ParserPush(sf->pstate, source) != 0)
326     {
327 	ParserPerror(sf->pstate, &sf->pstate->xbit);
328 	LT_ERROR(NEPARSE, "Error opening source\n");
329 	return 0;
330     }
331 
332     /* Choose xml mode unless there's an NSL declaration, or we were
333        passed an NSL doctype. */
334 
335     ml = source->entity->ml_decl;
336 
337     switch(ml)
338     {
339     case ML_xml:
340 	SetMode(sf, 1);
341 	if(dtype && !dtype->XMLMode)
342 	    WARN(NWMLMIXUP,
343 		 "You are trying to read an XML document with an nSGML\n"
344 		 "doctype.  Expect a bus error.\n"
345     "LTG makes no representations about the suitability of this software\n"
346     "and data for any purpose.  It is provided \"as is\" without express or\n"
347     "implied warranty.  LTG disclaims all warranties with regard to this\n"
348     "software and data, including all implied warranties of merchantability\n"
349     "and fitness, in no event shall LTG be liable for any special, indirect\n"
350     "or consequential damages or any damages whatsoever, action of\n"
351     "contract, negligence or other tortious action, arising out of or in\n"
352     "connection with the use or performance of this software.\n");
353 	break;
354     case ML_nsl:
355 	SetMode(sf, 0);
356 	if(dtype && dtype->XMLMode)
357 	    WARN(NWMLMIXUP,
358 		 "You are trying to read an nSGML document with an XML\n"
359 		 "doctype.  Who knows what will happen?  Good luck.\n")
360 	break;
361     case ML_unspecified:
362 	SetMode(sf, !dtype || dtype->XMLMode);
363 	break;
364     }
365 
366     if(type & NSL_read_no_expand) {
367 	ParserSetFlag(sf->pstate, ExpandCharacterEntities, 0);
368 	ParserSetFlag(sf->pstate, ExpandGeneralEntities, 0);
369 	ParserSetFlag(sf->pstate, MergePCData, 0);
370     };
371 
372     if(type & NSL_read_no_normalise_attributes) {
373 	ParserSetFlag(sf->pstate, NormaliseAttributeValues, 0);
374     }
375 
376     if(type & NSL_read_all_bits) {
377 	ParserSetFlag(sf->pstate, ReturnComments, 1);
378 	ParserSetFlag(sf->pstate, MergePCData, 0);
379     }
380 
381     {
382 	int w = (type & NSL_read_declaration_warnings);
383 	ParserSetFlag(sf->pstate, WarnOnRedefinitions, w);
384     }
385 
386     if(type & NSL_read_relaxed_any) {
387 	ParserSetFlag(sf->pstate, RelaxedAny, 1);
388     }
389 
390     if(type & NSL_read_allow_undeclared_nsattributes) {
391 	ParserSetFlag(sf->pstate, AllowUndeclaredNSAttributes, 1);
392     }
393 
394     if(type & NSL_read_strict)
395     {
396 #if 0
397 	/* Doesn't work, fix it on output instead */
398 	/* Actually, it does work now for XML, but we won't change it */
399 	ParserSetFlag(sf->pstate, ReturnDefaultedAttributes, 1);
400 #endif
401 	ParserSetFlag(sf->pstate, AllowMultipleElements, 0);
402 	ParserSetFlag(sf->pstate, ErrorOnBadCharacterEntities, 1);
403 	ParserSetFlag(sf->pstate, ErrorOnUndefinedEntities, 1);
404 	ParserSetFlag(sf->pstate, XMLStrictWFErrors, 1);
405     }
406 
407     if(type & NSL_read_validate)
408     {
409 	if((dtype && !dtype->XMLMode) || ml == ML_nsl)
410 	    ;
411 	else
412 	{
413 	    /* Must set this so that RXP keeps an element stack */
414 	    ParserSetFlag(sf->pstate, MaintainElementStack, 1);
415 	    ParserSetFlag(sf->pstate, Validate, 1);
416 	}
417     }
418 
419     if(type & NSL_read_namespaces)
420     {
421 	if(ml == ML_nsl || (dtype && !dtype->XMLMode))
422 	    /* namespace don't make sense for nSGML */
423 	    type &= ~NSL_read_namespaces;
424 	else
425 	    ParserSetFlag(sf->pstate, XMLNamespaces, 1);
426     }
427 
428     if(type & NSL_read_defaulted_attributes)
429     {
430 	if(ml == ML_nsl || (dtype && !dtype->XMLMode))
431 	    /* won't work for nSGML */
432 	    type &= ~NSL_read_defaulted_attributes;
433 	else
434 	    ParserSetFlag(sf->pstate, ReturnDefaultedAttributes, 1);
435     }
436 
437     ECNF(sf->eltContent.base =
438 	 sf->eltContent.current = NEWSTACK(StackSize));
439     sf->eltContent.end=sf->eltContent.base+StackSize;
440     PUSH(sf->eltContent,&rootElt); /* fencepost, true if off the top */
441 
442     ParserSetDtdCallback(sf->pstate, NSL_Parser_declaration);
443     ParserSetDtdCallbackArg(sf->pstate, sf);
444 
445     if(dtype)
446     {
447 	FreeDtd(sf->pstate->dtd);
448 	sf->pstate->dtd = dtype->rxp_dtd;
449 	sf->pstate->have_dtd = 1;
450     }
451     else
452     {
453 	if(ml == ML_nsl)
454 	{
455 	    SetMode(sf, 0);
456 	    ECNN(sf->doctype = dtype =
457 		 DoctypeFromDdb(source->entity->ddb_filename));
458 	    /* DoctypeFromDdb allocates an RXP dtd but we already have one */
459 	    FreeDtd(dtype->rxp_dtd);
460 	    sf->pstate->have_dtd = 1;
461 	}
462 	else
463 	{
464 	    /* Create a fake doctype for an XML document */
465 	    /* This is common code from DoctypeFromDdb */
466 	    ECNN(sf->doctype=dtype=tsalloc(NSL_Doctype_I,1));
467 	    dtype->doctype=NULL;
468 	    dtype->XMLMode = TRUE;
469 	    dtype->ddb=NULL;
470 	    dtype->ddbfile=NULL;
471 	    dtype->seenDTD = FALSE;
472 	    dtype->sdd = (sddCode)sf->pstate->standalone;
473 	    ECFN(doctype_init_alloc(dtype, 100, 100, 100));
474 
475 	    ECNN(dtype->attrNames=rcreate(NATTRS,ATTRNAMES));/*(4096,65536));*/
476 	    ECNN(dtype->elements=rcreate(NELEMENTS,ELEMENTNAMES)); /*1024,16384));*/
477 	    dtype->attrNameCount=dtype->elementCount=0;
478 	    ECNN(NewEltTable(dtype,0));
479 	    dtype->permanentBase=dtype->elementBase;
480 	    ECNN(dtype->offAttrsBase=salloc(sizeof(OffboardAttrs)*128));
481 	    dtype->offAttrsIndex=0;
482 	    dtype->offAttrsLimit=128;
483 	    dtype->entities=NULL; /* handled separately */
484 	    dtype->entityBase=NULL;
485 	}
486 	dtype->rxp_dtd = sf->pstate->dtd;
487 	dtype->root_entity = source->entity;
488 	dtype->doctypeStatement=NULL;
489 	dtype->defaultOutputEncoding = source->entity->encoding;
490 	dtype->fallbackEncodingDeclaration = source->entity->encoding_decl;
491     }
492 
493     sf->pstate->dtd->doctype = dtype;
494 
495     sf->currentbase=NewNullNSLData(dtype);
496 
497     if(!(type & NSL_read_no_consume_prolog))
498 	ReadProlog(sf);
499 
500     return sf;
501 }
502 
ReadProlog(NSL_File_I * sf)503 int ReadProlog(NSL_File_I *sf)
504 {
505     while(1)
506     {
507 	NSL_Bit *bit = NextBit(sf);
508 	switch(bit->type)
509 	{
510 	case NSL_start_bit:
511 	case NSL_empty_bit:
512 	case NSL_eof_bit:
513 	case NSL_bad:
514 	    /* We've reached the end of the prolog */
515 	    /* XXX just leave an error to be read later - is that right? */
516 	    sf->peekBit = bit;
517 	    return 0;
518 
519 	default:
520 	    FreeBit(bit);
521 	    break;
522 	}
523     }
524 }
525 
SynthesizePIBit(NSL_File_I * sf)526 static boolean SynthesizePIBit(NSL_File_I * sf) {
527     const NSL_Doctype_I * dtype = sf->doctype;
528     Char *buffer;
529 
530     ECNF(buffer = salloc(1024 * sizeof(Char)));
531     sf->_bit1.type = NSL_pi_bit;
532     sf->_bit1.value.body = buffer;
533 
534     if( dtype->XMLMode ){
535 	char sdds[24];
536 	char ecs[50];
537 	if (dtype->sdd!=sdd_unspecified) {
538 	    sprintf(sdds," standalone='%s'",sddNames[dtype->sdd]);
539 	}
540 	else {
541 	    sdds[0]=0;
542 	};
543 	if (GetFileEncoding(sf->file16)!=CE_unspecified_ascii_superset) {
544 	    sprintf(ecs," encoding='%s'",
545 		    CharacterEncodingName[GetFileEncoding(sf->file16)]);
546 	}
547 	else {
548 	    if(dtype->fallbackEncodingDeclaration != CE_unknown)
549 		sprintf(ecs," encoding='%s'",
550 			CharacterEncodingName[dtype->fallbackEncodingDeclaration]);
551 	    else
552 		ecs[0]=0;
553 	};
554 	Sprintf(buffer, InternalCharacterEncoding,
555 		"xml version='1.0'%s%s",
556 		ecs,
557 		sdds);
558     } else {
559 	Sprintf(buffer, InternalCharacterEncoding,
560 		"NSL DDB %s 0", dtype->ddbfile);
561     }
562 
563     return TRUE;
564 }
565 
566 #include "nsl-ibit.h"
567 
SynthesizeDoctypeBit(NSL_File_I * sf)568 static void SynthesizeDoctypeBit(NSL_File_I * sf) {
569     NSL_Doctype_I * dtype = sf->doctype;
570     /* record the data in the bit */
571     sf->_bit1.type = NSL_doctype_bit;
572     sf->_bit1.value.body = (void *)dtype->doctypeStatement;
573 }
574 
575 
576 /* Set up a file for writing, but don't actually write anything
577    yet.
578  */
579 
open_output(FILE16 * f16,NSL_Doctype dtype,NSL_FType type,CharacterEncoding encoding)580 static NSL_File_I *open_output(FILE16 *f16, NSL_Doctype dtype, NSL_FType type,
581 			       CharacterEncoding encoding)
582 {
583     NSL_File_I *sf = NewNullFile();
584     static NSL_ElementSummary_I rootElt = {0,dc_mixed,0,0};
585 
586     sf->type = type;
587     sf->file16 = f16;
588     sf->doctype = dtype;
589     if(encoding)
590 	SetFileEncoding(sf->file16, encoding);
591     else if(dtype)
592 	SetFileEncoding(sf->file16, dtype->defaultOutputEncoding);
593 
594     encoding = GetFileEncoding(sf->file16);
595     if(encoding == CE_UTF_16B || encoding == CE_UTF_16L)
596 	/* Byte-order mark */
597 	Fprintf(sf->file16, "%c", 0xfeff);
598 
599     /* Set up element-only content stack if we're going to pretty-print */
600 
601     if((type & NSL_write_style) == NSL_write_canonical ||
602        (type & NSL_write_style) == NSL_write_plain)
603     {
604 	sf->eltContent.base=0;
605     }
606     else
607     {
608 	if(dtype)
609 	{
610 	    ECNN(sf->eltContent.base =
611 		 sf->eltContent.current=NEWSTACK(StackSize));
612 	   /* N.B. now have two pointers to same storage in sf->elementContent,
613 	      let's hope we eventually free exactly one of them
614 	      */
615 	    sf->eltContent.end=sf->eltContent.base+StackSize;
616 	    PUSH(sf->eltContent,&rootElt);	/* fencepost */
617 	}
618 	else
619 	{
620 	    WARN(NWOFND,
621 		 "NSL Output file needs doctype for normal or pretty output\n"
622 	       "but none supplied or defaulted:  minimal output will ensue\n");
623 	    sf->type &= ~NSL_write_style;
624 	    sf->type |= NSL_write_plain;
625 	    sf->eltContent.base=0;
626 	}
627     }
628 
629     /* Print PI and doctype, if we want it */
630 
631     if(!(type & NSL_write_no_doctype) && dtype)
632     {
633 	static Char newline[] = {'\n',0};
634 	/* The call to SynthesizePIBit is made in order
635 	 * that all output pass through PrintBit.
636 	 */
637 	ECFN(SynthesizePIBit(sf));
638 	/* XXX -- use a global to ensure that PrintBit
639 	 * doesn't output the DOCTYPE statement. Ugly
640 	 * temporary patch
641 	 */
642 	PrintBit(sf,&(sf->_bit1));
643 	FreeBit(&sf->_bit1);
644 	PrintTextLiteral(sf, newline);
645 
646 	/* ... in the same way we synthesise and print
647 	 * a doctype bit if there is one...
648 	 */
649 	if(dtype->doctypeStatement) {
650 	    SynthesizeDoctypeBit(sf);
651 	    PrintBit(sf,&(sf->_bit1));
652 	}
653     }
654 
655     return sf;
656 }
657 
658 /* XML DTD call backs */
659 
NSL_Parser_declaration(XBit bit,void * callback_arg)660 void NSL_Parser_declaration(XBit bit, void *callback_arg){
661 #if 0
662     NSL_File_I *sf = (NSL_File_I*)callback_arg;
663 #endif
664     switch(bit->type) {
665     case XBIT_comment:
666 	COMMENT1("CB:XBIT_comment %s\n", bit->comment_chars);
667 	FreeXBit(bit);
668 	break;
669 
670     case XBIT_pi:
671       COMMENT1("CB:XBIT_pi %s", bit->pi_name);
672       COMMENT1(" %s\n",bit->pi_chars);
673       FreeXBit(bit);
674       break;
675 
676     default:
677       SHOULDNT;
678       break;
679     }
680 }
681 
682 /* This should not be called directly now.  Use DefineElement[N]. */
DeclareElement(NSL_Doctype_I * doctype,const Char * name,int length,const char * data,ctVals modelType)683 RHTEntry *DeclareElement(NSL_Doctype_I *doctype, const Char *name,
684 			 int length, const char *data, ctVals modelType) {
685   RHTEntry *res;
686   if (!doctype) return 0; // RXP internally-read document, e.g. catalog
687   NSL_ElementSummary_I *value=(NSL_ElementSummary_I *)doctype->elementPtr;
688   if ((char*)(value+1)>doctype->elementLimit) {
689     ECNN(value=(NSL_ElementSummary_I *)NewEltTable(doctype,
690 						   doctype->elementBase));
691   };
692   res=xrinsert(doctype,name, length, (RHashTableHdr*)doctype->elements,
693 	      ((char*)value)-doctype->permanentBase);
694   COMMENT1("Inserting element %s into doctype\n",name);
695   value->numAttr=value->omitStart=value->omitEnd=0;
696   value->contentType=modelType;
697 
698   doctype->elementPtr=(char*)(value+1);
699   return res;
700 }
701 
702 /* This should not be called directly now.  Use DefineAttribute[N]. */
DeclareAttr(NSL_Doctype_I * doctype,const Char * name,int length,NSL_Attr_Declared_Value declared_value,const Char * allowed_values,int allowed_values_count,NSL_ADefType default_type,const Char * default_value,NSL_ElementSummary_I ** eltptr,const Char * eltName)703 const Char *DeclareAttr(NSL_Doctype_I *doctype, const Char *name,
704 			int length,
705 			NSL_Attr_Declared_Value declared_value,
706 			const Char *allowed_values, int allowed_values_count,
707 			NSL_ADefType default_type, const Char *default_value,
708 			NSL_ElementSummary_I **eltptr,
709 			const Char *eltName) {
710   /* Only called in XML mode */
711   RHTEntry *attr;
712   AttributeSummary *value;
713   ASPtr *asp;
714   OffboardAttrs *oa;
715   NSL_ElementSummary_I *oPtr,*elt=*eltptr;
716   if (!eltName) {
717     SHOULDNT;
718   };
719   if (!(attr=rsearch(name,length,doctype->attrNames))) {
720     attr=xrinsert(doctype,name,length,(RHashTableHdr*)doctype->attrNames,1);
721   }
722   value=(AttributeSummary *)doctype->elementPtr;
723   if ((char*)(value+1)>doctype->elementLimit) {
724     value=(AttributeSummary *)NewEltTable(doctype,doctype->elementBase);
725   };
726   oPtr=(NSL_ElementSummary_I *)doctype->elementPtr;
727   doctype->elementPtr=(char*)(value+1);
728     /* what value? */
729   value->namePtr=((Char*)doctype->attrNames+attr->keyptr)-(Char*)value;
730   value->defaultPtr=default_value?default_value-(Char*)value:0;
731   value->allowedValuesPtr=allowed_values?allowed_values-(Char*)value:0;
732   value->numAV=allowed_values_count;
733   value->declaredValue=(char)declared_value;
734   value->defaultValueType=(char)default_type;
735   if (elt->numAttr<0) {
736     /* Some summaries are already offboard, add to them */
737     oa=(OffboardAttrs*)doctype->offAttrsBase+(-1-elt->numAttr);
738     asp=(ASPtr *)doctype->elementPtr;
739     if ((char*)(asp+1)>doctype->elementLimit) {
740       asp=(ASPtr *)NewEltTable(doctype,doctype->elementBase);
741     };
742     doctype->elementPtr=(char*)(asp+1);
743     asp->next=oa->asp;
744     asp->as=value;
745     oa->asp=asp;
746   }
747   else if ((char*)elt!=(char*)((AttributeSummary*)
748 			  (oPtr-1)-
749 			  elt->numAttr)) {
750     /* Not contiguous with pre-existing ASs for this elt, if any,
751        either because of intervening allocation or change in table */
752     /* Shift to offboard attr sums */
753     if (doctype->offAttrsIndex==doctype->offAttrsLimit) {
754       /* No more room: reallocate and move offboard table */
755       int newLimit=doctype->offAttrsLimit=doctype->offAttrsLimit+128;
756       ECNN(doctype->offAttrsBase=(const OffboardAttrs *)srealloc(
757                                           (void *)doctype->offAttrsBase,
758 					  sizeof(OffboardAttrs)*newLimit));
759     };
760     oa=(OffboardAttrs *)doctype->offAttrsBase+(doctype->offAttrsIndex++);
761     asp=(ASPtr *)doctype->elementPtr;
762     if ((char*)(asp+1)>doctype->elementLimit) {
763       asp=(ASPtr *)NewEltTable(doctype,doctype->elementBase);
764     };
765     doctype->elementPtr=(char*)(asp+1);
766     asp->next=0;
767     asp->as=value;
768     oa->asp=asp;
769     oa->oldNumAttr=elt->numAttr;
770     elt->numAttr=-doctype->offAttrsIndex; /* Note this is, correctly,
771 					   -1-(the index of the new entry) */
772   }
773   else {
774     elt->numAttr+=1;
775   };
776   return (const Char*)doctype->attrNames+attr->keyptr;
777 }
778 
779 #if 0
780 void CheckinElement(NSL_Doctype_I *doctype, Char *element_name,
781 		    const char *element_data, ctVals modelType) {
782   size_t len;
783 
784   if (!rsearch(element_name, (len=Strlen(element_name)), doctype->elements)) {
785     DeclareElement(doctype,element_name,len,element_data,modelType);
786   }
787   return;
788 }
789 #endif
790 
791 extern NSL_Item *NNI(const NSL_ElementSummary_I *elt,
792 		     const NSL_Doctype_I *doctype,
793 		     const Char *name);
794 
795 #if 0
796 int CheckInEntity(NSL_Doctype_I *doctype, const char *entity_name, int length){
797   RHTEntry* entry;
798   RHVal value = 0;
799 
800   if( !length ){ length = strlen(entity_name); };
801   entry=rsearch(entity_name, length, doctype->entities);
802   if (entry) {
803     COMMENT1("Entity %s already in doctype\n",entity_name);
804   } else {
805     xrinsert(doctype,
806 	     entity_name, length, (RHashTableHdr*)doctype->entities,value);
807     COMMENT1("Inserting entity %s into doctype\n",entity_name);
808   }
809   return 0;
810 }
811 #endif
812 
813 /* ================================================= */
814 
815 /* XXX this function is broken, given the XML parser. */
816 
SFFreopen(NSL_File_I * file,FILE * filep)817 boolean SFFreopen( NSL_File_I *file, FILE *filep ) {
818 #if 1
819     LT_ERROR(NEUNSUP,
820 		"The function SFFreopen has been removed from the library\n");
821     return FALSE;
822 #else
823   ECEF(sfclose(file->filep));
824   file->filep=(FILE *)filep;
825   return TRUE;
826 #endif
827 }
828 
SFclose(NSL_File_I * f)829 int SFclose( NSL_File_I *f) {
830 
831   if (f->type & NSL_read) {
832     Entity docent = 0;
833     FreeBit(f->peekBit);
834     FreeData(f->currentbase, f->doctype);
835     /* Free the document entity only if it's not needed by the dtd.
836        This will be true only when we got the doctype from another file.
837        Don't free it until after FreeParser, because FreeParser looks
838        at the entities. */
839     if(f->pstate->document_entity != f->doctype->root_entity)
840 	docent = f->pstate->document_entity;
841     FreeParser(f->pstate);
842     if(docent) {
843 	/* Don't free strings passed in to OpenString */
844 	docent->text = 0;
845 	FreeEntity(docent);
846     }
847   } else {
848       if((f->type & NSL_write_style) == NSL_write_default ||
849 	 (f->type & NSL_write_style) == NSL_write_fancy) {
850 	  ForceNewline(f);
851       }
852       ECEE(ForceOutput(f)); /* will do FlushRe */
853       ECEE(Fclose(f->file16));
854       if(f->fileToClose)
855 	  ECEE(stdfclose(f->fileToClose)); /* Only close what we opened */
856   }
857   /* We don't free the doctype, because some other file may use it.
858      Tough tittie, but you have to call FreeDoctype yourself. */
859   if (f->eltContent.base) {
860     ECFE(sfree(f->eltContent.base));
861   }
862   ECFE(sfree(f));
863   return 0;
864 }
865 
866 /* An unsafe (see documentation) version of SFclose which cleans up
867    all heap storage associated with the file.
868    The DOCTYPE is released iff releaseDoctype is TRUE */
869 
SFrelease(NSL_File_I * f,boolean releaseDoctype)870 int SFrelease( NSL_File_I *f, boolean releaseDoctype) {
871   NSL_Doctype_I *dct = f->doctype;
872 
873   ECEE(SFclose(f));
874 
875   if( releaseDoctype ){
876     FreeDoctype(dct);
877   }
878 
879   return 0;
880 }
881 
882 #if 0
883 void SetFileDoctype( NSL_File_I *f, NSL_Doctype_I *d ) {
884     f->doctype=d;
885 }
886 #endif
887 
888 /* Given a NSL_File_I *return the NSL_Doctype_I *associated with it */
889 
DoctypeFromFile(NSL_File_I * file)890 NSL_Doctype_I *DoctypeFromFile( NSL_File_I *file) {
891   return (NSL_Doctype_I *)file->doctype;
892 }
893 
894 /* Opens three standard files streams as NSGML streams */
895 
896 NSL_File_I *sgstdin, *sgstdout, *sgstderr;
897 
StdFiles(NSL_FType type)898 boolean StdFiles(NSL_FType type) {
899   ECNF(sgstdin=SFFopen(stdin, NULL, NSL_read, "stdin"));
900   ECNF(sgstdout=SFFopen(stdout, DoctypeFromFile(sgstdin), type, "stdout"));
901   ECNF(sgstderr=SFFopen(stderr, NULL, NSL_write_plain|NSL_write_no_doctype,
902 			"stderr"));
903   return TRUE;
904 }
905 
906 #if 0
907 void MakeFilesSameDoctype( NSL_File_I *ffrom, NSL_File_I *fto ) {
908     fto->doctype=ffrom->doctype;
909 }
910 #endif
911 
912 /* Return the byte offset in the file of the current read position */
913 /* XXX only works for root entity; need a better interface */
914 /* Why does this return size_t???  Should be long (like ftell) or
915   off_t (like lseek). */
916 
SFtell(NSL_File_I * sf)917 size_t SFtell(NSL_File_I *sf) {
918     return SourceTell(ParserRootSource(sf->pstate));
919 }
920 
921 /* Move read position in file to pos bytes in */
922 /* XXX only works for root entity; need a better interface */
923 
SFseek(NSL_File_I * sf,size_t pos)924 int SFseek(NSL_File_I *sf,size_t pos) {
925     Parser p = sf->pstate;
926 
927     if (sf->peekBit) {
928 	FreeBit(sf->peekBit);
929 	sf->peekBit=0;
930     }
931 
932     while(p->source->parent)
933 	ParserPop(p);
934 
935     p->state = PS_body;		/* What else could we do? */
936     ParserSetFlag(p, IgnorePlacementErrors, 1);
937 
938     return SourceSeek(p->source, pos);
939 }
940 
941 /* Called by NSL_Init          */
942 
ParseInit(void)943 boolean ParseInit( void ) {
944   init_parser();
945   return TRUE;
946 }
947 
948 /* Read and return the next NSL_Bit in the input file */
949 
NextBit(NSL_File_I * sf)950 NSL_Bit *NextBit(NSL_File_I *sf) {
951 
952   XBit bit;
953   NSL_Bit *nslbit;
954   NSL_BI_Type lastType;
955   int n;
956   NSL_Doctype_I *doctype;
957   boolean XMLMode;
958   char8 *sysid=0;
959   Char  *dtdchars=0, *intsubset=0;
960   Char *data;
961   static Char empty_string[] = {0};
962 
963   if (sf->peekBit) {
964     NSL_Bit *tmp=sf->peekBit;
965     sf->peekBit=0;
966     return tmp;
967   }
968 
969   doctype=(NSL_Doctype_I *)sf->doctype;
970   nslbit = &(sf->_bit1);
971   bit = ReadXBit(sf->pstate);
972 
973   sf->currentBitOffset = bit->byte_offset;
974 
975   XMLMode=doctype?doctype->XMLMode:FALSE;
976   lastType=nslbit->type;
977 
978   nslbit->nsc = 0;
979   nslbit->nsowned = 0;
980 
981   switch(bit->type) {
982   case XBIT_error:
983     ParserPerror(sf->pstate, bit);
984     LT_ERROR(NEPARSE, "");
985     nslbit->type = NSL_bad;
986     FreeXBit(bit);
987     break;
988   case XBIT_start:
989   case XBIT_empty:
990     if( bit->type == XBIT_empty ){
991       nslbit->type = NSL_empty_bit;
992     } else {
993       nslbit->type = NSL_start_bit;
994     }
995 
996     nslbit->value.item=NNI(bit->element_definition->eltsum,
997 			   doctype,
998 			   bit->element_definition->name);
999     nslbit->value.item->prefix=bit->element_definition->prefix;
1000 
1001     nslbit->label=nslbit->value.item->label;
1002     nslbit->prefix=nslbit->value.item->prefix;
1003 
1004     ParseAttributeString(doctype,nslbit->value.item, bit->attributes,
1005 			 sf->type & NSL_read_namespaces);
1006 
1007     if(sf->type & NSL_read_namespaces)
1008     {
1009 	if(bit->ns_element_definition)
1010 	{
1011 	    nslbit->llabel = nslbit->value.item->llabel =
1012 		bit->ns_element_definition->name;
1013 	    nslbit->nsuri = nslbit->value.item->nsuri =
1014 		bit->ns_element_definition->namespace->nsname;
1015 	}
1016 	else
1017 	{
1018 	    nslbit->llabel= nslbit->label;
1019 	    nslbit->nsuri = 0;
1020 	}
1021 
1022 	nslbit->value.item->ns_dict = nslbit->ns_dict = bit->ns_dict;
1023 	nslbit->value.item->nsc = nslbit->nsc = bit->nsc;
1024 
1025 	/* Take ownership of the ns records */
1026 	if(bit->type == XBIT_empty)
1027 	{
1028 	    nslbit->value.item->nsowned = 1;
1029 	    bit->nsowned = 0;
1030 	}
1031     }
1032 
1033     if (XMLMode) {
1034 	if (bit->type==XBIT_empty) {
1035 	    nslbit->value.item->type=NSL_empty;
1036 	}
1037     } else {
1038 	if (nslbit->value.item->defn->contentType==dc_empty) {
1039 	    nslbit->type=NSL_empty_bit;
1040 	    nslbit->value.item->type=NSL_empty;
1041 	} else if ( nslbit->value.item->type == NSL_empty ){
1042 	    /* item type may have been set to NSL_empty if we */
1043 	    /* came across an explicit CONREF attribute value */
1044 	    nslbit->type = NSL_empty_bit;
1045 	}
1046     }
1047 
1048     if( nslbit->type == NSL_start_bit ){
1049 	PUSH(sf->eltContent,
1050 	     bit->element_definition->eltsum);
1051     }
1052     else if(sf->eltContent.current - 1 == sf->eltContent.base)
1053 	/* See comment below under XBIT_end */
1054 	sf->pstate->state = PS_epilog;
1055 
1056     break;
1057   case XBIT_pi:
1058     /* A processing instruction */
1059     COMMENT1("%s", xbit_type_name[bit->type]);
1060     COMMENT1(" %s",bit->pi_name);
1061     COMMENT1(" %s\n", bit->pi_chars);
1062     nslbit->type=NSL_pi_bit;
1063     { Char *new;
1064     ECNN(new=salloc((Strlen(bit->pi_name)+1+Strlen(bit->pi_chars)+1)*sizeof(Char)));
1065     if (bit->pi_chars[0]) {
1066       Sprintf(new,InternalCharacterEncoding,"%S %S",bit->pi_name,bit->pi_chars);
1067     }
1068     else {
1069       Sprintf(new,InternalCharacterEncoding,"%S",bit->pi_name);
1070     };
1071     nslbit->value.body = new;
1072     }
1073 #if 0
1074     /* NSL PIs are now handled in the low-level parser */
1075     if (HandlePi(bit,sf) < 0) {
1076       nslbit->type=NSL_bad;
1077     }
1078 #endif
1079     /* Since we copied them, we should free the originals */
1080     FreeXBit(bit);
1081     break;
1082   case XBIT_pcdata:
1083     COMMENT1("%s", xbit_type_name[bit->type]);
1084     COMMENT1(" [%s]\n", bit->pcdata_chars);
1085     if (!(sf->type & NSL_read_all_bits) &&
1086 	(TOP(sf->eltContent))->contentType==dc_element) {
1087       /* this assumes any PCData in the wrong place is white . . . */
1088       /* and that eltOnly is true outside the document element */
1089       FreeXBit(bit);
1090       return NextBit(sf);
1091     }
1092     data=bit->pcdata_chars;
1093     if(data[0] == '&' && (sf->type & NSL_read_no_expand))
1094 	nslbit->flags = NSL_text_isERef;
1095     else
1096 	nslbit->flags = 0;
1097     if (!XMLMode) {
1098       int incr=0;
1099       Char *realdata=data;
1100 
1101       if ( data[0] == '\n' ) {
1102 	switch (lastType) {
1103 	case NSL_start_bit:
1104 	case NSL_pi_bit:
1105 	  /* one RE is ignored after start tag or pi */
1106 	  if (*(++(data))=='\0') {
1107 	    /* skip the whole thing */
1108 	    sfree(realdata);
1109 	    return NextBit(sf);
1110 	  } else {
1111 	    incr=1;
1112 	  }
1113 	  break;
1114 	default:
1115 	  break;
1116 	}
1117       }
1118 
1119       n=Strlen(data);
1120       if (n--==0) {
1121 	sfree(realdata);
1122 	return NextBit(sf);
1123       }
1124       /* check last char if before end tag */
1125       /* what about PIs? */
1126       if (PeekXBit(sf->pstate)->type==XBIT_end) {
1127 	if (n==0 && data[0]=='\n') {
1128 	  /* empty, try again */
1129 	  sfree(realdata);
1130 	  return NextBit(sf);
1131 	} else if (data[n]=='\n') {
1132 	  data[n]='\000';
1133 	}
1134       }
1135       if (incr) {
1136 	/* o bother, hope this is rare because it's STUPID */
1137 	data=Strdup(data);
1138 	sfree(realdata);
1139       }
1140     }
1141 
1142     nslbit->type = NSL_text_bit;
1143     nslbit->value.body = (Char*)data;
1144     break;
1145 
1146   case XBIT_end:
1147     nslbit->type = NSL_end_bit;
1148     COMMENT1("%s", xbit_type_name[bit->type]);
1149     COMMENT1(" %s\n", bit->element_definition->name);
1150     if ((NSL_ElementSummary_I *)TOP(sf->eltContent)!=
1151 	bit->element_definition->eltsum) {
1152       char buf[100];
1153       bit->type=XBIT_error;
1154       Sprintf(buf,CE_ISO_8859_1,"unmatched end tag %.70S",
1155 	      bit->element_definition->name);
1156       bit->error_message=buf;
1157       ParserPerror(sf->pstate, bit);
1158       LT_ERROR(NEPARSE,"")
1159     };
1160     POPNV(sf->eltContent);
1161     nslbit->label = bit->element_definition->name;
1162     nslbit->prefix = bit->element_definition->prefix;
1163     nslbit->value.item = NULL;
1164     if(sf->eltContent.current - 1 == sf->eltContent.base)
1165 	/* Have to do this here, because the low-level parser doesn't keep
1166 	   an element stack when used with NSL.  The stack is empty when
1167 	   there is only one element on it (sigh). */
1168 	sf->pstate->state = PS_epilog;
1169 
1170     if(sf->type & NSL_read_namespaces)
1171     {
1172 	if(bit->ns_element_definition)
1173 	{
1174 	    nslbit->llabel = bit->ns_element_definition->name;
1175 	    nslbit->nsuri = bit->ns_element_definition->namespace->nsname;
1176 	}
1177 	else
1178 	{
1179 	    nslbit->llabel= nslbit->label;
1180 	    nslbit->nsuri = 0;
1181 	}
1182 
1183 	/* Take ownership of the ns records */
1184 	nslbit->ns_dict = bit->ns_dict;
1185 	nslbit->nsc = bit->nsc;
1186 	nslbit->nsowned = 1;
1187 	bit->nsowned = 0;
1188     }
1189 
1190     FreeXBit(bit);
1191     break;
1192 
1193   case XBIT_eof:
1194     nslbit->type = NSL_eof_bit;
1195     COMMENT1("%s\n", xbit_type_name[bit->type]);
1196     break;
1197 
1198   case XBIT_comment:
1199     COMMENT1("%s", xbit_type_name[bit->type]);
1200     COMMENT1(" %s\n", bit->comment_chars);
1201     nslbit->type = NSL_comment_bit;
1202     nslbit->value.body = bit->comment_chars;
1203     break;
1204 
1205   case XBIT_dtd:
1206     if (XMLMode && !doctype->seenDTD) {
1207       COMMENT1("%s", xbit_type_name[bit->type]);
1208       doctype->seenDTD = TRUE;
1209       {
1210 	struct xbit oldbit = *bit;
1211 	if (sf->pstate->dtd->internal_part) {
1212 	    bit = ParseDtd(sf->pstate, sf->pstate->dtd->internal_part);
1213 	    if (bit->type==XBIT_error) {
1214 		ParserPerror(sf->pstate, bit);
1215 		LT_ERROR(NEPARSE, "");
1216 		nslbit->type = NSL_bad;
1217 		FreeXBit(&oldbit);
1218 		FreeXBit(bit);
1219 		break;
1220 	    }
1221 	}
1222 	if ((doctype->sdd!=sdd_yes || sf->type & NSL_read_validate) &&
1223 	    sf->pstate->dtd->external_part) {
1224 	    bit = ParseDtd(sf->pstate, sf->pstate->dtd->external_part);
1225 	    if (bit->type == XBIT_error) {
1226 		ParserPerror(sf->pstate, bit);
1227 		LT_ERROR(NEPARSE, "");
1228 		nslbit->type = NSL_bad;
1229 		FreeXBit(&oldbit);
1230 		FreeXBit(bit);
1231 		break;
1232 	    }
1233 	    FreeXBit(bit);
1234 	}
1235 	*bit = oldbit;
1236       }
1237     } else {
1238       COMMENT1("%s", xbit_type_name[bit->type]);
1239     }
1240     if (sf->pstate->dtd->external_part &&
1241 	sf->pstate->dtd->external_part->systemid) {
1242 	sysid=salloc(strlen(sf->pstate->dtd->external_part->systemid)+4);
1243 	sprintf(sysid,"\"%s\" ",sf->pstate->dtd->external_part->systemid);
1244     }
1245     if (sf->pstate->dtd->internal_part) {
1246       intsubset=salloc((Strlen(sf->pstate->dtd->internal_part->text)+3)*
1247 		       sizeof(Char));
1248       Sprintf(intsubset, InternalCharacterEncoding,
1249 	      "[%S]",sf->pstate->dtd->internal_part->text);
1250     }
1251     if (sf->pstate->dtd->external_part &&
1252 	sf->pstate->dtd->external_part->publicid) {
1253       dtdchars=salloc((Strlen(sf->pstate->dtd->name)+
1254 		      9+
1255 		      strlen8(sf->pstate->dtd->external_part->publicid)+
1256 		      2+
1257 		      (sysid?strlen8(sysid):0)+
1258 		      (intsubset?Strlen(intsubset):0)+
1259 		      1) * sizeof(Char));
1260       Sprintf(dtdchars, InternalCharacterEncoding, "%S PUBLIC \"%s\" %s%S",
1261 	      sf->pstate->dtd->name,
1262 	      sf->pstate->dtd->external_part->publicid,
1263 	      sysid?sysid:(char8 *)"",
1264 	      intsubset?intsubset:empty_string);
1265     } else {
1266       dtdchars=salloc((Strlen(sf->pstate->dtd->name)+
1267 		      1+
1268 		      (sysid?7:0)+
1269 		      (sysid?strlen8(sysid):0)+
1270 		      (intsubset?Strlen(intsubset):0)+
1271 		      1) * sizeof(Char));
1272       Sprintf(dtdchars, InternalCharacterEncoding, "%S %s%s%S",
1273 	      sf->pstate->dtd->name,
1274 	      sysid?"SYSTEM ":"",
1275 	      sysid?sysid:(char8 *)"",
1276 	      intsubset?intsubset:empty_string);
1277     }
1278     sfree(sysid);
1279     sfree(intsubset);
1280     FreeXBit(bit);
1281 
1282     doctype->doctypeStatement = dtdchars;
1283 
1284     nslbit->type = NSL_doctype_bit;
1285     nslbit->value.body = Strdup(dtdchars);
1286     break;
1287 
1288   case XBIT_cdsect:
1289     COMMENT1("%s\n", xbit_type_name[bit->type]);
1290     nslbit->type=NSL_text_bit;
1291     nslbit->value.body=bit->cdsect_chars;
1292     if(sf->type & NSL_read_all_bits)
1293 	nslbit->flags = NSL_text_isCData;
1294     else
1295 	nslbit->flags = 0;
1296     break;
1297 
1298   default:
1299     SHOULDNT;
1300   }
1301   return nslbit;
1302 }
1303 
1304 /* NB, this should be tied much more closely in with Richard's code,
1305    since his attribute and our NSL_Attr are VERY similar */
1306 
1307 static const AttributeSummary ConstAtSum={0,0,0,0,0,0};
1308 
PAS1(NSL_Doctype_I * dct,NSL_Item * item,struct attribute * source,int do_ns,NSL_Attr ** prev)1309 static boolean PAS1(NSL_Doctype_I *dct, NSL_Item *item,
1310 		    struct attribute *source, int do_ns, NSL_Attr **prev) {
1311   NSL_Attr *refvar, *res;
1312   const AttributeSummary *atsum;
1313 
1314   /* If no attributes then we are finished */
1315   if (!source) {
1316     item->attr=NULL;
1317     return TRUE;
1318   }
1319 
1320   /* Recurse down list of attributes - in effect we are starting at
1321      the end and working backwards (So that order of attributes is
1322      reversed twice, and hence in the same order as in the source
1323      file) */
1324   if (source->next) {
1325     ECFF(PAS1(dct, item, source->next, do_ns, &res));
1326   }
1327 
1328   /* Now we process this attribut/value pair */
1329 
1330   if(dct->XMLMode)
1331       atsum = source->definition->attrsum;
1332   else
1333       /* In NSL mode the attribute summary itself is returned */
1334       atsum = (AttributeSummary *)source->definition;
1335 
1336   ECNF(refvar=AttrFromSpec(atsum, dct));
1337 
1338   /* We have an explicit #CONREF attribute, this means that
1339      The item is of type NSL_empty, despite what the DTD says */
1340   if( refvar->deft == NSL_defval_conref ){
1341     item->type = NSL_empty;
1342   }
1343 
1344   ECFF(SetAttrValue(refvar, source->value));
1345 
1346   if(do_ns)
1347   {
1348       if(source->ns_definition && !source->ns_definition->element)
1349       {
1350 	  refvar->lname = source->ns_definition->name;
1351 	  refvar->nsuri = source->ns_definition->namespace->nsname;
1352       }
1353       else
1354       {
1355 	  refvar->lname = source->definition->name;
1356 	  refvar->nsuri = 0;
1357       }
1358   }
1359 
1360   if( !(source->next) ){
1361     item->attr=refvar;
1362   } else {
1363     res->next=refvar;
1364   }
1365   if (prev) {
1366     *prev=refvar;
1367   }
1368 
1369   sfree(source);
1370   return TRUE;
1371 }
1372 
ParseAttributeString(NSL_Doctype_I * dct,NSL_Item * item,struct attribute * source,int do_ns)1373 int ParseAttributeString(NSL_Doctype_I *dct, NSL_Item *item,
1374 			 struct attribute *source, int do_ns) {
1375   return PAS1(dct, item, source, do_ns, 0);
1376 }
1377 
DocumentIsNSGML(NSL_Doctype_I * dct)1378 boolean DocumentIsNSGML(NSL_Doctype_I *dct)
1379 {
1380     return !dct->XMLMode;
1381 }
1382 
1383 /* ======================================================================== */
1384 /* Moved from sgmldef.c */
1385 
DoctypeFromDdb(const char8 * filename)1386 NSL_Doctype_I *DoctypeFromDdb(const char8 *filename) {
1387 
1388   NSL_Doctype_I *doctype;
1389   DDBHeader* ddb;
1390   ECNN(ddb=readddb(filename));
1391 
1392   checkddb(filename,ddb,FALSE);
1393 
1394   ECNN(doctype=tsalloc(NSL_Doctype_I, 1));
1395 
1396   doctype->doctype=(char*)(ddb+1);
1397   COMMENT1("[!DOCTYPE %s ",doctype->doctype);
1398   COMMENT1("from DDB %s]\n",filename);
1399 
1400   doctype->ddb=ddb;
1401   doctype->ddbfile=filename;
1402 
1403   doctype->attrNames=(RHashTableHdr*)(((char*)ddb)+ddb->anameTableOffset);
1404   doctype->elements=(RHashTableHdr*)(((char*)ddb)+ddb->elementTableOffset);
1405   doctype->elementBase=((char*)doctype->elements)+doctype->elements->length;
1406   doctype->permanentBase=doctype->elementBase;
1407   doctype->entities=(RHashTableHdr*)(((char*)ddb)+ddb->entityTableOffset);
1408   doctype->entityBase=((char*)doctype->entities)+doctype->entities->length;
1409   doctype->doctypeStatement = NULL;
1410   doctype->XMLMode=FALSE;
1411   doctype->seenDTD = TRUE;
1412 
1413   ECFN(doctype_init_alloc(doctype, 100, 100, 100));
1414 
1415   doctype->defaultOutputEncoding = CE_UTF_8; /* XXX is this a good default? */
1416   doctype->fallbackEncodingDeclaration = CE_unknown;
1417   doctype->rxp_dtd = NewDtd();
1418   doctype->root_entity = 0;
1419 
1420   return doctype;
1421 }
1422 
doctype_init_alloc(NSL_Doctype_I * doctype,int nitems,int ndata,int nattr)1423 static boolean doctype_init_alloc(NSL_Doctype_I *doctype,
1424 				  int nitems, int ndata, int nattr)
1425 {
1426   if ((doctype->itemstack=Uinit(sizeof(NSL_Item), nitems, 30))!=NULL &&
1427       (doctype->datastack=Uinit(sizeof(NSL_Data), ndata, 30))!=NULL &&
1428       (doctype->attrstack=Uinit(sizeof(NSL_Attr), nattr, 30))!=NULL) {
1429     NameUmalloc(doctype->itemstack, "NSL_Item");
1430     NameUmalloc(doctype->datastack, "NSL_Data");
1431     NameUmalloc(doctype->attrstack, "NSL_Attr");
1432     return TRUE;
1433   } else {
1434     return FALSE;
1435   }
1436 }
1437 
LoadDoctype(const char8 * filename)1438 NSL_Doctype_I *LoadDoctype(const char8 *filename)
1439 {
1440     int len;
1441     NSL_File_I *sf;
1442     NSL_Doctype_I *dct;
1443 
1444     /* If it's a .ddb file, use DoctypeFromDdb */
1445 
1446     len = strlen(filename);
1447     if(len > 4 && strcmp(filename+len-4, ".ddb") == 0)
1448 	return DoctypeFromDdb(filename);
1449 
1450     /* Otherwise, open as an nSGML/XML file, and get its doctype */
1451 
1452     /* Open in no_consume_prolog mode, so we don't attempt to read the first
1453       element (we want it to work for files just containing <!DOCTYPE ... >) */
1454 
1455     ECNN(sf = SFopen(filename, 0, NSL_read|NSL_read_no_consume_prolog));
1456 
1457     /* If it's a nSGML file, we should now have read the doctype */
1458 
1459     dct = sf->doctype;
1460     if(!dct->XMLMode)
1461     {
1462 	SFclose(sf);
1463 	return dct;
1464     }
1465 
1466     /* Otherwise we must read until we get the doctype bit */
1467 
1468     while(1)
1469     {
1470 	NSL_Bit *bit = NextBit(sf);
1471 	switch(bit->type)
1472 	{
1473 	case NSL_bad:
1474 	    SFrelease(sf, 1);
1475 	    return 0;
1476 
1477 	case NSL_start_bit:
1478 	case NSL_empty_bit:
1479 	case NSL_eof_bit:
1480 	    /* We've reached the end of the prolog without seeing <!DOCTYPE>.
1481 	       Just return the empty doctype. */
1482 	case NSL_doctype_bit:
1483 	    /* We found it. */
1484 	    FreeBit(bit);
1485 	    SFclose(sf);
1486 	    return dct;
1487 
1488 	default:
1489 	    /* Ignore other bits (PIs, comments). */
1490 	    FreeBit(bit);
1491 	    break;
1492 	}
1493     }
1494 }
1495 
1496 /* ======================================================================== */
1497 /* Moved from sgmldef.c */
1498 
MakeSpec(const NSL_Doctype_I * doctype)1499 const char8 *MakeSpec(const NSL_Doctype_I *doctype) {
1500   char *spstr;
1501 
1502   ECNN(spstr=salloc(200));
1503   sprintf(spstr, "<?NSL DDB %s 0>\n",doctype->ddbfile);
1504   return spstr;
1505 }
1506 
1507 /* ======================================================================== */
1508 
CurrentBitOffset(NSL_File_I * sf)1509 extern int CurrentBitOffset( NSL_File_I *sf ){
1510   if (sf->currentBitOffset<0) {
1511     LT_ERROR(NEOFFST,"No bit offset yet\n");
1512   };
1513   return sf->currentBitOffset;
1514 }
1515 
1516 /* end of file */
1517 
1518