1 /*
2  *  Copyright 1993 Open Software Foundation, Inc., Cambridge, Massachusetts.
3  *  All rights reserved.
4  */
5 /*
6  * Copyright (c) 1994
7  * Open Software Foundation, Inc.
8  *
9  * Permission is hereby granted to use, copy, modify and freely distribute
10  * the software in this file and its documentation for any purpose without
11  * fee, provided that the above copyright notice appears in all copies and
12  * that both the copyright notice and this permission notice appear in
13  * supporting documentation.  Further, provided that the name of Open
14  * Software Foundation, Inc. ("OSF") not be used in advertising or
15  * publicity pertaining to distribution of the software without prior
16  * written permission from OSF.  OSF makes no representations about the
17  * suitability of this software for any purpose.  It is provided "as is"
18  * without express or implied warranty.
19  */
20 /*
21  * Copyright (c) 1996 X Consortium
22  * Copyright (c) 1995, 1996 Dalrymple Consulting
23  *
24  * Permission is hereby granted, free of charge, to any person obtaining a copy
25  * of this software and associated documentation files (the "Software"), to deal
26  * in the Software without restriction, including without limitation the rights
27  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
28  * copies of the Software, and to permit persons to whom the Software is
29  * furnished to do so, subject to the following conditions:
30  *
31  * The above copyright notice and this permission notice shall be included in
32  * all copies or substantial portions of the Software.
33  *
34  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
35  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
36  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
37  * X CONSORTIUM OR DALRYMPLE CONSULTING BE LIABLE FOR ANY CLAIM, DAMAGES OR
38  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
39  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
40  * OTHER DEALINGS IN THE SOFTWARE.
41  *
42  * Except as contained in this notice, the names of the X Consortium and
43  * Dalrymple Consulting shall not be used in advertising or otherwise to
44  * promote the sale, use or other dealings in this Software without prior
45  * written authorization.
46  */
47 /* ________________________________________________________________________
48  *
49  *  Program to manipulate SGML instances.
50  *
51  *  This module contains the initialization routines for translation module.
52  *  They mostly deal with reading data files (translation specs, SDATA
53  *  mappings, character mappings).
54  *
55  *  Entry points:
56  *	ReadTransSpec(transfile)	read/store translation spec from file
57  * ________________________________________________________________________
58  */
59 
60 #ifndef lint
61 static char *RCSid =
62   "$Header: /home/ncvs/src/usr.bin/sgmls/instant/traninit.c,v 1.1.1.1 1996/09/08 01:55:10 jfieber Exp $";
63 #endif
64 
65 #include <stdio.h>
66 #include <stdlib.h>
67 #include <ctype.h>
68 #include <string.h>
69 #include <memory.h>
70 #include <sys/types.h>
71 #include <errno.h>
72 #include <regex.h>
73 
74 #include "general.h"
75 #include "translate.h"
76 
77 #include "sgmls.h"
78 #include "config.h"
79 
80 #ifndef PREFIX
81 #define PREFIX "/usr/local"
82 #endif
83 
84 #ifndef TRANSPEC_DIR
85 #define TRANSPEC_DIR PREFIX "/share/sgml/transpec"
86 #endif
87 
88 #ifndef TRUE
89 #define TRUE	(1 == 1)
90 #endif
91 
92 #define MAX(a, b) ((a) > (b) ? (a) : (b))
93 #define MIN(a, b) ((a) < (b) ? (a) : (b))
94 
95 /* forward references */
96 void	RememberTransSpec(Trans_t *, int);
97 static void do_data(char *gi, struct sgmls_data *v, int n);
98 static void build_ts(char *gi, char* cp);
99 void	AddCharMap(const char *from, const char* to);
100 void	AddSDATA(const char *from, const char *to);
101 
102 /* ______________________________________________________________________ */
103 /* minimal compatibility wrapper for UNIX V8 regexp, match only
104  */
105 
v8_regcomp(const char * pattern)106 static regex_t *v8_regcomp(const char *pattern)
107 {
108 	regex_t *re;
109 	if ((re = malloc(sizeof(regex_t))) != NULL) {
110 		if (regcomp(re, pattern, REG_EXTENDED|REG_NOSUB)) {
111 			free(re);
112 			return NULL;
113 		}
114 	}
115 	return re;
116 }
117 #define regcomp	v8_regcomp
118 
119 /* ______________________________________________________________________ */
120 /*  Read the translation specs from the input file, storing in memory.
121  *  Arguments:
122  *	Name of translation spec file.
123  */
124 
125 static Trans_t T;
126 
127 
128 static
input_error(num,str,lineno)129 void input_error(num, str, lineno)
130      int num;
131      char *str;
132      unsigned long lineno;
133 {
134   fprintf(stderr, "Error at input line %lu: %s\n", lineno, str);
135 }
136 
137 void
ReadTransSpec(char * transfile)138 ReadTransSpec(
139     char *transfile
140 )
141 {
142     FILE *fp;
143     struct sgmls *sp;
144     struct sgmls_event e;
145     char gi[LINESIZE];
146     char buf[LINESIZE];
147     char buf2[LINESIZE];
148     char *command;
149     char *sgmls = "onsgmls -c " TRANSPEC_DIR "/catalog ";
150     char maptype = '\0';
151 
152     (void)sgmls_set_errhandler(input_error);
153     transfile = FilePath(transfile);
154     if (!transfile)
155     {
156     	fprintf(stderr, "Error: Could not locate specified transfile\n");
157     	exit(1);
158     }
159 
160     /* XXX this is a quick, gross hack.  Should write a parse() function. */
161     Malloc(strlen(sgmls) + strlen(transfile) + 2, command, char);
162     sprintf(command, "%s %s", sgmls, transfile);
163     fp = popen(command, "r");
164 
165     sp = sgmls_create(fp);
166     while (sgmls_next(sp, &e))
167     switch (e.type) {
168     	case SGMLS_EVENT_DATA:
169     	    do_data(gi, e.u.data.v, e.u.data.n);
170     	    break;
171     	case SGMLS_EVENT_ENTITY:
172     	    fprintf(stderr, "Hm... got an entity\n");
173     	    break;
174     	case SGMLS_EVENT_PI:
175     	    break;
176     	case SGMLS_EVENT_START:
177     	    if (strncmp("RULE", e.u.start.gi, 4) == 0) {
178     	    	/* A new transpec, so clear the data structure
179     	    	 * and look for an ID attribute.
180     	    	 */
181 	    	struct sgmls_attribute *attr = e.u.start.attributes;
182 	    	memset(&T, 0, sizeof T);
183     	    	while (attr) {
184     	    	    if (attr->type == SGMLS_ATTR_CDATA
185     	    	    	&& strncmp("ID", attr->name, 2) == 0) {
186     	    	    	strncpy(buf, attr->value.data.v->s,
187     	    	    	    MIN(attr->value.data.v->len, LINESIZE));
188     	    	    	buf[MIN(attr->value.data.v->len, LINESIZE - 1)] = '\0';
189     	    	    	T.my_id = atoi(buf);
190     	    	    }
191     	    	    attr = attr->next;
192     	    	}
193     	    }
194     	    else if (strncmp("CMAP", e.u.start.gi, 4) == 0)
195     	    	maptype = 'c';
196     	    else if (strncmp("SMAP", e.u.start.gi, 4) == 0)
197     	    	maptype = 's';
198     	    else if (strncmp("MAP", e.u.start.gi, 3) == 0) {
199     	    	struct sgmls_attribute *attr = e.u.start.attributes;
200     	    	char *from = 0;
201     	    	char *to = 0;
202 
203     	    	while (attr) {
204     	    	    if (attr->value.data.v && strncmp("FROM", attr->name, 4) == 0) {
205     	    	    	strncpy(buf, attr->value.data.v->s,
206     	    	    	    MIN(attr->value.data.v->len, LINESIZE - 1));
207     	    	    	buf[MIN(attr->value.data.v->len, LINESIZE - 1)] = '\0';
208     	    	    }
209    	    	    if (attr->value.data.v && strncmp("TO", attr->name, 2) == 0) {
210     	    	    	strncpy(buf2, attr->value.data.v->s,
211     	    	    	    MIN(attr->value.data.v->len, LINESIZE - 1));
212     	    	    	buf2[MIN(attr->value.data.v->len, LINESIZE - 1)] = '\0';
213     	    	    }
214     	    	    attr = attr->next;
215     	    	}
216     	    	if (maptype == 'c')
217     	    	    AddCharMap(buf, buf2);
218     	    	else if (maptype == 's')
219     	    	    AddSDATA(buf, buf2);
220     	    	else
221     	    	    fprintf(stderr, "Unknown map type!\n");
222     	    }
223     	    else {
224     	    	strncpy(gi, e.u.start.gi, 512);
225     	    	sgmls_free_attributes(e.u.start.attributes);
226     	    }
227     	    break;
228     	case SGMLS_EVENT_END:
229     	    if (strncmp("RULE", e.u.start.gi, 4) == 0)
230     	    	RememberTransSpec(&T, e.lineno);
231     	    break;
232     	case SGMLS_EVENT_SUBSTART:
233     	    break;
234     	case SGMLS_EVENT_SUBEND:
235     	    break;
236     	case SGMLS_EVENT_APPINFO:
237     	    break;
238     	case SGMLS_EVENT_CONFORMING:
239     	    break;
240     	default:
241     	    abort();
242     }
243     sgmls_free(sp);
244     pclose(fp);
245     free(command);
246 }
247 
248 
do_data(char * gi,struct sgmls_data * v,int n)249 static void do_data(char *gi, struct sgmls_data *v, int n)
250 {
251     int i;
252     char *cp;
253     static char *buf = 0;
254     static int buf_size = 0;
255     int buf_pos = 0;
256 
257 
258     /* figure out how much space this element will really
259        take, inculding expanded sdata entities. */
260 
261     if (!buf)
262     {
263     	buf_size = 1024;
264     	Malloc(buf_size, buf, char);
265     }
266 
267     for (i = 0; i < n; i++)
268     {
269     	char *s;
270     	int len;
271 
272     	/* Mark the current position.  If this is SDATA
273     	   we will have to return here. */
274     	int tmp_buf_pos = buf_pos;
275 
276     	/* Make sure the buffer is big enough. */
277     	if (buf_size - buf_pos <= v[i].len)
278     	{
279     	    buf_size += v[i].len * (n - i);
280     	    Realloc(buf_size, buf, char);
281     	}
282 
283     	s = v[i].s;
284     	len = v[i].len;
285     	for (; len > 0; len--, s++)
286      	{
287 	    if (*s != RSCHAR) {
288 		if (*s == RECHAR)
289 		    buf[buf_pos] = '\n';
290 		else
291 		    buf[buf_pos] = *s;
292 		buf_pos++;
293 	    }
294     	}
295     	if (v[i].is_sdata)
296     	{
297     	    char *p;
298     	    buf[buf_pos] = '\0';
299     	    p = LookupSDATA(buf + tmp_buf_pos);
300     	    if (p)
301     	    {
302     	    	if (buf_size - tmp_buf_pos <= strlen(p))
303     	    	{
304     		    buf_size += strlen(p) * (n - i);
305     		    Realloc(buf_size, buf, char);
306     	    	}
307     	    	strcpy(buf + tmp_buf_pos, p);
308     	    	buf_pos = tmp_buf_pos + strlen(p);
309     	    }
310     	}
311     }
312 
313     /* Clean up the trailing end of the data. */
314     buf[buf_pos] = '\0';
315     buf_pos--;
316     while (buf_pos > 0  && isspace(buf[buf_pos]) && buf[buf_pos] != '\n')
317     	buf_pos--;
318     if (buf[buf_pos] == '\n')
319     	buf[buf_pos] = '\0';
320 
321     /* Skip over whitespace at the beginning of the data. */
322     cp = buf;
323     while (*cp && isspace(*cp))
324     	cp++;
325     build_ts(gi, cp);
326 }
327 
328 /* ______________________________________________________________________ */
329 /*  Set a transpec parameter
330  *  Arguments:
331  *	gi - the parameter to set
332  *	cp - the value of the parameter
333  */
build_ts(char * gi,char * cp)334 static void build_ts(char *gi, char* cp)
335 {
336     if (strcmp("GI", gi) == 0)
337     {
338     	char *cp2;
339 	/* if we are folding the case of GIs, make all upper (unless
340 	   it's an internal pseudo-GI name, which starts with '_') */
341 	if (fold_case && cp[0] != '_' && cp[0] != '#')
342 	{
343 	    for (cp2=cp; *cp2; cp2++)
344 		if (islower(*cp2)) *cp2 = toupper(*cp2);
345 	}
346 	T.gi = AddElemName(cp);
347     }
348     else if (strcmp("START", gi) == 0)
349     	T.starttext = strdup(cp);
350     else if (strcmp("END", gi) == 0)
351     	T.endtext = strdup(cp);
352     else if (strcmp("RELATION", gi) == 0)
353     {
354 	if (!T.relations)
355 	    T.relations = NewMap(IMS_relations);
356 	SetMapping(T.relations, cp);
357     }
358     else if (strcmp("REPLACE", gi) == 0)
359     	T.replace = strdup(cp);
360     else if (strcmp("ATTVAL", gi) == 0)
361     {
362 	if (!T.nattpairs)
363 	{
364 	    Malloc(1, T.attpair, AttPair_t);
365 	}
366 	else
367 	    Realloc((T.nattpairs+1), T.attpair, AttPair_t);
368 	/* we'll split name/value pairs later */
369 	T.attpair[T.nattpairs].name = strdup(cp);
370 	T.nattpairs++;
371     }
372     else if (strcmp("CONTEXT", gi) == 0)
373     	T.context = strdup(cp);
374     else if (strcmp("MESSAGE", gi) == 0)
375     	T.message = strdup(cp);
376     else if (strcmp("DO", gi) == 0)
377     	T.use_id = atoi(cp);
378     else if (strcmp("CONTENT", gi) == 0)
379     	T.content = strdup(cp);
380     else if (strcmp("PATTSET", gi) == 0)
381     	T.pattrset = strdup(cp);
382     else if (strcmp("VERBATIM", gi) == 0)
383     	T.verbatim = TRUE;
384     else if (strcmp("IGNORE", gi) == 0)
385     {
386 	if (!strcmp(cp, "all"))
387 	    T.ignore = IGN_ALL;
388 	else if (!strcmp(cp, "data"))
389 	    T.ignore = IGN_DATA;
390 	else if (!strcmp(cp, "children"))
391 	    T.ignore = IGN_CHILDREN;
392 	else
393 	    fprintf(stderr, "Bad 'Ignore:' arg in transpec %s: %s\n",
394 		    gi, cp);
395     }
396     else if (strcmp("VARVAL", gi) == 0)
397     {
398 	char **tok;
399 	int i = 2;
400 	tok = Split(cp, &i, S_STRDUP);
401 	T.var_name	= tok[0];
402 	T.var_value	= tok[1];
403     }
404     else if (strcmp("VARREVAL", gi) == 0)
405     {
406     	char buf[1000];
407     	char **tok;
408 	int i = 2;
409 	tok = Split(cp, &i, S_STRDUP);
410 	T.var_RE_name = tok[0];
411 	ExpandVariables(tok[1], buf, 0);
412 	if (!(T.var_RE_value=regcomp(buf)))	{
413 	    fprintf(stderr, "Regex error in VarREValue Content: %s\n",
414 				    tok[1]);
415 	}
416     }
417     else if (strcmp("SET", gi) == 0)
418     {
419 	if (!T.set_var)
420 	    T.set_var = NewMap(IMS_setvar);
421 	SetMapping(T.set_var, cp);
422     }
423     else if (strcmp("INCR", gi) == 0)
424     {
425 	if (!T.incr_var)
426 	    T.incr_var = NewMap(IMS_incvar);
427 	SetMapping(T.incr_var, cp);
428     }
429     else if (strcmp("NTHCHILD", gi) == 0)
430     	T.nth_child = atoi(cp);
431     else if (strcmp("VAR", gi) == 0)
432     	SetMapping(Variables, cp);
433     else if (strcmp("QUIT", gi) == 0)
434     	T.quit = strdup(cp);
435     else
436 	fprintf(stderr, "Unknown translation spec (skipping it): %s\n",	gi);
437 
438 }
439 
440 
441 /* ______________________________________________________________________ */
442 /*  Store translation spec 't' in memory.
443  *  Arguments:
444  *	Pointer to translation spec to remember.
445  *	Line number where translation spec ends.
446  */
447 void
RememberTransSpec(Trans_t * t,int lineno)448 RememberTransSpec(
449     Trans_t	*t,
450     int		lineno
451 )
452 {
453     char	*cp;
454     int		i, do_regex;
455     static Trans_t *last_t;
456     char buf[1000];
457 
458     /* If context testing, check some details and set things up for later. */
459     if (t->context) {
460 	/* See if the context specified is a regular expression.
461 	 * If so, compile the reg expr.  It is assumed to be a regex if
462 	 * it contains a character other than what's allowed for GIs in the
463 	 * OSF sgml declaration (alphas, nums, '-', and '.').
464 	 */
465 	for (do_regex=0,cp=t->context; *cp; cp++) {
466 	    if (!isalnum(*cp) && *cp != '-' && *cp != '.' && *cp != ' ') {
467 		do_regex = 1;
468 		break;
469 	    }
470 	}
471 
472 	if (do_regex) {
473 	    t->depth = MAX_DEPTH;
474 	    if (!(t->context_re=regcomp(t->context))) {
475 		fprintf(stderr, "Regex error in Context: %s\n", t->context);
476 	    }
477 	}
478 	else {
479 	    /* If there's only one item in context, it's the parent.  Treat
480 	     * it specially, since it's faster to just check parent gi.
481 	     */
482 	    cp = t->context;
483 	    if (!strchr(cp, ' ')) {
484 		t->parent  = t->context;
485 		t->context = NULL;
486 	    }
487 	    else {
488 		/* Figure out depth of context string */
489 		t->depth = 0;
490 		while (*cp) {
491 		    if (*cp) t->depth++;
492 		    while (*cp && !IsWhite(*cp)) cp++;	/* find end of gi */
493 		    while (*cp && IsWhite(*cp)) cp++;	/* skip space */
494 		}
495 	    }
496 	}
497     }
498 
499     /* Compile regular expressions for each attribute */
500     for (i=0; i<t->nattpairs; i++) {
501 	/* Initially, name points to "name value".  Split them... */
502 	cp = t->attpair[i].name;
503 	while (*cp && !IsWhite(*cp)) cp++;	/* point past end of name */
504 	if (*cp) {	/* value found */
505 	    *cp++ = EOS;			/* terminate name */
506 	    while (*cp && IsWhite(*cp)) cp++;	/* point to value */
507 	    ExpandVariables(cp, buf, 0);	/* expand any variables */
508 	    t->attpair[i].val = strdup(buf);
509 	}
510 	else {		/* value not found */
511 	    t->attpair[i].val = ".";
512 	}
513 	if (!(t->attpair[i].rex=regcomp(t->attpair[i].val))) {
514 	    fprintf(stderr, "Regex error in AttValue: %s %s\n",
515 		    t->attpair[i].name, t->attpair[i].val);
516 	}
517     }
518 
519     /* Compile regular expression for content */
520     t->content_re = 0;
521     if (t->content) {
522 	ExpandVariables(t->content, buf, 0);
523 	if (!(t->content_re=regcomp(buf)))
524 	    fprintf(stderr, "Regex error in Content: %s\n",
525 		    t->content);
526     }
527 
528     /* If multiple GIs, break up into a vector, then remember it.  We either
529      * sture the individual, or the list - not both. */
530     if (t->gi && strchr(t->gi, ' ')) {
531 	t->gilist = Split(t->gi, 0, S_ALVEC);
532 	t->gi = NULL;
533     }
534 
535     /* Now, store structure in linked list. */
536     if (!TrSpecs) {
537 	Malloc(1, TrSpecs, Trans_t);
538 	last_t = TrSpecs;
539     }
540     else {
541 	Malloc(1, last_t->next, Trans_t);
542 	last_t = last_t->next;
543     }
544     *last_t = *t;
545 }
546 
547 
548 /* ______________________________________________________________________ */
549 /*  Add an entry to the character mapping table, allocating or
550  *  expanding the table if necessary.
551  *  Arguments:
552  *	Character to map
553  *      String to map the character to
554  *  	A 'c' or an 's' for character or sdata map
555  */
556 
557 void
AddCharMap(const char * from,const char * to)558 AddCharMap(
559     const char *from,
560     const char* to
561 )
562 {
563     static int n_alloc = 0;
564 
565     if (from && to) {
566 	if (nCharMap >= n_alloc) {
567 	    n_alloc += 32;
568     	    if (!CharMap) {
569     		Malloc(n_alloc, CharMap, Mapping_t);
570     	    }
571     	    else {
572 		Realloc(n_alloc, CharMap, Mapping_t);
573 	    }
574 	}
575     	CharMap[nCharMap].name = strdup(from);
576     	CharMap[nCharMap].sval = strdup(to);
577         nCharMap++;
578     }
579 }
580 
581 /* ______________________________________________________________________ */
582 /*  Add an entry to the SDATA mapping table.
583  *  Arguments:
584  *	String to map
585  *      String to map to
586  */
587 
588 void
AddSDATA(const char * from,const char * to)589 AddSDATA(
590     const char *from,
591     const char *to
592 )
593 {
594     if (from && to) {
595         if (!SDATAmap)
596             SDATAmap = NewMap(IMS_sdata);
597     	SetMappingNV(SDATAmap, from, to);
598     }
599 }
600 
601 /* ______________________________________________________________________ */
602