1 /*
2 * Copyright 1993 Open Software Foundation, Inc., Cambridge, Massachusetts.
3 * All rights reserved.
4 */
5 /*
6 * Copyright (c) 1994
7 * Open Software Foundation, Inc.
8 *
9 * Permission is hereby granted to use, copy, modify and freely distribute
10 * the software in this file and its documentation for any purpose without
11 * fee, provided that the above copyright notice appears in all copies and
12 * that both the copyright notice and this permission notice appear in
13 * supporting documentation. Further, provided that the name of Open
14 * Software Foundation, Inc. ("OSF") not be used in advertising or
15 * publicity pertaining to distribution of the software without prior
16 * written permission from OSF. OSF makes no representations about the
17 * suitability of this software for any purpose. It is provided "as is"
18 * without express or implied warranty.
19 */
20 /*
21 * Copyright (c) 1996 X Consortium
22 * Copyright (c) 1995, 1996 Dalrymple Consulting
23 *
24 * Permission is hereby granted, free of charge, to any person obtaining a copy
25 * of this software and associated documentation files (the "Software"), to deal
26 * in the Software without restriction, including without limitation the rights
27 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
28 * copies of the Software, and to permit persons to whom the Software is
29 * furnished to do so, subject to the following conditions:
30 *
31 * The above copyright notice and this permission notice shall be included in
32 * all copies or substantial portions of the Software.
33 *
34 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
35 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
36 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
37 * X CONSORTIUM OR DALRYMPLE CONSULTING BE LIABLE FOR ANY CLAIM, DAMAGES OR
38 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
39 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
40 * OTHER DEALINGS IN THE SOFTWARE.
41 *
42 * Except as contained in this notice, the names of the X Consortium and
43 * Dalrymple Consulting shall not be used in advertising or otherwise to
44 * promote the sale, use or other dealings in this Software without prior
45 * written authorization.
46 */
47 /* ________________________________________________________________________
48 *
49 * Program to manipulate SGML instances.
50 *
51 * This module contains the initialization routines for translation module.
52 * They mostly deal with reading data files (translation specs, SDATA
53 * mappings, character mappings).
54 *
55 * Entry points:
56 * ReadTransSpec(transfile) read/store translation spec from file
57 * ________________________________________________________________________
58 */
59
60 #ifndef lint
61 static char *RCSid =
62 "$Header: /home/ncvs/src/usr.bin/sgmls/instant/traninit.c,v 1.1.1.1 1996/09/08 01:55:10 jfieber Exp $";
63 #endif
64
65 #include <stdio.h>
66 #include <stdlib.h>
67 #include <ctype.h>
68 #include <string.h>
69 #include <memory.h>
70 #include <sys/types.h>
71 #include <errno.h>
72 #include <regex.h>
73
74 #include "general.h"
75 #include "translate.h"
76
77 #include "sgmls.h"
78 #include "config.h"
79
80 #ifndef PREFIX
81 #define PREFIX "/usr/local"
82 #endif
83
84 #ifndef TRANSPEC_DIR
85 #define TRANSPEC_DIR PREFIX "/share/sgml/transpec"
86 #endif
87
88 #ifndef TRUE
89 #define TRUE (1 == 1)
90 #endif
91
92 #define MAX(a, b) ((a) > (b) ? (a) : (b))
93 #define MIN(a, b) ((a) < (b) ? (a) : (b))
94
95 /* forward references */
96 void RememberTransSpec(Trans_t *, int);
97 static void do_data(char *gi, struct sgmls_data *v, int n);
98 static void build_ts(char *gi, char* cp);
99 void AddCharMap(const char *from, const char* to);
100 void AddSDATA(const char *from, const char *to);
101
102 /* ______________________________________________________________________ */
103 /* minimal compatibility wrapper for UNIX V8 regexp, match only
104 */
105
v8_regcomp(const char * pattern)106 static regex_t *v8_regcomp(const char *pattern)
107 {
108 regex_t *re;
109 if ((re = malloc(sizeof(regex_t))) != NULL) {
110 if (regcomp(re, pattern, REG_EXTENDED|REG_NOSUB)) {
111 free(re);
112 return NULL;
113 }
114 }
115 return re;
116 }
117 #define regcomp v8_regcomp
118
119 /* ______________________________________________________________________ */
120 /* Read the translation specs from the input file, storing in memory.
121 * Arguments:
122 * Name of translation spec file.
123 */
124
125 static Trans_t T;
126
127
128 static
input_error(num,str,lineno)129 void input_error(num, str, lineno)
130 int num;
131 char *str;
132 unsigned long lineno;
133 {
134 fprintf(stderr, "Error at input line %lu: %s\n", lineno, str);
135 }
136
137 void
ReadTransSpec(char * transfile)138 ReadTransSpec(
139 char *transfile
140 )
141 {
142 FILE *fp;
143 struct sgmls *sp;
144 struct sgmls_event e;
145 char gi[LINESIZE];
146 char buf[LINESIZE];
147 char buf2[LINESIZE];
148 char *command;
149 char *sgmls = "onsgmls -c " TRANSPEC_DIR "/catalog ";
150 char maptype = '\0';
151
152 (void)sgmls_set_errhandler(input_error);
153 transfile = FilePath(transfile);
154 if (!transfile)
155 {
156 fprintf(stderr, "Error: Could not locate specified transfile\n");
157 exit(1);
158 }
159
160 /* XXX this is a quick, gross hack. Should write a parse() function. */
161 Malloc(strlen(sgmls) + strlen(transfile) + 2, command, char);
162 sprintf(command, "%s %s", sgmls, transfile);
163 fp = popen(command, "r");
164
165 sp = sgmls_create(fp);
166 while (sgmls_next(sp, &e))
167 switch (e.type) {
168 case SGMLS_EVENT_DATA:
169 do_data(gi, e.u.data.v, e.u.data.n);
170 break;
171 case SGMLS_EVENT_ENTITY:
172 fprintf(stderr, "Hm... got an entity\n");
173 break;
174 case SGMLS_EVENT_PI:
175 break;
176 case SGMLS_EVENT_START:
177 if (strncmp("RULE", e.u.start.gi, 4) == 0) {
178 /* A new transpec, so clear the data structure
179 * and look for an ID attribute.
180 */
181 struct sgmls_attribute *attr = e.u.start.attributes;
182 memset(&T, 0, sizeof T);
183 while (attr) {
184 if (attr->type == SGMLS_ATTR_CDATA
185 && strncmp("ID", attr->name, 2) == 0) {
186 strncpy(buf, attr->value.data.v->s,
187 MIN(attr->value.data.v->len, LINESIZE));
188 buf[MIN(attr->value.data.v->len, LINESIZE - 1)] = '\0';
189 T.my_id = atoi(buf);
190 }
191 attr = attr->next;
192 }
193 }
194 else if (strncmp("CMAP", e.u.start.gi, 4) == 0)
195 maptype = 'c';
196 else if (strncmp("SMAP", e.u.start.gi, 4) == 0)
197 maptype = 's';
198 else if (strncmp("MAP", e.u.start.gi, 3) == 0) {
199 struct sgmls_attribute *attr = e.u.start.attributes;
200 char *from = 0;
201 char *to = 0;
202
203 while (attr) {
204 if (attr->value.data.v && strncmp("FROM", attr->name, 4) == 0) {
205 strncpy(buf, attr->value.data.v->s,
206 MIN(attr->value.data.v->len, LINESIZE - 1));
207 buf[MIN(attr->value.data.v->len, LINESIZE - 1)] = '\0';
208 }
209 if (attr->value.data.v && strncmp("TO", attr->name, 2) == 0) {
210 strncpy(buf2, attr->value.data.v->s,
211 MIN(attr->value.data.v->len, LINESIZE - 1));
212 buf2[MIN(attr->value.data.v->len, LINESIZE - 1)] = '\0';
213 }
214 attr = attr->next;
215 }
216 if (maptype == 'c')
217 AddCharMap(buf, buf2);
218 else if (maptype == 's')
219 AddSDATA(buf, buf2);
220 else
221 fprintf(stderr, "Unknown map type!\n");
222 }
223 else {
224 strncpy(gi, e.u.start.gi, 512);
225 sgmls_free_attributes(e.u.start.attributes);
226 }
227 break;
228 case SGMLS_EVENT_END:
229 if (strncmp("RULE", e.u.start.gi, 4) == 0)
230 RememberTransSpec(&T, e.lineno);
231 break;
232 case SGMLS_EVENT_SUBSTART:
233 break;
234 case SGMLS_EVENT_SUBEND:
235 break;
236 case SGMLS_EVENT_APPINFO:
237 break;
238 case SGMLS_EVENT_CONFORMING:
239 break;
240 default:
241 abort();
242 }
243 sgmls_free(sp);
244 pclose(fp);
245 free(command);
246 }
247
248
do_data(char * gi,struct sgmls_data * v,int n)249 static void do_data(char *gi, struct sgmls_data *v, int n)
250 {
251 int i;
252 char *cp;
253 static char *buf = 0;
254 static int buf_size = 0;
255 int buf_pos = 0;
256
257
258 /* figure out how much space this element will really
259 take, inculding expanded sdata entities. */
260
261 if (!buf)
262 {
263 buf_size = 1024;
264 Malloc(buf_size, buf, char);
265 }
266
267 for (i = 0; i < n; i++)
268 {
269 char *s;
270 int len;
271
272 /* Mark the current position. If this is SDATA
273 we will have to return here. */
274 int tmp_buf_pos = buf_pos;
275
276 /* Make sure the buffer is big enough. */
277 if (buf_size - buf_pos <= v[i].len)
278 {
279 buf_size += v[i].len * (n - i);
280 Realloc(buf_size, buf, char);
281 }
282
283 s = v[i].s;
284 len = v[i].len;
285 for (; len > 0; len--, s++)
286 {
287 if (*s != RSCHAR) {
288 if (*s == RECHAR)
289 buf[buf_pos] = '\n';
290 else
291 buf[buf_pos] = *s;
292 buf_pos++;
293 }
294 }
295 if (v[i].is_sdata)
296 {
297 char *p;
298 buf[buf_pos] = '\0';
299 p = LookupSDATA(buf + tmp_buf_pos);
300 if (p)
301 {
302 if (buf_size - tmp_buf_pos <= strlen(p))
303 {
304 buf_size += strlen(p) * (n - i);
305 Realloc(buf_size, buf, char);
306 }
307 strcpy(buf + tmp_buf_pos, p);
308 buf_pos = tmp_buf_pos + strlen(p);
309 }
310 }
311 }
312
313 /* Clean up the trailing end of the data. */
314 buf[buf_pos] = '\0';
315 buf_pos--;
316 while (buf_pos > 0 && isspace(buf[buf_pos]) && buf[buf_pos] != '\n')
317 buf_pos--;
318 if (buf[buf_pos] == '\n')
319 buf[buf_pos] = '\0';
320
321 /* Skip over whitespace at the beginning of the data. */
322 cp = buf;
323 while (*cp && isspace(*cp))
324 cp++;
325 build_ts(gi, cp);
326 }
327
328 /* ______________________________________________________________________ */
329 /* Set a transpec parameter
330 * Arguments:
331 * gi - the parameter to set
332 * cp - the value of the parameter
333 */
build_ts(char * gi,char * cp)334 static void build_ts(char *gi, char* cp)
335 {
336 if (strcmp("GI", gi) == 0)
337 {
338 char *cp2;
339 /* if we are folding the case of GIs, make all upper (unless
340 it's an internal pseudo-GI name, which starts with '_') */
341 if (fold_case && cp[0] != '_' && cp[0] != '#')
342 {
343 for (cp2=cp; *cp2; cp2++)
344 if (islower(*cp2)) *cp2 = toupper(*cp2);
345 }
346 T.gi = AddElemName(cp);
347 }
348 else if (strcmp("START", gi) == 0)
349 T.starttext = strdup(cp);
350 else if (strcmp("END", gi) == 0)
351 T.endtext = strdup(cp);
352 else if (strcmp("RELATION", gi) == 0)
353 {
354 if (!T.relations)
355 T.relations = NewMap(IMS_relations);
356 SetMapping(T.relations, cp);
357 }
358 else if (strcmp("REPLACE", gi) == 0)
359 T.replace = strdup(cp);
360 else if (strcmp("ATTVAL", gi) == 0)
361 {
362 if (!T.nattpairs)
363 {
364 Malloc(1, T.attpair, AttPair_t);
365 }
366 else
367 Realloc((T.nattpairs+1), T.attpair, AttPair_t);
368 /* we'll split name/value pairs later */
369 T.attpair[T.nattpairs].name = strdup(cp);
370 T.nattpairs++;
371 }
372 else if (strcmp("CONTEXT", gi) == 0)
373 T.context = strdup(cp);
374 else if (strcmp("MESSAGE", gi) == 0)
375 T.message = strdup(cp);
376 else if (strcmp("DO", gi) == 0)
377 T.use_id = atoi(cp);
378 else if (strcmp("CONTENT", gi) == 0)
379 T.content = strdup(cp);
380 else if (strcmp("PATTSET", gi) == 0)
381 T.pattrset = strdup(cp);
382 else if (strcmp("VERBATIM", gi) == 0)
383 T.verbatim = TRUE;
384 else if (strcmp("IGNORE", gi) == 0)
385 {
386 if (!strcmp(cp, "all"))
387 T.ignore = IGN_ALL;
388 else if (!strcmp(cp, "data"))
389 T.ignore = IGN_DATA;
390 else if (!strcmp(cp, "children"))
391 T.ignore = IGN_CHILDREN;
392 else
393 fprintf(stderr, "Bad 'Ignore:' arg in transpec %s: %s\n",
394 gi, cp);
395 }
396 else if (strcmp("VARVAL", gi) == 0)
397 {
398 char **tok;
399 int i = 2;
400 tok = Split(cp, &i, S_STRDUP);
401 T.var_name = tok[0];
402 T.var_value = tok[1];
403 }
404 else if (strcmp("VARREVAL", gi) == 0)
405 {
406 char buf[1000];
407 char **tok;
408 int i = 2;
409 tok = Split(cp, &i, S_STRDUP);
410 T.var_RE_name = tok[0];
411 ExpandVariables(tok[1], buf, 0);
412 if (!(T.var_RE_value=regcomp(buf))) {
413 fprintf(stderr, "Regex error in VarREValue Content: %s\n",
414 tok[1]);
415 }
416 }
417 else if (strcmp("SET", gi) == 0)
418 {
419 if (!T.set_var)
420 T.set_var = NewMap(IMS_setvar);
421 SetMapping(T.set_var, cp);
422 }
423 else if (strcmp("INCR", gi) == 0)
424 {
425 if (!T.incr_var)
426 T.incr_var = NewMap(IMS_incvar);
427 SetMapping(T.incr_var, cp);
428 }
429 else if (strcmp("NTHCHILD", gi) == 0)
430 T.nth_child = atoi(cp);
431 else if (strcmp("VAR", gi) == 0)
432 SetMapping(Variables, cp);
433 else if (strcmp("QUIT", gi) == 0)
434 T.quit = strdup(cp);
435 else
436 fprintf(stderr, "Unknown translation spec (skipping it): %s\n", gi);
437
438 }
439
440
441 /* ______________________________________________________________________ */
442 /* Store translation spec 't' in memory.
443 * Arguments:
444 * Pointer to translation spec to remember.
445 * Line number where translation spec ends.
446 */
447 void
RememberTransSpec(Trans_t * t,int lineno)448 RememberTransSpec(
449 Trans_t *t,
450 int lineno
451 )
452 {
453 char *cp;
454 int i, do_regex;
455 static Trans_t *last_t;
456 char buf[1000];
457
458 /* If context testing, check some details and set things up for later. */
459 if (t->context) {
460 /* See if the context specified is a regular expression.
461 * If so, compile the reg expr. It is assumed to be a regex if
462 * it contains a character other than what's allowed for GIs in the
463 * OSF sgml declaration (alphas, nums, '-', and '.').
464 */
465 for (do_regex=0,cp=t->context; *cp; cp++) {
466 if (!isalnum(*cp) && *cp != '-' && *cp != '.' && *cp != ' ') {
467 do_regex = 1;
468 break;
469 }
470 }
471
472 if (do_regex) {
473 t->depth = MAX_DEPTH;
474 if (!(t->context_re=regcomp(t->context))) {
475 fprintf(stderr, "Regex error in Context: %s\n", t->context);
476 }
477 }
478 else {
479 /* If there's only one item in context, it's the parent. Treat
480 * it specially, since it's faster to just check parent gi.
481 */
482 cp = t->context;
483 if (!strchr(cp, ' ')) {
484 t->parent = t->context;
485 t->context = NULL;
486 }
487 else {
488 /* Figure out depth of context string */
489 t->depth = 0;
490 while (*cp) {
491 if (*cp) t->depth++;
492 while (*cp && !IsWhite(*cp)) cp++; /* find end of gi */
493 while (*cp && IsWhite(*cp)) cp++; /* skip space */
494 }
495 }
496 }
497 }
498
499 /* Compile regular expressions for each attribute */
500 for (i=0; i<t->nattpairs; i++) {
501 /* Initially, name points to "name value". Split them... */
502 cp = t->attpair[i].name;
503 while (*cp && !IsWhite(*cp)) cp++; /* point past end of name */
504 if (*cp) { /* value found */
505 *cp++ = EOS; /* terminate name */
506 while (*cp && IsWhite(*cp)) cp++; /* point to value */
507 ExpandVariables(cp, buf, 0); /* expand any variables */
508 t->attpair[i].val = strdup(buf);
509 }
510 else { /* value not found */
511 t->attpair[i].val = ".";
512 }
513 if (!(t->attpair[i].rex=regcomp(t->attpair[i].val))) {
514 fprintf(stderr, "Regex error in AttValue: %s %s\n",
515 t->attpair[i].name, t->attpair[i].val);
516 }
517 }
518
519 /* Compile regular expression for content */
520 t->content_re = 0;
521 if (t->content) {
522 ExpandVariables(t->content, buf, 0);
523 if (!(t->content_re=regcomp(buf)))
524 fprintf(stderr, "Regex error in Content: %s\n",
525 t->content);
526 }
527
528 /* If multiple GIs, break up into a vector, then remember it. We either
529 * sture the individual, or the list - not both. */
530 if (t->gi && strchr(t->gi, ' ')) {
531 t->gilist = Split(t->gi, 0, S_ALVEC);
532 t->gi = NULL;
533 }
534
535 /* Now, store structure in linked list. */
536 if (!TrSpecs) {
537 Malloc(1, TrSpecs, Trans_t);
538 last_t = TrSpecs;
539 }
540 else {
541 Malloc(1, last_t->next, Trans_t);
542 last_t = last_t->next;
543 }
544 *last_t = *t;
545 }
546
547
548 /* ______________________________________________________________________ */
549 /* Add an entry to the character mapping table, allocating or
550 * expanding the table if necessary.
551 * Arguments:
552 * Character to map
553 * String to map the character to
554 * A 'c' or an 's' for character or sdata map
555 */
556
557 void
AddCharMap(const char * from,const char * to)558 AddCharMap(
559 const char *from,
560 const char* to
561 )
562 {
563 static int n_alloc = 0;
564
565 if (from && to) {
566 if (nCharMap >= n_alloc) {
567 n_alloc += 32;
568 if (!CharMap) {
569 Malloc(n_alloc, CharMap, Mapping_t);
570 }
571 else {
572 Realloc(n_alloc, CharMap, Mapping_t);
573 }
574 }
575 CharMap[nCharMap].name = strdup(from);
576 CharMap[nCharMap].sval = strdup(to);
577 nCharMap++;
578 }
579 }
580
581 /* ______________________________________________________________________ */
582 /* Add an entry to the SDATA mapping table.
583 * Arguments:
584 * String to map
585 * String to map to
586 */
587
588 void
AddSDATA(const char * from,const char * to)589 AddSDATA(
590 const char *from,
591 const char *to
592 )
593 {
594 if (from && to) {
595 if (!SDATAmap)
596 SDATAmap = NewMap(IMS_sdata);
597 SetMappingNV(SDATAmap, from, to);
598 }
599 }
600
601 /* ______________________________________________________________________ */
602