1 /*-
2  * Copyright (c) 2002 Poul-Henning Kamp
3  * Copyright (c) 2002 Networks Associates Technology, Inc.
4  * All rights reserved.
5  *
6  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
7  * and NAI Labs, the Security Research Division of Network Associates, Inc.
8  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
9  * DARPA CHATS research program.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. The names of the authors may not be used to endorse or promote
20  *    products derived from this software without specific prior written
21  *    permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  * $FreeBSD$
36  */
37 
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <unistd.h>
42 #include <fcntl.h>
43 #include <ctype.h>
44 #include <sys/stat.h>
45 #include <sys/mman.h>
46 #include <sys/queue.h>
47 #include <sys/sbuf.h>
48 #include <err.h>
49 #include <bsdxml.h>
50 
51 FILE *fsubs;
52 
53 struct node {
54 	LIST_HEAD(, node)	children;
55 	LIST_ENTRY(node)	siblings;
56 	struct node		*parent;
57 	const char		*name;
58 	struct sbuf		*cont;
59 	struct sbuf		*key;
60 	char			*id;
61 	char			*ref;
62 };
63 
64 struct mytree {
65 	struct node		*top;
66 	struct node		*cur;
67 	int			indent;
68 	int			ignore;
69 };
70 
71 struct ref {
72 	LIST_ENTRY(ref)		next;
73 	char 			*k1;
74 	char			*k2;
75 };
76 
77 LIST_HEAD(, ref)		refs = LIST_HEAD_INITIALIZER(&refs);
78 
79 static struct node *
80 new_node(void)
81 {
82 	struct node *np;
83 
84 	np = calloc(1, sizeof *np);
85 	np->cont = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
86 	sbuf_clear(np->cont);
87 	np->key = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
88 	sbuf_clear(np->key);
89 	LIST_INIT(&np->children);
90 	return (np);
91 }
92 
93 static void
94 indent(int n)
95 {
96 
97 	printf("%*.*s", n, n, "");
98 }
99 
100 static void
101 StartElement(void *userData, const char *name, const char **attr)
102 {
103 	struct mytree *mt;
104 	struct node *np;
105 	int i;
106 
107 	mt = userData;
108 	if (!strcmp(name, "FreeBSD")) {
109 		mt->ignore = 1;
110 		return;
111 	}
112 	mt->ignore = 0;
113 	mt->indent += 2;
114 	np = new_node();
115 	for (i = 0; attr[i]; i += 2) {
116 		if (!strcmp(attr[i], "id"))
117 			np->id = strdup(attr[i+1]);
118 		else if (!strcmp(attr[i], "ref"))
119 			np->ref = strdup(attr[i+1]);
120 	}
121 	np->name = strdup(name);
122 	sbuf_cat(np->key, name);
123 	sbuf_cat(np->key, "::");
124 	np->parent = mt->cur;
125 	LIST_INSERT_HEAD(&mt->cur->children, np, siblings);
126 	mt->cur = np;
127 }
128 
129 static void
130 EndElement(void *userData, const char *name __unused)
131 {
132 	struct mytree *mt;
133 	struct node *np;
134 
135 	mt = userData;
136 	if (mt->ignore)
137 		return;
138 
139 	mt->indent -= 2;
140 	sbuf_finish(mt->cur->cont);
141 	LIST_FOREACH(np, &mt->cur->children, siblings) {
142 		if (strcmp(np->name, "name"))
143 			continue;
144 		sbuf_cat(mt->cur->key, sbuf_data(np->cont));
145 		break;
146 	}
147 	sbuf_finish(mt->cur->key);
148 	mt->cur = mt->cur->parent;
149 }
150 
151 static void
152 CharData(void *userData , const XML_Char *s , int len)
153 {
154 	struct mytree *mt;
155 	const char *b, *e;
156 
157 	mt = userData;
158 	if (mt->ignore)
159 		return;
160 	b = s;
161 	e = s + len - 1;
162 	while (isspace(*b) && b < e)
163 		b++;
164 	while (isspace(*e) && e > b)
165 		e--;
166 	if (e != b || *b)
167 		sbuf_bcat(mt->cur->cont, b, e - b + 1);
168 }
169 
170 static struct mytree *
171 dofile(char *filename)
172 {
173 	XML_Parser parser;
174 	struct mytree *mt;
175 	struct stat st;
176 	int fd;
177 	char *p;
178 	int i;
179 
180 	parser = XML_ParserCreate(NULL);
181 	mt = calloc(1, sizeof *mt);
182 	mt->top = new_node();
183 	mt->top->name = "(top)";
184 	mt->top->parent = mt->top;
185 	mt->cur = mt->top;
186 	sbuf_finish(mt->top->key);
187 	sbuf_finish(mt->top->cont);
188 	XML_SetUserData(parser, mt);
189 	XML_SetElementHandler(parser, StartElement, EndElement);
190 	XML_SetCharacterDataHandler(parser, CharData);
191 	fd = open(filename, O_RDONLY);
192 	if (fd < 0)
193 		err(1, filename);
194 	fstat(fd, &st);
195 	p = mmap(NULL, st.st_size, PROT_READ, MAP_NOCORE|MAP_PRIVATE, fd, 0);
196 	i = XML_Parse(parser, p, st.st_size, 1);
197 	if (i != 1)
198 		errx(1, "XML_Parse complained -> %d", i);
199 	munmap(p, st.st_size);
200 	close(fd);
201 	XML_ParserFree(parser);
202 	sbuf_finish(mt->top->cont);
203 	if (i)
204 		return (mt);
205 	else
206 		return (NULL);
207 }
208 
209 static void
210 print_node(struct node *np)
211 {
212 	printf("\"%s\" -- \"%s\" -- \"%s\"", np->name, sbuf_data(np->cont), sbuf_data(np->key));
213 	if (np->id)
214 		printf(" id=\"%s\"", np->id);
215 	if (np->ref)
216 		printf(" ref=\"%s\"", np->ref);
217 	printf("\n");
218 }
219 
220 static void
221 print_tree(struct node *np, int n)
222 {
223 	struct node *np1;
224 
225 	indent(n); printf("%s id=%s ref=%s\n", np->name, np->id, np->ref);
226 	LIST_FOREACH(np1, &np->children, siblings)
227 		print_tree(np1, n + 2);
228 }
229 
230 static void
231 sort_node(struct node *np)
232 {
233 	struct node *np1, *np2;
234 	int n;
235 
236 	LIST_FOREACH(np1, &np->children, siblings)
237 		sort_node(np1);
238 	do {
239 		np1 = LIST_FIRST(&np->children);
240 		n = 0;
241 		for (;;) {
242 			if (np1 == NULL)
243 				return;
244 			np2 = LIST_NEXT(np1, siblings);
245 			if (np2 == NULL)
246 				return;
247 			if (strcmp(sbuf_data(np1->key), sbuf_data(np2->key)) > 0) {
248 				LIST_REMOVE(np2, siblings);
249 				LIST_INSERT_BEFORE(np1, np2, siblings);
250 				n++;
251 				break;
252 			}
253 			np1 = np2;
254 		}
255 	} while (n);
256 }
257 
258 static int
259 refcmp(char *r1, char *r2)
260 {
261 	struct ref *r;
262 
263 	LIST_FOREACH(r, &refs, next) {
264 		if (!strcmp(r1, r->k1))
265 			return (strcmp(r2, r->k2));
266 	}
267 	r = calloc(1, sizeof(*r));
268 	r->k1 = strdup(r1);
269 	r->k2 = strdup(r2);
270 	LIST_INSERT_HEAD(&refs, r, next);
271 	if (fsubs != NULL) {
272 		fprintf(fsubs, "s/%s/%s/g\n", r1, r2);
273 		fflush(fsubs);
274 	}
275 	return (0);
276 }
277 
278 static int compare_node2(struct node *n1, struct node *n2, int in);
279 
280 static int
281 compare_node(struct node *n1, struct node *n2, int in)
282 {
283 	int i;
284 	struct node *n1a, *n2a;
285 
286 	i = strcmp(n1->name, n2->name);
287 	if (i)
288 		return (i);
289 	if (n1->id && n2->id)
290 		i = refcmp(n1->id, n2->id);
291 	else if (n1->id || n2->id)
292 		i = -1;
293 	if (i)
294 		return (i);
295 	if (n1->ref && n2->ref)
296 		i = refcmp(n1->ref, n2->ref);
297 	else if (n1->ref || n2->ref)
298 		i = -1;
299 	if (i)
300 		return (i);
301 	if (!strcmp(n1->name, "ref"))
302 		i = refcmp(sbuf_data(n1->cont), sbuf_data(n2->cont));
303 	else
304 		i = strcmp(sbuf_data(n1->cont), sbuf_data(n2->cont));
305 	if (i)
306 		return (1);
307 	n1a = LIST_FIRST(&n1->children);
308 	n2a = LIST_FIRST(&n2->children);
309 	for (;;) {
310 		if (n1a == NULL && n2a == NULL)
311 			return (0);
312 		if (n1a != NULL && n2a == NULL) {
313 			printf("1>");
314 			indent(in);
315 			print_node(n1a);
316 			printf("2>\n");
317 			return (1);
318 		}
319 		if (n1a == NULL && n2a != NULL) {
320 			printf("1>\n");
321 			printf("2>");
322 			indent(in);
323 			print_node(n2a);
324 			return (1);
325 		}
326 		i = compare_node2(n1a, n2a, in + 2);
327 		if (i)
328 			return (1);
329 		n1a = LIST_NEXT(n1a, siblings);
330 		n2a = LIST_NEXT(n2a, siblings);
331 	}
332 	return (0);
333 }
334 
335 static int
336 compare_node2(struct node *n1, struct node *n2, int in)
337 {
338 	int i;
339 
340 	i = compare_node(n1, n2, in);
341 	if (i) {
342 		printf("1>");
343 		indent(in);
344 		print_node(n1);
345 		printf("2>");
346 		indent(in);
347 		print_node(n2);
348 	}
349 	return (i);
350 }
351 
352 
353 
354 int
355 main(int argc, char **argv)
356 {
357 	struct mytree *t1, *t2;
358 	int i;
359 
360 	fsubs = fopen("_.subs", "w");
361 	setbuf(stdout, NULL);
362 	setbuf(stderr, NULL);
363 	if (argc != 3)
364 		errx(1, "usage: %s file1 file2", argv[0]);
365 
366 	t1 = dofile(argv[1]);
367 	if (t1 == NULL)
368 		errx(2, "XML parser error on file %s", argv[1]);
369 	sort_node(t1->top);
370 	t2 = dofile(argv[2]);
371 	if (t2 == NULL)
372 		errx(2, "XML parser error on file %s", argv[2]);
373 	sort_node(t2->top);
374 	i = compare_node(t1->top, t2->top, 0);
375 	return (i);
376 }
377 
378