1 /*-
2  * Copyright (c) 2002 Poul-Henning Kamp
3  * Copyright (c) 2002 Networks Associates Technology, Inc.
4  * All rights reserved.
5  *
6  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
7  * and NAI Labs, the Security Research Division of Network Associates, Inc.
8  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
9  * DARPA CHATS research program.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. The names of the authors may not be used to endorse or promote
20  *    products derived from this software without specific prior written
21  *    permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <unistd.h>
40 #include <fcntl.h>
41 #include <ctype.h>
42 #include <sys/stat.h>
43 #include <sys/mman.h>
44 #include <sys/queue.h>
45 #include <sys/sbuf.h>
46 #include <err.h>
47 #include <bsdxml.h>
48 
49 FILE *fsubs;
50 
51 struct node {
52 	LIST_HEAD(, node)	children;
53 	LIST_ENTRY(node)	siblings;
54 	struct node		*parent;
55 	const char		*name;
56 	struct sbuf		*cont;
57 	struct sbuf		*key;
58 	char			*id;
59 	char			*ref;
60 };
61 
62 struct mytree {
63 	struct node		*top;
64 	struct node		*cur;
65 	int			indent;
66 	int			ignore;
67 };
68 
69 struct ref {
70 	LIST_ENTRY(ref)		next;
71 	char 			*k1;
72 	char			*k2;
73 };
74 
75 LIST_HEAD(, ref)		refs = LIST_HEAD_INITIALIZER(refs);
76 
77 static struct node *
78 new_node(void)
79 {
80 	struct node *np;
81 
82 	np = calloc(1, sizeof *np);
83 	np->cont = sbuf_new_auto();
84 	sbuf_clear(np->cont);
85 	np->key = sbuf_new_auto();
86 	sbuf_clear(np->key);
87 	LIST_INIT(&np->children);
88 	return (np);
89 }
90 
91 static void
92 indent(int n)
93 {
94 
95 	printf("%*.*s", n, n, "");
96 }
97 
98 static void
99 StartElement(void *userData, const char *name, const char **attr)
100 {
101 	struct mytree *mt;
102 	struct node *np;
103 	int i;
104 
105 	mt = userData;
106 	if (!strcmp(name, "FreeBSD")) {
107 		mt->ignore = 1;
108 		return;
109 	}
110 	mt->ignore = 0;
111 	mt->indent += 2;
112 	np = new_node();
113 	for (i = 0; attr[i]; i += 2) {
114 		if (!strcmp(attr[i], "id"))
115 			np->id = strdup(attr[i+1]);
116 		else if (!strcmp(attr[i], "ref"))
117 			np->ref = strdup(attr[i+1]);
118 	}
119 	np->name = strdup(name);
120 	sbuf_cat(np->key, name);
121 	sbuf_cat(np->key, "::");
122 	np->parent = mt->cur;
123 	LIST_INSERT_HEAD(&mt->cur->children, np, siblings);
124 	mt->cur = np;
125 }
126 
127 static void
128 EndElement(void *userData, const char *name __unused)
129 {
130 	struct mytree *mt;
131 	struct node *np;
132 
133 	mt = userData;
134 	if (mt->ignore)
135 		return;
136 
137 	mt->indent -= 2;
138 	sbuf_finish(mt->cur->cont);
139 	LIST_FOREACH(np, &mt->cur->children, siblings) {
140 		if (strcmp(np->name, "name"))
141 			continue;
142 		sbuf_cat(mt->cur->key, sbuf_data(np->cont));
143 		break;
144 	}
145 	sbuf_finish(mt->cur->key);
146 	mt->cur = mt->cur->parent;
147 }
148 
149 static void
150 CharData(void *userData , const XML_Char *s , int len)
151 {
152 	struct mytree *mt;
153 	const char *b, *e;
154 
155 	mt = userData;
156 	if (mt->ignore)
157 		return;
158 	b = s;
159 	e = s + len - 1;
160 	while (isspace(*b) && b < e)
161 		b++;
162 	while (isspace(*e) && e > b)
163 		e--;
164 	if (e != b || *b)
165 		sbuf_bcat(mt->cur->cont, b, e - b + 1);
166 }
167 
168 static struct mytree *
169 dofile(char *filename)
170 {
171 	XML_Parser parser;
172 	struct mytree *mt;
173 	struct stat st;
174 	int fd;
175 	char *p;
176 	int i;
177 
178 	parser = XML_ParserCreate(NULL);
179 	mt = calloc(1, sizeof *mt);
180 	mt->top = new_node();
181 	mt->top->name = "(top)";
182 	mt->top->parent = mt->top;
183 	mt->cur = mt->top;
184 	sbuf_finish(mt->top->key);
185 	sbuf_finish(mt->top->cont);
186 	XML_SetUserData(parser, mt);
187 	XML_SetElementHandler(parser, StartElement, EndElement);
188 	XML_SetCharacterDataHandler(parser, CharData);
189 	fd = open(filename, O_RDONLY);
190 	if (fd < 0)
191 		err(1, filename);
192 	fstat(fd, &st);
193 	p = mmap(NULL, st.st_size, PROT_READ, MAP_NOCORE|MAP_PRIVATE, fd, 0);
194 	i = XML_Parse(parser, p, st.st_size, 1);
195 	if (i != 1)
196 		errx(1, "XML_Parse complained -> %d", i);
197 	munmap(p, st.st_size);
198 	close(fd);
199 	XML_ParserFree(parser);
200 	sbuf_finish(mt->top->cont);
201 	if (i)
202 		return (mt);
203 	else
204 		return (NULL);
205 }
206 
207 static void
208 print_node(struct node *np)
209 {
210 	printf("\"%s\" -- \"%s\" -- \"%s\"", np->name, sbuf_data(np->cont), sbuf_data(np->key));
211 	if (np->id)
212 		printf(" id=\"%s\"", np->id);
213 	if (np->ref)
214 		printf(" ref=\"%s\"", np->ref);
215 	printf("\n");
216 }
217 
218 static void
219 print_tree(struct node *np, int n)
220 {
221 	struct node *np1;
222 
223 	indent(n); printf("%s id=%s ref=%s\n", np->name, np->id, np->ref);
224 	LIST_FOREACH(np1, &np->children, siblings)
225 		print_tree(np1, n + 2);
226 }
227 
228 static void
229 sort_node(struct node *np)
230 {
231 	struct node *np1, *np2;
232 	int n;
233 
234 	LIST_FOREACH(np1, &np->children, siblings)
235 		sort_node(np1);
236 	do {
237 		np1 = LIST_FIRST(&np->children);
238 		n = 0;
239 		for (;;) {
240 			if (np1 == NULL)
241 				return;
242 			np2 = LIST_NEXT(np1, siblings);
243 			if (np2 == NULL)
244 				return;
245 			if (strcmp(sbuf_data(np1->key), sbuf_data(np2->key)) > 0) {
246 				LIST_REMOVE(np2, siblings);
247 				LIST_INSERT_BEFORE(np1, np2, siblings);
248 				n++;
249 				break;
250 			}
251 			np1 = np2;
252 		}
253 	} while (n);
254 }
255 
256 static int
257 refcmp(char *r1, char *r2)
258 {
259 	struct ref *r;
260 
261 	LIST_FOREACH(r, &refs, next) {
262 		if (!strcmp(r1, r->k1))
263 			return (strcmp(r2, r->k2));
264 	}
265 	r = calloc(1, sizeof(*r));
266 	r->k1 = strdup(r1);
267 	r->k2 = strdup(r2);
268 	LIST_INSERT_HEAD(&refs, r, next);
269 	if (fsubs != NULL) {
270 		fprintf(fsubs, "s/%s/%s/g\n", r1, r2);
271 		fflush(fsubs);
272 	}
273 	return (0);
274 }
275 
276 static int compare_node2(struct node *n1, struct node *n2, int in);
277 
278 static int
279 compare_node(struct node *n1, struct node *n2, int in)
280 {
281 	int i;
282 	struct node *n1a, *n2a;
283 
284 	i = strcmp(n1->name, n2->name);
285 	if (i)
286 		return (i);
287 	if (n1->id && n2->id)
288 		i = refcmp(n1->id, n2->id);
289 	else if (n1->id || n2->id)
290 		i = -1;
291 	if (i)
292 		return (i);
293 	if (n1->ref && n2->ref)
294 		i = refcmp(n1->ref, n2->ref);
295 	else if (n1->ref || n2->ref)
296 		i = -1;
297 	if (i)
298 		return (i);
299 	if (!strcmp(n1->name, "ref"))
300 		i = refcmp(sbuf_data(n1->cont), sbuf_data(n2->cont));
301 	else
302 		i = strcmp(sbuf_data(n1->cont), sbuf_data(n2->cont));
303 	if (i)
304 		return (1);
305 	n1a = LIST_FIRST(&n1->children);
306 	n2a = LIST_FIRST(&n2->children);
307 	for (;;) {
308 		if (n1a == NULL && n2a == NULL)
309 			return (0);
310 		if (n1a != NULL && n2a == NULL) {
311 			printf("1>");
312 			indent(in);
313 			print_node(n1a);
314 			printf("2>\n");
315 			return (1);
316 		}
317 		if (n1a == NULL && n2a != NULL) {
318 			printf("1>\n");
319 			printf("2>");
320 			indent(in);
321 			print_node(n2a);
322 			return (1);
323 		}
324 		i = compare_node2(n1a, n2a, in + 2);
325 		if (i)
326 			return (1);
327 		n1a = LIST_NEXT(n1a, siblings);
328 		n2a = LIST_NEXT(n2a, siblings);
329 	}
330 	return (0);
331 }
332 
333 static int
334 compare_node2(struct node *n1, struct node *n2, int in)
335 {
336 	int i;
337 
338 	i = compare_node(n1, n2, in);
339 	if (i) {
340 		printf("1>");
341 		indent(in);
342 		print_node(n1);
343 		printf("2>");
344 		indent(in);
345 		print_node(n2);
346 	}
347 	return (i);
348 }
349 
350 
351 
352 int
353 main(int argc, char **argv)
354 {
355 	struct mytree *t1, *t2;
356 	int i;
357 
358 	fsubs = fopen("_.subs", "w");
359 	setbuf(stdout, NULL);
360 	setbuf(stderr, NULL);
361 	if (argc != 3)
362 		errx(1, "usage: %s file1 file2", argv[0]);
363 
364 	t1 = dofile(argv[1]);
365 	if (t1 == NULL)
366 		errx(2, "XML parser error on file %s", argv[1]);
367 	sort_node(t1->top);
368 	t2 = dofile(argv[2]);
369 	if (t2 == NULL)
370 		errx(2, "XML parser error on file %s", argv[2]);
371 	sort_node(t2->top);
372 	i = compare_node(t1->top, t2->top, 0);
373 	return (i);
374 }
375 
376