xref: /original-bsd/usr.bin/tsort/tsort.c (revision 784dc2a7)
1b4f9b773Sbostic /*
2bb52f206Sbostic  * Copyright (c) 1989, 1993, 1994
34dd38c1dSbostic  *	The Regents of the University of California.  All rights reserved.
4b4f9b773Sbostic  *
5b4f9b773Sbostic  * This code is derived from software contributed to Berkeley by
6b4f9b773Sbostic  * Michael Rendell of Memorial University of Newfoundland.
7b4f9b773Sbostic  *
8f5eef415Sbostic  * %sccs.include.redist.c%
911871153Sbill  */
10b4f9b773Sbostic 
11b4f9b773Sbostic #ifndef lint
124dd38c1dSbostic static char copyright[] =
13bb52f206Sbostic "@(#) Copyright (c) 1989, 1993, 1994\n\
144dd38c1dSbostic 	The Regents of the University of California.  All rights reserved.\n";
15b4f9b773Sbostic #endif /* not lint */
16b4f9b773Sbostic 
17b4f9b773Sbostic #ifndef lint
18*784dc2a7Sbostic static char sccsid[] = "@(#)tsort.c	8.3 (Berkeley) 05/04/95";
19b4f9b773Sbostic #endif /* not lint */
20b4f9b773Sbostic 
21b4f9b773Sbostic #include <sys/types.h>
22bb52f206Sbostic 
23bb52f206Sbostic #include <ctype.h>
24bb52f206Sbostic #include <db.h>
25bb52f206Sbostic #include <err.h>
26b4f9b773Sbostic #include <errno.h>
27be1888fcSbostic #include <fcntl.h>
28625352d0Smckusick #include <stdio.h>
29ca9382c3Sbostic #include <stdlib.h>
302f15e4cdSbostic #include <string.h>
31*784dc2a7Sbostic #include <unistd.h>
3211871153Sbill 
33b4f9b773Sbostic /*
34481152d7Sbostic  *  Topological sort.  Input is a list of pairs of strings separated by
35b4f9b773Sbostic  *  white space (spaces, tabs, and/or newlines); strings are written to
36b4f9b773Sbostic  *  standard output in sorted order, one per line.
37b4f9b773Sbostic  *
38b4f9b773Sbostic  *  usage:
39bb52f206Sbostic  *     tsort [-l] [inputfile]
40b4f9b773Sbostic  *  If no input file is specified, standard input is read.
41b4f9b773Sbostic  *
42b4f9b773Sbostic  *  Should be compatable with AT&T tsort HOWEVER the output is not identical
43b4f9b773Sbostic  *  (i.e. for most graphs there is more than one sorted order, and this tsort
44b4f9b773Sbostic  *  usually generates a different one then the AT&T tsort).  Also, cycle
45b4f9b773Sbostic  *  reporting seems to be more accurate in this version (the AT&T tsort
46b4f9b773Sbostic  *  sometimes says a node is in a cycle when it isn't).
47b4f9b773Sbostic  *
48b4f9b773Sbostic  *  Michael Rendell, michael@stretch.cs.mun.ca - Feb 26, '90
4911871153Sbill  */
50b4f9b773Sbostic #define	HASHSIZE	53		/* doesn't need to be big */
51b4f9b773Sbostic #define	NF_MARK		0x1		/* marker for cycle detection */
52b4f9b773Sbostic #define	NF_ACYCLIC	0x2		/* this node is cycle free */
53bb52f206Sbostic #define	NF_NODEST	0x4		/* Unreachable */
54bb52f206Sbostic 
55b4f9b773Sbostic 
56b4f9b773Sbostic typedef struct node_str NODE;
57b4f9b773Sbostic 
58b4f9b773Sbostic struct node_str {
59b4f9b773Sbostic 	NODE **n_prevp;			/* pointer to previous node's n_next */
60b4f9b773Sbostic 	NODE *n_next;			/* next node in graph */
61be1888fcSbostic 	NODE **n_arcs;			/* array of arcs to other nodes */
62b4f9b773Sbostic 	int n_narcs;			/* number of arcs in n_arcs[] */
63b4f9b773Sbostic 	int n_arcsize;			/* size of n_arcs[] array */
64b4f9b773Sbostic 	int n_refcnt;			/* # of arcs pointing to this node */
65b4f9b773Sbostic 	int n_flags;			/* NF_* */
66be1888fcSbostic 	char n_name[1];			/* name of this node */
6711871153Sbill };
6811871153Sbill 
69b4f9b773Sbostic typedef struct _buf {
70b4f9b773Sbostic 	char *b_buf;
71b4f9b773Sbostic 	int b_bsize;
72b4f9b773Sbostic } BUF;
7311871153Sbill 
74be1888fcSbostic DB *db;
75bb52f206Sbostic NODE *graph, **cycle_buf, **longest_cycle;
76bb52f206Sbostic int debug, longest;
77b4f9b773Sbostic 
78ca9382c3Sbostic void	 add_arc __P((char *, char *));
79ca9382c3Sbostic int	 find_cycle __P((NODE *, NODE *, int, int));
80be1888fcSbostic NODE	*get_node __P((char *));
81ca9382c3Sbostic void	*grow_buf __P((void *, int));
82ca9382c3Sbostic void	 remove_node __P((NODE *));
83ca9382c3Sbostic void	 tsort __P((void));
84ca9382c3Sbostic void	 usage __P((void));
85ca9382c3Sbostic 
86ca9382c3Sbostic int
main(argc,argv)8711871153Sbill main(argc, argv)
88b4f9b773Sbostic 	int argc;
89ca9382c3Sbostic 	char *argv[];
9011871153Sbill {
91b4f9b773Sbostic 	register BUF *b;
92b4f9b773Sbostic 	register int c, n;
93b4f9b773Sbostic 	FILE *fp;
94ca9382c3Sbostic 	int bsize, ch, nused;
95b4f9b773Sbostic 	BUF bufs[2];
9611871153Sbill 
97bb52f206Sbostic 	while ((ch = getopt(argc, argv, "dl")) != EOF)
98ca9382c3Sbostic 		switch (ch) {
99bb52f206Sbostic 		case 'd':
100bb52f206Sbostic 			debug = 1;
101bb52f206Sbostic 			break;
102bb52f206Sbostic 		case 'l':
103bb52f206Sbostic 			longest = 1;
104bb52f206Sbostic 			break;
105ca9382c3Sbostic 		case '?':
106ca9382c3Sbostic 		default:
107ca9382c3Sbostic 			usage();
108ca9382c3Sbostic 		}
109ca9382c3Sbostic 	argc -= optind;
110ca9382c3Sbostic 	argv += optind;
111ca9382c3Sbostic 
112ca9382c3Sbostic 	switch (argc) {
113ca9382c3Sbostic 	case 0:
114b4f9b773Sbostic 		fp = stdin;
115ca9382c3Sbostic 		break;
116ca9382c3Sbostic 	case 1:
117ca9382c3Sbostic 		if ((fp = fopen(*argv, "r")) == NULL)
118bb52f206Sbostic 			err(1, "%s", *argv);
119ca9382c3Sbostic 		break;
120ca9382c3Sbostic 	default:
121ca9382c3Sbostic 		usage();
12211871153Sbill 	}
12311871153Sbill 
124b4f9b773Sbostic 	for (b = bufs, n = 2; --n >= 0; b++)
125ca9382c3Sbostic 		b->b_buf = grow_buf(NULL, b->b_bsize = 1024);
12611871153Sbill 
127b4f9b773Sbostic 	/* parse input and build the graph */
128b4f9b773Sbostic 	for (n = 0, c = getc(fp);;) {
129b4f9b773Sbostic 		while (c != EOF && isspace(c))
130b4f9b773Sbostic 			c = getc(fp);
131b4f9b773Sbostic 		if (c == EOF)
13211871153Sbill 			break;
133b4f9b773Sbostic 
134b4f9b773Sbostic 		nused = 0;
135b4f9b773Sbostic 		b = &bufs[n];
136b4f9b773Sbostic 		bsize = b->b_bsize;
137b4f9b773Sbostic 		do {
138b4f9b773Sbostic 			b->b_buf[nused++] = c;
139ca9382c3Sbostic 			if (nused == bsize)
140ca9382c3Sbostic 				b->b_buf = grow_buf(b->b_buf, bsize *= 2);
141b4f9b773Sbostic 			c = getc(fp);
142b4f9b773Sbostic 		} while (c != EOF && !isspace(c));
143b4f9b773Sbostic 
144b4f9b773Sbostic 		b->b_buf[nused] = '\0';
145b4f9b773Sbostic 		b->b_bsize = bsize;
146b4f9b773Sbostic 		if (n)
147b4f9b773Sbostic 			add_arc(bufs[0].b_buf, bufs[1].b_buf);
148b4f9b773Sbostic 		n = !n;
149b4f9b773Sbostic 	}
150b4f9b773Sbostic 	(void)fclose(fp);
151ca9382c3Sbostic 	if (n)
152bb52f206Sbostic 		errx(1, "odd data count");
15311871153Sbill 
154b4f9b773Sbostic 	/* do the sort */
155b4f9b773Sbostic 	tsort();
156b4f9b773Sbostic 	exit(0);
157b4f9b773Sbostic }
158b4f9b773Sbostic 
159b4f9b773Sbostic /* double the size of oldbuf and return a pointer to the new buffer. */
160ca9382c3Sbostic void *
grow_buf(bp,size)161b4f9b773Sbostic grow_buf(bp, size)
162ca9382c3Sbostic 	void *bp;
163b4f9b773Sbostic 	int size;
16411871153Sbill {
165ca9382c3Sbostic 	if ((bp = realloc(bp, (u_int)size)) == NULL)
166bb52f206Sbostic 		err(1, NULL);
167b4f9b773Sbostic 	return (bp);
168b4f9b773Sbostic }
169b4f9b773Sbostic 
170b4f9b773Sbostic /*
171b4f9b773Sbostic  * add an arc from node s1 to node s2 in the graph.  If s1 or s2 are not in
172b4f9b773Sbostic  * the graph, then add them.
173b4f9b773Sbostic  */
174b4f9b773Sbostic void
add_arc(s1,s2)175b4f9b773Sbostic add_arc(s1, s2)
176b4f9b773Sbostic 	char *s1, *s2;
177b4f9b773Sbostic {
178b4f9b773Sbostic 	register NODE *n1;
179b4f9b773Sbostic 	NODE *n2;
180c5957fa4Smarc 	int bsize, i;
181b4f9b773Sbostic 
182be1888fcSbostic 	n1 = get_node(s1);
183b4f9b773Sbostic 
184b4f9b773Sbostic 	if (!strcmp(s1, s2))
185b4f9b773Sbostic 		return;
186b4f9b773Sbostic 
187be1888fcSbostic 	n2 = get_node(s2);
188b4f9b773Sbostic 
189b4f9b773Sbostic 	/*
190c5957fa4Smarc 	 * Check if this arc is already here.
191c5957fa4Smarc 	 */
192c5957fa4Smarc 	for (i = 0; i < n1->n_narcs; i++)
193c5957fa4Smarc 		if (n1->n_arcs[i] == n2)
194c5957fa4Smarc 			return;
195c5957fa4Smarc 	/*
196c5957fa4Smarc 	 * Add it.
197b4f9b773Sbostic 	 */
198b4f9b773Sbostic 	if (n1->n_narcs == n1->n_arcsize) {
199b4f9b773Sbostic 		if (!n1->n_arcsize)
200b4f9b773Sbostic 			n1->n_arcsize = 10;
201b4f9b773Sbostic 		bsize = n1->n_arcsize * sizeof(*n1->n_arcs) * 2;
202ca9382c3Sbostic 		n1->n_arcs = grow_buf(n1->n_arcs, bsize);
203b4f9b773Sbostic 		n1->n_arcsize = bsize / sizeof(*n1->n_arcs);
204b4f9b773Sbostic 	}
205b4f9b773Sbostic 	n1->n_arcs[n1->n_narcs++] = n2;
206b4f9b773Sbostic 	++n2->n_refcnt;
207b4f9b773Sbostic }
208b4f9b773Sbostic 
209be1888fcSbostic /* Find a node in the graph (insert if not found) and return a pointer to it. */
210b4f9b773Sbostic NODE *
get_node(name)211be1888fcSbostic get_node(name)
212b4f9b773Sbostic 	char *name;
213b4f9b773Sbostic {
214be1888fcSbostic 	DBT data, key;
215be1888fcSbostic 	NODE *n;
216b4f9b773Sbostic 
217be1888fcSbostic 	if (db == NULL &&
218be1888fcSbostic 	    (db = dbopen(NULL, O_RDWR, 0, DB_HASH, NULL)) == NULL)
219bb52f206Sbostic 		err(1, "db: %s", name);
220be1888fcSbostic 
221be1888fcSbostic 	key.data = name;
222be1888fcSbostic 	key.size = strlen(name) + 1;
223be1888fcSbostic 
224be1888fcSbostic 	switch ((*db->get)(db, &key, &data, 0)) {
225be1888fcSbostic 	case 0:
226a5cd5de9Sbostic 		bcopy(data.data, &n, sizeof(n));
227a5cd5de9Sbostic 		return (n);
228be1888fcSbostic 	case 1:
229be1888fcSbostic 		break;
230be1888fcSbostic 	default:
231be1888fcSbostic 	case -1:
232bb52f206Sbostic 		err(1, "db: %s", name);
233b4f9b773Sbostic 	}
234b4f9b773Sbostic 
235be1888fcSbostic 	if ((n = malloc(sizeof(NODE) + key.size)) == NULL)
236bb52f206Sbostic 		err(1, NULL);
237b4f9b773Sbostic 
238b4f9b773Sbostic 	n->n_narcs = 0;
239b4f9b773Sbostic 	n->n_arcsize = 0;
240ca9382c3Sbostic 	n->n_arcs = NULL;
241b4f9b773Sbostic 	n->n_refcnt = 0;
242b4f9b773Sbostic 	n->n_flags = 0;
243be1888fcSbostic 	bcopy(name, n->n_name, key.size);
244b4f9b773Sbostic 
245be1888fcSbostic 	/* Add to linked list. */
246bb52f206Sbostic 	if ((n->n_next = graph) != NULL)
247b4f9b773Sbostic 		graph->n_prevp = &n->n_next;
248b4f9b773Sbostic 	n->n_prevp = &graph;
249b4f9b773Sbostic 	graph = n;
250b4f9b773Sbostic 
251be1888fcSbostic 	/* Add to hash table. */
252be1888fcSbostic 	data.data = &n;
253be1888fcSbostic 	data.size = sizeof(n);
254be1888fcSbostic 	if ((*db->put)(db, &key, &data, 0))
255bb52f206Sbostic 		err(1, "db: %s", name);
256b4f9b773Sbostic 	return (n);
257b4f9b773Sbostic }
258b4f9b773Sbostic 
259bb52f206Sbostic 
260bb52f206Sbostic /*
261bb52f206Sbostic  * Clear the NODEST flag from all nodes.
262bb52f206Sbostic  */
263bb52f206Sbostic void
clear_cycle()264bb52f206Sbostic clear_cycle()
265bb52f206Sbostic {
266bb52f206Sbostic 	NODE *n;
267bb52f206Sbostic 
268bb52f206Sbostic 	for (n = graph; n != NULL; n = n->n_next)
269bb52f206Sbostic 		n->n_flags &= ~NF_NODEST;
270bb52f206Sbostic }
271bb52f206Sbostic 
272b4f9b773Sbostic /* do topological sort on graph */
273b4f9b773Sbostic void
tsort()274b4f9b773Sbostic tsort()
275b4f9b773Sbostic {
276b4f9b773Sbostic 	register NODE *n, *next;
277bb52f206Sbostic 	register int cnt, i;
278b4f9b773Sbostic 
279bb52f206Sbostic 	while (graph != NULL) {
280b4f9b773Sbostic 		/*
281481152d7Sbostic 		 * Keep getting rid of simple cases until there are none left,
282b4f9b773Sbostic 		 * if there are any nodes still in the graph, then there is
283b4f9b773Sbostic 		 * a cycle in it.
284b4f9b773Sbostic 		 */
285b4f9b773Sbostic 		do {
286bb52f206Sbostic 			for (cnt = 0, n = graph; n != NULL; n = next) {
287b4f9b773Sbostic 				next = n->n_next;
288b4f9b773Sbostic 				if (n->n_refcnt == 0) {
289b4f9b773Sbostic 					remove_node(n);
290b4f9b773Sbostic 					++cnt;
29111871153Sbill 				}
29211871153Sbill 			}
293bb52f206Sbostic 		} while (graph != NULL && cnt);
294b4f9b773Sbostic 
295bb52f206Sbostic 		if (graph == NULL)
296b4f9b773Sbostic 			break;
297b4f9b773Sbostic 
298b4f9b773Sbostic 		if (!cycle_buf) {
299b4f9b773Sbostic 			/*
300c5957fa4Smarc 			 * Allocate space for two cycle logs - one to be used
301b4f9b773Sbostic 			 * as scratch space, the other to save the longest
302b4f9b773Sbostic 			 * cycle.
303b4f9b773Sbostic 			 */
304bb52f206Sbostic 			for (cnt = 0, n = graph; n != NULL; n = n->n_next)
305b4f9b773Sbostic 				++cnt;
306481152d7Sbostic 			cycle_buf = malloc((u_int)sizeof(NODE *) * cnt);
307481152d7Sbostic 			longest_cycle = malloc((u_int)sizeof(NODE *) * cnt);
308481152d7Sbostic 			if (cycle_buf == NULL || longest_cycle == NULL)
309bb52f206Sbostic 				err(1, NULL);
310b4f9b773Sbostic 		}
311bb52f206Sbostic 		for (n = graph; n != NULL; n = n->n_next)
312bb52f206Sbostic 			if (!(n->n_flags & NF_ACYCLIC))
313b4f9b773Sbostic 				if (cnt = find_cycle(n, n, 0, 0)) {
314bb52f206Sbostic 					warnx("cycle in data");
315b4f9b773Sbostic 					for (i = 0; i < cnt; i++)
316bb52f206Sbostic 						warnx("%s",
317bb52f206Sbostic 						    longest_cycle[i]->n_name);
318b4f9b773Sbostic 					remove_node(n);
319bb52f206Sbostic 					clear_cycle();
320b4f9b773Sbostic 					break;
321bb52f206Sbostic 				} else {
322b4f9b773Sbostic 					/* to avoid further checks */
323bb52f206Sbostic 					n->n_flags  |= NF_ACYCLIC;
324bb52f206Sbostic 					clear_cycle();
325b4f9b773Sbostic 				}
326b4f9b773Sbostic 
327bb52f206Sbostic 		if (n == NULL)
328bb52f206Sbostic 			errx(1, "internal error -- could not find cycle");
329b4f9b773Sbostic 	}
330b4f9b773Sbostic }
331b4f9b773Sbostic 
332b4f9b773Sbostic /* print node and remove from graph (does not actually free node) */
333b4f9b773Sbostic void
remove_node(n)334b4f9b773Sbostic remove_node(n)
335b4f9b773Sbostic 	register NODE *n;
336b4f9b773Sbostic {
337b4f9b773Sbostic 	register NODE **np;
338b4f9b773Sbostic 	register int i;
339b4f9b773Sbostic 
340b4f9b773Sbostic 	(void)printf("%s\n", n->n_name);
341b4f9b773Sbostic 	for (np = n->n_arcs, i = n->n_narcs; --i >= 0; np++)
342b4f9b773Sbostic 		--(*np)->n_refcnt;
343b4f9b773Sbostic 	n->n_narcs = 0;
344b4f9b773Sbostic 	*n->n_prevp = n->n_next;
345b4f9b773Sbostic 	if (n->n_next)
346b4f9b773Sbostic 		n->n_next->n_prevp = n->n_prevp;
347b4f9b773Sbostic }
348b4f9b773Sbostic 
349bb52f206Sbostic 
350bb52f206Sbostic /* look for the longest? cycle from node from to node to. */
351ca9382c3Sbostic int
find_cycle(from,to,longest_len,depth)352b4f9b773Sbostic find_cycle(from, to, longest_len, depth)
353b4f9b773Sbostic 	NODE *from, *to;
354b4f9b773Sbostic 	int depth, longest_len;
355b4f9b773Sbostic {
356b4f9b773Sbostic 	register NODE **np;
357b4f9b773Sbostic 	register int i, len;
358b4f9b773Sbostic 
359b4f9b773Sbostic 	/*
360b4f9b773Sbostic 	 * avoid infinite loops and ignore portions of the graph known
361b4f9b773Sbostic 	 * to be acyclic
362b4f9b773Sbostic 	 */
363bb52f206Sbostic 	if (from->n_flags & (NF_NODEST|NF_MARK|NF_ACYCLIC))
364b4f9b773Sbostic 		return (0);
365bb52f206Sbostic 	from->n_flags |= NF_MARK;
366b4f9b773Sbostic 
367b4f9b773Sbostic 	for (np = from->n_arcs, i = from->n_narcs; --i >= 0; np++) {
368b4f9b773Sbostic 		cycle_buf[depth] = *np;
369b4f9b773Sbostic 		if (*np == to) {
370b4f9b773Sbostic 			if (depth + 1 > longest_len) {
371b4f9b773Sbostic 				longest_len = depth + 1;
372b4f9b773Sbostic 				(void)memcpy((char *)longest_cycle,
373b4f9b773Sbostic 				    (char *)cycle_buf,
374b4f9b773Sbostic 				    longest_len * sizeof(NODE *));
375b4f9b773Sbostic 			}
376b4f9b773Sbostic 		} else {
377bb52f206Sbostic 			if ((*np)->n_flags & (NF_MARK|NF_ACYCLIC|NF_NODEST))
378bb52f206Sbostic 				continue;
379b4f9b773Sbostic 			len = find_cycle(*np, to, longest_len, depth + 1);
380bb52f206Sbostic 
381bb52f206Sbostic 			if (debug)
382bb52f206Sbostic 				(void)printf("%*s %s->%s %d\n", depth, "",
383bb52f206Sbostic 				    from->n_name, to->n_name, len);
384bb52f206Sbostic 
385bb52f206Sbostic 			if (len == 0)
386bb52f206Sbostic 				(*np)->n_flags |= NF_NODEST;
387bb52f206Sbostic 
388b4f9b773Sbostic 			if (len > longest_len)
389b4f9b773Sbostic 				longest_len = len;
390bb52f206Sbostic 
391bb52f206Sbostic 			if (len > 0 && !longest)
392bb52f206Sbostic 				break;
393b4f9b773Sbostic 		}
394b4f9b773Sbostic 	}
395b4f9b773Sbostic 	from->n_flags &= ~NF_MARK;
396b4f9b773Sbostic 	return (longest_len);
397b4f9b773Sbostic }
398b4f9b773Sbostic 
399b4f9b773Sbostic void
usage()400ca9382c3Sbostic usage()
401b4f9b773Sbostic {
402bb52f206Sbostic 	(void)fprintf(stderr, "usage: tsort [-l] [file]\n");
403b4f9b773Sbostic 	exit(1);
40411871153Sbill }
405