xref: /original-bsd/usr.bin/tsort/tsort.c (revision e59fb703)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Michael Rendell of Memorial University of Newfoundland.
7  *
8  * %sccs.include.redist.c%
9  */
10 
11 #ifndef lint
12 char copyright[] =
13 "@(#) Copyright (c) 1989 The Regents of the University of California.\n\
14  All rights reserved.\n";
15 #endif /* not lint */
16 
17 #ifndef lint
18 static char sccsid[] = "@(#)tsort.c	5.3 (Berkeley) 06/01/90";
19 #endif /* not lint */
20 
21 #include <sys/types.h>
22 #include <errno.h>
23 #include <stdio.h>
24 #include <ctype.h>
25 #include <string.h>
26 
27 /*
28  *  Topological sort.  Input is a list of pairs of strings seperated by
29  *  white space (spaces, tabs, and/or newlines); strings are written to
30  *  standard output in sorted order, one per line.
31  *
32  *  usage:
33  *     tsort [inputfile]
34  *  If no input file is specified, standard input is read.
35  *
36  *  Should be compatable with AT&T tsort HOWEVER the output is not identical
37  *  (i.e. for most graphs there is more than one sorted order, and this tsort
38  *  usually generates a different one then the AT&T tsort).  Also, cycle
39  *  reporting seems to be more accurate in this version (the AT&T tsort
40  *  sometimes says a node is in a cycle when it isn't).
41  *
42  *  Michael Rendell, michael@stretch.cs.mun.ca - Feb 26, '90
43  */
44 #define	HASHSIZE	53		/* doesn't need to be big */
45 #define	NF_MARK		0x1		/* marker for cycle detection */
46 #define	NF_ACYCLIC	0x2		/* this node is cycle free */
47 
48 typedef struct node_str NODE;
49 
50 struct node_str {
51 	char *n_name;			/* name of this node */
52 	NODE **n_prevp;			/* pointer to previous node's n_next */
53 	NODE *n_next;			/* next node in graph */
54 	NODE *n_hash;			/* next node in hash table */
55 	int n_narcs;			/* number of arcs in n_arcs[] */
56 	int n_arcsize;			/* size of n_arcs[] array */
57 	NODE **n_arcs;			/* array of arcs to other nodes */
58 	int n_refcnt;			/* # of arcs pointing to this node */
59 	int n_flags;			/* NF_* */
60 };
61 
62 typedef struct _buf {
63 	char *b_buf;
64 	int b_bsize;
65 } BUF;
66 
67 NODE *add_node(), *find_node();
68 void add_arc(), no_memory(), remove_node(), tsort();
69 char *grow_buf(), *malloc();
70 
71 extern int errno;
72 NODE *graph;
73 NODE *hashtable[HASHSIZE];
74 NODE **cycle_buf;
75 NODE **longest_cycle;
76 
77 main(argc, argv)
78 	int argc;
79 	char **argv;
80 {
81 	register BUF *b;
82 	register int c, n;
83 	FILE *fp;
84 	int bsize, nused;
85 	BUF bufs[2];
86 
87 	if (argc < 2)
88 		fp = stdin;
89 	else if (argc == 2) {
90 		(void)fprintf(stderr, "usage: tsort [ inputfile ]\n");
91 		exit(1);
92 	} else if (!(fp = fopen(argv[1], "r"))) {
93 		(void)fprintf(stderr, "tsort: %s.\n", strerror(errno));
94 		exit(1);
95 	}
96 
97 	for (b = bufs, n = 2; --n >= 0; b++)
98 		b->b_buf = grow_buf((char *)NULL, b->b_bsize = 1024);
99 
100 	/* parse input and build the graph */
101 	for (n = 0, c = getc(fp);;) {
102 		while (c != EOF && isspace(c))
103 			c = getc(fp);
104 		if (c == EOF)
105 			break;
106 
107 		nused = 0;
108 		b = &bufs[n];
109 		bsize = b->b_bsize;
110 		do {
111 			b->b_buf[nused++] = c;
112 			if (nused == bsize) {
113 				bsize *= 2;
114 				b->b_buf = grow_buf(b->b_buf, bsize);
115 			}
116 			c = getc(fp);
117 		} while (c != EOF && !isspace(c));
118 
119 		b->b_buf[nused] = '\0';
120 		b->b_bsize = bsize;
121 		if (n)
122 			add_arc(bufs[0].b_buf, bufs[1].b_buf);
123 		n = !n;
124 	}
125 	(void)fclose(fp);
126 	if (n) {
127 		(void)fprintf(stderr, "tsort: odd data count.\n");
128 		exit(1);
129 	}
130 
131 	/* do the sort */
132 	tsort();
133 	exit(0);
134 }
135 
136 /* double the size of oldbuf and return a pointer to the new buffer. */
137 char *
138 grow_buf(bp, size)
139 	char *bp;
140 	int size;
141 {
142 	char *realloc();
143 
144 	if (!(bp = realloc(bp, (u_int)size)))
145 		no_memory();
146 	return(bp);
147 }
148 
149 /*
150  * add an arc from node s1 to node s2 in the graph.  If s1 or s2 are not in
151  * the graph, then add them.
152  */
153 void
154 add_arc(s1, s2)
155 	char *s1, *s2;
156 {
157 	register NODE *n1;
158 	NODE *n2;
159 	int bsize;
160 
161 	n1 = find_node(s1);
162 	if (!n1)
163 		n1 = add_node(s1);
164 
165 	if (!strcmp(s1, s2))
166 		return;
167 
168 	n2 = find_node(s2);
169 	if (!n2)
170 		n2 = add_node(s2);
171 
172 	/*
173 	 * could check to see if this arc is here already, but it isn't
174 	 * worth the bother -- there usually isn't and it doesn't hurt if
175 	 * there is (I think :-).
176 	 */
177 	if (n1->n_narcs == n1->n_arcsize) {
178 		if (!n1->n_arcsize)
179 			n1->n_arcsize = 10;
180 		bsize = n1->n_arcsize * sizeof(*n1->n_arcs) * 2;
181 		n1->n_arcs = (NODE **)grow_buf((char *)n1->n_arcs, bsize);
182 		n1->n_arcsize = bsize / sizeof(*n1->n_arcs);
183 	}
184 	n1->n_arcs[n1->n_narcs++] = n2;
185 	++n2->n_refcnt;
186 }
187 
188 hash_string(s)
189 	char *s;
190 {
191 	register int hash, i;
192 
193 	for (hash = 0, i = 1; *s; s++, i++)
194 		hash += *s * i;
195 	return(hash % HASHSIZE);
196 }
197 
198 /*
199  * find a node in the graph and return a pointer to it - returns null if not
200  * found.
201  */
202 NODE *
203 find_node(name)
204 	char *name;
205 {
206 	register NODE *n;
207 
208 	for (n = hashtable[hash_string(name)]; n; n = n->n_hash)
209 		if (!strcmp(n->n_name, name))
210 			return(n);
211 	return((NODE *)NULL);
212 }
213 
214 /* Add a node to the graph and return a pointer to it. */
215 NODE *
216 add_node(name)
217 	char *name;
218 {
219 	register NODE *n;
220 	int hash;
221 
222 	if (!(n = (NODE *)malloc(sizeof(NODE))) || !(n->n_name = strdup(name)))
223 		no_memory();
224 
225 	n->n_narcs = 0;
226 	n->n_arcsize = 0;
227 	n->n_arcs = (NODE **)NULL;
228 	n->n_refcnt = 0;
229 	n->n_flags = 0;
230 
231 	/* add to linked list */
232 	if (n->n_next = graph)
233 		graph->n_prevp = &n->n_next;
234 	n->n_prevp = &graph;
235 	graph = n;
236 
237 	/* add to hash table */
238 	hash = hash_string(name);
239 	n->n_hash = hashtable[hash];
240 	hashtable[hash] = n;
241 	return(n);
242 }
243 
244 /* do topological sort on graph */
245 void
246 tsort()
247 {
248 	register NODE *n, *next;
249 	register int cnt;
250 
251 	while (graph) {
252 		/*
253 		 * keep getting rid of simple cases until there are none left,
254 		 * if there are any nodes still in the graph, then there is
255 		 * a cycle in it.
256 		 */
257 		do {
258 			for (cnt = 0, n = graph; n; n = next) {
259 				next = n->n_next;
260 				if (n->n_refcnt == 0) {
261 					remove_node(n);
262 					++cnt;
263 				}
264 			}
265 		} while (graph && cnt);
266 
267 		if (!graph)
268 			break;
269 
270 		if (!cycle_buf) {
271 			/*
272 			 * allocate space for two cycle logs - one to be used
273 			 * as scratch space, the other to save the longest
274 			 * cycle.
275 			 */
276 			for (cnt = 0, n = graph; n; n = n->n_next)
277 				++cnt;
278 			cycle_buf =
279 			    (NODE **)malloc((u_int)sizeof(NODE *) * cnt);
280 			longest_cycle =
281 			    (NODE **)malloc((u_int)sizeof(NODE *) * cnt);
282 			if (!cycle_buf || !longest_cycle)
283 				no_memory();
284 		}
285 		for (n = graph; n; n = n->n_next)
286 			if (!(n->n_flags & NF_ACYCLIC)) {
287 				if (cnt = find_cycle(n, n, 0, 0)) {
288 					register int i;
289 
290 					(void)fprintf(stderr,
291 					    "tsort: cycle in data.\n");
292 					for (i = 0; i < cnt; i++)
293 						(void)fprintf(stderr,
294 				"tsort: %s.\n", longest_cycle[i]->n_name);
295 					remove_node(n);
296 					break;
297 				} else
298 					/* to avoid further checks */
299 					n->n_flags  = NF_ACYCLIC;
300 			}
301 
302 		if (!n) {
303 			(void)fprintf(stderr,
304 			    "tsort: internal error -- could not find cycle.\n");
305 			exit(1);
306 		}
307 	}
308 }
309 
310 /* print node and remove from graph (does not actually free node) */
311 void
312 remove_node(n)
313 	register NODE *n;
314 {
315 	register NODE **np;
316 	register int i;
317 
318 	(void)printf("%s\n", n->n_name);
319 	for (np = n->n_arcs, i = n->n_narcs; --i >= 0; np++)
320 		--(*np)->n_refcnt;
321 	n->n_narcs = 0;
322 	*n->n_prevp = n->n_next;
323 	if (n->n_next)
324 		n->n_next->n_prevp = n->n_prevp;
325 }
326 
327 /* look for the longest cycle from node from to node to. */
328 find_cycle(from, to, longest_len, depth)
329 	NODE *from, *to;
330 	int depth, longest_len;
331 {
332 	register NODE **np;
333 	register int i, len;
334 
335 	/*
336 	 * avoid infinite loops and ignore portions of the graph known
337 	 * to be acyclic
338 	 */
339 	if (from->n_flags & (NF_MARK|NF_ACYCLIC))
340 		return(0);
341 	from->n_flags = NF_MARK;
342 
343 	for (np = from->n_arcs, i = from->n_narcs; --i >= 0; np++) {
344 		cycle_buf[depth] = *np;
345 		if (*np == to) {
346 			if (depth + 1 > longest_len) {
347 				longest_len = depth + 1;
348 				(void)memcpy((char *)longest_cycle,
349 				    (char *)cycle_buf,
350 				    longest_len * sizeof(NODE *));
351 			}
352 		} else {
353 			len = find_cycle(*np, to, longest_len, depth + 1);
354 			if (len > longest_len)
355 				longest_len = len;
356 		}
357 	}
358 	from->n_flags &= ~NF_MARK;
359 	return(longest_len);
360 }
361 
362 void
363 no_memory()
364 {
365 	(void)fprintf(stderr, "tsort: %s.\n", strerror(ENOMEM));
366 	exit(1);
367 }
368