1b4f9b773Sbostic /*
2bb52f206Sbostic * Copyright (c) 1989, 1993, 1994
34dd38c1dSbostic * The Regents of the University of California. All rights reserved.
4b4f9b773Sbostic *
5b4f9b773Sbostic * This code is derived from software contributed to Berkeley by
6b4f9b773Sbostic * Michael Rendell of Memorial University of Newfoundland.
7b4f9b773Sbostic *
8f5eef415Sbostic * %sccs.include.redist.c%
911871153Sbill */
10b4f9b773Sbostic
11b4f9b773Sbostic #ifndef lint
124dd38c1dSbostic static char copyright[] =
13bb52f206Sbostic "@(#) Copyright (c) 1989, 1993, 1994\n\
144dd38c1dSbostic The Regents of the University of California. All rights reserved.\n";
15b4f9b773Sbostic #endif /* not lint */
16b4f9b773Sbostic
17b4f9b773Sbostic #ifndef lint
18*784dc2a7Sbostic static char sccsid[] = "@(#)tsort.c 8.3 (Berkeley) 05/04/95";
19b4f9b773Sbostic #endif /* not lint */
20b4f9b773Sbostic
21b4f9b773Sbostic #include <sys/types.h>
22bb52f206Sbostic
23bb52f206Sbostic #include <ctype.h>
24bb52f206Sbostic #include <db.h>
25bb52f206Sbostic #include <err.h>
26b4f9b773Sbostic #include <errno.h>
27be1888fcSbostic #include <fcntl.h>
28625352d0Smckusick #include <stdio.h>
29ca9382c3Sbostic #include <stdlib.h>
302f15e4cdSbostic #include <string.h>
31*784dc2a7Sbostic #include <unistd.h>
3211871153Sbill
33b4f9b773Sbostic /*
34481152d7Sbostic * Topological sort. Input is a list of pairs of strings separated by
35b4f9b773Sbostic * white space (spaces, tabs, and/or newlines); strings are written to
36b4f9b773Sbostic * standard output in sorted order, one per line.
37b4f9b773Sbostic *
38b4f9b773Sbostic * usage:
39bb52f206Sbostic * tsort [-l] [inputfile]
40b4f9b773Sbostic * If no input file is specified, standard input is read.
41b4f9b773Sbostic *
42b4f9b773Sbostic * Should be compatable with AT&T tsort HOWEVER the output is not identical
43b4f9b773Sbostic * (i.e. for most graphs there is more than one sorted order, and this tsort
44b4f9b773Sbostic * usually generates a different one then the AT&T tsort). Also, cycle
45b4f9b773Sbostic * reporting seems to be more accurate in this version (the AT&T tsort
46b4f9b773Sbostic * sometimes says a node is in a cycle when it isn't).
47b4f9b773Sbostic *
48b4f9b773Sbostic * Michael Rendell, michael@stretch.cs.mun.ca - Feb 26, '90
4911871153Sbill */
50b4f9b773Sbostic #define HASHSIZE 53 /* doesn't need to be big */
51b4f9b773Sbostic #define NF_MARK 0x1 /* marker for cycle detection */
52b4f9b773Sbostic #define NF_ACYCLIC 0x2 /* this node is cycle free */
53bb52f206Sbostic #define NF_NODEST 0x4 /* Unreachable */
54bb52f206Sbostic
55b4f9b773Sbostic
56b4f9b773Sbostic typedef struct node_str NODE;
57b4f9b773Sbostic
58b4f9b773Sbostic struct node_str {
59b4f9b773Sbostic NODE **n_prevp; /* pointer to previous node's n_next */
60b4f9b773Sbostic NODE *n_next; /* next node in graph */
61be1888fcSbostic NODE **n_arcs; /* array of arcs to other nodes */
62b4f9b773Sbostic int n_narcs; /* number of arcs in n_arcs[] */
63b4f9b773Sbostic int n_arcsize; /* size of n_arcs[] array */
64b4f9b773Sbostic int n_refcnt; /* # of arcs pointing to this node */
65b4f9b773Sbostic int n_flags; /* NF_* */
66be1888fcSbostic char n_name[1]; /* name of this node */
6711871153Sbill };
6811871153Sbill
69b4f9b773Sbostic typedef struct _buf {
70b4f9b773Sbostic char *b_buf;
71b4f9b773Sbostic int b_bsize;
72b4f9b773Sbostic } BUF;
7311871153Sbill
74be1888fcSbostic DB *db;
75bb52f206Sbostic NODE *graph, **cycle_buf, **longest_cycle;
76bb52f206Sbostic int debug, longest;
77b4f9b773Sbostic
78ca9382c3Sbostic void add_arc __P((char *, char *));
79ca9382c3Sbostic int find_cycle __P((NODE *, NODE *, int, int));
80be1888fcSbostic NODE *get_node __P((char *));
81ca9382c3Sbostic void *grow_buf __P((void *, int));
82ca9382c3Sbostic void remove_node __P((NODE *));
83ca9382c3Sbostic void tsort __P((void));
84ca9382c3Sbostic void usage __P((void));
85ca9382c3Sbostic
86ca9382c3Sbostic int
main(argc,argv)8711871153Sbill main(argc, argv)
88b4f9b773Sbostic int argc;
89ca9382c3Sbostic char *argv[];
9011871153Sbill {
91b4f9b773Sbostic register BUF *b;
92b4f9b773Sbostic register int c, n;
93b4f9b773Sbostic FILE *fp;
94ca9382c3Sbostic int bsize, ch, nused;
95b4f9b773Sbostic BUF bufs[2];
9611871153Sbill
97bb52f206Sbostic while ((ch = getopt(argc, argv, "dl")) != EOF)
98ca9382c3Sbostic switch (ch) {
99bb52f206Sbostic case 'd':
100bb52f206Sbostic debug = 1;
101bb52f206Sbostic break;
102bb52f206Sbostic case 'l':
103bb52f206Sbostic longest = 1;
104bb52f206Sbostic break;
105ca9382c3Sbostic case '?':
106ca9382c3Sbostic default:
107ca9382c3Sbostic usage();
108ca9382c3Sbostic }
109ca9382c3Sbostic argc -= optind;
110ca9382c3Sbostic argv += optind;
111ca9382c3Sbostic
112ca9382c3Sbostic switch (argc) {
113ca9382c3Sbostic case 0:
114b4f9b773Sbostic fp = stdin;
115ca9382c3Sbostic break;
116ca9382c3Sbostic case 1:
117ca9382c3Sbostic if ((fp = fopen(*argv, "r")) == NULL)
118bb52f206Sbostic err(1, "%s", *argv);
119ca9382c3Sbostic break;
120ca9382c3Sbostic default:
121ca9382c3Sbostic usage();
12211871153Sbill }
12311871153Sbill
124b4f9b773Sbostic for (b = bufs, n = 2; --n >= 0; b++)
125ca9382c3Sbostic b->b_buf = grow_buf(NULL, b->b_bsize = 1024);
12611871153Sbill
127b4f9b773Sbostic /* parse input and build the graph */
128b4f9b773Sbostic for (n = 0, c = getc(fp);;) {
129b4f9b773Sbostic while (c != EOF && isspace(c))
130b4f9b773Sbostic c = getc(fp);
131b4f9b773Sbostic if (c == EOF)
13211871153Sbill break;
133b4f9b773Sbostic
134b4f9b773Sbostic nused = 0;
135b4f9b773Sbostic b = &bufs[n];
136b4f9b773Sbostic bsize = b->b_bsize;
137b4f9b773Sbostic do {
138b4f9b773Sbostic b->b_buf[nused++] = c;
139ca9382c3Sbostic if (nused == bsize)
140ca9382c3Sbostic b->b_buf = grow_buf(b->b_buf, bsize *= 2);
141b4f9b773Sbostic c = getc(fp);
142b4f9b773Sbostic } while (c != EOF && !isspace(c));
143b4f9b773Sbostic
144b4f9b773Sbostic b->b_buf[nused] = '\0';
145b4f9b773Sbostic b->b_bsize = bsize;
146b4f9b773Sbostic if (n)
147b4f9b773Sbostic add_arc(bufs[0].b_buf, bufs[1].b_buf);
148b4f9b773Sbostic n = !n;
149b4f9b773Sbostic }
150b4f9b773Sbostic (void)fclose(fp);
151ca9382c3Sbostic if (n)
152bb52f206Sbostic errx(1, "odd data count");
15311871153Sbill
154b4f9b773Sbostic /* do the sort */
155b4f9b773Sbostic tsort();
156b4f9b773Sbostic exit(0);
157b4f9b773Sbostic }
158b4f9b773Sbostic
159b4f9b773Sbostic /* double the size of oldbuf and return a pointer to the new buffer. */
160ca9382c3Sbostic void *
grow_buf(bp,size)161b4f9b773Sbostic grow_buf(bp, size)
162ca9382c3Sbostic void *bp;
163b4f9b773Sbostic int size;
16411871153Sbill {
165ca9382c3Sbostic if ((bp = realloc(bp, (u_int)size)) == NULL)
166bb52f206Sbostic err(1, NULL);
167b4f9b773Sbostic return (bp);
168b4f9b773Sbostic }
169b4f9b773Sbostic
170b4f9b773Sbostic /*
171b4f9b773Sbostic * add an arc from node s1 to node s2 in the graph. If s1 or s2 are not in
172b4f9b773Sbostic * the graph, then add them.
173b4f9b773Sbostic */
174b4f9b773Sbostic void
add_arc(s1,s2)175b4f9b773Sbostic add_arc(s1, s2)
176b4f9b773Sbostic char *s1, *s2;
177b4f9b773Sbostic {
178b4f9b773Sbostic register NODE *n1;
179b4f9b773Sbostic NODE *n2;
180c5957fa4Smarc int bsize, i;
181b4f9b773Sbostic
182be1888fcSbostic n1 = get_node(s1);
183b4f9b773Sbostic
184b4f9b773Sbostic if (!strcmp(s1, s2))
185b4f9b773Sbostic return;
186b4f9b773Sbostic
187be1888fcSbostic n2 = get_node(s2);
188b4f9b773Sbostic
189b4f9b773Sbostic /*
190c5957fa4Smarc * Check if this arc is already here.
191c5957fa4Smarc */
192c5957fa4Smarc for (i = 0; i < n1->n_narcs; i++)
193c5957fa4Smarc if (n1->n_arcs[i] == n2)
194c5957fa4Smarc return;
195c5957fa4Smarc /*
196c5957fa4Smarc * Add it.
197b4f9b773Sbostic */
198b4f9b773Sbostic if (n1->n_narcs == n1->n_arcsize) {
199b4f9b773Sbostic if (!n1->n_arcsize)
200b4f9b773Sbostic n1->n_arcsize = 10;
201b4f9b773Sbostic bsize = n1->n_arcsize * sizeof(*n1->n_arcs) * 2;
202ca9382c3Sbostic n1->n_arcs = grow_buf(n1->n_arcs, bsize);
203b4f9b773Sbostic n1->n_arcsize = bsize / sizeof(*n1->n_arcs);
204b4f9b773Sbostic }
205b4f9b773Sbostic n1->n_arcs[n1->n_narcs++] = n2;
206b4f9b773Sbostic ++n2->n_refcnt;
207b4f9b773Sbostic }
208b4f9b773Sbostic
209be1888fcSbostic /* Find a node in the graph (insert if not found) and return a pointer to it. */
210b4f9b773Sbostic NODE *
get_node(name)211be1888fcSbostic get_node(name)
212b4f9b773Sbostic char *name;
213b4f9b773Sbostic {
214be1888fcSbostic DBT data, key;
215be1888fcSbostic NODE *n;
216b4f9b773Sbostic
217be1888fcSbostic if (db == NULL &&
218be1888fcSbostic (db = dbopen(NULL, O_RDWR, 0, DB_HASH, NULL)) == NULL)
219bb52f206Sbostic err(1, "db: %s", name);
220be1888fcSbostic
221be1888fcSbostic key.data = name;
222be1888fcSbostic key.size = strlen(name) + 1;
223be1888fcSbostic
224be1888fcSbostic switch ((*db->get)(db, &key, &data, 0)) {
225be1888fcSbostic case 0:
226a5cd5de9Sbostic bcopy(data.data, &n, sizeof(n));
227a5cd5de9Sbostic return (n);
228be1888fcSbostic case 1:
229be1888fcSbostic break;
230be1888fcSbostic default:
231be1888fcSbostic case -1:
232bb52f206Sbostic err(1, "db: %s", name);
233b4f9b773Sbostic }
234b4f9b773Sbostic
235be1888fcSbostic if ((n = malloc(sizeof(NODE) + key.size)) == NULL)
236bb52f206Sbostic err(1, NULL);
237b4f9b773Sbostic
238b4f9b773Sbostic n->n_narcs = 0;
239b4f9b773Sbostic n->n_arcsize = 0;
240ca9382c3Sbostic n->n_arcs = NULL;
241b4f9b773Sbostic n->n_refcnt = 0;
242b4f9b773Sbostic n->n_flags = 0;
243be1888fcSbostic bcopy(name, n->n_name, key.size);
244b4f9b773Sbostic
245be1888fcSbostic /* Add to linked list. */
246bb52f206Sbostic if ((n->n_next = graph) != NULL)
247b4f9b773Sbostic graph->n_prevp = &n->n_next;
248b4f9b773Sbostic n->n_prevp = &graph;
249b4f9b773Sbostic graph = n;
250b4f9b773Sbostic
251be1888fcSbostic /* Add to hash table. */
252be1888fcSbostic data.data = &n;
253be1888fcSbostic data.size = sizeof(n);
254be1888fcSbostic if ((*db->put)(db, &key, &data, 0))
255bb52f206Sbostic err(1, "db: %s", name);
256b4f9b773Sbostic return (n);
257b4f9b773Sbostic }
258b4f9b773Sbostic
259bb52f206Sbostic
260bb52f206Sbostic /*
261bb52f206Sbostic * Clear the NODEST flag from all nodes.
262bb52f206Sbostic */
263bb52f206Sbostic void
clear_cycle()264bb52f206Sbostic clear_cycle()
265bb52f206Sbostic {
266bb52f206Sbostic NODE *n;
267bb52f206Sbostic
268bb52f206Sbostic for (n = graph; n != NULL; n = n->n_next)
269bb52f206Sbostic n->n_flags &= ~NF_NODEST;
270bb52f206Sbostic }
271bb52f206Sbostic
272b4f9b773Sbostic /* do topological sort on graph */
273b4f9b773Sbostic void
tsort()274b4f9b773Sbostic tsort()
275b4f9b773Sbostic {
276b4f9b773Sbostic register NODE *n, *next;
277bb52f206Sbostic register int cnt, i;
278b4f9b773Sbostic
279bb52f206Sbostic while (graph != NULL) {
280b4f9b773Sbostic /*
281481152d7Sbostic * Keep getting rid of simple cases until there are none left,
282b4f9b773Sbostic * if there are any nodes still in the graph, then there is
283b4f9b773Sbostic * a cycle in it.
284b4f9b773Sbostic */
285b4f9b773Sbostic do {
286bb52f206Sbostic for (cnt = 0, n = graph; n != NULL; n = next) {
287b4f9b773Sbostic next = n->n_next;
288b4f9b773Sbostic if (n->n_refcnt == 0) {
289b4f9b773Sbostic remove_node(n);
290b4f9b773Sbostic ++cnt;
29111871153Sbill }
29211871153Sbill }
293bb52f206Sbostic } while (graph != NULL && cnt);
294b4f9b773Sbostic
295bb52f206Sbostic if (graph == NULL)
296b4f9b773Sbostic break;
297b4f9b773Sbostic
298b4f9b773Sbostic if (!cycle_buf) {
299b4f9b773Sbostic /*
300c5957fa4Smarc * Allocate space for two cycle logs - one to be used
301b4f9b773Sbostic * as scratch space, the other to save the longest
302b4f9b773Sbostic * cycle.
303b4f9b773Sbostic */
304bb52f206Sbostic for (cnt = 0, n = graph; n != NULL; n = n->n_next)
305b4f9b773Sbostic ++cnt;
306481152d7Sbostic cycle_buf = malloc((u_int)sizeof(NODE *) * cnt);
307481152d7Sbostic longest_cycle = malloc((u_int)sizeof(NODE *) * cnt);
308481152d7Sbostic if (cycle_buf == NULL || longest_cycle == NULL)
309bb52f206Sbostic err(1, NULL);
310b4f9b773Sbostic }
311bb52f206Sbostic for (n = graph; n != NULL; n = n->n_next)
312bb52f206Sbostic if (!(n->n_flags & NF_ACYCLIC))
313b4f9b773Sbostic if (cnt = find_cycle(n, n, 0, 0)) {
314bb52f206Sbostic warnx("cycle in data");
315b4f9b773Sbostic for (i = 0; i < cnt; i++)
316bb52f206Sbostic warnx("%s",
317bb52f206Sbostic longest_cycle[i]->n_name);
318b4f9b773Sbostic remove_node(n);
319bb52f206Sbostic clear_cycle();
320b4f9b773Sbostic break;
321bb52f206Sbostic } else {
322b4f9b773Sbostic /* to avoid further checks */
323bb52f206Sbostic n->n_flags |= NF_ACYCLIC;
324bb52f206Sbostic clear_cycle();
325b4f9b773Sbostic }
326b4f9b773Sbostic
327bb52f206Sbostic if (n == NULL)
328bb52f206Sbostic errx(1, "internal error -- could not find cycle");
329b4f9b773Sbostic }
330b4f9b773Sbostic }
331b4f9b773Sbostic
332b4f9b773Sbostic /* print node and remove from graph (does not actually free node) */
333b4f9b773Sbostic void
remove_node(n)334b4f9b773Sbostic remove_node(n)
335b4f9b773Sbostic register NODE *n;
336b4f9b773Sbostic {
337b4f9b773Sbostic register NODE **np;
338b4f9b773Sbostic register int i;
339b4f9b773Sbostic
340b4f9b773Sbostic (void)printf("%s\n", n->n_name);
341b4f9b773Sbostic for (np = n->n_arcs, i = n->n_narcs; --i >= 0; np++)
342b4f9b773Sbostic --(*np)->n_refcnt;
343b4f9b773Sbostic n->n_narcs = 0;
344b4f9b773Sbostic *n->n_prevp = n->n_next;
345b4f9b773Sbostic if (n->n_next)
346b4f9b773Sbostic n->n_next->n_prevp = n->n_prevp;
347b4f9b773Sbostic }
348b4f9b773Sbostic
349bb52f206Sbostic
350bb52f206Sbostic /* look for the longest? cycle from node from to node to. */
351ca9382c3Sbostic int
find_cycle(from,to,longest_len,depth)352b4f9b773Sbostic find_cycle(from, to, longest_len, depth)
353b4f9b773Sbostic NODE *from, *to;
354b4f9b773Sbostic int depth, longest_len;
355b4f9b773Sbostic {
356b4f9b773Sbostic register NODE **np;
357b4f9b773Sbostic register int i, len;
358b4f9b773Sbostic
359b4f9b773Sbostic /*
360b4f9b773Sbostic * avoid infinite loops and ignore portions of the graph known
361b4f9b773Sbostic * to be acyclic
362b4f9b773Sbostic */
363bb52f206Sbostic if (from->n_flags & (NF_NODEST|NF_MARK|NF_ACYCLIC))
364b4f9b773Sbostic return (0);
365bb52f206Sbostic from->n_flags |= NF_MARK;
366b4f9b773Sbostic
367b4f9b773Sbostic for (np = from->n_arcs, i = from->n_narcs; --i >= 0; np++) {
368b4f9b773Sbostic cycle_buf[depth] = *np;
369b4f9b773Sbostic if (*np == to) {
370b4f9b773Sbostic if (depth + 1 > longest_len) {
371b4f9b773Sbostic longest_len = depth + 1;
372b4f9b773Sbostic (void)memcpy((char *)longest_cycle,
373b4f9b773Sbostic (char *)cycle_buf,
374b4f9b773Sbostic longest_len * sizeof(NODE *));
375b4f9b773Sbostic }
376b4f9b773Sbostic } else {
377bb52f206Sbostic if ((*np)->n_flags & (NF_MARK|NF_ACYCLIC|NF_NODEST))
378bb52f206Sbostic continue;
379b4f9b773Sbostic len = find_cycle(*np, to, longest_len, depth + 1);
380bb52f206Sbostic
381bb52f206Sbostic if (debug)
382bb52f206Sbostic (void)printf("%*s %s->%s %d\n", depth, "",
383bb52f206Sbostic from->n_name, to->n_name, len);
384bb52f206Sbostic
385bb52f206Sbostic if (len == 0)
386bb52f206Sbostic (*np)->n_flags |= NF_NODEST;
387bb52f206Sbostic
388b4f9b773Sbostic if (len > longest_len)
389b4f9b773Sbostic longest_len = len;
390bb52f206Sbostic
391bb52f206Sbostic if (len > 0 && !longest)
392bb52f206Sbostic break;
393b4f9b773Sbostic }
394b4f9b773Sbostic }
395b4f9b773Sbostic from->n_flags &= ~NF_MARK;
396b4f9b773Sbostic return (longest_len);
397b4f9b773Sbostic }
398b4f9b773Sbostic
399b4f9b773Sbostic void
usage()400ca9382c3Sbostic usage()
401b4f9b773Sbostic {
402bb52f206Sbostic (void)fprintf(stderr, "usage: tsort [-l] [file]\n");
403b4f9b773Sbostic exit(1);
40411871153Sbill }
405