1 /* 2 * Copyright (c) 1989, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Michael Rendell of Memorial University of Newfoundland. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #ifndef lint 12 static char copyright[] = 13 "@(#) Copyright (c) 1989, 1993, 1994\n\ 14 The Regents of the University of California. All rights reserved.\n"; 15 #endif /* not lint */ 16 17 #ifndef lint 18 static char sccsid[] = "@(#)tsort.c 8.3 (Berkeley) 05/04/95"; 19 #endif /* not lint */ 20 21 #include <sys/types.h> 22 23 #include <ctype.h> 24 #include <db.h> 25 #include <err.h> 26 #include <errno.h> 27 #include <fcntl.h> 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <string.h> 31 #include <unistd.h> 32 33 /* 34 * Topological sort. Input is a list of pairs of strings separated by 35 * white space (spaces, tabs, and/or newlines); strings are written to 36 * standard output in sorted order, one per line. 37 * 38 * usage: 39 * tsort [-l] [inputfile] 40 * If no input file is specified, standard input is read. 41 * 42 * Should be compatable with AT&T tsort HOWEVER the output is not identical 43 * (i.e. for most graphs there is more than one sorted order, and this tsort 44 * usually generates a different one then the AT&T tsort). Also, cycle 45 * reporting seems to be more accurate in this version (the AT&T tsort 46 * sometimes says a node is in a cycle when it isn't). 47 * 48 * Michael Rendell, michael@stretch.cs.mun.ca - Feb 26, '90 49 */ 50 #define HASHSIZE 53 /* doesn't need to be big */ 51 #define NF_MARK 0x1 /* marker for cycle detection */ 52 #define NF_ACYCLIC 0x2 /* this node is cycle free */ 53 #define NF_NODEST 0x4 /* Unreachable */ 54 55 56 typedef struct node_str NODE; 57 58 struct node_str { 59 NODE **n_prevp; /* pointer to previous node's n_next */ 60 NODE *n_next; /* next node in graph */ 61 NODE **n_arcs; /* array of arcs to other nodes */ 62 int n_narcs; /* number of arcs in n_arcs[] */ 63 int n_arcsize; /* size of n_arcs[] array */ 64 int n_refcnt; /* # of arcs pointing to this node */ 65 int n_flags; /* NF_* */ 66 char n_name[1]; /* name of this node */ 67 }; 68 69 typedef struct _buf { 70 char *b_buf; 71 int b_bsize; 72 } BUF; 73 74 DB *db; 75 NODE *graph, **cycle_buf, **longest_cycle; 76 int debug, longest; 77 78 void add_arc __P((char *, char *)); 79 int find_cycle __P((NODE *, NODE *, int, int)); 80 NODE *get_node __P((char *)); 81 void *grow_buf __P((void *, int)); 82 void remove_node __P((NODE *)); 83 void tsort __P((void)); 84 void usage __P((void)); 85 86 int 87 main(argc, argv) 88 int argc; 89 char *argv[]; 90 { 91 register BUF *b; 92 register int c, n; 93 FILE *fp; 94 int bsize, ch, nused; 95 BUF bufs[2]; 96 97 while ((ch = getopt(argc, argv, "dl")) != EOF) 98 switch (ch) { 99 case 'd': 100 debug = 1; 101 break; 102 case 'l': 103 longest = 1; 104 break; 105 case '?': 106 default: 107 usage(); 108 } 109 argc -= optind; 110 argv += optind; 111 112 switch (argc) { 113 case 0: 114 fp = stdin; 115 break; 116 case 1: 117 if ((fp = fopen(*argv, "r")) == NULL) 118 err(1, "%s", *argv); 119 break; 120 default: 121 usage(); 122 } 123 124 for (b = bufs, n = 2; --n >= 0; b++) 125 b->b_buf = grow_buf(NULL, b->b_bsize = 1024); 126 127 /* parse input and build the graph */ 128 for (n = 0, c = getc(fp);;) { 129 while (c != EOF && isspace(c)) 130 c = getc(fp); 131 if (c == EOF) 132 break; 133 134 nused = 0; 135 b = &bufs[n]; 136 bsize = b->b_bsize; 137 do { 138 b->b_buf[nused++] = c; 139 if (nused == bsize) 140 b->b_buf = grow_buf(b->b_buf, bsize *= 2); 141 c = getc(fp); 142 } while (c != EOF && !isspace(c)); 143 144 b->b_buf[nused] = '\0'; 145 b->b_bsize = bsize; 146 if (n) 147 add_arc(bufs[0].b_buf, bufs[1].b_buf); 148 n = !n; 149 } 150 (void)fclose(fp); 151 if (n) 152 errx(1, "odd data count"); 153 154 /* do the sort */ 155 tsort(); 156 exit(0); 157 } 158 159 /* double the size of oldbuf and return a pointer to the new buffer. */ 160 void * 161 grow_buf(bp, size) 162 void *bp; 163 int size; 164 { 165 if ((bp = realloc(bp, (u_int)size)) == NULL) 166 err(1, NULL); 167 return (bp); 168 } 169 170 /* 171 * add an arc from node s1 to node s2 in the graph. If s1 or s2 are not in 172 * the graph, then add them. 173 */ 174 void 175 add_arc(s1, s2) 176 char *s1, *s2; 177 { 178 register NODE *n1; 179 NODE *n2; 180 int bsize, i; 181 182 n1 = get_node(s1); 183 184 if (!strcmp(s1, s2)) 185 return; 186 187 n2 = get_node(s2); 188 189 /* 190 * Check if this arc is already here. 191 */ 192 for (i = 0; i < n1->n_narcs; i++) 193 if (n1->n_arcs[i] == n2) 194 return; 195 /* 196 * Add it. 197 */ 198 if (n1->n_narcs == n1->n_arcsize) { 199 if (!n1->n_arcsize) 200 n1->n_arcsize = 10; 201 bsize = n1->n_arcsize * sizeof(*n1->n_arcs) * 2; 202 n1->n_arcs = grow_buf(n1->n_arcs, bsize); 203 n1->n_arcsize = bsize / sizeof(*n1->n_arcs); 204 } 205 n1->n_arcs[n1->n_narcs++] = n2; 206 ++n2->n_refcnt; 207 } 208 209 /* Find a node in the graph (insert if not found) and return a pointer to it. */ 210 NODE * 211 get_node(name) 212 char *name; 213 { 214 DBT data, key; 215 NODE *n; 216 217 if (db == NULL && 218 (db = dbopen(NULL, O_RDWR, 0, DB_HASH, NULL)) == NULL) 219 err(1, "db: %s", name); 220 221 key.data = name; 222 key.size = strlen(name) + 1; 223 224 switch ((*db->get)(db, &key, &data, 0)) { 225 case 0: 226 bcopy(data.data, &n, sizeof(n)); 227 return (n); 228 case 1: 229 break; 230 default: 231 case -1: 232 err(1, "db: %s", name); 233 } 234 235 if ((n = malloc(sizeof(NODE) + key.size)) == NULL) 236 err(1, NULL); 237 238 n->n_narcs = 0; 239 n->n_arcsize = 0; 240 n->n_arcs = NULL; 241 n->n_refcnt = 0; 242 n->n_flags = 0; 243 bcopy(name, n->n_name, key.size); 244 245 /* Add to linked list. */ 246 if ((n->n_next = graph) != NULL) 247 graph->n_prevp = &n->n_next; 248 n->n_prevp = &graph; 249 graph = n; 250 251 /* Add to hash table. */ 252 data.data = &n; 253 data.size = sizeof(n); 254 if ((*db->put)(db, &key, &data, 0)) 255 err(1, "db: %s", name); 256 return (n); 257 } 258 259 260 /* 261 * Clear the NODEST flag from all nodes. 262 */ 263 void 264 clear_cycle() 265 { 266 NODE *n; 267 268 for (n = graph; n != NULL; n = n->n_next) 269 n->n_flags &= ~NF_NODEST; 270 } 271 272 /* do topological sort on graph */ 273 void 274 tsort() 275 { 276 register NODE *n, *next; 277 register int cnt, i; 278 279 while (graph != NULL) { 280 /* 281 * Keep getting rid of simple cases until there are none left, 282 * if there are any nodes still in the graph, then there is 283 * a cycle in it. 284 */ 285 do { 286 for (cnt = 0, n = graph; n != NULL; n = next) { 287 next = n->n_next; 288 if (n->n_refcnt == 0) { 289 remove_node(n); 290 ++cnt; 291 } 292 } 293 } while (graph != NULL && cnt); 294 295 if (graph == NULL) 296 break; 297 298 if (!cycle_buf) { 299 /* 300 * Allocate space for two cycle logs - one to be used 301 * as scratch space, the other to save the longest 302 * cycle. 303 */ 304 for (cnt = 0, n = graph; n != NULL; n = n->n_next) 305 ++cnt; 306 cycle_buf = malloc((u_int)sizeof(NODE *) * cnt); 307 longest_cycle = malloc((u_int)sizeof(NODE *) * cnt); 308 if (cycle_buf == NULL || longest_cycle == NULL) 309 err(1, NULL); 310 } 311 for (n = graph; n != NULL; n = n->n_next) 312 if (!(n->n_flags & NF_ACYCLIC)) 313 if (cnt = find_cycle(n, n, 0, 0)) { 314 warnx("cycle in data"); 315 for (i = 0; i < cnt; i++) 316 warnx("%s", 317 longest_cycle[i]->n_name); 318 remove_node(n); 319 clear_cycle(); 320 break; 321 } else { 322 /* to avoid further checks */ 323 n->n_flags |= NF_ACYCLIC; 324 clear_cycle(); 325 } 326 327 if (n == NULL) 328 errx(1, "internal error -- could not find cycle"); 329 } 330 } 331 332 /* print node and remove from graph (does not actually free node) */ 333 void 334 remove_node(n) 335 register NODE *n; 336 { 337 register NODE **np; 338 register int i; 339 340 (void)printf("%s\n", n->n_name); 341 for (np = n->n_arcs, i = n->n_narcs; --i >= 0; np++) 342 --(*np)->n_refcnt; 343 n->n_narcs = 0; 344 *n->n_prevp = n->n_next; 345 if (n->n_next) 346 n->n_next->n_prevp = n->n_prevp; 347 } 348 349 350 /* look for the longest? cycle from node from to node to. */ 351 int 352 find_cycle(from, to, longest_len, depth) 353 NODE *from, *to; 354 int depth, longest_len; 355 { 356 register NODE **np; 357 register int i, len; 358 359 /* 360 * avoid infinite loops and ignore portions of the graph known 361 * to be acyclic 362 */ 363 if (from->n_flags & (NF_NODEST|NF_MARK|NF_ACYCLIC)) 364 return (0); 365 from->n_flags |= NF_MARK; 366 367 for (np = from->n_arcs, i = from->n_narcs; --i >= 0; np++) { 368 cycle_buf[depth] = *np; 369 if (*np == to) { 370 if (depth + 1 > longest_len) { 371 longest_len = depth + 1; 372 (void)memcpy((char *)longest_cycle, 373 (char *)cycle_buf, 374 longest_len * sizeof(NODE *)); 375 } 376 } else { 377 if ((*np)->n_flags & (NF_MARK|NF_ACYCLIC|NF_NODEST)) 378 continue; 379 len = find_cycle(*np, to, longest_len, depth + 1); 380 381 if (debug) 382 (void)printf("%*s %s->%s %d\n", depth, "", 383 from->n_name, to->n_name, len); 384 385 if (len == 0) 386 (*np)->n_flags |= NF_NODEST; 387 388 if (len > longest_len) 389 longest_len = len; 390 391 if (len > 0 && !longest) 392 break; 393 } 394 } 395 from->n_flags &= ~NF_MARK; 396 return (longest_len); 397 } 398 399 void 400 usage() 401 { 402 (void)fprintf(stderr, "usage: tsort [-l] [file]\n"); 403 exit(1); 404 } 405