1 /* 2 * Copyright (c) 1989, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Michael Rendell of Memorial University of Newfoundland. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #ifndef lint 12 static char copyright[] = 13 "@(#) Copyright (c) 1989, 1993, 1994\n\ 14 The Regents of the University of California. All rights reserved.\n"; 15 #endif /* not lint */ 16 17 #ifndef lint 18 static char sccsid[] = "@(#)tsort.c 8.2 (Berkeley) 03/30/94"; 19 #endif /* not lint */ 20 21 #include <sys/types.h> 22 23 #include <ctype.h> 24 #include <db.h> 25 #include <err.h> 26 #include <errno.h> 27 #include <fcntl.h> 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <string.h> 31 32 /* 33 * Topological sort. Input is a list of pairs of strings separated by 34 * white space (spaces, tabs, and/or newlines); strings are written to 35 * standard output in sorted order, one per line. 36 * 37 * usage: 38 * tsort [-l] [inputfile] 39 * If no input file is specified, standard input is read. 40 * 41 * Should be compatable with AT&T tsort HOWEVER the output is not identical 42 * (i.e. for most graphs there is more than one sorted order, and this tsort 43 * usually generates a different one then the AT&T tsort). Also, cycle 44 * reporting seems to be more accurate in this version (the AT&T tsort 45 * sometimes says a node is in a cycle when it isn't). 46 * 47 * Michael Rendell, michael@stretch.cs.mun.ca - Feb 26, '90 48 */ 49 #define HASHSIZE 53 /* doesn't need to be big */ 50 #define NF_MARK 0x1 /* marker for cycle detection */ 51 #define NF_ACYCLIC 0x2 /* this node is cycle free */ 52 #define NF_NODEST 0x4 /* Unreachable */ 53 54 55 typedef struct node_str NODE; 56 57 struct node_str { 58 NODE **n_prevp; /* pointer to previous node's n_next */ 59 NODE *n_next; /* next node in graph */ 60 NODE **n_arcs; /* array of arcs to other nodes */ 61 int n_narcs; /* number of arcs in n_arcs[] */ 62 int n_arcsize; /* size of n_arcs[] array */ 63 int n_refcnt; /* # of arcs pointing to this node */ 64 int n_flags; /* NF_* */ 65 char n_name[1]; /* name of this node */ 66 }; 67 68 typedef struct _buf { 69 char *b_buf; 70 int b_bsize; 71 } BUF; 72 73 DB *db; 74 NODE *graph, **cycle_buf, **longest_cycle; 75 int debug, longest; 76 77 void add_arc __P((char *, char *)); 78 int find_cycle __P((NODE *, NODE *, int, int)); 79 NODE *get_node __P((char *)); 80 void *grow_buf __P((void *, int)); 81 void remove_node __P((NODE *)); 82 void tsort __P((void)); 83 void usage __P((void)); 84 85 int 86 main(argc, argv) 87 int argc; 88 char *argv[]; 89 { 90 register BUF *b; 91 register int c, n; 92 FILE *fp; 93 int bsize, ch, nused; 94 BUF bufs[2]; 95 96 while ((ch = getopt(argc, argv, "dl")) != EOF) 97 switch (ch) { 98 case 'd': 99 debug = 1; 100 break; 101 case 'l': 102 longest = 1; 103 break; 104 case '?': 105 default: 106 usage(); 107 } 108 argc -= optind; 109 argv += optind; 110 111 switch (argc) { 112 case 0: 113 fp = stdin; 114 break; 115 case 1: 116 if ((fp = fopen(*argv, "r")) == NULL) 117 err(1, "%s", *argv); 118 break; 119 default: 120 usage(); 121 } 122 123 for (b = bufs, n = 2; --n >= 0; b++) 124 b->b_buf = grow_buf(NULL, b->b_bsize = 1024); 125 126 /* parse input and build the graph */ 127 for (n = 0, c = getc(fp);;) { 128 while (c != EOF && isspace(c)) 129 c = getc(fp); 130 if (c == EOF) 131 break; 132 133 nused = 0; 134 b = &bufs[n]; 135 bsize = b->b_bsize; 136 do { 137 b->b_buf[nused++] = c; 138 if (nused == bsize) 139 b->b_buf = grow_buf(b->b_buf, bsize *= 2); 140 c = getc(fp); 141 } while (c != EOF && !isspace(c)); 142 143 b->b_buf[nused] = '\0'; 144 b->b_bsize = bsize; 145 if (n) 146 add_arc(bufs[0].b_buf, bufs[1].b_buf); 147 n = !n; 148 } 149 (void)fclose(fp); 150 if (n) 151 errx(1, "odd data count"); 152 153 /* do the sort */ 154 tsort(); 155 exit(0); 156 } 157 158 /* double the size of oldbuf and return a pointer to the new buffer. */ 159 void * 160 grow_buf(bp, size) 161 void *bp; 162 int size; 163 { 164 if ((bp = realloc(bp, (u_int)size)) == NULL) 165 err(1, NULL); 166 return (bp); 167 } 168 169 /* 170 * add an arc from node s1 to node s2 in the graph. If s1 or s2 are not in 171 * the graph, then add them. 172 */ 173 void 174 add_arc(s1, s2) 175 char *s1, *s2; 176 { 177 register NODE *n1; 178 NODE *n2; 179 int bsize, i; 180 181 n1 = get_node(s1); 182 183 if (!strcmp(s1, s2)) 184 return; 185 186 n2 = get_node(s2); 187 188 /* 189 * Check if this arc is already here. 190 */ 191 for (i = 0; i < n1->n_narcs; i++) 192 if (n1->n_arcs[i] == n2) 193 return; 194 /* 195 * Add it. 196 */ 197 if (n1->n_narcs == n1->n_arcsize) { 198 if (!n1->n_arcsize) 199 n1->n_arcsize = 10; 200 bsize = n1->n_arcsize * sizeof(*n1->n_arcs) * 2; 201 n1->n_arcs = grow_buf(n1->n_arcs, bsize); 202 n1->n_arcsize = bsize / sizeof(*n1->n_arcs); 203 } 204 n1->n_arcs[n1->n_narcs++] = n2; 205 ++n2->n_refcnt; 206 } 207 208 /* Find a node in the graph (insert if not found) and return a pointer to it. */ 209 NODE * 210 get_node(name) 211 char *name; 212 { 213 DBT data, key; 214 NODE *n; 215 216 if (db == NULL && 217 (db = dbopen(NULL, O_RDWR, 0, DB_HASH, NULL)) == NULL) 218 err(1, "db: %s", name); 219 220 key.data = name; 221 key.size = strlen(name) + 1; 222 223 switch ((*db->get)(db, &key, &data, 0)) { 224 case 0: 225 bcopy(data.data, &n, sizeof(n)); 226 return (n); 227 case 1: 228 break; 229 default: 230 case -1: 231 err(1, "db: %s", name); 232 } 233 234 if ((n = malloc(sizeof(NODE) + key.size)) == NULL) 235 err(1, NULL); 236 237 n->n_narcs = 0; 238 n->n_arcsize = 0; 239 n->n_arcs = NULL; 240 n->n_refcnt = 0; 241 n->n_flags = 0; 242 bcopy(name, n->n_name, key.size); 243 244 /* Add to linked list. */ 245 if ((n->n_next = graph) != NULL) 246 graph->n_prevp = &n->n_next; 247 n->n_prevp = &graph; 248 graph = n; 249 250 /* Add to hash table. */ 251 data.data = &n; 252 data.size = sizeof(n); 253 if ((*db->put)(db, &key, &data, 0)) 254 err(1, "db: %s", name); 255 return (n); 256 } 257 258 259 /* 260 * Clear the NODEST flag from all nodes. 261 */ 262 void 263 clear_cycle() 264 { 265 NODE *n; 266 267 for (n = graph; n != NULL; n = n->n_next) 268 n->n_flags &= ~NF_NODEST; 269 } 270 271 /* do topological sort on graph */ 272 void 273 tsort() 274 { 275 register NODE *n, *next; 276 register int cnt, i; 277 278 while (graph != NULL) { 279 /* 280 * Keep getting rid of simple cases until there are none left, 281 * if there are any nodes still in the graph, then there is 282 * a cycle in it. 283 */ 284 do { 285 for (cnt = 0, n = graph; n != NULL; n = next) { 286 next = n->n_next; 287 if (n->n_refcnt == 0) { 288 remove_node(n); 289 ++cnt; 290 } 291 } 292 } while (graph != NULL && cnt); 293 294 if (graph == NULL) 295 break; 296 297 if (!cycle_buf) { 298 /* 299 * Allocate space for two cycle logs - one to be used 300 * as scratch space, the other to save the longest 301 * cycle. 302 */ 303 for (cnt = 0, n = graph; n != NULL; n = n->n_next) 304 ++cnt; 305 cycle_buf = malloc((u_int)sizeof(NODE *) * cnt); 306 longest_cycle = malloc((u_int)sizeof(NODE *) * cnt); 307 if (cycle_buf == NULL || longest_cycle == NULL) 308 err(1, NULL); 309 } 310 for (n = graph; n != NULL; n = n->n_next) 311 if (!(n->n_flags & NF_ACYCLIC)) 312 if (cnt = find_cycle(n, n, 0, 0)) { 313 warnx("cycle in data"); 314 for (i = 0; i < cnt; i++) 315 warnx("%s", 316 longest_cycle[i]->n_name); 317 remove_node(n); 318 clear_cycle(); 319 break; 320 } else { 321 /* to avoid further checks */ 322 n->n_flags |= NF_ACYCLIC; 323 clear_cycle(); 324 } 325 326 if (n == NULL) 327 errx(1, "internal error -- could not find cycle"); 328 } 329 } 330 331 /* print node and remove from graph (does not actually free node) */ 332 void 333 remove_node(n) 334 register NODE *n; 335 { 336 register NODE **np; 337 register int i; 338 339 (void)printf("%s\n", n->n_name); 340 for (np = n->n_arcs, i = n->n_narcs; --i >= 0; np++) 341 --(*np)->n_refcnt; 342 n->n_narcs = 0; 343 *n->n_prevp = n->n_next; 344 if (n->n_next) 345 n->n_next->n_prevp = n->n_prevp; 346 } 347 348 349 /* look for the longest? cycle from node from to node to. */ 350 int 351 find_cycle(from, to, longest_len, depth) 352 NODE *from, *to; 353 int depth, longest_len; 354 { 355 register NODE **np; 356 register int i, len; 357 358 /* 359 * avoid infinite loops and ignore portions of the graph known 360 * to be acyclic 361 */ 362 if (from->n_flags & (NF_NODEST|NF_MARK|NF_ACYCLIC)) 363 return (0); 364 from->n_flags |= NF_MARK; 365 366 for (np = from->n_arcs, i = from->n_narcs; --i >= 0; np++) { 367 cycle_buf[depth] = *np; 368 if (*np == to) { 369 if (depth + 1 > longest_len) { 370 longest_len = depth + 1; 371 (void)memcpy((char *)longest_cycle, 372 (char *)cycle_buf, 373 longest_len * sizeof(NODE *)); 374 } 375 } else { 376 if ((*np)->n_flags & (NF_MARK|NF_ACYCLIC|NF_NODEST)) 377 continue; 378 len = find_cycle(*np, to, longest_len, depth + 1); 379 380 if (debug) 381 (void)printf("%*s %s->%s %d\n", depth, "", 382 from->n_name, to->n_name, len); 383 384 if (len == 0) 385 (*np)->n_flags |= NF_NODEST; 386 387 if (len > longest_len) 388 longest_len = len; 389 390 if (len > 0 && !longest) 391 break; 392 } 393 } 394 from->n_flags &= ~NF_MARK; 395 return (longest_len); 396 } 397 398 void 399 usage() 400 { 401 (void)fprintf(stderr, "usage: tsort [-l] [file]\n"); 402 exit(1); 403 } 404