1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Michael Rendell of Memorial University of Newfoundland. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #ifndef lint 12 static char copyright[] = 13 "@(#) Copyright (c) 1989, 1993\n\ 14 The Regents of the University of California. All rights reserved.\n"; 15 #endif /* not lint */ 16 17 #ifndef lint 18 static char sccsid[] = "@(#)tsort.c 8.1 (Berkeley) 06/09/93"; 19 #endif /* not lint */ 20 21 #include <sys/types.h> 22 #include <errno.h> 23 #include <fcntl.h> 24 #include <db.h> 25 #include <stdio.h> 26 #include <stdlib.h> 27 #include <ctype.h> 28 #include <string.h> 29 30 /* 31 * Topological sort. Input is a list of pairs of strings separated by 32 * white space (spaces, tabs, and/or newlines); strings are written to 33 * standard output in sorted order, one per line. 34 * 35 * usage: 36 * tsort [inputfile] 37 * If no input file is specified, standard input is read. 38 * 39 * Should be compatable with AT&T tsort HOWEVER the output is not identical 40 * (i.e. for most graphs there is more than one sorted order, and this tsort 41 * usually generates a different one then the AT&T tsort). Also, cycle 42 * reporting seems to be more accurate in this version (the AT&T tsort 43 * sometimes says a node is in a cycle when it isn't). 44 * 45 * Michael Rendell, michael@stretch.cs.mun.ca - Feb 26, '90 46 */ 47 #define HASHSIZE 53 /* doesn't need to be big */ 48 #define NF_MARK 0x1 /* marker for cycle detection */ 49 #define NF_ACYCLIC 0x2 /* this node is cycle free */ 50 51 typedef struct node_str NODE; 52 53 struct node_str { 54 NODE **n_prevp; /* pointer to previous node's n_next */ 55 NODE *n_next; /* next node in graph */ 56 NODE **n_arcs; /* array of arcs to other nodes */ 57 int n_narcs; /* number of arcs in n_arcs[] */ 58 int n_arcsize; /* size of n_arcs[] array */ 59 int n_refcnt; /* # of arcs pointing to this node */ 60 int n_flags; /* NF_* */ 61 char n_name[1]; /* name of this node */ 62 }; 63 64 typedef struct _buf { 65 char *b_buf; 66 int b_bsize; 67 } BUF; 68 69 DB *db; 70 NODE *graph; 71 NODE **cycle_buf; 72 NODE **longest_cycle; 73 74 void add_arc __P((char *, char *)); 75 void err __P((const char *, ...)); 76 int find_cycle __P((NODE *, NODE *, int, int)); 77 NODE *get_node __P((char *)); 78 void *grow_buf __P((void *, int)); 79 void remove_node __P((NODE *)); 80 void tsort __P((void)); 81 void usage __P((void)); 82 83 int 84 main(argc, argv) 85 int argc; 86 char *argv[]; 87 { 88 register BUF *b; 89 register int c, n; 90 FILE *fp; 91 int bsize, ch, nused; 92 BUF bufs[2]; 93 94 while ((ch = getopt(argc, argv, "")) != EOF) 95 switch(ch) { 96 case '?': 97 default: 98 usage(); 99 } 100 argc -= optind; 101 argv += optind; 102 103 switch(argc) { 104 case 0: 105 fp = stdin; 106 break; 107 case 1: 108 if ((fp = fopen(*argv, "r")) == NULL) 109 err("%s: %s", *argv, strerror(errno)); 110 break; 111 default: 112 usage(); 113 } 114 115 for (b = bufs, n = 2; --n >= 0; b++) 116 b->b_buf = grow_buf(NULL, b->b_bsize = 1024); 117 118 /* parse input and build the graph */ 119 for (n = 0, c = getc(fp);;) { 120 while (c != EOF && isspace(c)) 121 c = getc(fp); 122 if (c == EOF) 123 break; 124 125 nused = 0; 126 b = &bufs[n]; 127 bsize = b->b_bsize; 128 do { 129 b->b_buf[nused++] = c; 130 if (nused == bsize) 131 b->b_buf = grow_buf(b->b_buf, bsize *= 2); 132 c = getc(fp); 133 } while (c != EOF && !isspace(c)); 134 135 b->b_buf[nused] = '\0'; 136 b->b_bsize = bsize; 137 if (n) 138 add_arc(bufs[0].b_buf, bufs[1].b_buf); 139 n = !n; 140 } 141 (void)fclose(fp); 142 if (n) 143 err("odd data count"); 144 145 /* do the sort */ 146 tsort(); 147 exit(0); 148 } 149 150 /* double the size of oldbuf and return a pointer to the new buffer. */ 151 void * 152 grow_buf(bp, size) 153 void *bp; 154 int size; 155 { 156 if ((bp = realloc(bp, (u_int)size)) == NULL) 157 err("%s", strerror(errno)); 158 return (bp); 159 } 160 161 /* 162 * add an arc from node s1 to node s2 in the graph. If s1 or s2 are not in 163 * the graph, then add them. 164 */ 165 void 166 add_arc(s1, s2) 167 char *s1, *s2; 168 { 169 register NODE *n1; 170 NODE *n2; 171 int bsize, i; 172 173 n1 = get_node(s1); 174 175 if (!strcmp(s1, s2)) 176 return; 177 178 n2 = get_node(s2); 179 180 /* 181 * Check if this arc is already here. 182 */ 183 for (i = 0; i < n1->n_narcs; i++) 184 if (n1->n_arcs[i] == n2) 185 return; 186 /* 187 * Add it. 188 */ 189 if (n1->n_narcs == n1->n_arcsize) { 190 if (!n1->n_arcsize) 191 n1->n_arcsize = 10; 192 bsize = n1->n_arcsize * sizeof(*n1->n_arcs) * 2; 193 n1->n_arcs = grow_buf(n1->n_arcs, bsize); 194 n1->n_arcsize = bsize / sizeof(*n1->n_arcs); 195 } 196 n1->n_arcs[n1->n_narcs++] = n2; 197 ++n2->n_refcnt; 198 } 199 200 /* Find a node in the graph (insert if not found) and return a pointer to it. */ 201 NODE * 202 get_node(name) 203 char *name; 204 { 205 DBT data, key; 206 NODE *n; 207 208 if (db == NULL && 209 (db = dbopen(NULL, O_RDWR, 0, DB_HASH, NULL)) == NULL) 210 err("db: open: %s", name, strerror(errno)); 211 212 key.data = name; 213 key.size = strlen(name) + 1; 214 215 switch((*db->get)(db, &key, &data, 0)) { 216 case 0: 217 bcopy(data.data, &n, sizeof(n)); 218 return (n); 219 case 1: 220 break; 221 default: 222 case -1: 223 err("db: get %s: %s", name, strerror(errno)); 224 } 225 226 if ((n = malloc(sizeof(NODE) + key.size)) == NULL) 227 err("%s", strerror(errno)); 228 229 n->n_narcs = 0; 230 n->n_arcsize = 0; 231 n->n_arcs = NULL; 232 n->n_refcnt = 0; 233 n->n_flags = 0; 234 bcopy(name, n->n_name, key.size); 235 236 /* Add to linked list. */ 237 if (n->n_next = graph) 238 graph->n_prevp = &n->n_next; 239 n->n_prevp = &graph; 240 graph = n; 241 242 /* Add to hash table. */ 243 data.data = &n; 244 data.size = sizeof(n); 245 if ((*db->put)(db, &key, &data, 0)) 246 err("db: put %s: %s", name, strerror(errno)); 247 return (n); 248 } 249 250 /* do topological sort on graph */ 251 void 252 tsort() 253 { 254 register NODE *n, *next; 255 register int cnt; 256 257 while (graph) { 258 /* 259 * Keep getting rid of simple cases until there are none left, 260 * if there are any nodes still in the graph, then there is 261 * a cycle in it. 262 */ 263 do { 264 for (cnt = 0, n = graph; n; n = next) { 265 next = n->n_next; 266 if (n->n_refcnt == 0) { 267 remove_node(n); 268 ++cnt; 269 } 270 } 271 } while (graph && cnt); 272 273 if (!graph) 274 break; 275 276 if (!cycle_buf) { 277 /* 278 * Allocate space for two cycle logs - one to be used 279 * as scratch space, the other to save the longest 280 * cycle. 281 */ 282 for (cnt = 0, n = graph; n; n = n->n_next) 283 ++cnt; 284 cycle_buf = malloc((u_int)sizeof(NODE *) * cnt); 285 longest_cycle = malloc((u_int)sizeof(NODE *) * cnt); 286 if (cycle_buf == NULL || longest_cycle == NULL) 287 err("%s", strerror(errno)); 288 } 289 for (n = graph; n; n = n->n_next) 290 if (!(n->n_flags & NF_ACYCLIC)) { 291 if (cnt = find_cycle(n, n, 0, 0)) { 292 register int i; 293 294 (void)fprintf(stderr, 295 "tsort: cycle in data\n"); 296 for (i = 0; i < cnt; i++) 297 (void)fprintf(stderr, 298 "tsort: %s\n", longest_cycle[i]->n_name); 299 remove_node(n); 300 break; 301 } else 302 /* to avoid further checks */ 303 n->n_flags = NF_ACYCLIC; 304 } 305 306 if (!n) 307 err("internal error -- could not find cycle"); 308 } 309 } 310 311 /* print node and remove from graph (does not actually free node) */ 312 void 313 remove_node(n) 314 register NODE *n; 315 { 316 register NODE **np; 317 register int i; 318 319 (void)printf("%s\n", n->n_name); 320 for (np = n->n_arcs, i = n->n_narcs; --i >= 0; np++) 321 --(*np)->n_refcnt; 322 n->n_narcs = 0; 323 *n->n_prevp = n->n_next; 324 if (n->n_next) 325 n->n_next->n_prevp = n->n_prevp; 326 } 327 328 /* look for the longest cycle from node from to node to. */ 329 int 330 find_cycle(from, to, longest_len, depth) 331 NODE *from, *to; 332 int depth, longest_len; 333 { 334 register NODE **np; 335 register int i, len; 336 337 /* 338 * avoid infinite loops and ignore portions of the graph known 339 * to be acyclic 340 */ 341 if (from->n_flags & (NF_MARK|NF_ACYCLIC)) 342 return (0); 343 from->n_flags = NF_MARK; 344 345 for (np = from->n_arcs, i = from->n_narcs; --i >= 0; np++) { 346 cycle_buf[depth] = *np; 347 if (*np == to) { 348 if (depth + 1 > longest_len) { 349 longest_len = depth + 1; 350 (void)memcpy((char *)longest_cycle, 351 (char *)cycle_buf, 352 longest_len * sizeof(NODE *)); 353 } 354 } else { 355 len = find_cycle(*np, to, longest_len, depth + 1); 356 if (len > longest_len) 357 longest_len = len; 358 } 359 } 360 from->n_flags &= ~NF_MARK; 361 return (longest_len); 362 } 363 364 void 365 usage() 366 { 367 (void)fprintf(stderr, "usage: tsort [file]\n"); 368 exit(1); 369 } 370 371 #if __STDC__ 372 #include <stdarg.h> 373 #else 374 #include <varargs.h> 375 #endif 376 377 void 378 #if __STDC__ 379 err(const char *fmt, ...) 380 #else 381 err(fmt, va_alist) 382 char *fmt; 383 va_dcl 384 #endif 385 { 386 va_list ap; 387 #if __STDC__ 388 va_start(ap, fmt); 389 #else 390 va_start(ap); 391 #endif 392 (void)fprintf(stderr, "tsort: "); 393 (void)vfprintf(stderr, fmt, ap); 394 va_end(ap); 395 (void)fprintf(stderr, "\n"); 396 exit(1); 397 /* NOTREACHED */ 398 } 399