1 /* 2 * Copyright (c) 1989 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Michael Rendell of Memorial University of Newfoundland. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #ifndef lint 12 char copyright[] = 13 "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ 14 All rights reserved.\n"; 15 #endif /* not lint */ 16 17 #ifndef lint 18 static char sccsid[] = "@(#)tsort.c 5.7 (Berkeley) 06/23/92"; 19 #endif /* not lint */ 20 21 #include <sys/types.h> 22 #include <errno.h> 23 #include <fcntl.h> 24 #include <db.h> 25 #include <stdio.h> 26 #include <stdlib.h> 27 #include <ctype.h> 28 #include <string.h> 29 30 /* 31 * Topological sort. Input is a list of pairs of strings separated by 32 * white space (spaces, tabs, and/or newlines); strings are written to 33 * standard output in sorted order, one per line. 34 * 35 * usage: 36 * tsort [inputfile] 37 * If no input file is specified, standard input is read. 38 * 39 * Should be compatable with AT&T tsort HOWEVER the output is not identical 40 * (i.e. for most graphs there is more than one sorted order, and this tsort 41 * usually generates a different one then the AT&T tsort). Also, cycle 42 * reporting seems to be more accurate in this version (the AT&T tsort 43 * sometimes says a node is in a cycle when it isn't). 44 * 45 * Michael Rendell, michael@stretch.cs.mun.ca - Feb 26, '90 46 */ 47 #define HASHSIZE 53 /* doesn't need to be big */ 48 #define NF_MARK 0x1 /* marker for cycle detection */ 49 #define NF_ACYCLIC 0x2 /* this node is cycle free */ 50 51 typedef struct node_str NODE; 52 53 struct node_str { 54 NODE **n_prevp; /* pointer to previous node's n_next */ 55 NODE *n_next; /* next node in graph */ 56 NODE **n_arcs; /* array of arcs to other nodes */ 57 int n_narcs; /* number of arcs in n_arcs[] */ 58 int n_arcsize; /* size of n_arcs[] array */ 59 int n_refcnt; /* # of arcs pointing to this node */ 60 int n_flags; /* NF_* */ 61 char n_name[1]; /* name of this node */ 62 }; 63 64 typedef struct _buf { 65 char *b_buf; 66 int b_bsize; 67 } BUF; 68 69 DB *db; 70 NODE *graph; 71 NODE **cycle_buf; 72 NODE **longest_cycle; 73 74 void add_arc __P((char *, char *)); 75 void err __P((const char *, ...)); 76 int find_cycle __P((NODE *, NODE *, int, int)); 77 NODE *get_node __P((char *)); 78 void *grow_buf __P((void *, int)); 79 void remove_node __P((NODE *)); 80 void tsort __P((void)); 81 void usage __P((void)); 82 83 int 84 main(argc, argv) 85 int argc; 86 char *argv[]; 87 { 88 register BUF *b; 89 register int c, n; 90 FILE *fp; 91 int bsize, ch, nused; 92 BUF bufs[2]; 93 94 while ((ch = getopt(argc, argv, "")) != EOF) 95 switch(ch) { 96 case '?': 97 default: 98 usage(); 99 } 100 argc -= optind; 101 argv += optind; 102 103 switch(argc) { 104 case 0: 105 fp = stdin; 106 break; 107 case 1: 108 if ((fp = fopen(*argv, "r")) == NULL) 109 err("%s: %s", *argv, strerror(errno)); 110 break; 111 default: 112 usage(); 113 } 114 115 for (b = bufs, n = 2; --n >= 0; b++) 116 b->b_buf = grow_buf(NULL, b->b_bsize = 1024); 117 118 /* parse input and build the graph */ 119 for (n = 0, c = getc(fp);;) { 120 while (c != EOF && isspace(c)) 121 c = getc(fp); 122 if (c == EOF) 123 break; 124 125 nused = 0; 126 b = &bufs[n]; 127 bsize = b->b_bsize; 128 do { 129 b->b_buf[nused++] = c; 130 if (nused == bsize) 131 b->b_buf = grow_buf(b->b_buf, bsize *= 2); 132 c = getc(fp); 133 } while (c != EOF && !isspace(c)); 134 135 b->b_buf[nused] = '\0'; 136 b->b_bsize = bsize; 137 if (n) 138 add_arc(bufs[0].b_buf, bufs[1].b_buf); 139 n = !n; 140 } 141 (void)fclose(fp); 142 if (n) 143 err("odd data count"); 144 145 /* do the sort */ 146 tsort(); 147 exit(0); 148 } 149 150 /* double the size of oldbuf and return a pointer to the new buffer. */ 151 void * 152 grow_buf(bp, size) 153 void *bp; 154 int size; 155 { 156 if ((bp = realloc(bp, (u_int)size)) == NULL) 157 err("%s", strerror(errno)); 158 return (bp); 159 } 160 161 /* 162 * add an arc from node s1 to node s2 in the graph. If s1 or s2 are not in 163 * the graph, then add them. 164 */ 165 void 166 add_arc(s1, s2) 167 char *s1, *s2; 168 { 169 register NODE *n1; 170 NODE *n2; 171 int bsize, i; 172 173 n1 = get_node(s1); 174 175 if (!strcmp(s1, s2)) 176 return; 177 178 n2 = get_node(s2); 179 180 /* 181 * Check if this arc is already here. 182 */ 183 for (i = 0; i < n1->n_narcs; i++) 184 if (n1->n_arcs[i] == n2) 185 return; 186 /* 187 * Add it. 188 */ 189 if (n1->n_narcs == n1->n_arcsize) { 190 if (!n1->n_arcsize) 191 n1->n_arcsize = 10; 192 bsize = n1->n_arcsize * sizeof(*n1->n_arcs) * 2; 193 n1->n_arcs = grow_buf(n1->n_arcs, bsize); 194 n1->n_arcsize = bsize / sizeof(*n1->n_arcs); 195 } 196 n1->n_arcs[n1->n_narcs++] = n2; 197 ++n2->n_refcnt; 198 } 199 200 /* Find a node in the graph (insert if not found) and return a pointer to it. */ 201 NODE * 202 get_node(name) 203 char *name; 204 { 205 DBT data, key; 206 NODE *n; 207 208 if (db == NULL && 209 (db = dbopen(NULL, O_RDWR, 0, DB_HASH, NULL)) == NULL) 210 err("db: open: %s", name, strerror(errno)); 211 212 key.data = name; 213 key.size = strlen(name) + 1; 214 215 switch((*db->get)(db, &key, &data, 0)) { 216 case 0: 217 return (*(NODE **)data.data); 218 case 1: 219 break; 220 default: 221 case -1: 222 err("db: get %s: %s", name, strerror(errno)); 223 } 224 225 if ((n = malloc(sizeof(NODE) + key.size)) == NULL) 226 err("%s", strerror(errno)); 227 228 n->n_narcs = 0; 229 n->n_arcsize = 0; 230 n->n_arcs = NULL; 231 n->n_refcnt = 0; 232 n->n_flags = 0; 233 bcopy(name, n->n_name, key.size); 234 235 /* Add to linked list. */ 236 if (n->n_next = graph) 237 graph->n_prevp = &n->n_next; 238 n->n_prevp = &graph; 239 graph = n; 240 241 /* Add to hash table. */ 242 data.data = &n; 243 data.size = sizeof(n); 244 if ((*db->put)(db, &key, &data, 0)) 245 err("db: put %s: %s", name, strerror(errno)); 246 return (n); 247 } 248 249 /* do topological sort on graph */ 250 void 251 tsort() 252 { 253 register NODE *n, *next; 254 register int cnt; 255 256 while (graph) { 257 /* 258 * Keep getting rid of simple cases until there are none left, 259 * if there are any nodes still in the graph, then there is 260 * a cycle in it. 261 */ 262 do { 263 for (cnt = 0, n = graph; n; n = next) { 264 next = n->n_next; 265 if (n->n_refcnt == 0) { 266 remove_node(n); 267 ++cnt; 268 } 269 } 270 } while (graph && cnt); 271 272 if (!graph) 273 break; 274 275 if (!cycle_buf) { 276 /* 277 * Allocate space for two cycle logs - one to be used 278 * as scratch space, the other to save the longest 279 * cycle. 280 */ 281 for (cnt = 0, n = graph; n; n = n->n_next) 282 ++cnt; 283 cycle_buf = malloc((u_int)sizeof(NODE *) * cnt); 284 longest_cycle = malloc((u_int)sizeof(NODE *) * cnt); 285 if (cycle_buf == NULL || longest_cycle == NULL) 286 err("%s", strerror(errno)); 287 } 288 for (n = graph; n; n = n->n_next) 289 if (!(n->n_flags & NF_ACYCLIC)) { 290 if (cnt = find_cycle(n, n, 0, 0)) { 291 register int i; 292 293 (void)fprintf(stderr, 294 "tsort: cycle in data\n"); 295 for (i = 0; i < cnt; i++) 296 (void)fprintf(stderr, 297 "tsort: %s\n", longest_cycle[i]->n_name); 298 remove_node(n); 299 break; 300 } else 301 /* to avoid further checks */ 302 n->n_flags = NF_ACYCLIC; 303 } 304 305 if (!n) 306 err("internal error -- could not find cycle"); 307 } 308 } 309 310 /* print node and remove from graph (does not actually free node) */ 311 void 312 remove_node(n) 313 register NODE *n; 314 { 315 register NODE **np; 316 register int i; 317 318 (void)printf("%s\n", n->n_name); 319 for (np = n->n_arcs, i = n->n_narcs; --i >= 0; np++) 320 --(*np)->n_refcnt; 321 n->n_narcs = 0; 322 *n->n_prevp = n->n_next; 323 if (n->n_next) 324 n->n_next->n_prevp = n->n_prevp; 325 } 326 327 /* look for the longest cycle from node from to node to. */ 328 int 329 find_cycle(from, to, longest_len, depth) 330 NODE *from, *to; 331 int depth, longest_len; 332 { 333 register NODE **np; 334 register int i, len; 335 336 /* 337 * avoid infinite loops and ignore portions of the graph known 338 * to be acyclic 339 */ 340 if (from->n_flags & (NF_MARK|NF_ACYCLIC)) 341 return (0); 342 from->n_flags = NF_MARK; 343 344 for (np = from->n_arcs, i = from->n_narcs; --i >= 0; np++) { 345 cycle_buf[depth] = *np; 346 if (*np == to) { 347 if (depth + 1 > longest_len) { 348 longest_len = depth + 1; 349 (void)memcpy((char *)longest_cycle, 350 (char *)cycle_buf, 351 longest_len * sizeof(NODE *)); 352 } 353 } else { 354 len = find_cycle(*np, to, longest_len, depth + 1); 355 if (len > longest_len) 356 longest_len = len; 357 } 358 } 359 from->n_flags &= ~NF_MARK; 360 return (longest_len); 361 } 362 363 void 364 usage() 365 { 366 (void)fprintf(stderr, "usage: tsort [file]\n"); 367 exit(1); 368 } 369 370 #if __STDC__ 371 #include <stdarg.h> 372 #else 373 #include <varargs.h> 374 #endif 375 376 void 377 #if __STDC__ 378 err(const char *fmt, ...) 379 #else 380 err(fmt, va_alist) 381 char *fmt; 382 va_dcl 383 #endif 384 { 385 va_list ap; 386 #if __STDC__ 387 va_start(ap, fmt); 388 #else 389 va_start(ap); 390 #endif 391 (void)fprintf(stderr, "tsort: "); 392 (void)vfprintf(stderr, fmt, ap); 393 va_end(ap); 394 (void)fprintf(stderr, "\n"); 395 exit(1); 396 /* NOTREACHED */ 397 } 398