1 /* 2 * Copyright (c) 1989 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Michael Rendell of Memorial University of Newfoundland. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #ifndef lint 12 char copyright[] = 13 "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ 14 All rights reserved.\n"; 15 #endif /* not lint */ 16 17 #ifndef lint 18 static char sccsid[] = "@(#)tsort.c 5.5 (Berkeley) 02/03/92"; 19 #endif /* not lint */ 20 21 #include <sys/types.h> 22 #include <errno.h> 23 #include <fcntl.h> 24 #include <db.h> 25 #include <stdio.h> 26 #include <stdlib.h> 27 #include <ctype.h> 28 #include <string.h> 29 30 /* 31 * Topological sort. Input is a list of pairs of strings seperated by 32 * white space (spaces, tabs, and/or newlines); strings are written to 33 * standard output in sorted order, one per line. 34 * 35 * usage: 36 * tsort [inputfile] 37 * If no input file is specified, standard input is read. 38 * 39 * Should be compatable with AT&T tsort HOWEVER the output is not identical 40 * (i.e. for most graphs there is more than one sorted order, and this tsort 41 * usually generates a different one then the AT&T tsort). Also, cycle 42 * reporting seems to be more accurate in this version (the AT&T tsort 43 * sometimes says a node is in a cycle when it isn't). 44 * 45 * Michael Rendell, michael@stretch.cs.mun.ca - Feb 26, '90 46 */ 47 #define HASHSIZE 53 /* doesn't need to be big */ 48 #define NF_MARK 0x1 /* marker for cycle detection */ 49 #define NF_ACYCLIC 0x2 /* this node is cycle free */ 50 51 typedef struct node_str NODE; 52 53 struct node_str { 54 NODE **n_prevp; /* pointer to previous node's n_next */ 55 NODE *n_next; /* next node in graph */ 56 NODE **n_arcs; /* array of arcs to other nodes */ 57 int n_narcs; /* number of arcs in n_arcs[] */ 58 int n_arcsize; /* size of n_arcs[] array */ 59 int n_refcnt; /* # of arcs pointing to this node */ 60 int n_flags; /* NF_* */ 61 char n_name[1]; /* name of this node */ 62 }; 63 64 typedef struct _buf { 65 char *b_buf; 66 int b_bsize; 67 } BUF; 68 69 DB *db; 70 NODE *graph; 71 NODE **cycle_buf; 72 NODE **longest_cycle; 73 74 void add_arc __P((char *, char *)); 75 void err __P((const char *, ...)); 76 int find_cycle __P((NODE *, NODE *, int, int)); 77 NODE *get_node __P((char *)); 78 void *grow_buf __P((void *, int)); 79 void remove_node __P((NODE *)); 80 void tsort __P((void)); 81 void usage __P((void)); 82 83 int 84 main(argc, argv) 85 int argc; 86 char *argv[]; 87 { 88 register BUF *b; 89 register int c, n; 90 FILE *fp; 91 int bsize, ch, nused; 92 BUF bufs[2]; 93 94 while ((ch = getopt(argc, argv, "")) != EOF) 95 switch(ch) { 96 case '?': 97 default: 98 usage(); 99 } 100 argc -= optind; 101 argv += optind; 102 103 switch(argc) { 104 case 0: 105 fp = stdin; 106 break; 107 case 1: 108 if ((fp = fopen(*argv, "r")) == NULL) 109 err("%s: %s", *argv, strerror(errno)); 110 break; 111 default: 112 usage(); 113 } 114 115 for (b = bufs, n = 2; --n >= 0; b++) 116 b->b_buf = grow_buf(NULL, b->b_bsize = 1024); 117 118 /* parse input and build the graph */ 119 for (n = 0, c = getc(fp);;) { 120 while (c != EOF && isspace(c)) 121 c = getc(fp); 122 if (c == EOF) 123 break; 124 125 nused = 0; 126 b = &bufs[n]; 127 bsize = b->b_bsize; 128 do { 129 b->b_buf[nused++] = c; 130 if (nused == bsize) 131 b->b_buf = grow_buf(b->b_buf, bsize *= 2); 132 c = getc(fp); 133 } while (c != EOF && !isspace(c)); 134 135 b->b_buf[nused] = '\0'; 136 b->b_bsize = bsize; 137 if (n) 138 add_arc(bufs[0].b_buf, bufs[1].b_buf); 139 n = !n; 140 } 141 (void)fclose(fp); 142 if (n) 143 err("odd data count"); 144 145 /* do the sort */ 146 tsort(); 147 exit(0); 148 } 149 150 /* double the size of oldbuf and return a pointer to the new buffer. */ 151 void * 152 grow_buf(bp, size) 153 void *bp; 154 int size; 155 { 156 if ((bp = realloc(bp, (u_int)size)) == NULL) 157 err("%s", strerror(errno)); 158 return (bp); 159 } 160 161 /* 162 * add an arc from node s1 to node s2 in the graph. If s1 or s2 are not in 163 * the graph, then add them. 164 */ 165 void 166 add_arc(s1, s2) 167 char *s1, *s2; 168 { 169 register NODE *n1; 170 NODE *n2; 171 int bsize; 172 173 n1 = get_node(s1); 174 175 if (!strcmp(s1, s2)) 176 return; 177 178 n2 = get_node(s2); 179 180 /* 181 * could check to see if this arc is here already, but it isn't 182 * worth the bother -- there usually isn't and it doesn't hurt if 183 * there is (I think :-). 184 */ 185 if (n1->n_narcs == n1->n_arcsize) { 186 if (!n1->n_arcsize) 187 n1->n_arcsize = 10; 188 bsize = n1->n_arcsize * sizeof(*n1->n_arcs) * 2; 189 n1->n_arcs = grow_buf(n1->n_arcs, bsize); 190 n1->n_arcsize = bsize / sizeof(*n1->n_arcs); 191 } 192 n1->n_arcs[n1->n_narcs++] = n2; 193 ++n2->n_refcnt; 194 } 195 196 /* Find a node in the graph (insert if not found) and return a pointer to it. */ 197 NODE * 198 get_node(name) 199 char *name; 200 { 201 DBT data, key; 202 NODE *n; 203 204 if (db == NULL && 205 (db = dbopen(NULL, O_RDWR, 0, DB_HASH, NULL)) == NULL) 206 err("db: open: %s", name, strerror(errno)); 207 208 key.data = name; 209 key.size = strlen(name) + 1; 210 211 switch((*db->get)(db, &key, &data, 0)) { 212 case 0: 213 return (*(NODE **)data.data); 214 case 1: 215 break; 216 default: 217 case -1: 218 err("db: get %s: %s", name, strerror(errno)); 219 } 220 221 if ((n = malloc(sizeof(NODE) + key.size)) == NULL) 222 err("%s", strerror(errno)); 223 224 n->n_narcs = 0; 225 n->n_arcsize = 0; 226 n->n_arcs = NULL; 227 n->n_refcnt = 0; 228 n->n_flags = 0; 229 bcopy(name, n->n_name, key.size); 230 231 /* Add to linked list. */ 232 if (n->n_next = graph) 233 graph->n_prevp = &n->n_next; 234 n->n_prevp = &graph; 235 graph = n; 236 237 /* Add to hash table. */ 238 data.data = &n; 239 data.size = sizeof(n); 240 if ((*db->put)(db, &key, &data, 0)) 241 err("db: put %s: %s", name, strerror(errno)); 242 return (n); 243 } 244 245 /* do topological sort on graph */ 246 void 247 tsort() 248 { 249 register NODE *n, *next; 250 register int cnt; 251 252 while (graph) { 253 /* 254 * keep getting rid of simple cases until there are none left, 255 * if there are any nodes still in the graph, then there is 256 * a cycle in it. 257 */ 258 do { 259 for (cnt = 0, n = graph; n; n = next) { 260 next = n->n_next; 261 if (n->n_refcnt == 0) { 262 remove_node(n); 263 ++cnt; 264 } 265 } 266 } while (graph && cnt); 267 268 if (!graph) 269 break; 270 271 if (!cycle_buf) { 272 /* 273 * allocate space for two cycle logs - one to be used 274 * as scratch space, the other to save the longest 275 * cycle. 276 */ 277 for (cnt = 0, n = graph; n; n = n->n_next) 278 ++cnt; 279 cycle_buf = 280 (NODE **)malloc((u_int)sizeof(NODE *) * cnt); 281 longest_cycle = 282 (NODE **)malloc((u_int)sizeof(NODE *) * cnt); 283 if (!cycle_buf || !longest_cycle) 284 err("%s", strerror(errno)); 285 } 286 for (n = graph; n; n = n->n_next) 287 if (!(n->n_flags & NF_ACYCLIC)) { 288 if (cnt = find_cycle(n, n, 0, 0)) { 289 register int i; 290 291 (void)fprintf(stderr, 292 "tsort: cycle in data\n"); 293 for (i = 0; i < cnt; i++) 294 (void)fprintf(stderr, 295 "tsort: %s\n", longest_cycle[i]->n_name); 296 remove_node(n); 297 break; 298 } else 299 /* to avoid further checks */ 300 n->n_flags = NF_ACYCLIC; 301 } 302 303 if (!n) 304 err("internal error -- could not find cycle"); 305 } 306 } 307 308 /* print node and remove from graph (does not actually free node) */ 309 void 310 remove_node(n) 311 register NODE *n; 312 { 313 register NODE **np; 314 register int i; 315 316 (void)printf("%s\n", n->n_name); 317 for (np = n->n_arcs, i = n->n_narcs; --i >= 0; np++) 318 --(*np)->n_refcnt; 319 n->n_narcs = 0; 320 *n->n_prevp = n->n_next; 321 if (n->n_next) 322 n->n_next->n_prevp = n->n_prevp; 323 } 324 325 /* look for the longest cycle from node from to node to. */ 326 int 327 find_cycle(from, to, longest_len, depth) 328 NODE *from, *to; 329 int depth, longest_len; 330 { 331 register NODE **np; 332 register int i, len; 333 334 /* 335 * avoid infinite loops and ignore portions of the graph known 336 * to be acyclic 337 */ 338 if (from->n_flags & (NF_MARK|NF_ACYCLIC)) 339 return (0); 340 from->n_flags = NF_MARK; 341 342 for (np = from->n_arcs, i = from->n_narcs; --i >= 0; np++) { 343 cycle_buf[depth] = *np; 344 if (*np == to) { 345 if (depth + 1 > longest_len) { 346 longest_len = depth + 1; 347 (void)memcpy((char *)longest_cycle, 348 (char *)cycle_buf, 349 longest_len * sizeof(NODE *)); 350 } 351 } else { 352 len = find_cycle(*np, to, longest_len, depth + 1); 353 if (len > longest_len) 354 longest_len = len; 355 } 356 } 357 from->n_flags &= ~NF_MARK; 358 return (longest_len); 359 } 360 361 void 362 usage() 363 { 364 (void)fprintf(stderr, "usage: tsort [file]\n"); 365 exit(1); 366 } 367 368 #if __STDC__ 369 #include <stdarg.h> 370 #else 371 #include <varargs.h> 372 #endif 373 374 void 375 #if __STDC__ 376 err(const char *fmt, ...) 377 #else 378 err(fmt, va_alist) 379 char *fmt; 380 va_dcl 381 #endif 382 { 383 va_list ap; 384 #if __STDC__ 385 va_start(ap, fmt); 386 #else 387 va_start(ap); 388 #endif 389 (void)fprintf(stderr, "tsort: "); 390 (void)vfprintf(stderr, fmt, ap); 391 va_end(ap); 392 (void)fprintf(stderr, "\n"); 393 exit(1); 394 /* NOTREACHED */ 395 } 396