1 /* 2 * Copyright (c) 1989 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Michael Rendell of Memorial University of Newfoundland. 7 * 8 * Redistribution and use in source and binary forms are permitted 9 * provided that the above copyright notice and this paragraph are 10 * duplicated in all such forms and that any documentation, 11 * advertising materials, and other materials related to such 12 * distribution and use acknowledge that the software was developed 13 * by the University of California, Berkeley. The name of the 14 * University may not be used to endorse or promote products derived 15 * from this software without specific prior written permission. 16 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 19 */ 20 21 #ifndef lint 22 char copyright[] = 23 "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ 24 All rights reserved.\n"; 25 #endif /* not lint */ 26 27 #ifndef lint 28 static char sccsid[] = "@(#)tsort.c 5.1 (Berkeley) 03/06/90"; 29 #endif /* not lint */ 30 31 #include <sys/types.h> 32 #include <errno.h> 33 #include <stdio.h> 34 #include <ctype.h> 35 #include <strings.h> 36 37 /* 38 * Topological sort. Input is a list of pairs of strings seperated by 39 * white space (spaces, tabs, and/or newlines); strings are written to 40 * standard output in sorted order, one per line. 41 * 42 * usage: 43 * tsort [inputfile] 44 * If no input file is specified, standard input is read. 45 * 46 * Should be compatable with AT&T tsort HOWEVER the output is not identical 47 * (i.e. for most graphs there is more than one sorted order, and this tsort 48 * usually generates a different one then the AT&T tsort). Also, cycle 49 * reporting seems to be more accurate in this version (the AT&T tsort 50 * sometimes says a node is in a cycle when it isn't). 51 * 52 * Michael Rendell, michael@stretch.cs.mun.ca - Feb 26, '90 53 */ 54 #define HASHSIZE 53 /* doesn't need to be big */ 55 #define NF_MARK 0x1 /* marker for cycle detection */ 56 #define NF_ACYCLIC 0x2 /* this node is cycle free */ 57 58 typedef struct node_str NODE; 59 60 struct node_str { 61 char *n_name; /* name of this node */ 62 NODE **n_prevp; /* pointer to previous node's n_next */ 63 NODE *n_next; /* next node in graph */ 64 NODE *n_hash; /* next node in hash table */ 65 int n_narcs; /* number of arcs in n_arcs[] */ 66 int n_arcsize; /* size of n_arcs[] array */ 67 NODE **n_arcs; /* array of arcs to other nodes */ 68 int n_refcnt; /* # of arcs pointing to this node */ 69 int n_flags; /* NF_* */ 70 }; 71 72 typedef struct _buf { 73 char *b_buf; 74 int b_bsize; 75 } BUF; 76 77 NODE *add_node(), *find_node(); 78 void add_arc(), no_memory(), remove_node(), tsort(); 79 char *grow_buf(), *malloc(); 80 81 extern int errno; 82 NODE *graph; 83 NODE *hashtable[HASHSIZE]; 84 NODE **cycle_buf; 85 NODE **longest_cycle; 86 87 main(argc, argv) 88 int argc; 89 char **argv; 90 { 91 register BUF *b; 92 register int c, n; 93 FILE *fp; 94 int bsize, nused; 95 BUF bufs[2]; 96 97 if (argc < 2) 98 fp = stdin; 99 else if (argc == 2) { 100 (void)fprintf(stderr, "usage: tsort [ inputfile ]\n"); 101 exit(1); 102 } else if (!(fp = fopen(argv[1], "r"))) { 103 (void)fprintf(stderr, "tsort: %s.\n", strerror(errno)); 104 exit(1); 105 } 106 107 for (b = bufs, n = 2; --n >= 0; b++) 108 b->b_buf = grow_buf((char *)NULL, b->b_bsize = 1024); 109 110 /* parse input and build the graph */ 111 for (n = 0, c = getc(fp);;) { 112 while (c != EOF && isspace(c)) 113 c = getc(fp); 114 if (c == EOF) 115 break; 116 117 nused = 0; 118 b = &bufs[n]; 119 bsize = b->b_bsize; 120 do { 121 b->b_buf[nused++] = c; 122 if (nused == bsize) { 123 bsize *= 2; 124 b->b_buf = grow_buf(b->b_buf, bsize); 125 } 126 c = getc(fp); 127 } while (c != EOF && !isspace(c)); 128 129 b->b_buf[nused] = '\0'; 130 b->b_bsize = bsize; 131 if (n) 132 add_arc(bufs[0].b_buf, bufs[1].b_buf); 133 n = !n; 134 } 135 (void)fclose(fp); 136 if (n) { 137 (void)fprintf(stderr, "tsort: odd data count.\n"); 138 exit(1); 139 } 140 141 /* do the sort */ 142 tsort(); 143 exit(0); 144 } 145 146 /* double the size of oldbuf and return a pointer to the new buffer. */ 147 char * 148 grow_buf(bp, size) 149 char *bp; 150 int size; 151 { 152 char *realloc(); 153 154 if (!(bp = realloc(bp, (u_int)size))) 155 no_memory(); 156 return(bp); 157 } 158 159 /* 160 * add an arc from node s1 to node s2 in the graph. If s1 or s2 are not in 161 * the graph, then add them. 162 */ 163 void 164 add_arc(s1, s2) 165 char *s1, *s2; 166 { 167 register NODE *n1; 168 NODE *n2; 169 int bsize; 170 171 n1 = find_node(s1); 172 if (!n1) 173 n1 = add_node(s1); 174 175 if (!strcmp(s1, s2)) 176 return; 177 178 n2 = find_node(s2); 179 if (!n2) 180 n2 = add_node(s2); 181 182 /* 183 * could check to see if this arc is here already, but it isn't 184 * worth the bother -- there usually isn't and it doesn't hurt if 185 * there is (I think :-). 186 */ 187 if (n1->n_narcs == n1->n_arcsize) { 188 if (!n1->n_arcsize) 189 n1->n_arcsize = 10; 190 bsize = n1->n_arcsize * sizeof(*n1->n_arcs) * 2; 191 n1->n_arcs = (NODE **)grow_buf((char *)n1->n_arcs, bsize); 192 n1->n_arcsize = bsize / sizeof(*n1->n_arcs); 193 } 194 n1->n_arcs[n1->n_narcs++] = n2; 195 ++n2->n_refcnt; 196 } 197 198 hash_string(s) 199 char *s; 200 { 201 register int hash, i; 202 203 for (hash = 0, i = 1; *s; s++, i++) 204 hash += *s * i; 205 return(hash % HASHSIZE); 206 } 207 208 /* 209 * find a node in the graph and return a pointer to it - returns null if not 210 * found. 211 */ 212 NODE * 213 find_node(name) 214 char *name; 215 { 216 register NODE *n; 217 218 for (n = hashtable[hash_string(name)]; n; n = n->n_hash) 219 if (!strcmp(n->n_name, name)) 220 return(n); 221 return((NODE *)NULL); 222 } 223 224 /* Add a node to the graph and return a pointer to it. */ 225 NODE * 226 add_node(name) 227 char *name; 228 { 229 register NODE *n; 230 int hash; 231 232 if (!(n = (NODE *)malloc(sizeof(NODE))) || !(n->n_name = strdup(name))) 233 no_memory(); 234 235 n->n_narcs = 0; 236 n->n_arcsize = 0; 237 n->n_arcs = (NODE **)NULL; 238 n->n_refcnt = 0; 239 n->n_flags = 0; 240 241 /* add to linked list */ 242 if (n->n_next = graph) 243 graph->n_prevp = &n->n_next; 244 n->n_prevp = &graph; 245 graph = n; 246 247 /* add to hash table */ 248 hash = hash_string(name); 249 n->n_hash = hashtable[hash]; 250 hashtable[hash] = n; 251 return(n); 252 } 253 254 /* do topological sort on graph */ 255 void 256 tsort() 257 { 258 register NODE *n, *next; 259 register int cnt; 260 261 while (graph) { 262 /* 263 * keep getting rid of simple cases until there are none left, 264 * if there are any nodes still in the graph, then there is 265 * a cycle in it. 266 */ 267 do { 268 for (cnt = 0, n = graph; n; n = next) { 269 next = n->n_next; 270 if (n->n_refcnt == 0) { 271 remove_node(n); 272 ++cnt; 273 } 274 } 275 } while (graph && cnt); 276 277 if (!graph) 278 break; 279 280 if (!cycle_buf) { 281 /* 282 * allocate space for two cycle logs - one to be used 283 * as scratch space, the other to save the longest 284 * cycle. 285 */ 286 for (cnt = 0, n = graph; n; n = n->n_next) 287 ++cnt; 288 cycle_buf = 289 (NODE **)malloc((u_int)sizeof(NODE *) * cnt); 290 longest_cycle = 291 (NODE **)malloc((u_int)sizeof(NODE *) * cnt); 292 if (!cycle_buf || !longest_cycle) 293 no_memory(); 294 } 295 for (n = graph; n; n = n->n_next) 296 if (!(n->n_flags & NF_ACYCLIC)) { 297 if (cnt = find_cycle(n, n, 0, 0)) { 298 register int i; 299 300 (void)fprintf(stderr, 301 "tsort: cycle in data.\n"); 302 for (i = 0; i < cnt; i++) 303 (void)fprintf(stderr, 304 "tsort: %s.\n", longest_cycle[i]->n_name); 305 remove_node(n); 306 break; 307 } else 308 /* to avoid further checks */ 309 n->n_flags = NF_ACYCLIC; 310 } 311 312 if (!n) { 313 (void)fprintf(stderr, 314 "tsort: internal error -- could not find cycle.\n"); 315 exit(1); 316 } 317 } 318 } 319 320 /* print node and remove from graph (does not actually free node) */ 321 void 322 remove_node(n) 323 register NODE *n; 324 { 325 register NODE **np; 326 register int i; 327 328 (void)printf("%s\n", n->n_name); 329 for (np = n->n_arcs, i = n->n_narcs; --i >= 0; np++) 330 --(*np)->n_refcnt; 331 n->n_narcs = 0; 332 *n->n_prevp = n->n_next; 333 if (n->n_next) 334 n->n_next->n_prevp = n->n_prevp; 335 } 336 337 /* look for the longest cycle from node from to node to. */ 338 find_cycle(from, to, longest_len, depth) 339 NODE *from, *to; 340 int depth, longest_len; 341 { 342 register NODE **np; 343 register int i, len; 344 345 /* 346 * avoid infinite loops and ignore portions of the graph known 347 * to be acyclic 348 */ 349 if (from->n_flags & (NF_MARK|NF_ACYCLIC)) 350 return(0); 351 from->n_flags = NF_MARK; 352 353 for (np = from->n_arcs, i = from->n_narcs; --i >= 0; np++) { 354 cycle_buf[depth] = *np; 355 if (*np == to) { 356 if (depth + 1 > longest_len) { 357 longest_len = depth + 1; 358 (void)memcpy((char *)longest_cycle, 359 (char *)cycle_buf, 360 longest_len * sizeof(NODE *)); 361 } 362 } else { 363 len = find_cycle(*np, to, longest_len, depth + 1); 364 if (len > longest_len) 365 longest_len = len; 366 } 367 } 368 from->n_flags &= ~NF_MARK; 369 return(longest_len); 370 } 371 372 void 373 no_memory() 374 { 375 (void)fprintf(stderr, "tsort: %s.\n", strerror(ENOMEM)); 376 exit(1); 377 } 378