xref: /openbsd/usr.bin/tsort/tsort.c (revision 09467b48)
1 /* $OpenBSD: tsort.c,v 1.37 2019/07/11 17:28:32 mestre Exp $ */
2 /* ex:ts=8 sw=4:
3  *
4  * Copyright (c) 1999-2004 Marc Espie <espie@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <assert.h>
20 #include <ctype.h>
21 #include <err.h>
22 #include <limits.h>
23 #include <stddef.h>
24 #include <stdio.h>
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <unistd.h>
29 #include <ohash.h>
30 
31 /* The complexity of topological sorting is O(e), where e is the
32  * size of input.  While reading input, vertices have to be identified,
33  * thus add the complexity of e keys retrieval among v keys using
34  * an appropriate data structure.  This program uses open double hashing
35  * for that purpose.  See Knuth for the expected complexity of double
36  * hashing (Brent variation should probably be used if v << e, as a user
37  * option).
38  *
39  * The algorithm used for longest cycle reporting is accurate, but somewhat
40  * expensive.  It may need to build all free paths of the graph (a free
41  * path is a path that never goes twice through the same node), whose
42  * number can be as high as O(2^e).  Usually, the number of free paths is
43  * much smaller though.  This program's author does not believe that a
44  * significantly better worst-case complexity algorithm exists.
45  *
46  * In case of a hints file, the set of minimal nodes is maintained as a
47  * heap.  The resulting complexity is O(e+v log v) for the worst case.
48  * The average should actually be near O(e).
49  *
50  * If the hints file is incomplete, there is some extra complexity incurred
51  * by make_transparent, which does propagate order values to unmarked
52  * nodes. In the worst case, make_transparent is  O(e u),
53  * where u is the number of originally unmarked nodes.
54  * In practice, it is much faster.
55  *
56  * The simple topological sort algorithm detects cycles.  This program
57  * goes further, breaking cycles through the use of simple heuristics.
58  * Each cycle break checks the whole set of nodes, hence if c cycles break
59  * are needed, this is an extra cost of O(c v).
60  *
61  * Possible heuristics are as follows:
62  * - break cycle at node with lowest number of predecessors (default case),
63  * - break longest cycle at node with lowest number of predecessors,
64  * - break cycle at next node from the hints file.
65  *
66  * Except for the hints file case, which sets an explicit constraint on
67  * which cycle to break, those heuristics locally result in the smallest
68  * number of broken edges.
69  *
70  * Those are admittedly greedy strategies, as is the selection of the next
71  * node from the hints file amongst equivalent candidates that is used for
72  * `stable' topological sorting.
73  */
74 
75 #ifdef __GNUC__
76 #define UNUSED	__attribute__((unused))
77 #else
78 #define UNUSED
79 #endif
80 
81 struct node;
82 
83 /* The set of arcs from a given node is stored as a linked list.  */
84 struct link {
85 	struct link *next;
86 	struct node *node;
87 };
88 
89 #define NO_ORDER	UINT_MAX
90 
91 struct node {
92 	unsigned int refs;	/* Number of arcs left, coming into this node.
93 				 * Note that nodes with a null count can't
94 				 * be part of cycles.  */
95 	unsigned int order; 	/* Order of nodes according to a hint file.  */
96 
97 	struct link  *arcs;	/* List of forward arcs.  */
98 
99 	/* Cycle detection algorithms build a free path of nodes.  */
100 	struct node  *from; 	/* Previous node in the current path.  */
101 	struct link  *traverse;	/* Next link to traverse when backtracking.  */
102 	unsigned int mark;	/* Mark processed nodes in cycle discovery.  */
103 
104 	char         k[1];	/* Name of this node.  */
105 };
106 
107 #define HASH_START 9
108 
109 struct array {
110 	unsigned int entries;
111 	struct node  **t;
112 };
113 
114 static void nodes_init(struct ohash *);
115 static struct node *node_lookup(struct ohash *, const char *, const char *);
116 static __dead void usage(void);
117 static struct node *new_node(const char *, const char *);
118 
119 static unsigned int read_pairs(FILE *, struct ohash *, int,
120     const char *, unsigned int, int);
121 static void split_nodes(struct ohash *, struct array *, struct array *);
122 static void make_transparent(struct ohash *);
123 static void insert_arc(struct node *, struct node *);
124 
125 #ifdef DEBUG
126 static void dump_node(struct node *);
127 static void dump_array(struct array *);
128 static void dump_hash(struct ohash *);
129 #endif
130 static unsigned int read_hints(FILE *, struct ohash *, int,
131     const char *, unsigned int);
132 static struct node *find_smallest_node(struct array *);
133 static struct node *find_good_cycle_break(struct array *);
134 static void print_cycle(struct array *);
135 static struct node *find_cycle_from(struct node *, struct array *);
136 static struct node *find_predecessor(struct array *, struct node *);
137 static unsigned int traverse_node(struct node *, unsigned int, struct array *);
138 static struct node *find_longest_cycle(struct array *, struct array *);
139 static struct node *find_normal_cycle(struct array *, struct array *);
140 
141 static void heap_down(struct array *, unsigned int);
142 static void heapify(struct array *, int);
143 static struct node *dequeue(struct array *);
144 static void enqueue(struct array *, struct node *);
145 
146 
147 
148 static void *hash_calloc(size_t, size_t, void *);
149 static void hash_free(void *, void *);
150 static void* entry_alloc(size_t, void *);
151 static void *ereallocarray(void *, size_t, size_t);
152 static void *emem(void *);
153 #define DEBUG_TRAVERSE 0
154 static struct ohash_info node_info = {
155 	offsetof(struct node, k), NULL, hash_calloc, hash_free, entry_alloc };
156 static void parse_args(int, char *[], struct ohash *);
157 static int tsort(struct ohash *);
158 
159 static int 		quiet_flag, long_flag,
160 			warn_flag, hints_flag, verbose_flag;
161 
162 
163 int main(int, char *[]);
164 
165 /***
166  *** Memory handling.
167  ***/
168 
169 static void *
170 emem(void *p)
171 {
172 	if (p)
173 		return p;
174 	else
175 		errx(1, "Memory exhausted");
176 }
177 
178 static void *
179 hash_calloc(size_t n, size_t s, void *u UNUSED)
180 {
181 	return emem(calloc(n, s));
182 }
183 
184 static void
185 hash_free(void *p, void *u UNUSED)
186 {
187 	free(p);
188 }
189 
190 static void *
191 entry_alloc(size_t s, void *u UNUSED)
192 {
193 	return ereallocarray(NULL, 1, s);
194 }
195 
196 static void *
197 ereallocarray(void *p, size_t n, size_t s)
198 {
199 	return emem(reallocarray(p, n, s));
200 }
201 
202 
203 /***
204  *** Hash table.
205  ***/
206 
207 /* Inserting and finding nodes in the hash structure.
208  * We handle interval strings for efficiency wrt fgetln.  */
209 static struct node *
210 new_node(const char *start, const char *end)
211 {
212 	struct node 	*n;
213 
214 	n = ohash_create_entry(&node_info, start, &end);
215 	n->from = NULL;
216 	n->arcs = NULL;
217 	n->refs = 0;
218 	n->mark = 0;
219 	n->order = NO_ORDER;
220 	n->traverse = NULL;
221 	return n;
222 }
223 
224 
225 static void
226 nodes_init(struct ohash *h)
227 {
228 	ohash_init(h, HASH_START, &node_info);
229 }
230 
231 static struct node *
232 node_lookup(struct ohash *h, const char *start, const char *end)
233 {
234 	unsigned int	i;
235 	struct node *	n;
236 
237 	i = ohash_qlookupi(h, start, &end);
238 
239 	n = ohash_find(h, i);
240 	if (n == NULL)
241 		n = ohash_insert(h, i, new_node(start, end));
242 	return n;
243 }
244 
245 #ifdef DEBUG
246 static void
247 dump_node(struct node *n)
248 {
249 	struct link 	*l;
250 
251 	if (n->refs == 0)
252 		return;
253 	printf("%s (%u/%u): ", n->k, n->order, n->refs);
254 	for (l = n->arcs; l != NULL; l = l->next)
255 		if (n->refs != 0)
256 		printf("%s(%u/%u) ", l->node->k, l->node->order, l->node->refs);
257     	putchar('\n');
258 }
259 
260 static void
261 dump_array(struct array *a)
262 {
263 	unsigned int 	i;
264 
265 	for (i = 0; i < a->entries; i++)
266 		dump_node(a->t[i]);
267 }
268 
269 static void
270 dump_hash(struct ohash *h)
271 {
272 	unsigned int 	i;
273 	struct node 	*n;
274 
275 	for (n = ohash_first(h, &i); n != NULL; n = ohash_next(h, &i))
276 		dump_node(n);
277 }
278 #endif
279 
280 /***
281  *** Reading data.
282  ***/
283 
284 static void
285 insert_arc(struct node *a, struct node *b)
286 {
287 	struct link 	*l;
288 
289 	/* Check that this arc is not already present.  */
290 	for (l = a->arcs; l != NULL; l = l->next) {
291 		if (l->node == b)
292 			return;
293 	}
294 	b->refs++;
295 	l = ereallocarray(NULL, 1, sizeof(struct link));
296 	l->node = b;
297 	l->next = a->arcs;
298 	a->arcs = l;
299 }
300 
301 static unsigned int
302 read_pairs(FILE *f, struct ohash *h, int reverse, const char *name,
303     unsigned int order, int hint)
304 {
305 	int 		toggle;
306 	struct node 	*a;
307 	size_t 		size;
308 	char 		*str;
309 
310 	toggle = 1;
311 	a = NULL;
312 
313 	while ((str = fgetln(f, &size)) != NULL) {
314 		char *sentinel;
315 
316 		sentinel = str + size;
317 		for (;;) {
318 			char *e;
319 
320 			while (str < sentinel &&
321 			    isspace((unsigned char)*str))
322 				str++;
323 			if (str == sentinel)
324 				break;
325 			for (e = str;
326 			    e < sentinel && !isspace((unsigned char)*e); e++)
327 				continue;
328 			if (toggle) {
329 				a = node_lookup(h, str, e);
330 				if (a->order == NO_ORDER && hint)
331 					a->order = order++;
332 			} else {
333 				struct node *b;
334 
335 				b = node_lookup(h, str, e);
336 				assert(a != NULL);
337 				if (b != a) {
338 					if (reverse)
339 						insert_arc(b, a);
340 					else
341 						insert_arc(a, b);
342 				}
343 			}
344 			toggle = !toggle;
345 			str = e;
346 		}
347 	}
348 	if (toggle == 0)
349 		errx(1, "odd number of node names in %s", name);
350     	if (!feof(f))
351 		err(1, "error reading %s", name);
352 	return order;
353 }
354 
355 static unsigned int
356 read_hints(FILE *f, struct ohash *h, int quiet, const char *name,
357     unsigned int order)
358 {
359 	char 		*str;
360 	size_t 		size;
361 
362 	while ((str = fgetln(f, &size)) != NULL) {
363 		char *sentinel;
364 
365 		sentinel = str + size;
366 		for (;;) {
367 			char *e;
368 			struct node *a;
369 
370 			while (str < sentinel && isspace((unsigned char)*str))
371 				str++;
372 			if (str == sentinel)
373 				break;
374 			for (e = str;
375 			    e < sentinel && !isspace((unsigned char)*e); e++)
376 				continue;
377 			a = node_lookup(h, str, e);
378 			if (a->order != NO_ORDER) {
379 				if (!quiet)
380 				    warnx(
381 					"duplicate node %s in hints file %s",
382 					a->k, name);
383 			} else
384 				a->order = order++;
385 			str = e;
386 		}
387 	}
388     	if (!feof(f))
389 		err(1, "error reading %s", name);
390 	return order;
391 }
392 
393 
394 /***
395  *** Standard heap handling routines.
396  ***/
397 
398 static void
399 heap_down(struct array *h, unsigned int i)
400 {
401 	unsigned int 	j;
402 	struct node 	*swap;
403 
404 	for (; (j=2*i+1) < h->entries; i = j) {
405 		if (j+1 < h->entries && h->t[j+1]->order < h->t[j]->order)
406 		    	j++;
407 		if (h->t[i]->order <= h->t[j]->order)
408 			break;
409 		swap = h->t[i];
410 		h->t[i] = h->t[j];
411 		h->t[j] = swap;
412 	}
413 }
414 
415 static void
416 heapify(struct array *h, int verbose)
417 {
418 	unsigned int 	i;
419 
420 	for (i = h->entries; i != 0;) {
421 		if (h->t[--i]->order == NO_ORDER && verbose)
422 			warnx("node %s absent from hints file", h->t[i]->k);
423 		heap_down(h, i);
424 	}
425 }
426 
427 #define DEQUEUE(h) ( hints_flag ? dequeue(h) : (h)->t[--(h)->entries] )
428 
429 static struct node *
430 dequeue(struct array *h)
431 {
432 	struct node 	*n;
433 
434 	if (h->entries == 0)
435 		n = NULL;
436 	else {
437 		n = h->t[0];
438 		if (--h->entries != 0) {
439 		    h->t[0] = h->t[h->entries];
440 		    heap_down(h, 0);
441 		}
442 	}
443 	return n;
444 }
445 
446 #define ENQUEUE(h, n) do {			\
447 	if (hints_flag)				\
448 		enqueue((h), (n));		\
449 	else					\
450 		(h)->t[(h)->entries++] = (n);	\
451 	} while(0);
452 
453 static void
454 enqueue(struct array *h, struct node *n)
455 {
456 	unsigned int 	i, j;
457 	struct node 	*swap;
458 
459 	h->t[h->entries++] = n;
460 	for (i = h->entries-1; i > 0; i = j) {
461 		j = (i-1)/2;
462 		if (h->t[j]->order < h->t[i]->order)
463 			break;
464 		swap = h->t[j];
465 		h->t[j] = h->t[i];
466 		h->t[i] = swap;
467 	}
468 }
469 
470 /* Nodes without order should not hinder direct dependencies.
471  * Iterate until no nodes are left.
472  */
473 static void
474 make_transparent(struct ohash *hash)
475 {
476 	struct node 	*n;
477 	unsigned int 	i;
478 	struct link 	*l;
479 	int		adjusted;
480 	int		bad;
481 	unsigned int	min;
482 
483 	/* first try to solve complete nodes */
484 	do {
485 		adjusted = 0;
486 		bad = 0;
487 		for (n = ohash_first(hash, &i); n != NULL;
488 		    n = ohash_next(hash, &i)) {
489 			if (n->order == NO_ORDER) {
490 				min = NO_ORDER;
491 
492 				for (l = n->arcs; l != NULL; l = l->next) {
493 					/* unsolved node -> delay resolution */
494 					if (l->node->order == NO_ORDER) {
495 						bad = 1;
496 						break;
497 					} else if (l->node->order < min)
498 						min = l->node->order;
499 				}
500 				if (min < NO_ORDER && l == NULL) {
501 					n->order = min;
502 					adjusted = 1;
503 				}
504 			}
505 		}
506 
507 	} while (adjusted);
508 
509 	/* then, if incomplete nodes are left, do them */
510 	if (bad) do {
511 		adjusted = 0;
512 		for (n = ohash_first(hash, &i); n != NULL;
513 		    n = ohash_next(hash, &i))
514 			if (n->order == NO_ORDER)
515 				for (l = n->arcs; l != NULL; l = l->next)
516 					if (l->node->order < n->order) {
517 						n->order = l->node->order;
518 						adjusted = 1;
519 					}
520 	} while (adjusted);
521 }
522 
523 
524 /***
525  *** Search through hash array for nodes.
526  ***/
527 
528 /* Split nodes into unrefed nodes/live nodes.  */
529 static void
530 split_nodes(struct ohash *hash, struct array *heap, struct array *remaining)
531 {
532 
533 	struct node *n;
534 	unsigned int i;
535 
536 	heap->t = ereallocarray(NULL, ohash_entries(hash),
537 	    sizeof(struct node *));
538 	remaining->t = ereallocarray(NULL, ohash_entries(hash),
539 	    sizeof(struct node *));
540 	heap->entries = 0;
541 	remaining->entries = 0;
542 
543 	for (n = ohash_first(hash, &i); n != NULL; n = ohash_next(hash, &i)) {
544 		if (n->refs == 0)
545 			heap->t[heap->entries++] = n;
546 		else
547 			remaining->t[remaining->entries++] = n;
548 	}
549 }
550 
551 /* Good point to break a cycle: live node with as few refs as possible. */
552 static struct node *
553 find_good_cycle_break(struct array *h)
554 {
555 	unsigned int 	i;
556 	unsigned int 	best;
557 	struct node 	*u;
558 
559 	best = UINT_MAX;
560 	u = NULL;
561 
562 	assert(h->entries != 0);
563 	for (i = 0; i < h->entries; i++) {
564 		struct node *n = h->t[i];
565 		/* No need to look further. */
566 		if (n->refs == 1)
567 			return n;
568 		if (n->refs != 0 && n->refs < best) {
569 			best = n->refs;
570 			u = n;
571 		}
572 	}
573 	assert(u != NULL);
574 	return u;
575 }
576 
577 /*  Retrieve the node with the smallest order.  */
578 static struct node *
579 find_smallest_node(struct array *h)
580 {
581 	unsigned int 	i;
582 	unsigned int 	best;
583 	struct node 	*u;
584 
585 	best = UINT_MAX;
586 	u = NULL;
587 
588 	assert(h->entries != 0);
589 	for (i = 0; i < h->entries; i++) {
590 		struct node *n = h->t[i];
591 		if (n->refs != 0 && n->order < best) {
592 			best = n->order;
593 			u = n;
594 		}
595 	}
596 	assert(u != NULL);
597 	return u;
598 }
599 
600 
601 /***
602  *** Graph algorithms.
603  ***/
604 
605 /* Explore the nodes reachable from i to find a cycle, store it in c.
606  * This may fail.  */
607 static struct node *
608 find_cycle_from(struct node *i, struct array *c)
609 {
610 	struct node 	*n;
611 
612 	n = i;
613 	/* XXX Previous cycle findings may have left this pointer non-null.  */
614 	i->from = NULL;
615 
616 	for (;;) {
617 		/* Note that all marks are reversed before this code exits.  */
618 		n->mark = 1;
619 		if (n->traverse)
620 			n->traverse = n->traverse->next;
621 		else
622 			n->traverse = n->arcs;
623 		/* Skip over dead nodes.  */
624 		while (n->traverse && n->traverse->node->refs == 0)
625 			n->traverse = n->traverse->next;
626 		if (n->traverse) {
627 			struct node *go = n->traverse->node;
628 
629 			if (go->mark) {
630 				c->entries = 0;
631 				for (; n != NULL && n != go; n = n->from) {
632 					c->t[c->entries++] = n;
633 					n->mark = 0;
634 				}
635 				for (; n != NULL; n = n->from)
636 					n->mark = 0;
637 				c->t[c->entries++] = go;
638 				return go;
639 			} else {
640 			    go->from = n;
641 			    n = go;
642 			}
643 		} else {
644 			n->mark = 0;
645 			n = n->from;
646 			if (n == NULL)
647 				return NULL;
648 		}
649 	}
650 }
651 
652 /* Find a live predecessor of node n.  This is a slow routine, as it needs
653  * to go through the whole array, but it is not needed often.
654  */
655 static struct node *
656 find_predecessor(struct array *a, struct node *n)
657 {
658 	unsigned int i;
659 
660 	for (i = 0; i < a->entries; i++) {
661 		struct node *m;
662 
663 		m = a->t[i];
664 		if (m->refs != 0) {
665 			struct link *l;
666 
667 			for (l = m->arcs; l != NULL; l = l->next)
668 				if (l->node == n)
669 					return m;
670 		}
671 	}
672 	assert(1 == 0);
673 	return NULL;
674 }
675 
676 /* Traverse all strongly connected components reachable from node n.
677    Start numbering them at o. Return the maximum order reached.
678    Update the largest cycle found so far.
679  */
680 static unsigned int
681 traverse_node(struct node *n, unsigned int o, struct array *c)
682 {
683 	unsigned int 	min, max;
684 
685 	n->from = NULL;
686 	min = o;
687 	max = ++o;
688 
689 	for (;;) {
690 		n->mark = o;
691 		if (DEBUG_TRAVERSE)
692 			printf("%s(%u) ", n->k, n->mark);
693 		/* Find next arc to explore.  */
694 		if (n->traverse)
695 			n->traverse = n->traverse->next;
696 		else
697 			n->traverse = n->arcs;
698 		/* Skip over dead nodes.  */
699 		while (n->traverse && n->traverse->node->refs == 0)
700 			n->traverse = n->traverse->next;
701 		/* If arc left.  */
702 		if (n->traverse) {
703 			struct node 	*go;
704 
705 			go = n->traverse->node;
706 			/* Optimisation: if go->mark < min, we already
707 			 * visited this strongly-connected component in
708 			 * a previous pass.  Hence, this can yield no new
709 			 * cycle.  */
710 
711 			/* Not part of the current path: go for it.  */
712 			if (go->mark == 0 || go->mark == min) {
713 				go->from = n;
714 				n = go;
715 				o++;
716 				if (o > max)
717 					max = o;
718 			/* Part of the current path: check cycle length.  */
719 			} else if (go->mark > min) {
720 				if (DEBUG_TRAVERSE)
721 					printf("%d\n", o - go->mark + 1);
722 				if (o - go->mark + 1 > c->entries) {
723 					struct node *t;
724 					unsigned int i;
725 
726 					c->entries = o - go->mark + 1;
727 					i = 0;
728 					c->t[i++] = go;
729 					for (t = n; t != go; t = t->from)
730 						c->t[i++] = t;
731 				}
732 			}
733 
734 		/* No arc left: backtrack.  */
735 		} else {
736 			n->mark = min;
737 			n = n->from;
738 			if (!n)
739 				return max;
740 			o--;
741 		}
742 	}
743 }
744 
745 static void
746 print_cycle(struct array *c)
747 {
748 	unsigned int 	i;
749 
750 	/* Printing in reverse order, since cycle discoveries finds reverse
751 	 * edges.  */
752 	for (i = c->entries; i != 0;) {
753 		i--;
754 		warnx("%s", c->t[i]->k);
755 	}
756 }
757 
758 static struct node *
759 find_longest_cycle(struct array *h, struct array *c)
760 {
761 	unsigned int 	i;
762 	unsigned int 	o;
763 	unsigned int 	best;
764 	struct node 	*n;
765 	static int 	notfirst = 0;
766 
767 	assert(h->entries != 0);
768 
769 	/* No cycle found yet.  */
770 	c->entries = 0;
771 
772 	/* Reset the set of marks, except the first time around.  */
773 	if (notfirst) {
774 		for (i = 0; i < h->entries; i++)
775 			h->t[i]->mark = 0;
776 	} else
777 		notfirst = 1;
778 
779 	o = 0;
780 
781 	/* Traverse the array.  Each unmarked, live node heralds a
782 	 * new set of strongly connected components.  */
783 	for (i = 0; i < h->entries; i++) {
784 		n = h->t[i];
785 		if (n->refs != 0 && n->mark == 0) {
786 			/* Each call to traverse_node uses a separate
787 			 * interval of numbers to mark nodes.  */
788 			o++;
789 			o = traverse_node(n, o, c);
790 		}
791 	}
792 
793 	assert(c->entries != 0);
794 	n = c->t[0];
795 	best = n->refs;
796 	for (i = 0; i < c->entries; i++) {
797 		if (c->t[i]->refs < best) {
798 			n = c->t[i];
799 			best = n->refs;
800 		}
801 	}
802 	return n;
803 }
804 
805 static struct node *
806 find_normal_cycle(struct array *h, struct array *c)
807 {
808 	struct node *b, *n;
809 
810 	if (hints_flag)
811 		n = find_smallest_node(h);
812 	else
813 		n = find_good_cycle_break(h);
814 	while ((b = find_cycle_from(n, c)) == NULL)
815 		n = find_predecessor(h, n);
816 	return b;
817 }
818 
819 
820 #define plural(n) ((n) > 1 ? "s" : "")
821 
822 static void
823 parse_args(int argc, char *argv[], struct ohash *pairs)
824 {
825 	int c;
826 	unsigned int	order;
827 	int reverse_flag;
828 	const char **files;
829 	int i, j;
830 
831 	i = 0;
832 
833 	reverse_flag = quiet_flag = long_flag =
834 		warn_flag = hints_flag = verbose_flag = 0;
835 	/* argc is good enough, as we start at argv[1] */
836 	files = ereallocarray(NULL, argc, sizeof (char *));
837 	while ((c = getopt(argc, argv, "h:flqrvw")) != -1) {
838 		switch(c) {
839 		case 'h':
840 			files[i++] = optarg;
841 			hints_flag = 1;
842 			break;
843 			/*FALLTHRU*/
844 		case 'f':
845 			hints_flag = 2;
846 			break;
847 		case 'l':
848 			long_flag = 1;
849 			break;
850 		case 'q':
851 			quiet_flag = 1;
852 			break;
853 		case 'r':
854 			reverse_flag = 1;
855 			break;
856 		case 'v':
857 			verbose_flag = 1;
858 			break;
859 		case 'w':
860 			warn_flag = 1;
861 			break;
862 		default:
863 			usage();
864 		}
865 	}
866 
867 	argc -= optind;
868 	argv += optind;
869 
870 	switch(argc) {
871 	case 1:
872 		files[i++] = argv[0];
873 		break;
874 	case 0:
875 		break;
876 	default:
877 		usage();
878 	}
879 
880 	files[i] = NULL;
881 
882 	nodes_init(pairs);
883 	order = 0;
884 
885 	for (j = 0; j != i-argc; j++) {
886 		FILE *f;
887 
888 		f = fopen(files[j], "r");
889 		if (f == NULL)
890 			err(1, "Can't open hint file %s", files[i]);
891 		order = read_hints(f, pairs, quiet_flag, files[i], order);
892 		fclose(f);
893     	}
894 	free(files);
895 
896 	if (argc == 1) {
897 		FILE *f;
898 
899 		f = fopen(argv[0], "r");
900 		if (f == NULL)
901 			err(1, "Can't open file %s", argv[0]);
902 		order = read_pairs(f, pairs, reverse_flag, argv[0], order,
903 		    hints_flag == 2);
904 		fclose(f);
905 	} else {
906 		order = read_pairs(stdin, pairs, reverse_flag, "stdin",
907 		    order, hints_flag == 2);
908 	}
909 }
910 
911 static int
912 tsort(struct ohash *pairs)
913 {
914 	    struct array 	aux;	/* Unrefed nodes/cycle reporting.  */
915 	    struct array	remaining;
916 	    unsigned int	broken_arcs, broken_cycles;
917 	    unsigned int	left;
918 
919 	    broken_arcs = 0;
920 	    broken_cycles = 0;
921 
922 	    if (hints_flag)
923 		    make_transparent(pairs);
924 	    split_nodes(pairs, &aux, &remaining);
925 	    ohash_delete(pairs);
926 
927 	    if (hints_flag)
928 		    heapify(&aux, verbose_flag);
929 
930 	    left = remaining.entries + aux.entries;
931 	    while (left != 0) {
932 
933 		    /* Standard topological sort.  */
934 		    while (aux.entries) {
935 			    struct link *l;
936 			    struct node *n;
937 
938 			    n = DEQUEUE(&aux);
939 			    printf("%s\n", n->k);
940 			    left--;
941 			    /* We can't free nodes, as we don't know which
942 			     * entry we can remove in the hash table.  We
943 			     * rely on refs == 0 to recognize live nodes.
944 			     * Decrease ref count of live nodes, enter new
945 			     * candidates into the unrefed list.  */
946 			    for (l = n->arcs; l != NULL; l = l->next)
947 				    if (l->node->refs != 0 &&
948 					--l->node->refs == 0) {
949 					    ENQUEUE(&aux, l->node);
950 				    }
951 		    }
952 		    /* There are still cycles to break.  */
953 		    if (left != 0) {
954 			    struct node *n;
955 
956 			    broken_cycles++;
957 			    /* XXX Simple cycle detection and long cycle
958 			     * detection are mutually exclusive.  */
959 
960 			    if (long_flag)
961 				    n = find_longest_cycle(&remaining, &aux);
962 			    else
963 				    n = find_normal_cycle(&remaining, &aux);
964 
965 			    if (!quiet_flag) {
966 				    warnx("cycle in data");
967 				    print_cycle(&aux);
968 			    }
969 
970 			    if (verbose_flag)
971 				    warnx("%u edge%s broken", n->refs,
972 					plural(n->refs));
973 			    broken_arcs += n->refs;
974 			    n->refs = 0;
975 			    /* Reinitialization, cycle reporting uses aux.  */
976 			    aux.t[0] = n;
977 			    aux.entries = 1;
978 		    }
979 	    }
980 	    if (verbose_flag && broken_cycles != 0)
981 		    warnx("%u cycle%s broken, for a total of %u edge%s",
982 			broken_cycles, plural(broken_cycles),
983 			broken_arcs, plural(broken_arcs));
984 	    if (warn_flag)
985 		    return (broken_cycles < 256 ? broken_cycles : 255);
986 	    else
987 		    return (0);
988 }
989 
990 int
991 main(int argc, char *argv[])
992 {
993 	struct ohash 	pairs;
994 
995 	if (pledge("stdio rpath", NULL) == -1)
996 		err(1, "pledge");
997 
998 	parse_args(argc, argv, &pairs);
999 
1000 	if (pledge("stdio", NULL) == -1)
1001 		err(1, "pledge");
1002 
1003 	return tsort(&pairs);
1004 }
1005 
1006 
1007 extern char *__progname;
1008 
1009 static void
1010 usage(void)
1011 {
1012 	fprintf(stderr, "Usage: %s [-flqrvw] [-h file] [file]\n", __progname);
1013 	exit(1);
1014 }
1015