1 /*
2  * Useful to find the minimum 'maxlen' we can pass to mph's -m option.
3  * Used by mphminm (which see).
4  *
5  * This program is only meant for small keysets - it is quite
6  * time and space inefficient. Besides, minimizing 'maxlen' is
7  * only useful for small keysets anyway (see mph.doc).
8  */
9 
10 #include <stdio.h>
11 #include <string.h>
12 #include <stdlib.h>
13 #include <unistd.h>
14 #include "misc.h"
15 #include "arena.h"
16 #include "keys.h"
17 
18 int m=0;
19 
20 struct keymax {
21 	int	index;		/* input word index */
22 	char	**eclass;	/* equivalence classes */
23 	int	*slot;		/* next empty slot in equivalence class */
24 };
25 
26 #define uchar unsigned char
27 
28 int
charcomp(const void * a,const void * b)29 charcomp(const void *a, const void *b)
30 {
31 	uchar aa = *(uchar *)a, bb = *(uchar *)b;
32 
33 	/*
34  	 * NULLs compare larger than every other character.
35  	 */
36 	if (aa == bb)
37 		return 0;
38 	else if (!aa)
39 		return 1;
40 	else if (!bb)
41 		return -1;
42 	else
43 		return aa-bb;
44 }
45 
46 /*
47  * Compare first equivalence class of each word.
48  */
49 int
keycomp(const void * a,const void * b)50 keycomp(const void *a, const void *b)
51 {
52 	struct keymax *aa = (struct keymax *)a, *bb = (struct keymax *)b;
53 	return strcmp(aa->eclass[0], bb->eclass[0]);
54 }
55 
56 void
usage(void)57 usage(void)
58 {
59 	fprintf(stderr, "usage: mphm -m maxlen\n");
60 	exit(2);
61 }
62 
63 void
getargs(int argc,char ** argv)64 getargs(int argc, char **argv)
65 {
66 	int opt;
67 	extern char *optarg;
68 	extern int optind;
69 
70 	while ((opt = getopt(argc, argv, "m:")) != -1)
71 		switch (opt) {
72 		case 'm':
73 			m = atoi(optarg);
74 			break;
75 		case '?':
76 			usage();
77 			break;
78 		}
79 
80 	if (m <= 0 || optind != argc)
81 		usage();
82 }
83 
84 int
main(int argc,char ** argv)85 main(int argc, char **argv)
86 {
87 	Keys *kp;
88 	struct keymax *kmp;
89 	char **key, *cp;
90 	int i, j, k, n, mm;
91 
92 	getargs(argc, argv);
93 	kp = keynew(stdin);
94 	n = keynum(kp);
95 	mm = 1 + (kp->maxlen + kp->maxlen - 1)/m;
96 	kmp = xmalloc(sizeof *kmp * n);
97 
98 	for (i=0, key=kp->keys; i<n; i++, key++) {
99 
100 		/*
101 		 * the callocs() are necessary - leave them alone
102 		 */
103 		kmp[i].index = i;
104 		kmp[i].slot = xcalloc(1, sizeof *kmp[i].slot * m);
105 		kmp[i].eclass = xmalloc(sizeof *kmp[i].eclass * m);
106 		for (j=0; j<m; j++)
107 			kmp[i].eclass[j] =
108 				xcalloc(1, sizeof *kmp[i].eclass[j] * mm);
109 
110 		/*
111 		 * create character position equivalence classes
112 		 */
113 		for (j=0, cp=*key; *cp; cp++) {
114 			kmp[i].eclass[j][kmp[i].slot[j]++] = *cp;
115 			if (++j == m)
116 				j = 0;
117 		}
118 
119 		/*
120 		 * sort characters in each equivalence class - this
121 		 * allows us to use strcmp() to test for equivalence
122 		 * class equality
123 		 */
124 		for (j=0; j<m; j++)
125 			qsort(kmp[i].eclass[j], kmp[i].slot[j],
126 				sizeof *kmp[i].eclass[j], charcomp);
127 	}
128 
129 	/*
130 	 * optimization for comparison loop below
131 	 */
132 	qsort(kmp, n, sizeof *kmp, keycomp);
133 
134 	/*
135 	 * potentially O(n^2), but above qsort(kmp, ..) helps reduce this
136 	 */
137 	for (i=0; i<n; i++)
138 
139 		for (j=i+1; j<n; j++) {
140 
141 			/*
142 			 * compare corresponding equivalence classes - break
143 			 * on first non-match
144 			 */
145 			for (k=0; k<m; k++)
146 				if (strcmp(kmp[i].eclass[k], kmp[j].eclass[k]))
147 					break;
148 
149 			/*
150 			 * optimization courtesy of qsort(kmp, ..)
151 			 */
152 			if (k == 0)
153 				break;
154 
155 			/*
156 			 * all equivalence classes match -> conflict
157 			 */
158 			if (k == m) {
159 				fprintf(stderr, "mphm: %d %d conflict\n",
160 					kmp[i].index+1, kmp[j].index+1);
161 				return 1;
162 			}
163 		}
164 
165 	return 0;
166 }
167