1 /*
2 * Useful to find the minimum 'maxlen' we can pass to mph's -m option.
3 * Used by mphminm (which see).
4 *
5 * This program is only meant for small keysets - it is quite
6 * time and space inefficient. Besides, minimizing 'maxlen' is
7 * only useful for small keysets anyway (see mph.doc).
8 */
9
10 #include <stdio.h>
11 #include <string.h>
12 #include <stdlib.h>
13 #include <unistd.h>
14 #include "misc.h"
15 #include "arena.h"
16 #include "keys.h"
17
18 int m=0;
19
20 struct keymax {
21 int index; /* input word index */
22 char **eclass; /* equivalence classes */
23 int *slot; /* next empty slot in equivalence class */
24 };
25
26 #define uchar unsigned char
27
28 int
charcomp(const void * a,const void * b)29 charcomp(const void *a, const void *b)
30 {
31 uchar aa = *(uchar *)a, bb = *(uchar *)b;
32
33 /*
34 * NULLs compare larger than every other character.
35 */
36 if (aa == bb)
37 return 0;
38 else if (!aa)
39 return 1;
40 else if (!bb)
41 return -1;
42 else
43 return aa-bb;
44 }
45
46 /*
47 * Compare first equivalence class of each word.
48 */
49 int
keycomp(const void * a,const void * b)50 keycomp(const void *a, const void *b)
51 {
52 struct keymax *aa = (struct keymax *)a, *bb = (struct keymax *)b;
53 return strcmp(aa->eclass[0], bb->eclass[0]);
54 }
55
56 void
usage(void)57 usage(void)
58 {
59 fprintf(stderr, "usage: mphm -m maxlen\n");
60 exit(2);
61 }
62
63 void
getargs(int argc,char ** argv)64 getargs(int argc, char **argv)
65 {
66 int opt;
67 extern char *optarg;
68 extern int optind;
69
70 while ((opt = getopt(argc, argv, "m:")) != -1)
71 switch (opt) {
72 case 'm':
73 m = atoi(optarg);
74 break;
75 case '?':
76 usage();
77 break;
78 }
79
80 if (m <= 0 || optind != argc)
81 usage();
82 }
83
84 int
main(int argc,char ** argv)85 main(int argc, char **argv)
86 {
87 Keys *kp;
88 struct keymax *kmp;
89 char **key, *cp;
90 int i, j, k, n, mm;
91
92 getargs(argc, argv);
93 kp = keynew(stdin);
94 n = keynum(kp);
95 mm = 1 + (kp->maxlen + kp->maxlen - 1)/m;
96 kmp = xmalloc(sizeof *kmp * n);
97
98 for (i=0, key=kp->keys; i<n; i++, key++) {
99
100 /*
101 * the callocs() are necessary - leave them alone
102 */
103 kmp[i].index = i;
104 kmp[i].slot = xcalloc(1, sizeof *kmp[i].slot * m);
105 kmp[i].eclass = xmalloc(sizeof *kmp[i].eclass * m);
106 for (j=0; j<m; j++)
107 kmp[i].eclass[j] =
108 xcalloc(1, sizeof *kmp[i].eclass[j] * mm);
109
110 /*
111 * create character position equivalence classes
112 */
113 for (j=0, cp=*key; *cp; cp++) {
114 kmp[i].eclass[j][kmp[i].slot[j]++] = *cp;
115 if (++j == m)
116 j = 0;
117 }
118
119 /*
120 * sort characters in each equivalence class - this
121 * allows us to use strcmp() to test for equivalence
122 * class equality
123 */
124 for (j=0; j<m; j++)
125 qsort(kmp[i].eclass[j], kmp[i].slot[j],
126 sizeof *kmp[i].eclass[j], charcomp);
127 }
128
129 /*
130 * optimization for comparison loop below
131 */
132 qsort(kmp, n, sizeof *kmp, keycomp);
133
134 /*
135 * potentially O(n^2), but above qsort(kmp, ..) helps reduce this
136 */
137 for (i=0; i<n; i++)
138
139 for (j=i+1; j<n; j++) {
140
141 /*
142 * compare corresponding equivalence classes - break
143 * on first non-match
144 */
145 for (k=0; k<m; k++)
146 if (strcmp(kmp[i].eclass[k], kmp[j].eclass[k]))
147 break;
148
149 /*
150 * optimization courtesy of qsort(kmp, ..)
151 */
152 if (k == 0)
153 break;
154
155 /*
156 * all equivalence classes match -> conflict
157 */
158 if (k == m) {
159 fprintf(stderr, "mphm: %d %d conflict\n",
160 kmp[i].index+1, kmp[j].index+1);
161 return 1;
162 }
163 }
164
165 return 0;
166 }
167