1 /* Copyright 1998,2001 Mark Pulford
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. Read the file COPYING found in this archive for details, or
4 * visit http://www.gnu.org/copyleft/gpl.html
5 */
6
7 /* The upper bound for letter combinations at the moment is ~7000.
8 * Usually the number of checks required will be considerably less.
9 *
10 * A P166 can do 250000 failed lookups (the worst case) within a second on
11 * a 40k dictionary. Speed shouldn't be a problem.
12 */
13
14 #include <config.h>
15
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <sys/types.h>
19 #include <sys/stat.h>
20 #include <fcntl.h>
21 #include <unistd.h>
22 #include <errno.h>
23 #include <string.h>
24
25 #include "dict.h"
26
27 static char **windex; /* Contains the index to the dictionary */
28 static char *dict; /* Contains the dictionary */
29 static int words = 0; /* The number of the words in the dictionary */
30 static char dict_name[9];
31
32 static int scompare(const void *a, const void *b);
33 static int sorted();
34 static void dict_sort();
35
36 /* Dictionary entries are separated by \n. It is faster if they are sorted
37 * in strcmp order. Extended characters may be ok depending on your
38 * charset & curses.
39 */
40
41 /* Returns 0 on error (err points to error message), 1 on success
42 * If the dictionary is already sorted it will decrease the loading time.
43 */
dict_load(const char * dname,const char ** err)44 int dict_load(const char *dname, const char **err)
45 {
46 static char errbuf[81];
47 int fd;
48 struct stat info;
49 int c;
50 char fn[256];
51 int rsize;
52
53 if(dict)
54 return 1;
55
56 snprintf(dict_name, 9, "%s", dname);
57
58 snprintf(fn, 256, "%s/dict.%s", DICTDIR, dict_name);
59 fd = open(fn, O_RDONLY);
60 if(fd < 0) {
61 if(err) {
62 snprintf(errbuf, 81, "%s", strerror(errno));
63 *err = errbuf;
64 }
65 return 0;
66 }
67
68 if(fstat(fd, &info) < 0) {
69 close(fd);
70 if(err) {
71 snprintf(errbuf, 81, "%s", strerror(errno));
72 *err = errbuf;
73 }
74 return 0;
75 }
76 dict = malloc(info.st_size + 1); /* Inc null terminator */
77 if(!dict) {
78 if(err) {
79 strcpy(errbuf, _("Out of memory"));
80 *err = errbuf;
81 }
82 return 0;
83 }
84
85 rsize = read(fd, dict, info.st_size);
86 if(rsize != info.st_size) {
87 if(rsize < 0)
88 snprintf(errbuf, 81, "%s", strerror(errno));
89 else
90 strcpy(errbuf, _("Partial read"));
91 *err = errbuf;
92 close(fd);
93 free(dict);
94 dict = NULL;
95 return 0;
96 }
97 close(fd);
98
99 dict[info.st_size] = 0;
100
101 for(c=0; c<info.st_size; c++)
102 if('\n' == dict[c])
103 words++;
104
105 windex = malloc(sizeof(*windex) * (words+1)); /* Inc strtok NULL */
106 if(!windex) {
107 free(dict);
108 dict=NULL;
109 if(err) {
110 strcpy(errbuf, _("Out of memory"));
111 *err = errbuf;
112 }
113 return 0;
114 }
115
116 /* Convert into word arrary */
117 words = 0;
118 windex[words] = strtok(dict, "\n");
119 while((windex[++words] = strtok(NULL, "\n")));
120
121 if(!sorted())
122 dict_sort();
123
124 return 1;
125 }
126
dict_free()127 void dict_free()
128 {
129 if(dict) {
130 free(dict);
131 dict = NULL;
132 }
133 }
134
sorted()135 static int sorted()
136 {
137 int c;
138
139 for(c=0; c<(words-1); c++)
140 if(strcmp(windex[c],windex[c+1]) >= 0)
141 return 0;
142
143 return 1;
144 }
145
146 /* Dictionaries must be sorted for this binary search to work
147 * returns: 0 not found (or dictionary not loaded), 1 found
148 */
dict_check(const char * word)149 int dict_check(const char *word)
150 {
151 int min,max;
152 int d,i;
153
154 if(!dict)
155 return 0;
156
157 min = 0;
158 max = words - 1; /* div 2 rounds down, so words-1 is last */
159
160 do {
161 i = (min + max) / 2;
162 d = strcmp(word, windex[i]);
163 if(d < 0) {
164 max = i-1;
165 } else if(0 == d) {
166 return 1;
167 } else {
168 min = i+1;
169 }
170 } while(min <= max);
171
172 return 0;
173 }
174
175 /* Dump the dictionary in the "correct" format */
dict_dump()176 int dict_dump()
177 {
178 int i;
179
180 if(!dict)
181 return 0;
182
183 for(i=0; i<words; i++)
184 printf("%s\n", windex[i]);
185
186 return 1;
187 }
188
189 /* Fills freq[256] with frequency info from the loaded dictionary
190 * Returns: 0 dictionary not loaded
191 * 1 success */
dict_get_freq(int * freq)192 int dict_get_freq(int *freq)
193 {
194 int w, i;
195
196 if(!dict)
197 return 0;
198
199 memset(freq, 0, 256*sizeof(*freq));
200
201 for(w=0; w<words; w++)
202 for(i=0; windex[w][i]; i++)
203 freq[windex[w][i]&255]++;
204
205 return 1;
206 }
207
scompare(const void * a,const void * b)208 static int scompare(const void *a, const void *b)
209 {
210 const char **s1, **s2;
211
212 s1 = (const char **) a;
213 s2 = (const char **) b;
214 return strcmp(*s1, *s2);
215 }
216
dict_sort()217 static void dict_sort()
218 {
219 qsort(windex, words, sizeof(*windex), scompare);
220 }
221
dict_get()222 char *dict_get()
223 {
224 return dict_name;
225 }
226