1 /* Copyright 1998,2001  Mark Pulford
2  * This file is subject to the terms and conditions of the GNU General Public
3  * License. Read the file COPYING found in this archive for details, or
4  * visit http://www.gnu.org/copyleft/gpl.html
5  */
6 
7 /* The upper bound for letter combinations at the moment is ~7000.
8  * Usually the number of checks required will be considerably less.
9  *
10  * A P166 can do 250000 failed lookups (the worst case) within a second on
11  * a 40k dictionary. Speed shouldn't be a problem.
12  */
13 
14 #include <config.h>
15 
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <sys/types.h>
19 #include <sys/stat.h>
20 #include <fcntl.h>
21 #include <unistd.h>
22 #include <errno.h>
23 #include <string.h>
24 
25 #include "dict.h"
26 
27 static char **windex;	/* Contains the index to the dictionary */
28 static char *dict;	/* Contains the dictionary */
29 static int words = 0;	/* The number of the words in the dictionary */
30 static char dict_name[9];
31 
32 static int scompare(const void *a, const void *b);
33 static int sorted();
34 static void dict_sort();
35 
36 /* Dictionary entries are separated by \n. It is faster if they are sorted
37  * in strcmp order. Extended characters may be ok depending on your
38  * charset & curses.
39  */
40 
41 /* Returns 0 on error (err points to error message), 1 on success
42  * If the dictionary is already sorted it will decrease the loading time.
43  */
dict_load(const char * dname,const char ** err)44 int dict_load(const char *dname, const char **err)
45 {
46 	static char errbuf[81];
47 	int fd;
48 	struct stat info;
49 	int c;
50 	char fn[256];
51 	int rsize;
52 
53 	if(dict)
54 		return 1;
55 
56 	snprintf(dict_name, 9, "%s", dname);
57 
58 	snprintf(fn, 256, "%s/dict.%s", DICTDIR, dict_name);
59 	fd = open(fn, O_RDONLY);
60 	if(fd < 0) {
61 		if(err) {
62 			snprintf(errbuf, 81, "%s", strerror(errno));
63 			*err = errbuf;
64 		}
65 		return 0;
66 	}
67 
68 	if(fstat(fd, &info) < 0) {
69 		close(fd);
70 		if(err) {
71 			snprintf(errbuf, 81, "%s", strerror(errno));
72 			*err = errbuf;
73 		}
74 		return 0;
75 	}
76 	dict = malloc(info.st_size + 1);	/* Inc null terminator */
77 	if(!dict) {
78 		if(err) {
79 			strcpy(errbuf, _("Out of memory"));
80 			*err = errbuf;
81 		}
82 		return 0;
83 	}
84 
85 	rsize = read(fd, dict, info.st_size);
86 	if(rsize != info.st_size) {
87 		if(rsize < 0)
88 			snprintf(errbuf, 81, "%s", strerror(errno));
89 		else
90 			strcpy(errbuf, _("Partial read"));
91 		*err = errbuf;
92 		close(fd);
93 		free(dict);
94 		dict = NULL;
95 		return 0;
96 	}
97 	close(fd);
98 
99 	dict[info.st_size] = 0;
100 
101 	for(c=0; c<info.st_size; c++)
102 		if('\n' == dict[c])
103 			words++;
104 
105 	windex = malloc(sizeof(*windex) * (words+1));	/* Inc strtok NULL */
106 	if(!windex) {
107 		free(dict);
108 		dict=NULL;
109                 if(err) {
110 			strcpy(errbuf, _("Out of memory"));
111 			*err = errbuf;
112 		}
113 		return 0;
114 	}
115 
116 	/* Convert into word arrary */
117 	words = 0;
118 	windex[words] = strtok(dict, "\n");
119 	while((windex[++words] = strtok(NULL, "\n")));
120 
121 	if(!sorted())
122 		dict_sort();
123 
124 	return 1;
125 }
126 
dict_free()127 void dict_free()
128 {
129 	if(dict) {
130 		free(dict);
131 		dict = NULL;
132 	}
133 }
134 
sorted()135 static int sorted()
136 {
137 	int c;
138 
139 	for(c=0; c<(words-1); c++)
140 		if(strcmp(windex[c],windex[c+1]) >= 0)
141 			return 0;
142 
143 	return 1;
144 }
145 
146 /* Dictionaries must be sorted for this binary search to work
147  * returns: 0 not found (or dictionary not loaded), 1 found
148  */
dict_check(const char * word)149 int dict_check(const char *word)
150 {
151 	int min,max;
152 	int d,i;
153 
154 	if(!dict)
155 		return 0;
156 
157 	min = 0;
158 	max = words - 1;	/* div 2 rounds down, so words-1 is last */
159 
160 	do {
161 		i = (min + max) / 2;
162 		d = strcmp(word, windex[i]);
163 		if(d < 0) {
164 			max = i-1;
165 		} else if(0 == d) {
166 			return 1;
167 		} else {
168 			min = i+1;
169 		}
170 	} while(min <= max);
171 
172 	return 0;
173 }
174 
175 /* Dump the dictionary in the "correct" format */
dict_dump()176 int dict_dump()
177 {
178 	int i;
179 
180 	if(!dict)
181 		return 0;
182 
183 	for(i=0; i<words; i++)
184 		printf("%s\n", windex[i]);
185 
186 	return 1;
187 }
188 
189 /* Fills freq[256] with frequency info from the loaded dictionary
190  * Returns: 0	dictionary not loaded
191  * 	    1	success */
dict_get_freq(int * freq)192 int dict_get_freq(int *freq)
193 {
194 	int w, i;
195 
196 	if(!dict)
197 		return 0;
198 
199 	memset(freq, 0, 256*sizeof(*freq));
200 
201 	for(w=0; w<words; w++)
202 		for(i=0; windex[w][i]; i++)
203 			freq[windex[w][i]&255]++;
204 
205 	return 1;
206 }
207 
scompare(const void * a,const void * b)208 static int scompare(const void *a, const void *b)
209 {
210 	const char **s1, **s2;
211 
212 	s1 = (const char **) a;
213 	s2 = (const char **) b;
214 	return strcmp(*s1, *s2);
215 }
216 
dict_sort()217 static void dict_sort()
218 {
219 	qsort(windex, words, sizeof(*windex), scompare);
220 }
221 
dict_get()222 char *dict_get()
223 {
224 	return dict_name;
225 }
226