1 /* $OpenBSD: word.c,v 1.8 2016/01/10 13:18:07 mestre Exp $ */
2 /* $NetBSD: word.c,v 1.2 1995/03/21 12:14:45 cgd Exp $ */
3
4 /*-
5 * Copyright (c) 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Barry Brachman.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36 #include <sys/stat.h>
37
38 #include <err.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42
43 #include "bog.h"
44 #include "extern.h"
45
46 static char *dictspace, *dictend;
47 static char *sp;
48
49 static int first = 1, lastch = 0;
50
51 /*
52 * Return the next word in the compressed dictionary in 'buffer' or
53 * NULL on end-of-file
54 */
55 char *
nextword(FILE * fp)56 nextword(FILE *fp)
57 {
58 int ch, pcount;
59 char *p;
60 static char buf[MAXWORDLEN + 1];
61
62 if (fp == NULL) {
63 if (sp == dictend)
64 return (NULL);
65
66 p = buf + (int) *sp++;
67
68 /*
69 * The dictionary ends with a null byte
70 */
71 while (*sp >= 'a')
72 if ((*p++ = *sp++) == 'q')
73 *p++ = 'u';
74 } else {
75 if (first) {
76 if ((pcount = getc(fp)) == EOF)
77 return (NULL);
78 first = 0;
79 } else if ((pcount = lastch) == EOF)
80 return (NULL);
81
82 p = buf + pcount;
83
84 while ((ch = getc(fp)) != EOF && ch >= 'a')
85 if ((*p++ = ch) == 'q')
86 *p++ = 'u';
87 lastch = ch;
88 }
89 wordlen = (int) (p - buf);
90 *p = '\0';
91 return (buf);
92 }
93
94 /*
95 * Reset the state of nextword() and do the fseek()
96 */
97 long
dictseek(FILE * fp,long offset,int ptrname)98 dictseek(FILE *fp, long offset, int ptrname)
99 {
100 if (fp == NULL) {
101 if ((sp = dictspace + offset) >= dictend)
102 return (-1);
103 return (0);
104 }
105
106 first = 1;
107 return (fseek(fp, offset, ptrname));
108 }
109
110 FILE *
opendict(const char * dict)111 opendict(const char *dict)
112 {
113 FILE *fp;
114
115 if ((fp = fopen(dict, "r")) == NULL)
116 return (NULL);
117 return (fp);
118 }
119
120 /*
121 * Load the given dictionary and initialize the pointers
122 */
123 int
loaddict(FILE * fp)124 loaddict(FILE *fp)
125 {
126 struct stat statb;
127 long n;
128 int st;
129 char *p;
130
131 if (fstat(fileno(fp), &statb) < 0) {
132 fclose(fp);
133 return (-1);
134 }
135
136 /*
137 * An extra character (a sentinel) is allocated and set to null
138 * to improve the expansion loop in nextword().
139 */
140 if ((dictspace = malloc(statb.st_size + 1)) == NULL) {
141 fclose(fp);
142 return (-1);
143 }
144 n = (long)statb.st_size;
145 sp = dictspace;
146 dictend = dictspace + n;
147
148 p = dictspace;
149 st = -1;
150 while (n > 0 && (st = fread(p, 1, BUFSIZ, fp)) > 0) {
151 p += st;
152 n -= st;
153 }
154 if (st < 0) {
155 fclose(fp);
156 warnx("Error reading dictionary");
157 return (-1);
158 }
159 *p = '\0';
160 return (0);
161 }
162
163 /*
164 * Dependent on the exact format of the index file:
165 * Starting offset field begins in column 1 and length field in column 9
166 * Taking the easy way out, the input buffer is made "large" and a check
167 * is made for lines that are too long
168 */
169 int
loadindex(const char * indexfile)170 loadindex(const char *indexfile)
171 {
172 int i, j;
173 char buf[BUFSIZ];
174 FILE *fp;
175
176 if ((fp = fopen(indexfile, "r")) == NULL) {
177 warnx("Can't open '%s'", indexfile);
178 return (-1);
179 }
180 i = 0;
181 while (fgets(buf, sizeof(buf), fp) != NULL) {
182 if (strchr(buf, '\n') == NULL) {
183 warnx("A line in the index file is too long");
184 fclose(fp);
185 return(-1);
186 }
187 j = *buf - 'a';
188 if (i != j) {
189 warnx("Bad index order");
190 fclose(fp);
191 return(-1);
192 }
193 dictindex[j].start = atol(buf + 1);
194 dictindex[j].length = atol(buf + 9) - dictindex[j].start;
195 i++;
196 }
197 if (i != 26) {
198 warnx("Bad index length");
199 fclose(fp);
200 return(-1);
201 }
202 fclose(fp);
203 return(0);
204 }
205