1 /*
2 Input
3 */
4
5 #include "input.h"
6
7 #include "config.h"
8 #include "head.h"
9 #include "mem.h"
10 #include "message.h"
11 #include "os.h"
12 #include "page.h"
13 #include "print.h"
14 #include "shared.h"
15 #include "status.h"
16 #include "var.h"
17
18 static word num_vocab_words;
19 static word vocab_entry_size;
20 static word strt_vocab_table;
21
22 #define QUICK_BITS 8
23
24 /*
25 We know that, since the value we are searching for
26 was encoded (using encode), and the two possible dictionary
27 lengths are two and three words (6/9 letters), the top
28 bit will not be set on the first word.
29 We thus only need to store the quick lookup values for bits
30 14 and down of the first coded word.
31 */
32
33 #define QUICK_KEY(w) (((w)>>(15-QUICK_BITS)) & ((1<<QUICK_BITS)-1))
34 #define QUICK_HOLD (1<<QUICK_BITS)
35
36 static word quick[QUICK_HOLD + 1];
37
init_quick(void)38 static void init_quick(void)
39 {
40 word i = 0;
41 word j = 0;
42 word p = strt_vocab_table;
43 for(j = 0; j < QUICK_HOLD; ++j)
44 {
45 while(i < num_vocab_words && QUICK_KEY(rd_word_addr(p)) < j)
46 {
47 i += 1;
48 p += vocab_entry_size;
49 }
50 quick[j] = i;
51 }
52 quick[QUICK_HOLD] = i;
53 }
54
init_vocab(long_word vocab_strt)55 static void init_vocab(long_word vocab_strt)
56 {
57 static long_word last_vocab_start = 0xFFFFFFFFL;
58 if(vocab_strt != last_vocab_start)
59 {
60 word num = rd_byte_addr(vocab_strt);
61 signed_word words = rd_word_addr(vocab_strt + num + 2);
62
63 vocab_entry_size = rd_byte_addr(vocab_strt + num + 1);
64 strt_vocab_table = (word) (vocab_strt + num + 4);
65
66 if(words > 0)
67 {
68 num_vocab_words = words;
69 }
70 else
71 {
72 /* Can this ever happen ? */
73 num_vocab_words = -words;
74 }
75 init_quick();
76 last_vocab_start = vocab_strt;
77 }
78 }
79
look_up(word coded[],word encoded_size)80 static word look_up(word coded[], word encoded_size)
81 {
82 word q = QUICK_KEY(coded[0]);
83 word a = quick[q];
84 word b = quick[q+1];
85 /* p is the address of word j of table entry i */
86 word i = a, j;
87 word p = strt_vocab_table + i * vocab_entry_size;
88 /* Search possible region */
89 for(j = 0; j < encoded_size; ++j, p += 2)
90 {
91 while(i < b && rd_word_addr(p) < coded[j])
92 {
93 p += vocab_entry_size;
94 i += 1;
95 }
96 if(i == b || rd_word_addr(p) > coded[j])
97 return 0;
98 }
99 /* Check match, since might have hit 'diagonally' */
100 while(j > 0)
101 if(coded[--j] != rd_word_addr(p -= 2))
102 return 0;
103 return p;
104 }
105
buffer_copy(long_word start,word length,byte * buffer,word size)106 static void buffer_copy(long_word start, word length, byte *buffer, word size)
107 {
108 word i;
109 word len = min(size, length);
110 for(i = 0; i < len; ++i)
111 buffer[i] = rd_byte_addr(start + i);
112 buffer[len] = 0;
113 }
114
store_word(long_word words,word code,int len,int start)115 static int store_word(long_word words, word code, int len, int start)
116 {
117 byte upto = rd_byte_addr(words);
118 byte held = rd_byte_addr(words + 1);
119 if(held < upto)
120 {
121 long_word p = words + 2 + (long_word) (4 * (word) held);
122 wr_byte_addr(words + 1, held + 1);
123 wr_word_addr(p, code);
124 wr_byte_addr(p + 2, len);
125 wr_byte_addr(p + 3, start);
126 return 1;
127 }
128 else
129 {
130 return 0;
131 }
132 }
133
too_many_words(long_word chars)134 static void too_many_words(long_word chars)
135 {
136 display((byte *) "Too many words typed. Flushing.\n");
137 wr_byte_addr(chars + 2 + rd_byte_addr(chars + 1), 0);
138 }
139
140 typedef enum { punctuation, white_space, letter } char_class;
141
classify_char(byte c)142 static char_class classify_char(byte c)
143 {
144 word vocab = hd_vocab();
145 int puncs = rd_byte_addr(vocab);
146 while(puncs--)
147 if(rd_byte_addr(++vocab) == c)
148 return punctuation;
149 return os_strpos(" \t\r.,?", c) >= 0 ? white_space : letter;
150 }
151
general_parse_buffer(long_word chars,long_word words)152 static void general_parse_buffer(long_word chars, long_word words)
153 {
154 int x = 2;
155 int chars_len = 2 + rd_byte_addr(chars + 1);
156 int plus = hd_plus();
157
158 wr_byte_addr(words + 1, 0);
159
160 while(x < chars_len)
161 {
162 while(x < chars_len
163 && classify_char(rd_byte_addr(chars + x)) == white_space) ++x;
164 if(x < chars_len)
165 {
166 int len = 0;
167 word coded[max(STD_ENCODED_SIZE, PLUS_ENCODED_SIZE)];
168 byte the_word[max(STD_CHARS_PER_WORD, PLUS_ENCODED_SIZE) + 1];
169 int esz = plus ? PLUS_ENCODED_SIZE : STD_ENCODED_SIZE;
170 int cpw = plus ? PLUS_CHARS_PER_WORD : STD_CHARS_PER_WORD;
171 if(classify_char(rd_byte_addr(chars + x)) == punctuation)
172 {
173 len = 1;
174 }
175 else
176 {
177 while(x + len < chars_len
178 && classify_char(rd_byte_addr(chars + x + len)) == letter) ++len;
179 }
180 buffer_copy(chars + x, len, the_word, cpw);
181 encode(the_word, coded);
182 if(!store_word(words, look_up(coded, esz), len, x))
183 {
184 too_many_words(chars);
185 break;
186 }
187 x += len;
188 }
189 }
190 }
191
advanced_parse_buffer(long_word chars,long_word words,long_word vocab_strt,word ignore_offset)192 static void advanced_parse_buffer(long_word chars, long_word words,
193 long_word vocab_strt, word ignore_offset)
194 {
195 long_word char_ptr = chars + 2;
196 long_word in_buf_end = chars + 2 + rd_byte_addr(chars + 1);
197
198 word coded[PLUS_ENCODED_SIZE];
199
200 init_vocab(vocab_strt);
201 wr_byte_addr(words + 1, 0);
202
203 while(char_ptr != in_buf_end)
204 {
205 word bytes = (byte) (in_buf_end - char_ptr);
206 word count = get_code(&char_ptr, bytes, coded);
207 if(count == 0) break;
208
209 if(rd_byte_addr(words) == rd_byte_addr(words + 1))
210 {
211 too_many_words(chars);
212 break;
213 }
214 else
215 {
216 byte held = rd_byte_addr(words + 1);
217 long_word p = words + 2 + (long_word) 4 * held;
218 word offset = look_up(coded, PLUS_ENCODED_SIZE);
219 if(offset != 0 || ignore_offset == 0)
220 wr_word_addr(p, offset);
221 wr_byte_addr(p + 2, count);
222 wr_byte_addr(p + 3, (byte) (char_ptr - chars));
223 wr_byte_addr(words + 1, held + 1);
224 char_ptr += count;
225 }
226 }
227 }
228
read_line(long_word buffer)229 static void read_line(long_word buffer)
230 {
231 int i, len;
232 static char line[256];
233 len = getline((byte *)"", line, rd_byte_addr(buffer));
234 for(i = 0; i < len; ++i)
235 wr_byte_addr(buffer + 2 + i, os_lower(line[i]));
236 wr_byte_addr(buffer + 1, len);
237 }
238
scan_buffer(long_word * start,word length)239 static word scan_buffer(long_word *start, word length)
240 {
241 long_word char_ptr = *start;
242 long_word end_of_buffer = *start + length;
243
244 while(char_ptr != end_of_buffer
245 && classify_char(rd_byte_addr(char_ptr)) == white_space)
246 ++char_ptr;
247 *start = char_ptr;
248 if(char_ptr != end_of_buffer)
249 {
250 if(classify_char(rd_byte_addr(char_ptr)) == punctuation)
251 {
252 return 1;
253 }
254 else
255 {
256 while(char_ptr != end_of_buffer
257 && classify_char(rd_byte_addr(char_ptr)) == letter)
258 ++char_ptr;
259 return (word) (char_ptr - *start);
260 }
261 }
262 else
263 {
264 return 0;
265 }
266 }
267
268 /* Globals */
269
init_input(void)270 void init_input(void)
271 {
272 init_vocab(hd_vocab());
273 }
274
input(void)275 void input(void)
276 {
277 extern word param_stack[];
278
279 int num_params = param_stack[0];
280 long_word in_buf_strt = param_stack[1];
281 long_word word_buff_strt = num_params < 2 ? 0 : param_stack[2];
282
283 /* Empty the Print Buffer */
284
285 if(!hd_plus())
286 status();
287 flush_prt_buff();
288
289 #if 0 /* These parameters can't be used */
290 if(num_params < 3) param_stack[3] = 0xFFFF;
291 if(num_params < 4) param_stack[4] = 0;
292 param_stack[0] = 4;
293 #endif
294
295 read_line(in_buf_strt);
296
297 if(!hd_five())
298 {
299 general_parse_buffer(in_buf_strt, word_buff_strt);
300 }
301 else
302 {
303 if(param_stack[2] != 0)
304 advanced_parse_buffer(in_buf_strt, word_buff_strt, hd_vocab(), 0);
305 store('\n');
306 }
307 }
308
get_code(long_word * start,word length,word coded[])309 word get_code(long_word *start, word length, word coded[])
310 {
311 byte the_word[PLUS_CHARS_PER_WORD + 1];
312
313 length = scan_buffer(start, length);
314 if(length)
315 {
316 buffer_copy(*start, length, the_word, PLUS_CHARS_PER_WORD);
317 encode(the_word, coded);
318 }
319 return length;
320 }
321
parse(void)322 void parse(void)
323 {
324 extern word param_stack[];
325
326 int num_params = param_stack[0];
327 long_word in_buf_strt = param_stack[1];
328 long_word word_buff_strt = param_stack[2];
329 long_word vocab_strt = num_params < 3 ? hd_vocab() : param_stack[3];
330 word ignore_offset = num_params < 4 ? 0 : param_stack[4];
331
332 advanced_parse_buffer(in_buf_strt, word_buff_strt,
333 vocab_strt, ignore_offset);
334 }
335