1 /*
2   Input
3 */
4 
5 #include "input.h"
6 
7 #include "config.h"
8 #include "head.h"
9 #include "mem.h"
10 #include "message.h"
11 #include "os.h"
12 #include "page.h"
13 #include "print.h"
14 #include "shared.h"
15 #include "status.h"
16 #include "var.h"
17 
18 static word num_vocab_words;
19 static word vocab_entry_size;
20 static word strt_vocab_table;
21 
22 #define QUICK_BITS 8
23 
24 /*
25   We know that, since the value we are searching for
26   was encoded (using encode), and the two possible dictionary
27   lengths are two and three words (6/9 letters), the top
28   bit will not be set on the first word.
29   We thus only need to store the quick lookup values for bits
30   14 and down of the first coded word.
31 */
32 
33 #define QUICK_KEY(w) (((w)>>(15-QUICK_BITS)) & ((1<<QUICK_BITS)-1))
34 #define QUICK_HOLD   (1<<QUICK_BITS)
35 
36 static word quick[QUICK_HOLD + 1];
37 
init_quick(void)38 static void init_quick(void)
39 {
40   word i = 0;
41   word j = 0;
42   word p = strt_vocab_table;
43   for(j = 0; j < QUICK_HOLD; ++j)
44   {
45     while(i < num_vocab_words && QUICK_KEY(rd_word_addr(p)) < j)
46     {
47       i += 1;
48       p += vocab_entry_size;
49     }
50     quick[j] = i;
51   }
52   quick[QUICK_HOLD] = i;
53 }
54 
init_vocab(long_word vocab_strt)55 static void init_vocab(long_word vocab_strt)
56 {
57   static long_word last_vocab_start = 0xFFFFFFFFL;
58   if(vocab_strt != last_vocab_start)
59   {
60     word num          = rd_byte_addr(vocab_strt);
61     signed_word words = rd_word_addr(vocab_strt + num + 2);
62 
63     vocab_entry_size  = rd_byte_addr(vocab_strt + num + 1);
64     strt_vocab_table  = (word) (vocab_strt + num + 4);
65 
66     if(words > 0)
67     {
68       num_vocab_words  = words;
69     }
70     else
71     {
72       /* Can this ever happen ? */
73       num_vocab_words  = -words;
74     }
75     init_quick();
76     last_vocab_start = vocab_strt;
77   }
78 }
79 
look_up(word coded[],word encoded_size)80 static word look_up(word coded[], word encoded_size)
81 {
82   word q = QUICK_KEY(coded[0]);
83   word a = quick[q];
84   word b = quick[q+1];
85   /* p is the address of word j of table entry i */
86   word i = a, j;
87   word p = strt_vocab_table + i * vocab_entry_size;
88   /* Search possible region */
89   for(j = 0; j < encoded_size; ++j, p += 2)
90   {
91     while(i < b && rd_word_addr(p) < coded[j])
92     {
93       p += vocab_entry_size;
94       i += 1;
95     }
96     if(i == b || rd_word_addr(p) > coded[j])
97       return 0;
98   }
99   /* Check match, since might have hit 'diagonally' */
100   while(j > 0)
101     if(coded[--j] != rd_word_addr(p -= 2))
102       return 0;
103   return p;
104 }
105 
buffer_copy(long_word start,word length,byte * buffer,word size)106 static void buffer_copy(long_word start, word length, byte *buffer, word size)
107 {
108   word i;
109   word len = min(size, length);
110   for(i = 0; i < len; ++i)
111     buffer[i] = rd_byte_addr(start + i);
112   buffer[len] = 0;
113 }
114 
store_word(long_word words,word code,int len,int start)115 static int store_word(long_word words, word code, int len, int start)
116 {
117   byte upto = rd_byte_addr(words);
118   byte held = rd_byte_addr(words + 1);
119   if(held < upto)
120   {
121     long_word p  = words + 2 + (long_word) (4 * (word) held);
122     wr_byte_addr(words + 1, held + 1);
123     wr_word_addr(p, code);
124     wr_byte_addr(p + 2, len);
125     wr_byte_addr(p + 3, start);
126     return 1;
127   }
128   else
129   {
130     return 0;
131   }
132 }
133 
too_many_words(long_word chars)134 static void too_many_words(long_word chars)
135 {
136   display((byte *) "Too many words typed. Flushing.\n");
137   wr_byte_addr(chars + 2 + rd_byte_addr(chars + 1), 0);
138 }
139 
140 typedef enum { punctuation, white_space, letter } char_class;
141 
classify_char(byte c)142 static char_class classify_char(byte c)
143 {
144   word vocab = hd_vocab();
145   int puncs  = rd_byte_addr(vocab);
146   while(puncs--)
147     if(rd_byte_addr(++vocab) == c)
148       return punctuation;
149   return os_strpos(" \t\r.,?", c) >= 0 ? white_space : letter;
150 }
151 
general_parse_buffer(long_word chars,long_word words)152 static void general_parse_buffer(long_word chars, long_word words)
153 {
154   int x         = 2;
155   int chars_len = 2 + rd_byte_addr(chars + 1);
156   int plus      = hd_plus();
157 
158   wr_byte_addr(words + 1, 0);
159 
160   while(x < chars_len)
161   {
162     while(x < chars_len
163        && classify_char(rd_byte_addr(chars + x)) == white_space) ++x;
164     if(x < chars_len)
165     {
166       int len = 0;
167       word coded[max(STD_ENCODED_SIZE, PLUS_ENCODED_SIZE)];
168       byte the_word[max(STD_CHARS_PER_WORD, PLUS_ENCODED_SIZE) + 1];
169       int esz = plus ? PLUS_ENCODED_SIZE : STD_ENCODED_SIZE;
170       int cpw = plus ? PLUS_CHARS_PER_WORD : STD_CHARS_PER_WORD;
171       if(classify_char(rd_byte_addr(chars + x)) == punctuation)
172       {
173         len = 1;
174       }
175       else
176       {
177         while(x + len < chars_len
178            && classify_char(rd_byte_addr(chars + x + len)) == letter) ++len;
179       }
180       buffer_copy(chars + x, len, the_word, cpw);
181       encode(the_word, coded);
182       if(!store_word(words, look_up(coded, esz), len, x))
183       {
184         too_many_words(chars);
185 	break;
186       }
187       x += len;
188     }
189   }
190 }
191 
advanced_parse_buffer(long_word chars,long_word words,long_word vocab_strt,word ignore_offset)192 static void advanced_parse_buffer(long_word chars, long_word words,
193   long_word vocab_strt, word ignore_offset)
194 {
195   long_word char_ptr   = chars + 2;
196   long_word in_buf_end = chars + 2 + rd_byte_addr(chars + 1);
197 
198   word coded[PLUS_ENCODED_SIZE];
199 
200   init_vocab(vocab_strt);
201   wr_byte_addr(words + 1, 0);
202 
203   while(char_ptr != in_buf_end)
204   {
205     word bytes = (byte) (in_buf_end - char_ptr);
206     word count = get_code(&char_ptr, bytes, coded);
207     if(count == 0) break;
208 
209     if(rd_byte_addr(words) == rd_byte_addr(words + 1))
210     {
211       too_many_words(chars);
212       break;
213     }
214     else
215     {
216       byte held   = rd_byte_addr(words + 1);
217       long_word p = words + 2 + (long_word) 4 * held;
218       word offset = look_up(coded, PLUS_ENCODED_SIZE);
219       if(offset != 0 || ignore_offset == 0)
220         wr_word_addr(p, offset);
221       wr_byte_addr(p + 2, count);
222       wr_byte_addr(p + 3, (byte) (char_ptr - chars));
223       wr_byte_addr(words + 1, held + 1);
224       char_ptr += count;
225     }
226   }
227 }
228 
read_line(long_word buffer)229 static void read_line(long_word buffer)
230 {
231   int i, len;
232   static char line[256];
233   len = getline((byte *)"", line, rd_byte_addr(buffer));
234   for(i = 0; i < len; ++i)
235     wr_byte_addr(buffer + 2 + i,  os_lower(line[i]));
236   wr_byte_addr(buffer + 1, len);
237 }
238 
scan_buffer(long_word * start,word length)239 static word scan_buffer(long_word *start, word length)
240 {
241   long_word char_ptr      = *start;
242   long_word end_of_buffer = *start + length;
243 
244   while(char_ptr != end_of_buffer
245      && classify_char(rd_byte_addr(char_ptr)) == white_space)
246     ++char_ptr;
247   *start = char_ptr;
248   if(char_ptr != end_of_buffer)
249   {
250     if(classify_char(rd_byte_addr(char_ptr)) == punctuation)
251     {
252       return 1;
253     }
254     else
255     {
256       while(char_ptr != end_of_buffer
257          && classify_char(rd_byte_addr(char_ptr)) == letter)
258         ++char_ptr;
259       return (word) (char_ptr - *start);
260     }
261   }
262   else
263   {
264     return 0;
265   }
266 }
267 
268 /* Globals */
269 
init_input(void)270 void init_input(void)
271 {
272   init_vocab(hd_vocab());
273 }
274 
input(void)275 void input(void)
276 {
277   extern word param_stack[];
278 
279   int num_params           = param_stack[0];
280   long_word in_buf_strt    = param_stack[1];
281   long_word word_buff_strt = num_params < 2 ? 0 : param_stack[2];
282 
283   /* Empty the Print Buffer */
284 
285   if(!hd_plus())
286     status();
287   flush_prt_buff();
288 
289 #if 0 /* These parameters can't be used */
290   if(num_params < 3) param_stack[3] = 0xFFFF;
291   if(num_params < 4) param_stack[4] = 0;
292   param_stack[0] = 4;
293 #endif
294 
295   read_line(in_buf_strt);
296 
297   if(!hd_five())
298   {
299     general_parse_buffer(in_buf_strt, word_buff_strt);
300   }
301   else
302   {
303     if(param_stack[2] != 0)
304       advanced_parse_buffer(in_buf_strt, word_buff_strt, hd_vocab(), 0);
305     store('\n');
306   }
307 }
308 
get_code(long_word * start,word length,word coded[])309 word get_code(long_word *start, word length, word coded[])
310 {
311   byte the_word[PLUS_CHARS_PER_WORD + 1];
312 
313   length = scan_buffer(start, length);
314   if(length)
315   {
316     buffer_copy(*start, length, the_word, PLUS_CHARS_PER_WORD);
317     encode(the_word, coded);
318   }
319   return length;
320 }
321 
parse(void)322 void parse(void)
323 {
324   extern word param_stack[];
325 
326   int num_params            = param_stack[0];
327   long_word in_buf_strt     = param_stack[1];
328   long_word word_buff_strt  = param_stack[2];
329   long_word vocab_strt      = num_params < 3 ? hd_vocab() : param_stack[3];
330   word ignore_offset        = num_params < 4 ? 0 : param_stack[4];
331 
332   advanced_parse_buffer(in_buf_strt, word_buff_strt,
333     vocab_strt, ignore_offset);
334 }
335