1 /*
2  * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining
5  * a copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sublicense, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be
13  * included in all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 using System;
26 using System.Collections.Generic;
27 using System.IO;
28 using System.Reflection;
29 using System.Text;
30 
31 /*
32  * This is the main compiler class.
33  */
34 
35 public class T0Comp {
36 
37 	/*
38 	 * Command-line entry point.
39 	 */
Main(string[] args)40 	public static void Main(string[] args)
41 	{
42 		try {
43 			List<string> r = new List<string>();
44 			string outBase = null;
45 			List<string> entryPoints = new List<string>();
46 			string coreRun = null;
47 			bool flow = true;
48 			int dsLim = 32;
49 			int rsLim = 32;
50 			for (int i = 0; i < args.Length; i ++) {
51 				string a = args[i];
52 				if (!a.StartsWith("-")) {
53 					r.Add(a);
54 					continue;
55 				}
56 				if (a == "--") {
57 					for (;;) {
58 						if (++ i >= args.Length) {
59 							break;
60 						}
61 						r.Add(args[i]);
62 					}
63 					break;
64 				}
65 				while (a.StartsWith("-")) {
66 					a = a.Substring(1);
67 				}
68 				int j = a.IndexOf('=');
69 				string pname;
70 				string pval, pval2;
71 				if (j < 0) {
72 					pname = a.ToLowerInvariant();
73 					pval = null;
74 					pval2 = (i + 1) < args.Length
75 						? args[i + 1] : null;
76 				} else {
77 					pname = a.Substring(0, j).Trim()
78 						.ToLowerInvariant();
79 					pval = a.Substring(j + 1);
80 					pval2 = null;
81 				}
82 				switch (pname) {
83 				case "o":
84 				case "out":
85 					if (pval == null) {
86 						if (pval2 == null) {
87 							Usage();
88 						}
89 						i ++;
90 						pval = pval2;
91 					}
92 					if (outBase != null) {
93 						Usage();
94 					}
95 					outBase = pval;
96 					break;
97 				case "r":
98 				case "run":
99 					if (pval == null) {
100 						if (pval2 == null) {
101 							Usage();
102 						}
103 						i ++;
104 						pval = pval2;
105 					}
106 					coreRun = pval;
107 					break;
108 				case "m":
109 				case "main":
110 					if (pval == null) {
111 						if (pval2 == null) {
112 							Usage();
113 						}
114 						i ++;
115 						pval = pval2;
116 					}
117 					foreach (string ep in pval.Split(',')) {
118 						string epz = ep.Trim();
119 						if (epz.Length > 0) {
120 							entryPoints.Add(epz);
121 						}
122 					}
123 					break;
124 				case "nf":
125 				case "noflow":
126 					flow = false;
127 					break;
128 				default:
129 					Usage();
130 					break;
131 				}
132 			}
133 			if (r.Count == 0) {
134 				Usage();
135 			}
136 			if (outBase == null) {
137 				outBase = "t0out";
138 			}
139 			if (entryPoints.Count == 0) {
140 				entryPoints.Add("main");
141 			}
142 			if (coreRun == null) {
143 				coreRun = outBase;
144 			}
145 			T0Comp tc = new T0Comp();
146 			tc.enableFlowAnalysis = flow;
147 			tc.dsLimit = dsLim;
148 			tc.rsLimit = rsLim;
149 			using (TextReader tr = new StreamReader(
150 				Assembly.GetExecutingAssembly()
151 				.GetManifestResourceStream("t0-kernel")))
152 			{
153 				tc.ProcessInput(tr);
154 			}
155 			foreach (string a in r) {
156 				Console.WriteLine("[{0}]", a);
157 				using (TextReader tr = File.OpenText(a)) {
158 					tc.ProcessInput(tr);
159 				}
160 			}
161 			tc.Generate(outBase, coreRun, entryPoints.ToArray());
162 		} catch (Exception e) {
163 			Console.WriteLine(e.ToString());
164 			Environment.Exit(1);
165 		}
166 	}
167 
Usage()168 	static void Usage()
169 	{
170 		Console.WriteLine(
171 "usage: T0Comp.exe [ options... ] file...");
172 		Console.WriteLine(
173 "options:");
174 		Console.WriteLine(
175 "   -o file    use 'file' as base for output file name (default: 't0out')");
176 		Console.WriteLine(
177 "   -r name    use 'name' as base for run function (default: same as output)");
178 		Console.WriteLine(
179 "   -m name[,name...]");
180 		Console.WriteLine(
181 "              define entry point(s)");
182 		Console.WriteLine(
183 "   -nf        disable flow analysis");
184 		Environment.Exit(1);
185 	}
186 
187 	/*
188 	 * If 'delayedChar' is Int32.MinValue then there is no delayed
189 	 * character.
190 	 * If 'delayedChar' equals x >= 0 then there is one delayed
191 	 * character of value x.
192 	 * If 'delayedChar' equals y < 0 then there are two delayed
193 	 * characters, a newline (U+000A) followed by character of
194 	 * value -(y+1).
195 	 */
196 	TextReader currentInput;
197 	int delayedChar;
198 
199 	/*
200 	 * Common StringBuilder used to parse tokens; it is reused for
201 	 * each new token.
202 	 */
203 	StringBuilder tokenBuilder;
204 
205 	/*
206 	 * There may be a delayed token in some cases.
207 	 */
208 	String delayedToken;
209 
210 	/*
211 	 * Defined words are referenced by name in this map. Names are
212 	 * string-sensitive; for better reproducibility, the map is sorted
213 	 * (ordinal order).
214 	 */
215 	IDictionary<string, Word> words;
216 
217 	/*
218 	 * Last defined word is also referenced in 'lastWord'. This is
219 	 * used by 'immediate'.
220 	 */
221 	Word lastWord;
222 
223 	/*
224 	 * When compiling, this builder is used. A stack saves other
225 	 * builders in case of nested definition.
226 	 */
227 	WordBuilder wordBuilder;
228 	Stack<WordBuilder> savedWordBuilders;
229 
230 	/*
231 	 * C code defined for words is kept in this map, by word name.
232 	 */
233 	IDictionary<string, string> allCCode;
234 
235 	/*
236 	 * 'compiling' is true when compiling tokens to a word, false
237 	 * when interpreting them.
238 	 */
239 	bool compiling;
240 
241 	/*
242 	 * 'quitRunLoop' is set to true to trigger exit of the
243 	 * interpretation loop when the end of the current input file
244 	 * is reached.
245 	 */
246 	bool quitRunLoop;
247 
248 	/*
249 	 * 'extraCode' is for C code that is to be added as preamble to
250 	 * the C output.
251 	 */
252 	List<string> extraCode;
253 
254 	/*
255 	 * 'extraCodeDefer' is for C code that is to be added in the C
256 	 * output _after_ the data and code blocks.
257 	 */
258 	List<string> extraCodeDefer;
259 
260 	/*
261 	 * 'dataBlock' is the data block in which constant data bytes
262 	 * are accumulated.
263 	 */
264 	ConstData dataBlock;
265 
266 	/*
267 	 * Counter for blocks of constant data.
268 	 */
269 	long currentBlobID;
270 
271 	/*
272 	 * Flow analysis enable flag.
273 	 */
274 	bool enableFlowAnalysis;
275 
276 	/*
277 	 * Data stack size limit.
278 	 */
279 	int dsLimit;
280 
281 	/*
282 	 * Return stack size limit.
283 	 */
284 	int rsLimit;
285 
T0Comp()286 	T0Comp()
287 	{
288 		tokenBuilder = new StringBuilder();
289 		words = new SortedDictionary<string, Word>(
290 			StringComparer.Ordinal);
291 		savedWordBuilders = new Stack<WordBuilder>();
292 		allCCode = new SortedDictionary<string, string>(
293 			StringComparer.Ordinal);
294 		compiling = false;
295 		extraCode = new List<string>();
296 		extraCodeDefer = new List<string>();
297 		enableFlowAnalysis = true;
298 
299 		/*
300 		 * Native words are predefined and implemented only with
301 		 * native code. Some may be part of the generated output,
302 		 * if C code is set for them.
303 		 */
304 
305 		/*
306 		 * add-cc:
307 		 * Parses next token as a word name, then a C code snippet.
308 		 * Sets the C code for that word.
309 		 */
310 		AddNative("add-cc:", false, SType.BLANK, cpu => {
311 			string tt = Next();
312 			if (tt == null) {
313 				throw new Exception(
314 					"EOF reached (missing name)");
315 			}
316 			if (allCCode.ContainsKey(tt)) {
317 				throw new Exception(
318 					"C code already set for: " + tt);
319 			}
320 			allCCode[tt] = ParseCCode();
321 		});
322 
323 		/*
324 		 * cc:
325 		 * Parses next token as a word name, then a C code snippet.
326 		 * A new word is defined, that throws an exception when
327 		 * invoked during compilation. The C code is set for that
328 		 * new word.
329 		 */
330 		AddNative("cc:", false, SType.BLANK, cpu => {
331 			string tt = Next();
332 			if (tt == null) {
333 				throw new Exception(
334 					"EOF reached (missing name)");
335 			}
336 			Word w = AddNative(tt, false, cpu2 => {
337 				throw new Exception(
338 					"C-only word: " + tt);
339 			});
340 			if (allCCode.ContainsKey(tt)) {
341 				throw new Exception(
342 					"C code already set for: " + tt);
343 			}
344 			SType stackEffect;
345 			allCCode[tt] = ParseCCode(out stackEffect);
346 			w.StackEffect = stackEffect;
347 		});
348 
349 		/*
350 		 * preamble
351 		 * Parses a C code snippet, then adds it to the generated
352 		 * output preamble.
353 		 */
354 		AddNative("preamble", false, SType.BLANK, cpu => {
355 			extraCode.Add(ParseCCode());
356 		});
357 
358 		/*
359 		 * postamble
360 		 * Parses a C code snippet, then adds it to the generated
361 		 * output after the data and code blocks.
362 		 */
363 		AddNative("postamble", false, SType.BLANK, cpu => {
364 			extraCodeDefer.Add(ParseCCode());
365 		});
366 
367 		/*
368 		 * make-CX
369 		 * Expects two integers and a string, and makes a
370 		 * constant that stands for the string as a C constant
371 		 * expression. The two integers are the expected range
372 		 * (min-max, inclusive).
373 		 */
374 		AddNative("make-CX", false, new SType(3, 1), cpu => {
375 			TValue c = cpu.Pop();
376 			if (!(c.ptr is TPointerBlob)) {
377 				throw new Exception(string.Format(
378 					"'{0}' is not a string", c));
379 			}
380 			int max = cpu.Pop();
381 			int min = cpu.Pop();
382 			TValue tv = new TValue(0, new TPointerExpr(
383 				c.ToString(), min, max));
384 			cpu.Push(tv);
385 		});
386 
387 		/*
388 		 * CX  (immediate)
389 		 * Parses two integer constants, then a C code snippet.
390 		 * It then pushes on the stack, or compiles to the
391 		 * current word, a value consisting of the given C
392 		 * expression; the two integers indicate the expected
393 		 * range (min-max, inclusive) of the C expression when
394 		 * evaluated.
395 		 */
396 		AddNative("CX", true, cpu => {
397 			string tt = Next();
398 			if (tt == null) {
399 				throw new Exception(
400 					"EOF reached (missing min value)");
401 			}
402 			int min = ParseInteger(tt);
403 			tt = Next();
404 			if (tt == null) {
405 				throw new Exception(
406 					"EOF reached (missing max value)");
407 			}
408 			int max = ParseInteger(tt);
409 			if (max < min) {
410 				throw new Exception("min/max in wrong order");
411 			}
412 			TValue tv = new TValue(0, new TPointerExpr(
413 				ParseCCode().Trim(), min, max));
414 			if (compiling) {
415 				wordBuilder.Literal(tv);
416 			} else {
417 				cpu.Push(tv);
418 			}
419 		});
420 
421 		/*
422 		 * co
423 		 * Interrupt the current execution. This implements
424 		 * coroutines. It cannot be invoked during compilation.
425 		 */
426 		AddNative("co", false, SType.BLANK, cpu => {
427 			throw new Exception("No coroutine in compile mode");
428 		});
429 
430 		/*
431 		 * :
432 		 * Parses next token as word name. It begins definition
433 		 * of that word, setting it as current target for
434 		 * word building. Any previously opened word is saved
435 		 * and will become available again as a target when that
436 		 * new word is finished building.
437 		 */
438 		AddNative(":", false, cpu => {
439 			string tt = Next();
440 			if (tt == null) {
441 				throw new Exception(
442 					"EOF reached (missing name)");
443 			}
444 			if (compiling) {
445 				savedWordBuilders.Push(wordBuilder);
446 			} else {
447 				compiling = true;
448 			}
449 			wordBuilder = new WordBuilder(this, tt);
450 			tt = Next();
451 			if (tt == null) {
452 				throw new Exception(
453 					"EOF reached (while compiling)");
454 			}
455 			if (tt == "(") {
456 				SType stackEffect = ParseStackEffectNF();
457 				if (!stackEffect.IsKnown) {
458 					throw new Exception(
459 						"Invalid stack effect syntax");
460 				}
461 				wordBuilder.StackEffect = stackEffect;
462 			} else {
463 				delayedToken = tt;
464 			}
465 		});
466 
467 		/*
468 		 * Pops a string as word name, and two integers as stack
469 		 * effect. It begins definition of that word, setting it
470 		 * as current target for word building. Any previously
471 		 * opened word is saved and will become available again as
472 		 * a target when that new word is finished building.
473 		 *
474 		 * Stack effect is the pair 'din dout'. If din is negative,
475 		 * then the stack effect is "unknown". If din is nonnegative
476 		 * but dout is negative, then the word is reputed never to
477 		 * return.
478 		 */
479 		AddNative("define-word", false, cpu => {
480 			int dout = cpu.Pop();
481 			int din = cpu.Pop();
482 			TValue s = cpu.Pop();
483 			if (!(s.ptr is TPointerBlob)) {
484 				throw new Exception(string.Format(
485 					"Not a string: '{0}'", s));
486 			}
487 			string tt = s.ToString();
488 			if (compiling) {
489 				savedWordBuilders.Push(wordBuilder);
490 			} else {
491 				compiling = true;
492 			}
493 			wordBuilder = new WordBuilder(this, tt);
494 			wordBuilder.StackEffect = new SType(din, dout);
495 		});
496 
497 		/*
498 		 * ;  (immediate)
499 		 * Ends current word. The current word is registered under
500 		 * its name, and the previously opened word (if any) becomes
501 		 * again the building target.
502 		 */
503 		AddNative(";", true, cpu => {
504 			if (!compiling) {
505 				throw new Exception("Not compiling");
506 			}
507 			Word w = wordBuilder.Build();
508 			string name = w.Name;
509 			if (words.ContainsKey(name)) {
510 				throw new Exception(
511 					"Word already defined: " + name);
512 			}
513 			words[name] = w;
514 			lastWord = w;
515 			if (savedWordBuilders.Count > 0) {
516 				wordBuilder = savedWordBuilders.Pop();
517 			} else {
518 				wordBuilder = null;
519 				compiling = false;
520 			}
521 		});
522 
523 		/*
524 		 * immediate
525 		 * Sets the last defined word as immediate.
526 		 */
527 		AddNative("immediate", false, cpu => {
528 			if (lastWord == null) {
529 				throw new Exception("No word defined yet");
530 			}
531 			lastWord.Immediate = true;
532 		});
533 
534 		/*
535 		 * literal  (immediate)
536 		 * Pops the current TOS value, and add in the current word
537 		 * the action of pushing that value. This cannot be used
538 		 * when no word is being built.
539 		 */
540 		WordNative wliteral = AddNative("literal", true, cpu => {
541 			CheckCompiling();
542 			wordBuilder.Literal(cpu.Pop());
543 		});
544 
545 		/*
546 		 * compile
547 		 * Pops the current TOS value, which must be an XT (pointer
548 		 * to a word); the action of calling that word is compiled
549 		 * in the current word.
550 		 */
551 		WordNative wcompile = AddNative("compile", false, cpu => {
552 			CheckCompiling();
553 			wordBuilder.Call(cpu.Pop().ToXT());
554 		});
555 
556 		/*
557 		 * postpone  (immediate)
558 		 * Parses the next token as a word name, and add to the
559 		 * current word the action of calling that word. This
560 		 * basically removes immediatety from the next word.
561 		 */
562 		AddNative("postpone", true, cpu => {
563 			CheckCompiling();
564 			string tt = Next();
565 			if (tt == null) {
566 				throw new Exception(
567 					"EOF reached (missing name)");
568 			}
569 			TValue v;
570 			bool isVal = TryParseLiteral(tt, out v);
571 			Word w = LookupNF(tt);
572 			if (isVal && w != null) {
573 				throw new Exception(String.Format(
574 					"Ambiguous: both defined word and"
575 					+ " literal: {0}", tt));
576 			}
577 			if (isVal) {
578 				wordBuilder.Literal(v);
579 				wordBuilder.CallExt(wliteral);
580 			} else if (w != null) {
581 				if (w.Immediate) {
582 					wordBuilder.CallExt(w);
583 				} else {
584 					wordBuilder.Literal(new TValue(0,
585 						new TPointerXT(w)));
586 					wordBuilder.CallExt(wcompile);
587 				}
588 			} else {
589 				wordBuilder.Literal(new TValue(0,
590 					new TPointerXT(tt)));
591 				wordBuilder.CallExt(wcompile);
592 			}
593 		});
594 
595 		/*
596 		 * Interrupt compilation with an error.
597 		 */
598 		AddNative("exitvm", false, cpu => {
599 			throw new Exception();
600 		});
601 
602 		/*
603 		 * Open a new data block. Its symbolic address is pushed
604 		 * on the stack.
605 		 */
606 		AddNative("new-data-block", false, cpu => {
607 			dataBlock = new ConstData(this);
608 			cpu.Push(new TValue(0, new TPointerBlob(dataBlock)));
609 		});
610 
611 		/*
612 		 * Define a new data word. The data address and name are
613 		 * popped from the stack.
614 		 */
615 		AddNative("define-data-word", false, cpu => {
616 			string name = cpu.Pop().ToString();
617 			TValue va = cpu.Pop();
618 			TPointerBlob tb = va.ptr as TPointerBlob;
619 			if (tb == null) {
620 				throw new Exception(
621 					"Address is not a data area");
622 			}
623 			Word w = new WordData(this, name, tb.Blob, va.x);
624 			if (words.ContainsKey(name)) {
625 				throw new Exception(
626 					"Word already defined: " + name);
627 			}
628 			words[name] = w;
629 			lastWord = w;
630 		});
631 
632 		/*
633 		 * Get an address pointing at the end of the current
634 		 * data block. This is the address of the next byte that
635 		 * will be added.
636 		 */
637 		AddNative("current-data", false, cpu => {
638 			if (dataBlock == null) {
639 				throw new Exception(
640 					"No current data block");
641 			}
642 			cpu.Push(new TValue(dataBlock.Length,
643 				new TPointerBlob(dataBlock)));
644 		});
645 
646 		/*
647 		 * Add a byte value to the data block.
648 		 */
649 		AddNative("data-add8", false, cpu => {
650 			if (dataBlock == null) {
651 				throw new Exception(
652 					"No current data block");
653 			}
654 			int v = cpu.Pop();
655 			if (v < 0 || v > 0xFF) {
656 				throw new Exception(
657 					"Byte value out of range: " + v);
658 			}
659 			dataBlock.Add8((byte)v);
660 		});
661 
662 		/*
663 		 * Set a byte value in the data block.
664 		 */
665 		AddNative("data-set8", false, cpu => {
666 			TValue va = cpu.Pop();
667 			TPointerBlob tb = va.ptr as TPointerBlob;
668 			if (tb == null) {
669 				throw new Exception(
670 					"Address is not a data area");
671 			}
672 			int v = cpu.Pop();
673 			if (v < 0 || v > 0xFF) {
674 				throw new Exception(
675 					"Byte value out of range: " + v);
676 			}
677 			tb.Blob.Set8(va.x, (byte)v);
678 		});
679 
680 		/*
681 		 * Get a byte value from a data block.
682 		 */
683 		AddNative("data-get8", false, new SType(1, 1), cpu => {
684 			TValue va = cpu.Pop();
685 			TPointerBlob tb = va.ptr as TPointerBlob;
686 			if (tb == null) {
687 				throw new Exception(
688 					"Address is not a data area");
689 			}
690 			int v = tb.Blob.Read8(va.x);
691 			cpu.Push(v);
692 		});
693 
694 		/*
695 		 *
696 		 */
697 		AddNative("compile-local-read", false, cpu => {
698 			CheckCompiling();
699 			wordBuilder.GetLocal(cpu.Pop().ToString());
700 		});
701 		AddNative("compile-local-write", false, cpu => {
702 			CheckCompiling();
703 			wordBuilder.PutLocal(cpu.Pop().ToString());
704 		});
705 
706 		AddNative("ahead", true, cpu => {
707 			CheckCompiling();
708 			wordBuilder.Ahead();
709 		});
710 		AddNative("begin", true, cpu => {
711 			CheckCompiling();
712 			wordBuilder.Begin();
713 		});
714 		AddNative("again", true, cpu => {
715 			CheckCompiling();
716 			wordBuilder.Again();
717 		});
718 		AddNative("until", true, cpu => {
719 			CheckCompiling();
720 			wordBuilder.AgainIfNot();
721 		});
722 		AddNative("untilnot", true, cpu => {
723 			CheckCompiling();
724 			wordBuilder.AgainIf();
725 		});
726 		AddNative("if", true, cpu => {
727 			CheckCompiling();
728 			wordBuilder.AheadIfNot();
729 		});
730 		AddNative("ifnot", true, cpu => {
731 			CheckCompiling();
732 			wordBuilder.AheadIf();
733 		});
734 		AddNative("then", true, cpu => {
735 			CheckCompiling();
736 			wordBuilder.Then();
737 		});
738 		AddNative("cs-pick", false, cpu => {
739 			CheckCompiling();
740 			wordBuilder.CSPick(cpu.Pop());
741 		});
742 		AddNative("cs-roll", false, cpu => {
743 			CheckCompiling();
744 			wordBuilder.CSRoll(cpu.Pop());
745 		});
746 		AddNative("next-word", false, cpu => {
747 			string s = Next();
748 			if (s == null) {
749 				throw new Exception("No next word (EOF)");
750 			}
751 			cpu.Push(StringToBlob(s));
752 		});
753 		AddNative("parse", false, cpu => {
754 			int d = cpu.Pop();
755 			string s = ReadTerm(d);
756 			cpu.Push(StringToBlob(s));
757 		});
758 		AddNative("char", false, cpu => {
759 			int c = NextChar();
760 			if (c < 0) {
761 				throw new Exception("No next character (EOF)");
762 			}
763 			cpu.Push(c);
764 		});
765 		AddNative("'", false, cpu => {
766 			string name = Next();
767 			cpu.Push(new TValue(0, new TPointerXT(name)));
768 		});
769 
770 		/*
771 		 * The "execute" word is valid in generated C code, but
772 		 * since it jumps to a runtime pointer, its actual stack
773 		 * effect cannot be computed in advance.
774 		 */
775 		AddNative("execute", false, cpu => {
776 			cpu.Pop().Execute(this, cpu);
777 		});
778 
779 		AddNative("[", true, cpu => {
780 			CheckCompiling();
781 			compiling = false;
782 		});
783 		AddNative("]", false, cpu => {
784 			compiling = true;
785 		});
786 		AddNative("(local)", false, cpu => {
787 			CheckCompiling();
788 			wordBuilder.DefLocal(cpu.Pop().ToString());
789 		});
790 		AddNative("ret", true, cpu => {
791 			CheckCompiling();
792 			wordBuilder.Ret();
793 		});
794 
795 		AddNative("drop", false, new SType(1, 0), cpu => {
796 			cpu.Pop();
797 		});
798 		AddNative("dup", false, new SType(1, 2), cpu => {
799 			cpu.Push(cpu.Peek(0));
800 		});
801 		AddNative("swap", false, new SType(2, 2), cpu => {
802 			cpu.Rot(1);
803 		});
804 		AddNative("over", false, new SType(2, 3), cpu => {
805 			cpu.Push(cpu.Peek(1));
806 		});
807 		AddNative("rot", false, new SType(3, 3), cpu => {
808 			cpu.Rot(2);
809 		});
810 		AddNative("-rot", false, new SType(3, 3), cpu => {
811 			cpu.NRot(2);
812 		});
813 
814 		/*
815 		 * "roll" and "pick" are special in that the stack slot
816 		 * they inspect might be known only at runtime, so an
817 		 * absolute stack effect cannot be attributed. Instead,
818 		 * we simply hope that the caller knows what it is doing,
819 		 * and we use a simple stack effect for just the count
820 		 * value and picked value.
821 		 */
822 		AddNative("roll", false, new SType(1, 0), cpu => {
823 			cpu.Rot(cpu.Pop());
824 		});
825 		AddNative("pick", false, new SType(1, 1), cpu => {
826 			cpu.Push(cpu.Peek(cpu.Pop()));
827 		});
828 
829 		AddNative("+", false, new SType(2, 1), cpu => {
830 			TValue b = cpu.Pop();
831 			TValue a = cpu.Pop();
832 			if (b.ptr == null) {
833 				a.x += (int)b;
834 				cpu.Push(a);
835 			} else if (a.ptr is TPointerBlob
836 				&& b.ptr is TPointerBlob)
837 			{
838 				cpu.Push(StringToBlob(
839 					a.ToString() + b.ToString()));
840 			} else {
841 				throw new Exception(string.Format(
842 					"Cannot add '{0}' to '{1}'", b, a));
843 			}
844 		});
845 		AddNative("-", false, new SType(2, 1), cpu => {
846 			/*
847 			 * We can subtract two pointers, provided that
848 			 * they point to the same blob. Otherwise,
849 			 * the subtraction second operand must be an
850 			 * integer.
851 			 */
852 			TValue b = cpu.Pop();
853 			TValue a = cpu.Pop();
854 			TPointerBlob ap = a.ptr as TPointerBlob;
855 			TPointerBlob bp = b.ptr as TPointerBlob;
856 			if (ap != null && bp != null && ap.Blob == bp.Blob) {
857 				cpu.Push(new TValue(a.x - b.x));
858 				return;
859 			}
860 			int bx = b;
861 			a.x -= bx;
862 			cpu.Push(a);
863 		});
864 		AddNative("neg", false, new SType(1, 1), cpu => {
865 			int ax = cpu.Pop();
866 			cpu.Push(-ax);
867 		});
868 		AddNative("*", false, new SType(2, 1), cpu => {
869 			int bx = cpu.Pop();
870 			int ax = cpu.Pop();
871 			cpu.Push(ax * bx);
872 		});
873 		AddNative("/", false, new SType(2, 1), cpu => {
874 			int bx = cpu.Pop();
875 			int ax = cpu.Pop();
876 			cpu.Push(ax / bx);
877 		});
878 		AddNative("u/", false, new SType(2, 1), cpu => {
879 			uint bx = cpu.Pop();
880 			uint ax = cpu.Pop();
881 			cpu.Push(ax / bx);
882 		});
883 		AddNative("%", false, new SType(2, 1), cpu => {
884 			int bx = cpu.Pop();
885 			int ax = cpu.Pop();
886 			cpu.Push(ax % bx);
887 		});
888 		AddNative("u%", false, new SType(2, 1), cpu => {
889 			uint bx = cpu.Pop();
890 			uint ax = cpu.Pop();
891 			cpu.Push(ax % bx);
892 		});
893 		AddNative("<", false, new SType(2, 1), cpu => {
894 			int bx = cpu.Pop();
895 			int ax = cpu.Pop();
896 			cpu.Push(ax < bx);
897 		});
898 		AddNative("<=", false, new SType(2, 1), cpu => {
899 			int bx = cpu.Pop();
900 			int ax = cpu.Pop();
901 			cpu.Push(ax <= bx);
902 		});
903 		AddNative(">", false, new SType(2, 1), cpu => {
904 			int bx = cpu.Pop();
905 			int ax = cpu.Pop();
906 			cpu.Push(ax > bx);
907 		});
908 		AddNative(">=", false, new SType(2, 1), cpu => {
909 			int bx = cpu.Pop();
910 			int ax = cpu.Pop();
911 			cpu.Push(ax >= bx);
912 		});
913 		AddNative("=", false, new SType(2, 1), cpu => {
914 			TValue b = cpu.Pop();
915 			TValue a = cpu.Pop();
916 			cpu.Push(a.Equals(b));
917 		});
918 		AddNative("<>", false, new SType(2, 1), cpu => {
919 			TValue b = cpu.Pop();
920 			TValue a = cpu.Pop();
921 			cpu.Push(!a.Equals(b));
922 		});
923 		AddNative("u<", false, new SType(2, 1), cpu => {
924 			uint bx = cpu.Pop().UInt;
925 			uint ax = cpu.Pop().UInt;
926 			cpu.Push(new TValue(ax < bx));
927 		});
928 		AddNative("u<=", false, new SType(2, 1), cpu => {
929 			uint bx = cpu.Pop().UInt;
930 			uint ax = cpu.Pop().UInt;
931 			cpu.Push(new TValue(ax <= bx));
932 		});
933 		AddNative("u>", false, new SType(2, 1), cpu => {
934 			uint bx = cpu.Pop().UInt;
935 			uint ax = cpu.Pop().UInt;
936 			cpu.Push(new TValue(ax > bx));
937 		});
938 		AddNative("u>=", false, new SType(2, 1), cpu => {
939 			uint bx = cpu.Pop();
940 			uint ax = cpu.Pop();
941 			cpu.Push(ax >= bx);
942 		});
943 		AddNative("and", false, new SType(2, 1), cpu => {
944 			uint bx = cpu.Pop();
945 			uint ax = cpu.Pop();
946 			cpu.Push(ax & bx);
947 		});
948 		AddNative("or", false, new SType(2, 1), cpu => {
949 			uint bx = cpu.Pop();
950 			uint ax = cpu.Pop();
951 			cpu.Push(ax | bx);
952 		});
953 		AddNative("xor", false, new SType(2, 1), cpu => {
954 			uint bx = cpu.Pop();
955 			uint ax = cpu.Pop();
956 			cpu.Push(ax ^ bx);
957 		});
958 		AddNative("not", false, new SType(1, 1), cpu => {
959 			uint ax = cpu.Pop();
960 			cpu.Push(~ax);
961 		});
962 		AddNative("<<", false, new SType(2, 1), cpu => {
963 			int count = cpu.Pop();
964 			if (count < 0 || count > 31) {
965 				throw new Exception("Invalid shift count");
966 			}
967 			uint ax = cpu.Pop();
968 			cpu.Push(ax << count);
969 		});
970 		AddNative(">>", false, new SType(2, 1), cpu => {
971 			int count = cpu.Pop();
972 			if (count < 0 || count > 31) {
973 				throw new Exception("Invalid shift count");
974 			}
975 			int ax = cpu.Pop();
976 			cpu.Push(ax >> count);
977 		});
978 		AddNative("u>>", false, new SType(2, 1), cpu => {
979 			int count = cpu.Pop();
980 			if (count < 0 || count > 31) {
981 				throw new Exception("Invalid shift count");
982 			}
983 			uint ax = cpu.Pop();
984 			cpu.Push(ax >> count);
985 		});
986 
987 		AddNative(".", false, new SType(1, 0), cpu => {
988 			Console.Write(" {0}", cpu.Pop().ToString());
989 		});
990 		AddNative(".s", false, SType.BLANK, cpu => {
991 			int n = cpu.Depth;
992 			for (int i = n - 1; i >= 0; i --) {
993 				Console.Write(" {0}", cpu.Peek(i).ToString());
994 			}
995 		});
996 		AddNative("putc", false, new SType(1, 0), cpu => {
997 			Console.Write((char)cpu.Pop());
998 		});
999 		AddNative("puts", false, new SType(1, 0), cpu => {
1000 			Console.Write("{0}", cpu.Pop().ToString());
1001 		});
1002 		AddNative("cr", false, SType.BLANK, cpu => {
1003 			Console.WriteLine();
1004 		});
1005 		AddNative("eqstr", false, new SType(2, 1), cpu => {
1006 			string s2 = cpu.Pop().ToString();
1007 			string s1 = cpu.Pop().ToString();
1008 			cpu.Push(s1 == s2);
1009 		});
1010 	}
1011 
AddNative(string name, bool immediate, WordNative.NativeRun code)1012 	WordNative AddNative(string name, bool immediate,
1013 		WordNative.NativeRun code)
1014 	{
1015 		return AddNative(name, immediate, SType.UNKNOWN, code);
1016 	}
1017 
AddNative(string name, bool immediate, SType stackEffect, WordNative.NativeRun code)1018 	WordNative AddNative(string name, bool immediate, SType stackEffect,
1019 		WordNative.NativeRun code)
1020 	{
1021 		if (words.ContainsKey(name)) {
1022 			throw new Exception(
1023 				"Word already defined: " + name);
1024 		}
1025 		WordNative w = new WordNative(this, name, code);
1026 		w.Immediate = immediate;
1027 		w.StackEffect = stackEffect;
1028 		words[name] = w;
1029 		return w;
1030 	}
1031 
NextBlobID()1032 	internal long NextBlobID()
1033 	{
1034 		return currentBlobID ++;
1035 	}
1036 
NextChar()1037 	int NextChar()
1038 	{
1039 		int c = delayedChar;
1040 		if (c >= 0) {
1041 			delayedChar = Int32.MinValue;
1042 		} else if (c > Int32.MinValue) {
1043 			delayedChar = -(c + 1);
1044 			c = '\n';
1045 		} else {
1046 			c = currentInput.Read();
1047 		}
1048 		if (c == '\r') {
1049 			if (delayedChar >= 0) {
1050 				c = delayedChar;
1051 				delayedChar = Int32.MinValue;
1052 			} else {
1053 				c = currentInput.Read();
1054 			}
1055 			if (c != '\n') {
1056 				delayedChar = c;
1057 				c = '\n';
1058 			}
1059 		}
1060 		return c;
1061 	}
1062 
1063 	/*
1064 	 * Un-read the character value 'c'. That value MUST be the one
1065 	 * that was obtained from NextChar().
1066 	 */
Unread(int c)1067 	void Unread(int c)
1068 	{
1069 		if (c < 0) {
1070 			return;
1071 		}
1072 		if (delayedChar < 0) {
1073 			if (delayedChar != Int32.MinValue) {
1074 				throw new Exception(
1075 					"Already two delayed characters");
1076 			}
1077 			delayedChar = c;
1078 		} else if (c != '\n') {
1079 			throw new Exception("Cannot delay two characters");
1080 		} else {
1081 			delayedChar = -(delayedChar + 1);
1082 		}
1083 	}
1084 
Next()1085 	string Next()
1086 	{
1087 		string r = delayedToken;
1088 		if (r != null) {
1089 			delayedToken = null;
1090 			return r;
1091 		}
1092 		tokenBuilder.Length = 0;
1093 		int c;
1094 		for (;;) {
1095 			c = NextChar();
1096 			if (c < 0) {
1097 				return null;
1098 			}
1099 			if (!IsWS(c)) {
1100 				break;
1101 			}
1102 		}
1103 		if (c == '"') {
1104 			return ParseString();
1105 		}
1106 		for (;;) {
1107 			tokenBuilder.Append((char)c);
1108 			c = NextChar();
1109 			if (c < 0 || IsWS(c)) {
1110 				Unread(c);
1111 				return tokenBuilder.ToString();
1112 			}
1113 		}
1114 	}
1115 
ParseCCode()1116 	string ParseCCode()
1117 	{
1118 		SType stackEffect;
1119 		string r = ParseCCode(out stackEffect);
1120 		if (stackEffect.IsKnown) {
1121 			throw new Exception(
1122 				"Stack effect forbidden in this declaration");
1123 		}
1124 		return r;
1125 	}
1126 
ParseCCode(out SType stackEffect)1127 	string ParseCCode(out SType stackEffect)
1128 	{
1129 		string s = ParseCCodeNF(out stackEffect);
1130 		if (s == null) {
1131 			throw new Exception("Error while parsing C code");
1132 		}
1133 		return s;
1134 	}
1135 
ParseCCodeNF(out SType stackEffect)1136 	string ParseCCodeNF(out SType stackEffect)
1137 	{
1138 		stackEffect = SType.UNKNOWN;
1139 		for (;;) {
1140 			int c = NextChar();
1141 			if (c < 0) {
1142 				return null;
1143 			}
1144 			if (!IsWS(c)) {
1145 				if (c == '(') {
1146 					if (stackEffect.IsKnown) {
1147 						Unread(c);
1148 						return null;
1149 					}
1150 					stackEffect = ParseStackEffectNF();
1151 					if (!stackEffect.IsKnown) {
1152 						return null;
1153 					}
1154 					continue;
1155 				} else if (c != '{') {
1156 					Unread(c);
1157 					return null;
1158 				}
1159 				break;
1160 			}
1161 		}
1162 		StringBuilder sb = new StringBuilder();
1163 		int count = 1;
1164 		for (;;) {
1165 			int c = NextChar();
1166 			if (c < 0) {
1167 				return null;
1168 			}
1169 			switch (c) {
1170 			case '{':
1171 				count ++;
1172 				break;
1173 			case '}':
1174 				if (-- count == 0) {
1175 					return sb.ToString();
1176 				}
1177 				break;
1178 			}
1179 			sb.Append((char)c);
1180 		}
1181 	}
1182 
1183 	/*
1184 	 * Parse a stack effect declaration. This method assumes that the
1185 	 * opening parenthesis has just been read. If the parsing fails,
1186 	 * then this method returns SType.UNKNOWN.
1187 	 */
ParseStackEffectNF()1188 	SType ParseStackEffectNF()
1189 	{
1190 		bool seenSep = false;
1191 		bool seenBang = false;
1192 		int din = 0, dout = 0;
1193 		for (;;) {
1194 			string t = Next();
1195 			if (t == null) {
1196 				return SType.UNKNOWN;
1197 			}
1198 			if (t == "--") {
1199 				if (seenSep) {
1200 					return SType.UNKNOWN;
1201 				}
1202 				seenSep = true;
1203 			} else if (t == ")") {
1204 				if (seenSep) {
1205 					if (seenBang && dout == 1) {
1206 						dout = -1;
1207 					}
1208 					return new SType(din, dout);
1209 				} else {
1210 					return SType.UNKNOWN;
1211 				}
1212 			} else {
1213 				if (seenSep) {
1214 					if (dout == 0 && t == "!") {
1215 						seenBang = true;
1216 					}
1217 					dout ++;
1218 				} else {
1219 					din ++;
1220 				}
1221 			}
1222 		}
1223 	}
1224 
ParseString()1225 	string ParseString()
1226 	{
1227 		StringBuilder sb = new StringBuilder();
1228 		sb.Append('"');
1229 		bool lcwb = false;
1230 		int hexNum = 0;
1231 		int acc = 0;
1232 		for (;;) {
1233 			int c = NextChar();
1234 			if (c < 0) {
1235 				throw new Exception(
1236 					"Unfinished literal string");
1237 			}
1238 			if (hexNum > 0) {
1239 				int d = HexVal(c);
1240 				if (d < 0) {
1241 					throw new Exception(String.Format(
1242 						"not an hex digit: U+{0:X4}",
1243 						c));
1244 				}
1245 				acc = (acc << 4) + d;
1246 				if (-- hexNum == 0) {
1247 					sb.Append((char)acc);
1248 					acc = 0;
1249 				}
1250 			} else if (lcwb) {
1251 				lcwb = false;
1252 				switch (c) {
1253 				case '\n': SkipNL(); break;
1254 				case 'x':
1255 					hexNum = 2;
1256 					break;
1257 				case 'u':
1258 					hexNum = 4;
1259 					break;
1260 				default:
1261 					sb.Append(SingleCharEscape(c));
1262 					break;
1263 				}
1264 			} else {
1265 				switch (c) {
1266 				case '"':
1267 					return sb.ToString();
1268 				case '\\':
1269 					lcwb = true;
1270 					break;
1271 				default:
1272 					sb.Append((char)c);
1273 					break;
1274 				}
1275 			}
1276 		}
1277 	}
1278 
SingleCharEscape(int c)1279 	static char SingleCharEscape(int c)
1280 	{
1281 		switch (c) {
1282 		case 'n': return '\n';
1283 		case 'r': return '\r';
1284 		case 't': return '\t';
1285 		case 's': return ' ';
1286 		default:
1287 			return (char)c;
1288 		}
1289 	}
1290 
1291 	/*
1292 	 * A backslash+newline sequence occurred in a literal string; we
1293 	 * check and consume the newline escape sequence (whitespace at
1294 	 * start of next line, then a double-quote character).
1295 	 */
SkipNL()1296 	void SkipNL()
1297 	{
1298 		for (;;) {
1299 			int c = NextChar();
1300 			if (c < 0) {
1301 				throw new Exception("EOF in literal string");
1302 			}
1303 			if (c == '\n') {
1304 				throw new Exception(
1305 					"Unescaped newline in literal string");
1306 			}
1307 			if (IsWS(c)) {
1308 				continue;
1309 			}
1310 			if (c == '"') {
1311 				return;
1312 			}
1313 			throw new Exception(
1314 				"Invalid newline escape in literal string");
1315 		}
1316 	}
1317 
DecodeCharConst(string t)1318 	static char DecodeCharConst(string t)
1319 	{
1320 		if (t.Length == 1 && t[0] != '\\') {
1321 			return t[0];
1322 		}
1323 		if (t.Length >= 2 && t[0] == '\\') {
1324 			switch (t[1]) {
1325 			case 'x':
1326 				if (t.Length == 4) {
1327 					int x = DecHex(t.Substring(2));
1328 					if (x >= 0) {
1329 						return (char)x;
1330 					}
1331 				}
1332 				break;
1333 			case 'u':
1334 				if (t.Length == 6) {
1335 					int x = DecHex(t.Substring(2));
1336 					if (x >= 0) {
1337 						return (char)x;
1338 					}
1339 				}
1340 				break;
1341 			default:
1342 				if (t.Length == 2) {
1343 					return SingleCharEscape(t[1]);
1344 				}
1345 				break;
1346 			}
1347 		}
1348 		throw new Exception("Invalid literal char: `" + t);
1349 	}
1350 
DecHex(string s)1351 	static int DecHex(string s)
1352 	{
1353 		int acc = 0;
1354 		foreach (char c in s) {
1355 			int d = HexVal(c);
1356 			if (d < 0) {
1357 				return -1;
1358 			}
1359 			acc = (acc << 4) + d;
1360 		}
1361 		return acc;
1362 	}
1363 
HexVal(int c)1364 	static int HexVal(int c)
1365 	{
1366 		if (c >= '0' && c <= '9') {
1367 			return c - '0';
1368 		} else if (c >= 'A' && c <= 'F') {
1369 			return c - ('A' - 10);
1370 		} else if (c >= 'a' && c <= 'f') {
1371 			return c - ('a' - 10);
1372 		} else {
1373 			return -1;
1374 		}
1375 	}
1376 
ReadTerm(int ct)1377 	string ReadTerm(int ct)
1378 	{
1379 		StringBuilder sb = new StringBuilder();
1380 		for (;;) {
1381 			int c = NextChar();
1382 			if (c < 0) {
1383 				throw new Exception(String.Format(
1384 					"EOF reached before U+{0:X4}", ct));
1385 			}
1386 			if (c == ct) {
1387 				return sb.ToString();
1388 			}
1389 			sb.Append((char)c);
1390 		}
1391 	}
1392 
IsWS(int c)1393 	static bool IsWS(int c)
1394 	{
1395 		return c <= 32;
1396 	}
1397 
ProcessInput(TextReader tr)1398 	void ProcessInput(TextReader tr)
1399 	{
1400 		this.currentInput = tr;
1401 		delayedChar = -1;
1402 		Word w = new WordNative(this, "toplevel",
1403 			xcpu => { CompileStep(xcpu); });
1404 		CPU cpu = new CPU();
1405 		Opcode[] code = new Opcode[] {
1406 			new OpcodeCall(w),
1407 			new OpcodeJumpUncond(-2)
1408 		};
1409 		quitRunLoop = false;
1410 		cpu.Enter(code, 0);
1411 		for (;;) {
1412 			if (quitRunLoop) {
1413 				break;
1414 			}
1415 			Opcode op = cpu.ipBuf[cpu.ipOff ++];
1416 			op.Run(cpu);
1417 		}
1418 	}
1419 
CompileStep(CPU cpu)1420 	void CompileStep(CPU cpu)
1421 	{
1422 		string tt = Next();
1423 		if (tt == null) {
1424 			if (compiling) {
1425 				throw new Exception("EOF while compiling");
1426 			}
1427 			quitRunLoop = true;
1428 			return;
1429 		}
1430 		TValue v;
1431 		bool isVal = TryParseLiteral(tt, out v);
1432 		Word w = LookupNF(tt);
1433 		if (isVal && w != null) {
1434 			throw new Exception(String.Format(
1435 				"Ambiguous: both defined word and literal: {0}",
1436 				tt));
1437 		}
1438 		if (compiling) {
1439 			if (isVal) {
1440 				wordBuilder.Literal(v);
1441 			} else if (w != null) {
1442 				if (w.Immediate) {
1443 					w.Run(cpu);
1444 				} else {
1445 					wordBuilder.CallExt(w);
1446 				}
1447 			} else {
1448 				wordBuilder.Call(tt);
1449 			}
1450 		} else {
1451 			if (isVal) {
1452 				cpu.Push(v);
1453 			} else if (w != null) {
1454 				w.Run(cpu);
1455 			} else {
1456 				throw new Exception(String.Format(
1457 					"Unknown word: '{0}'", tt));
1458 			}
1459 		}
1460 	}
1461 
GetCCode(string name)1462 	string GetCCode(string name)
1463 	{
1464 		string ccode;
1465 		allCCode.TryGetValue(name, out ccode);
1466 		return ccode;
1467 	}
1468 
Generate(string outBase, string coreRun, params string[] entryPoints)1469 	void Generate(string outBase, string coreRun,
1470 		params string[] entryPoints)
1471 	{
1472 		/*
1473 		 * Gather all words that are part of the generated
1474 		 * code. This is done by exploring references
1475 		 * transitively. All such words are thus implicitly
1476 		 * resolved.
1477 		 */
1478 		IDictionary<string, Word> wordSet =
1479 			new SortedDictionary<string, Word>(
1480 				StringComparer.Ordinal);
1481 		Queue<Word> tx = new Queue<Word>();
1482 		foreach (string ep in entryPoints) {
1483 			if (wordSet.ContainsKey(ep)) {
1484 				continue;
1485 			}
1486 			Word w = Lookup(ep);
1487 			wordSet[w.Name] = w;
1488 			tx.Enqueue(w);
1489 		}
1490 		while (tx.Count > 0) {
1491 			Word w = tx.Dequeue();
1492 			foreach (Word w2 in w.GetReferences()) {
1493 				if (wordSet.ContainsKey(w2.Name)) {
1494 					continue;
1495 				}
1496 				wordSet[w2.Name] = w2;
1497 				tx.Enqueue(w2);
1498 			}
1499 		}
1500 
1501 		/*
1502 		 * Do flow analysis.
1503 		 */
1504 		if (enableFlowAnalysis) {
1505 			foreach (string ep in entryPoints) {
1506 				Word w = wordSet[ep];
1507 				w.AnalyseFlow();
1508 				Console.WriteLine("{0}: ds={1} rs={2}",
1509 					ep, w.MaxDataStack, w.MaxReturnStack);
1510 				if (w.MaxDataStack > dsLimit) {
1511 					throw new Exception("'" + ep
1512 						+ "' exceeds data stack limit");
1513 				}
1514 				if (w.MaxReturnStack > rsLimit) {
1515 					throw new Exception("'" + ep
1516 						+ "' exceeds return stack"
1517 						+ " limit");
1518 				}
1519 			}
1520 		}
1521 
1522 		/*
1523 		 * Gather referenced data areas and compute their
1524 		 * addresses in the generated data block. The address
1525 		 * 0 in the data block is unaffected so that no
1526 		 * valid runtime pointer is equal to null.
1527 		 */
1528 		IDictionary<long, ConstData> blocks =
1529 			new SortedDictionary<long, ConstData>();
1530 		foreach (Word w in wordSet.Values) {
1531 			foreach (ConstData cd in w.GetDataBlocks()) {
1532 				blocks[cd.ID] = cd;
1533 			}
1534 		}
1535 		int dataLen = 1;
1536 		foreach (ConstData cd in blocks.Values) {
1537 			cd.Address = dataLen;
1538 			dataLen += cd.Length;
1539 		}
1540 
1541 		/*
1542 		 * Generated code is a sequence of "slot numbers", each
1543 		 * referencing either a piece of explicit C code, or an
1544 		 * entry in the table of interpreted words.
1545 		 *
1546 		 * Opcodes other than "call" get the slots 0 to 6:
1547 		 *
1548 		 *   0   ret           no argument
1549 		 *   1   const         signed value
1550 		 *   2   get local     local number
1551 		 *   3   put local     local number
1552 		 *   4   jump          signed offset
1553 		 *   5   jump if       signed offset
1554 		 *   6   jump if not   signed offset
1555 		 *
1556 		 * The argument, if any, is in "7E" format: the value is
1557 		 * encoded in 7-bit chunk, with big-endian signed
1558 		 * convention. Each 7-bit chunk is encoded over one byte;
1559 		 * the upper bit is 1 for all chunks except the last one.
1560 		 *
1561 		 * Words with explicit C code get the slot numbers
1562 		 * immediately after 6. Interpreted words come afterwards.
1563 		 */
1564 		IDictionary<string, int> slots = new Dictionary<string, int>();
1565 		int curSlot = 7;
1566 
1567 		/*
1568 		 * Get explicit C code for words which have such code.
1569 		 * We use string equality on C code so that words with
1570 		 * identical implementations get merged.
1571 		 *
1572 		 * We also check that words with no explicit C code are
1573 		 * interpreted.
1574 		 */
1575 		IDictionary<string, int> ccodeUni =
1576 			new Dictionary<string, int>();
1577 		IDictionary<int, string> ccodeNames =
1578 			new Dictionary<int, string>();
1579 		foreach (Word w in wordSet.Values) {
1580 			string ccode = GetCCode(w.Name);
1581 			if (ccode == null) {
1582 				if (w is WordNative) {
1583 					throw new Exception(String.Format(
1584 						"No C code for native '{0}'",
1585 						w.Name));
1586 				}
1587 				continue;
1588 			}
1589 			int sn;
1590 			if (ccodeUni.ContainsKey(ccode)) {
1591 				sn = ccodeUni[ccode];
1592 				ccodeNames[sn] += " " + EscapeCComment(w.Name);
1593 			} else {
1594 				sn = curSlot ++;
1595 				ccodeUni[ccode] = sn;
1596 				ccodeNames[sn] = EscapeCComment(w.Name);
1597 			}
1598 			slots[w.Name] = sn;
1599 			w.Slot = sn;
1600 		}
1601 
1602 		/*
1603 		 * Assign slot values to all remaining words; we know they
1604 		 * are all interpreted.
1605 		 */
1606 		int slotInterpreted = curSlot;
1607 		foreach (Word w in wordSet.Values) {
1608 			if (GetCCode(w.Name) != null) {
1609 				continue;
1610 			}
1611 			int sn = curSlot ++;
1612 			slots[w.Name] = sn;
1613 			w.Slot = sn;
1614 		}
1615 		int numInterpreted = curSlot - slotInterpreted;
1616 
1617 		/*
1618 		 * Verify that all entry points are interpreted words.
1619 		 */
1620 		foreach (string ep in entryPoints) {
1621 			if (GetCCode(ep) != null) {
1622 				throw new Exception(
1623 					"Non-interpreted entry point");
1624 			}
1625 		}
1626 
1627 		/*
1628 		 * Compute the code block. Each word (without any C code)
1629 		 * yields some CodeElement instances.
1630 		 */
1631 		List<CodeElement> gcodeList = new List<CodeElement>();
1632 		CodeElement[] interpretedEntry =
1633 			new CodeElement[numInterpreted];
1634 		foreach (Word w in wordSet.Values) {
1635 			if (GetCCode(w.Name) != null) {
1636 				continue;
1637 			}
1638 			int n = gcodeList.Count;
1639 			w.GenerateCodeElements(gcodeList);
1640 			interpretedEntry[w.Slot - slotInterpreted] =
1641 				gcodeList[n];
1642 		}
1643 		CodeElement[] gcode = gcodeList.ToArray();
1644 
1645 		/*
1646 		 * If there are less than 256 words in total (C +
1647 		 * interpreted) then we can use "one-byte code" which is
1648 		 * more compact when the number of words is in the
1649 		 * 128..255 range.
1650 		 */
1651 		bool oneByteCode;
1652 		if (slotInterpreted + numInterpreted >= 256) {
1653 			Console.WriteLine("WARNING: more than 255 words");
1654 			oneByteCode = false;
1655 		} else {
1656 			oneByteCode = true;
1657 		}
1658 
1659 		/*
1660 		 * Compute all addresses and offsets. This loops until
1661 		 * the addresses stabilize.
1662 		 */
1663 		int totalLen = -1;
1664 		int[] gcodeLen = new int[gcode.Length];
1665 		for (;;) {
1666 			for (int i = 0; i < gcode.Length; i ++) {
1667 				gcodeLen[i] = gcode[i].GetLength(oneByteCode);
1668 			}
1669 			int off = 0;
1670 			for (int i = 0; i < gcode.Length; i ++) {
1671 				gcode[i].Address = off;
1672 				gcode[i].LastLength = gcodeLen[i];
1673 				off += gcodeLen[i];
1674 			}
1675 			if (off == totalLen) {
1676 				break;
1677 			}
1678 			totalLen = off;
1679 		}
1680 
1681 		/*
1682 		 * Produce output file.
1683 		 */
1684 		using (TextWriter tw = File.CreateText(outBase + ".c")) {
1685 			tw.NewLine = "\n";
1686 
1687 			tw.WriteLine("{0}",
1688 @"/* Automatically generated code; do not modify directly. */
1689 
1690 #include <stddef.h>
1691 #include <stdint.h>
1692 
1693 typedef struct {
1694 	uint32_t *dp;
1695 	uint32_t *rp;
1696 	const unsigned char *ip;
1697 } t0_context;
1698 
1699 static uint32_t
1700 t0_parse7E_unsigned(const unsigned char **p)
1701 {
1702 	uint32_t x;
1703 
1704 	x = 0;
1705 	for (;;) {
1706 		unsigned y;
1707 
1708 		y = *(*p) ++;
1709 		x = (x << 7) | (uint32_t)(y & 0x7F);
1710 		if (y < 0x80) {
1711 			return x;
1712 		}
1713 	}
1714 }
1715 
1716 static int32_t
1717 t0_parse7E_signed(const unsigned char **p)
1718 {
1719 	int neg;
1720 	uint32_t x;
1721 
1722 	neg = ((**p) >> 6) & 1;
1723 	x = (uint32_t)-neg;
1724 	for (;;) {
1725 		unsigned y;
1726 
1727 		y = *(*p) ++;
1728 		x = (x << 7) | (uint32_t)(y & 0x7F);
1729 		if (y < 0x80) {
1730 			if (neg) {
1731 				return -(int32_t)~x - 1;
1732 			} else {
1733 				return (int32_t)x;
1734 			}
1735 		}
1736 	}
1737 }
1738 
1739 #define T0_VBYTE(x, n)   (unsigned char)((((uint32_t)(x) >> (n)) & 0x7F) | 0x80)
1740 #define T0_FBYTE(x, n)   (unsigned char)(((uint32_t)(x) >> (n)) & 0x7F)
1741 #define T0_SBYTE(x)      (unsigned char)((((uint32_t)(x) >> 28) + 0xF8) ^ 0xF8)
1742 #define T0_INT1(x)       T0_FBYTE(x, 0)
1743 #define T0_INT2(x)       T0_VBYTE(x, 7), T0_FBYTE(x, 0)
1744 #define T0_INT3(x)       T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
1745 #define T0_INT4(x)       T0_VBYTE(x, 21), T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
1746 #define T0_INT5(x)       T0_SBYTE(x), T0_VBYTE(x, 21), T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
1747 
1748 /* static const unsigned char t0_datablock[]; */
1749 ");
1750 
1751 			/*
1752 			 * Add declarations (not definitions) for the
1753 			 * entry point initialisation functions, and the
1754 			 * runner.
1755 			 */
1756 			tw.WriteLine();
1757 			foreach (string ep in entryPoints) {
1758 				tw.WriteLine("void {0}_init_{1}(void *t0ctx);",
1759 					coreRun, ep);
1760 			}
1761 			tw.WriteLine();
1762 			tw.WriteLine("void {0}_run(void *t0ctx);", coreRun);
1763 
1764 			/*
1765 			 * Add preamble elements here. They may be needed
1766 			 * for evaluating constant expressions in the
1767 			 * code block.
1768 			 */
1769 			foreach (string pp in extraCode) {
1770 				tw.WriteLine();
1771 				tw.WriteLine("{0}", pp);
1772 			}
1773 
1774 			BlobWriter bw;
1775 			tw.WriteLine();
1776 			tw.Write("static const unsigned char"
1777 				+ " t0_datablock[] = {");
1778 			bw = new BlobWriter(tw, 78, 1);
1779 			bw.Append((byte)0);
1780 			foreach (ConstData cd in blocks.Values) {
1781 				cd.Encode(bw);
1782 			}
1783 			tw.WriteLine();
1784 			tw.WriteLine("};");
1785 
1786 			tw.WriteLine();
1787 			tw.Write("static const unsigned char"
1788 				+ " t0_codeblock[] = {");
1789 			bw = new BlobWriter(tw, 78, 1);
1790 			foreach (CodeElement ce in gcode) {
1791 				ce.Encode(bw, oneByteCode);
1792 			}
1793 			tw.WriteLine();
1794 			tw.WriteLine("};");
1795 
1796 			tw.WriteLine();
1797 			tw.Write("static const uint16_t t0_caddr[] = {");
1798 			for (int i = 0; i < interpretedEntry.Length; i ++) {
1799 				if (i != 0) {
1800 					tw.Write(',');
1801 				}
1802 				tw.WriteLine();
1803 				tw.Write("\t{0}", interpretedEntry[i].Address);
1804 			}
1805 			tw.WriteLine();
1806 			tw.WriteLine("};");
1807 
1808 			tw.WriteLine();
1809 			tw.WriteLine("#define T0_INTERPRETED   {0}",
1810 				slotInterpreted);
1811 			tw.WriteLine();
1812 			tw.WriteLine("{0}",
1813 @"#define T0_ENTER(ip, rp, slot)   do { \
1814 		const unsigned char *t0_newip; \
1815 		uint32_t t0_lnum; \
1816 		t0_newip = &t0_codeblock[t0_caddr[(slot) - T0_INTERPRETED]]; \
1817 		t0_lnum = t0_parse7E_unsigned(&t0_newip); \
1818 		(rp) += t0_lnum; \
1819 		*((rp) ++) = (uint32_t)((ip) - &t0_codeblock[0]) + (t0_lnum << 16); \
1820 		(ip) = t0_newip; \
1821 	} while (0)");
1822 			tw.WriteLine();
1823 			tw.WriteLine("{0}",
1824 @"#define T0_DEFENTRY(name, slot) \
1825 void \
1826 name(void *ctx) \
1827 { \
1828 	t0_context *t0ctx = ctx; \
1829 	t0ctx->ip = &t0_codeblock[0]; \
1830 	T0_ENTER(t0ctx->ip, t0ctx->rp, slot); \
1831 }");
1832 
1833 			tw.WriteLine();
1834 			foreach (string ep in entryPoints) {
1835 				tw.WriteLine("T0_DEFENTRY({0}, {1})",
1836 					coreRun + "_init_" + ep,
1837 					wordSet[ep].Slot);
1838 			}
1839 			tw.WriteLine();
1840 			if (oneByteCode) {
1841 				tw.WriteLine("{0}",
1842 @"#define T0_NEXT(t0ipp)   (*(*(t0ipp)) ++)");
1843 			} else {
1844 				tw.WriteLine("{0}",
1845 @"#define T0_NEXT(t0ipp)   t0_parse7E_unsigned(t0ipp)");
1846 			}
1847 			tw.WriteLine();
1848 			tw.WriteLine("void");
1849 			tw.WriteLine("{0}_run(void *t0ctx)", coreRun);
1850 			tw.WriteLine("{0}",
1851 @"{
1852 	uint32_t *dp, *rp;
1853 	const unsigned char *ip;
1854 
1855 #define T0_LOCAL(x)    (*(rp - 2 - (x)))
1856 #define T0_POP()       (*-- dp)
1857 #define T0_POPi()      (*(int32_t *)(-- dp))
1858 #define T0_PEEK(x)     (*(dp - 1 - (x)))
1859 #define T0_PEEKi(x)    (*(int32_t *)(dp - 1 - (x)))
1860 #define T0_PUSH(v)     do { *dp = (v); dp ++; } while (0)
1861 #define T0_PUSHi(v)    do { *(int32_t *)dp = (v); dp ++; } while (0)
1862 #define T0_RPOP()      (*-- rp)
1863 #define T0_RPOPi()     (*(int32_t *)(-- rp))
1864 #define T0_RPUSH(v)    do { *rp = (v); rp ++; } while (0)
1865 #define T0_RPUSHi(v)   do { *(int32_t *)rp = (v); rp ++; } while (0)
1866 #define T0_ROLL(x)     do { \
1867 	size_t t0len = (size_t)(x); \
1868 	uint32_t t0tmp = *(dp - 1 - t0len); \
1869 	memmove(dp - t0len - 1, dp - t0len, t0len * sizeof *dp); \
1870 	*(dp - 1) = t0tmp; \
1871 } while (0)
1872 #define T0_SWAP()      do { \
1873 	uint32_t t0tmp = *(dp - 2); \
1874 	*(dp - 2) = *(dp - 1); \
1875 	*(dp - 1) = t0tmp; \
1876 } while (0)
1877 #define T0_ROT()       do { \
1878 	uint32_t t0tmp = *(dp - 3); \
1879 	*(dp - 3) = *(dp - 2); \
1880 	*(dp - 2) = *(dp - 1); \
1881 	*(dp - 1) = t0tmp; \
1882 } while (0)
1883 #define T0_NROT()       do { \
1884 	uint32_t t0tmp = *(dp - 1); \
1885 	*(dp - 1) = *(dp - 2); \
1886 	*(dp - 2) = *(dp - 3); \
1887 	*(dp - 3) = t0tmp; \
1888 } while (0)
1889 #define T0_PICK(x)      do { \
1890 	uint32_t t0depth = (x); \
1891 	T0_PUSH(T0_PEEK(t0depth)); \
1892 } while (0)
1893 #define T0_CO()         do { \
1894 	goto t0_exit; \
1895 } while (0)
1896 #define T0_RET()        goto t0_next
1897 
1898 	dp = ((t0_context *)t0ctx)->dp;
1899 	rp = ((t0_context *)t0ctx)->rp;
1900 	ip = ((t0_context *)t0ctx)->ip;
1901 	goto t0_next;
1902 	for (;;) {
1903 		uint32_t t0x;
1904 
1905 	t0_next:
1906 		t0x = T0_NEXT(&ip);
1907 		if (t0x < T0_INTERPRETED) {
1908 			switch (t0x) {
1909 				int32_t t0off;
1910 
1911 			case 0: /* ret */
1912 				t0x = T0_RPOP();
1913 				rp -= (t0x >> 16);
1914 				t0x &= 0xFFFF;
1915 				if (t0x == 0) {
1916 					ip = NULL;
1917 					goto t0_exit;
1918 				}
1919 				ip = &t0_codeblock[t0x];
1920 				break;
1921 			case 1: /* literal constant */
1922 				T0_PUSHi(t0_parse7E_signed(&ip));
1923 				break;
1924 			case 2: /* read local */
1925 				T0_PUSH(T0_LOCAL(t0_parse7E_unsigned(&ip)));
1926 				break;
1927 			case 3: /* write local */
1928 				T0_LOCAL(t0_parse7E_unsigned(&ip)) = T0_POP();
1929 				break;
1930 			case 4: /* jump */
1931 				t0off = t0_parse7E_signed(&ip);
1932 				ip += t0off;
1933 				break;
1934 			case 5: /* jump if */
1935 				t0off = t0_parse7E_signed(&ip);
1936 				if (T0_POP()) {
1937 					ip += t0off;
1938 				}
1939 				break;
1940 			case 6: /* jump if not */
1941 				t0off = t0_parse7E_signed(&ip);
1942 				if (!T0_POP()) {
1943 					ip += t0off;
1944 				}
1945 				break;");
1946 
1947 			SortedDictionary<int, string> nccode =
1948 				new SortedDictionary<int, string>();
1949 			foreach (string k in ccodeUni.Keys) {
1950 				nccode[ccodeUni[k]] = k;
1951 			}
1952 			foreach (int sn in nccode.Keys) {
1953 				tw.WriteLine(
1954 @"			case {0}: {{
1955 				/* {1} */
1956 {2}
1957 				}}
1958 				break;", sn, ccodeNames[sn], nccode[sn]);
1959 			}
1960 
1961 			tw.WriteLine(
1962 @"			}
1963 
1964 		} else {
1965 			T0_ENTER(ip, rp, t0x);
1966 		}
1967 	}
1968 t0_exit:
1969 	((t0_context *)t0ctx)->dp = dp;
1970 	((t0_context *)t0ctx)->rp = rp;
1971 	((t0_context *)t0ctx)->ip = ip;
1972 }");
1973 
1974 			/*
1975 			 * Add the "postamblr" elements here. These are
1976 			 * elements that may need access to the data
1977 			 * block or code block, so they must occur after
1978 			 * their definition.
1979 			 */
1980 			foreach (string pp in extraCodeDefer) {
1981 				tw.WriteLine();
1982 				tw.WriteLine("{0}", pp);
1983 			}
1984 		}
1985 
1986 		int codeLen = 0;
1987 		foreach (CodeElement ce in gcode) {
1988 			codeLen += ce.GetLength(oneByteCode);
1989 		}
1990 		int dataBlockLen = 0;
1991 		foreach (ConstData cd in blocks.Values) {
1992 			dataBlockLen += cd.Length;
1993 		}
1994 
1995 		/*
1996 		 * Write some statistics on produced code.
1997 		 */
1998 		Console.WriteLine("code length: {0,6} byte(s)", codeLen);
1999 		Console.WriteLine("data length: {0,6} byte(s)", dataLen);
2000 		Console.WriteLine("total words: {0} (interpreted: {1})",
2001 			slotInterpreted + numInterpreted, numInterpreted);
2002 	}
2003 
Lookup(string name)2004 	internal Word Lookup(string name)
2005 	{
2006 		Word w = LookupNF(name);
2007 		if (w != null) {
2008 			return w;
2009 		}
2010 		throw new Exception(String.Format("No such word: '{0}'", name));
2011 	}
2012 
LookupNF(string name)2013 	internal Word LookupNF(string name)
2014 	{
2015 		Word w;
2016 		words.TryGetValue(name, out w);
2017 		return w;
2018 	}
2019 
StringToBlob(string s)2020 	internal TValue StringToBlob(string s)
2021 	{
2022 		return new TValue(0, new TPointerBlob(this, s));
2023 	}
2024 
TryParseLiteral(string tt, out TValue tv)2025 	internal bool TryParseLiteral(string tt, out TValue tv)
2026 	{
2027 		tv = new TValue(0);
2028 		if (tt.StartsWith("\"")) {
2029 			tv = StringToBlob(tt.Substring(1));
2030 			return true;
2031 		}
2032 		if (tt.StartsWith("`")) {
2033 			tv = DecodeCharConst(tt.Substring(1));
2034 			return true;
2035 		}
2036 		bool neg = false;
2037 		if (tt.StartsWith("-")) {
2038 			neg = true;
2039 			tt = tt.Substring(1);
2040 		} else if (tt.StartsWith("+")) {
2041 			tt = tt.Substring(1);
2042 		}
2043 		uint radix = 10;
2044 		if (tt.StartsWith("0x") || tt.StartsWith("0X")) {
2045 			radix = 16;
2046 			tt = tt.Substring(2);
2047 		} else if (tt.StartsWith("0b") || tt.StartsWith("0B")) {
2048 			radix = 2;
2049 			tt = tt.Substring(2);
2050 		}
2051 		if (tt.Length == 0) {
2052 			return false;
2053 		}
2054 		uint acc = 0;
2055 		bool overflow = false;
2056 		uint maxV = uint.MaxValue / radix;
2057 		foreach (char c in tt) {
2058 			int d = HexVal(c);
2059 			if (d < 0 || d >= radix) {
2060 				return false;
2061 			}
2062 			if (acc > maxV) {
2063 				overflow = true;
2064 			}
2065 			acc *= radix;
2066 			if ((uint)d > uint.MaxValue - acc) {
2067 				overflow = true;
2068 			}
2069 			acc += (uint)d;
2070 		}
2071 		int x = (int)acc;
2072 		if (neg) {
2073 			if (acc > (uint)0x80000000) {
2074 				overflow = true;
2075 			}
2076 			x = -x;
2077 		}
2078 		if (overflow) {
2079 			throw new Exception(
2080 				"invalid literal integer (overflow)");
2081 		}
2082 		tv = x;
2083 		return true;
2084 	}
2085 
ParseInteger(string tt)2086 	int ParseInteger(string tt)
2087 	{
2088 		TValue tv;
2089 		if (!TryParseLiteral(tt, out tv)) {
2090 			throw new Exception("not an integer: " + ToString());
2091 		}
2092 		return (int)tv;
2093 	}
2094 
CheckCompiling()2095 	void CheckCompiling()
2096 	{
2097 		if (!compiling) {
2098 			throw new Exception("Not in compilation mode");
2099 		}
2100 	}
2101 
EscapeCComment(string s)2102 	static string EscapeCComment(string s)
2103 	{
2104 		StringBuilder sb = new StringBuilder();
2105 		foreach (char c in s) {
2106 			if (c >= 33 && c <= 126 && c != '%') {
2107 				sb.Append(c);
2108 			} else if (c < 0x100) {
2109 				sb.AppendFormat("%{0:X2}", (int)c);
2110 			} else if (c < 0x800) {
2111 				sb.AppendFormat("%{0:X2}%{0:X2}",
2112 					((int)c >> 6) | 0xC0,
2113 					((int)c & 0x3F) | 0x80);
2114 			} else {
2115 				sb.AppendFormat("%{0:X2}%{0:X2}%{0:X2}",
2116 					((int)c >> 12) | 0xE0,
2117 					(((int)c >> 6) & 0x3F) | 0x80,
2118 					((int)c & 0x3F) | 0x80);
2119 			}
2120 		}
2121 		return sb.ToString().Replace("*/", "%2A/");
2122 	}
2123 }
2124