1; Copyright 2021 Piotr Meyer <aniou@smutek.pl>
2;
3; Permission to use, copy, modify, and/or distribute this
4; software for any purpose with or without fee is hereby
5; granted, provided that the above copyright notice and
6; this permission notice appear in all copies.
7
8; THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS
9; ALL WARRANTIES WITH REGARD TO THIS SOFTWARE  INCLUDING ALL
10; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO
11; EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
12; INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
14; WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
15; TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
16; USE OR PERFORMANCE OF THIS SOFTWARE.
17
18.cpu "65816"
19
20.include "macros_inc.asm"
21
22;----------------------------------------------------------
23; # Description
24;
25; RETRO/816 - a port of RETRO Forth to C256 Foenix
26; RETRO Forth was created by Charles Childers (crc)
27; see: http://retroforth.org/
28;
29; Program is created for C256 Foenix computer but should be
30; able to run on almost any compatible system.
31;
32; At this moment we provide single NGA machine with fixed
33; addresses at ZP and in main memory, but there is a room
34; for multiple, independent VMs
35
36; ## porting
37;
38; Current version is designed to be run on C256 Foenix
39; computer with Foenix Kernel loaded
40; We need two functions to be supported:
41; C256_GETCHW - "wait for char and return it in A"
42; C256_PUTC   - "print char from A to screen"
43;
44; ## memory layout in C256
45;
46; $0040 - begin of shared regions, used by various routines
47; $00E0 - end   of shared regions
48; $00F0 - 16 bytes of 'temporary user variables;
49;
50; $2000 - begin of free space (test code in Foenix)
51; $7FFF - end   of free space
52; $8000 - begin of CPU stack
53; $FEFF - end   of CPU stack
54;
55; XXX - move it to ZP
56; $01:0000 - beginning NGA (memory), single segment (64k)
57;     0000 - begin of data stack
58;     03ff - end of data stack
59;     0400 - beign of return
60;     07ff - end of return stack
61;     .... - unused
62; $02:0000 - start of main NGA memory
63; $05:FFFF - end of main NGA memory
64;
65; $3a:0000 - beginning of NGA code (overwrites BASIC)
66; ..
67;
68; ## implementation-specific notes
69;
70; There are few shortcuts and many inefficiences in code,
71; it should be corrected or extend in future releases
72;
73; At this moment main pointers are somewhat inconsistent
74; - IP counts CELLS when SP i RP count BYTES. It simplify
75; code a lot
76;
77; IP    - 16bit instruction pointer, thats means that
78;         system is able to use only FFFF cells
79;         unit: CELLS
80;
81; IPPTR - 24bit in-memory pointer, allows to use
82;         4*FFFF memory. It should be equal to IP << 2
83;         unit: BYTES
84;
85; SP    - 16bit, stack pointer
86;         unit: BYTES (inc/dec by 4 bytes)
87;
88; RP    - 16bit, return addres stack pointer,
89;         unit: BYTES (inc/dec by 4 bytes)
90;
91; ## booting
92;
93; C256 boot process, as remainder:
94;
95; 1. after boot CPU PC gets addr from $FFFC,
96; 2. that value points to $FF00 and following code
97;    CLC
98;    XCE
99;    JML $1000 - BOOT vector of Foenix Kernel
100; 3. JML IBOOT - internal boot routine
101; 4. ... and, finally JML $03:A0000 to init BASIC
102
103;----------------------------------------------------------
104; # constants
105;
106; note - unlike in 65c816 stacks in original NGA grows up,
107STACK_DEPTH =    $0400 ; bytes: depth of data stack
108ADDRESSES   =    $0400 ; bytes: depth of address stack
109IMAGE_ADDR  = $02_0000 ; bytes: base address in real memory
110CELL_MAX    =    $FFFF ; max allowed cell, IP is word-sized
111IMAGE_BANKS =        4 ; by 64k, max with word-sized IP
112CELL_SIZE   =        4 ; bytes: single CELL size
113
114; some sanitization checks
115.cerror IMAGE_ADDR  & $00FFFF != $0000, "IMAGE_ADDR  should be bank-aligned!"
116
117; TOS*, NOS*, TRS* and MEM* variant are accessed by indexed
118; modes (,X and ,Y). Different base addresses (+0, +2) are
119; used to access low and high words without extra inx/iny
120DSTACK      = $0000      ; data   stack addr, grows up
121NOSl        = DSTACK     ; second item  (,X)
122NOSh        = DSTACK + 2 ; second item  (,X)
123TOSl        = DSTACK + 4 ; current item, low  word
124TOSh        = DSTACK + 6 ; current item, high word
125
126RSTACK      = $0400      ; return stack addr, grows up
127TRSl        = RSTACK     ; current stack item, low word
128TRSh        = RSTACK + 2 ; current stack item. high word
129
130; XXX - only used for stacks, rip it off
131MEM_SEGMENT = $01        ; memory bank segment: $01:xxxx
132
133; nymber of devices supported by system
134NUM_DEVICES = 2
135
136; ## debug variables (only for go65c816 emulator)
137TRACE_ON    = $10
138TRACE_OFF   = $11
139KILL        = $20
140
141; ## FMX kernel vectors
142C256_GETCHW = $104c              ; get character (wait)
143C256_PUTC   = $1018              ; put character
144
145; ---------------------------------------------------------
146; # local variables
147;
148            * = $60
149IP          .word  0  ; instruction pointer - cells
150IPPTR       .dword 0  ; instruction pointer - bytes
151SP          .word  0  ; data   stack pointer - bytes
152RP          .word  0  ; return stack pointer - bytes
153CMD         .dword 0  ; temporary for OP unboundling
154TMP         .dword 0  ; temporary
155TMPa        = TMP     ; additional identifiers
156TMPb        .dword 0  ; for various cases
157TMPc        .word  0  ; at this moment inst_di
158TMPd        .word  0  ; ...
159
160; ---------------------------------------------------------
161; # main routine
162;
163            * = $03A0000
164
165main
166            clc
167            xce
168
169main0       .setaxl
170            .sdb `msg_banner
171            ldx  #<>msg_banner
172            jsr  prints
173
174            jsr  prepare_vm
175            jsr  execute
176
177            .sdb `msg_end
178            ldx  #<>msg_end
179            jsr  prints
180
181            jsl  C256_GETCHW
182            jml  $1000           ; BOOT
183
184; ## preparing environment
185;
186; 1. clear memory region
187; 2. clear stacks
188; 3. copy image to memory region
189;
190prepare_vm
191            ; 1. clear memory
192            .sdb `msg_mclean
193            ldx  #<>msg_mclean
194            jsr  prints
195
196            .setas
197            .setxl
198            lda  #`IMAGE_ADDR
199            sta  TMP
200            ldy  #IMAGE_BANKS
201
202mclean0     lda  TMP
203            pha
204            plb
205            ldx  #$0000
206mclean1     stz  $0,b,x
207            inx
208            bne  mclean1
209            inc  TMP
210            dey
211            bne  mclean0
212
213            ; 2. clear stacks
214            .sdb `msg_sclean
215            ldx  #<>msg_sclean
216            jsr  prints
217
218            .sdb MEM_SEGMENT
219            .setal
220            ldx  #STACK_DEPTH-2
221prep0       stz  #DSTACK,b,x
222            dex
223            dex
224            bpl  prep0
225
226            ldx  #ADDRESSES-2
227prep1       stz  #RSTACK,b,x
228            dex
229            dex
230            bpl  prep1
231
232
233            ; 4. copy image
234            .sdb `msg_copy
235            ldx  #<>msg_copy
236            jsr  prints
237
238            .setas
239            ldy  #IMAGE_SIZE
240            ldx  #0
241            .databank ?
242copy0       lda  IMAGE_SRC,x
243            sta  IMAGE_ADDR,x
244            inx
245            dey
246            bne  copy0
247
248            ; 4. set DBR to stack area
249            .sdb MEM_SEGMENT                    ; XXX fix it
250
251            rts
252
253; ## main execute loop
254execute
255            .setaxl
256            lda  #CELL_SIZE
257            sta  RP
258            stz  SP
259            stz  IP
260            jsr  update_ipptr
261
262execute0    jsr  process_bundle
263            wdm  #4              ; debugging - op count
264            lda  RP
265            beq  quit
266
267            jsr  next_ipptr
268            inc  IP
269            lda  IP
270            cmp  #CELL_MAX       ; NGA exit condition
271            bcc                  execute0
272quit        rts
273
274; ### process 4 commands in bundle
275process_bundle
276            ldy  #2
277            lda  [IPPTR],y       ; 7 cycles
278            sta  CMD+2
279            lda  [IPPTR]         ; also 7 cycles
280            sta  CMD
281
282            and   #$ff
283            beq   +              ; skip .. (nop)
284            asl   a
285            tax
286            jsr   (#<>op_table,k,x)
287
288+           lda   CMD+1
289            and   #$ff
290            beq   +              ; skip .. (nop)
291            asl   a
292            tax
293            jsr   (#<>op_table,k,x)
294
295+           lda   CMD+2
296            and   #$ff
297            beq   +              ; skip .. (nop)
298            asl   a
299            tax
300            jsr   (#<>op_table,k,x)
301
302+           lda   CMD+3
303            and   #$ff
304            beq   +              ; bne/rts for -1 cycle
305            asl   a
306            tax
307            jsr   (#<>op_table,k,x)
308+           rts
309
310op_table
311            .addr inst_no
312            .addr inst_li
313            .addr inst_du
314            .addr inst_dr
315            .addr inst_sw
316            .addr inst_pu
317            .addr inst_po
318            .addr inst_ju
319            .addr inst_ca
320            .addr inst_cc
321            .addr inst_re
322            .addr inst_eq
323            .addr inst_ne
324            .addr inst_lt
325            .addr inst_gt
326            .addr inst_fe
327            .addr inst_st
328            .addr inst_ad
329            .addr inst_su
330            .addr inst_mu
331            .addr inst_di
332            .addr inst_an
333            .addr inst_or
334            .addr inst_xo
335            .addr inst_sh
336            .addr inst_zr
337            .addr inst_ha
338            .addr inst_ie
339            .addr inst_iq
340            .addr inst_ii
341
342;----------------------------------------------------------
343; ## tooling routines
344
345; ### updates IPPTR (in bytes) from IP field (in cells)
346update_ipptr
347            lda  IP
348            sta  IPPTR
349            stz  IPPTR+2
350
351            asl  IPPTR        ; IPPTR = IP*4
352            rol  IPPTR+2
353
354            asl  IPPTR
355            rol  IPPTR+2
356
357            clc               ; add base
358            lda  IPPTR+2
359            adc  #`IMAGE_ADDR
360            sta  IPPTR+2
361
362            rts
363
364; ### increases in-memory IPPTR pointer by CELL_SIZE
365next_ipptr
366            lda  IPPTR
367            clc
368            adc  #CELL_SIZE
369            sta  IPPTR
370            bcs  +
371            rts
372
373+           inc  IPPTR+2
374            rts
375
376; ### print 0-terminated strings
377; DBR - string segment
378;   X - string address
379prints      .proc
380            php
381            .setas
382            .setxl
383prints0     lda  $0,b,x
384            beq  prints_done
385            jsl  C256_PUTC
386            inx
387            bra  prints0
388
389prints_done plp
390            rts
391            .pend
392
393
394
395;----------------------------------------------------------
396; # NGA VM
397;
398; Implementation of nga VM, based on `vm/nga-c/nga.c` code.
399; Current version may be suboptimal, but the goal is in most
400; accurate implementation.
401;
402
403            .al
404            .xl
405
406; ---------------------------------------------------------
407; ## .. ( 0) stack:   -   |   -    nop
408
409inst_no
410            rts
411
412; ---------------------------------------------------------
413; ## li ( 1) stack:   -n  |   -    lit
414;
415; void inst_li() {
416;   sp++;
417;   ip++;
418;   TOS = memory[ip];
419; }
420
421inst_li
422            lda   SP              ; 4 cycles
423            clc                   ; 1 cycle
424            adc   #CELL_SIZE      ; 3 cycles
425            sta   SP              ; 4 cycles
426            tax                   ; 2 cycles
427
428            inc  IP
429            jsr  next_ipptr
430            lda  [IPPTR]
431            sta  #TOSl,b,x
432            ldy  #2
433            lda  [IPPTR],y
434            sta  #TOSh,b,x
435
436;            lda   IP              ; 4 cycles
437;            clc                   ; 1 cycle
438;            adc   #4              ; 3 cycles
439;            sta   IP              ; 4 cycles
440;            tay                   ; 2 cycles
441
442;            lda   #MEMl,b,y
443;            sta   #TOSl,b,x
444;            lda   #MEMh,b,y
445;            sta   #TOSh,b,x
446
447            rts
448
449; ---------------------------------------------------------
450; ## du ( 2) stack:  n-nn |   -    dup
451;
452; void inst_du() {
453;   sp++;
454;   data[sp] = NOS;               // it means TOS = NOS?
455; }
456
457inst_du
458            lda   SP              ; 4 cycles
459            clc                   ; 1 cycle
460            adc   #CELL_SIZE      ; 3 cycles
461            sta   SP              ; 4 cycles
462            tax                   ; 2 cycles
463
464            lda   #NOSl,b,x
465            sta   #TOSl,b,x
466            lda   #NOSh,b,x
467            sta   #TOSh,b,x
468
469            rts
470
471; ---------------------------------------------------------
472; ## dr ( 3) stack:  n-   |   -    drop
473;
474; void inst_dr() {
475;   data[sp] = 0;                 // it means TOS=0?
476;    if (--sp < 0)
477;      ip = CELL_MAX;
478; }
479
480inst_dr
481            ldx   SP
482            stz   #TOSl,b,x
483            stz   #TOSh,b,x
484
485            txa
486            sec
487            sbc   #4
488            sta   SP
489            bmi   inst_dr0
490            rts
491
492            ; IP+1 in exec loop == LIMIT == EXIT
493inst_dr0    lda   #CELL_MAX-1
494            sta   IP
495            rts
496
497
498; ---------------------------------------------------------
499; ## sw ( 4) stack: xy-xy |   -    swap
500;
501; void inst_dr() {
502;   data[sp] = 0;                 // it means TOS=0?
503;    if (--sp < 0)
504;      ip = CELL_MAX;
505; }
506
507inst_sw
508            ldx   SP
509
510            ldy   #TOSl,b,x        ; TOS -> TMP
511            lda   #NOSl,b,x
512            sta   #TOSl,b,x        ; NOS -> TOS
513            tya
514            sta   #NOSl,b,x        ; TMP -> NOS
515
516            ldy   #TOSh,b,x        ; TOS -> TMP
517            lda   #NOSh,b,x
518            sta   #TOSh,b,x        ; NOS -> TOS
519            tya
520            sta   #NOSh,b,x        ; TMP -> NOS
521
522            rts
523
524; ---------------------------------------------------------
525; ## pu ( 5) stack:  n-   |   -n   push
526;
527; void inst_pu() {
528;  rp++;
529;  TORS = TOS;
530;  inst_dr();
531; }
532
533inst_pu
534            lda   RP              ; 4 cycles
535            clc                   ; 1 cycle
536            adc   #CELL_SIZE      ; 3 cycles
537            sta   RP              ; 4 cycles
538            tay                   ; 2 cycles
539
540            ldx   SP
541            lda   #TOSl,b,x
542            sta   #TRSl,b,y
543            lda   #TOSh,b,x
544            sta   #TRSh,b,y
545
546            jmp   inst_dr
547
548; ---------------------------------------------------------
549; ## po ( 6) stack:   -n  |  n-    pop
550;
551; void inst_po() {
552;   sp++;
553;   TOS = TORS;
554;   rp--;
555; }
556
557inst_po
558            lda   SP
559            clc
560            adc   #CELL_SIZE
561            sta   SP
562            tax
563
564            ldy   RP
565
566            lda   #TRSl,b,y
567            sta   #TOSl,b,x
568            lda   #TRSh,b,y
569            sta   #TOSh,b,x
570
571            tya
572            sec
573            sbc   #4
574            sta   RP
575            rts
576
577; ---------------------------------------------------------
578; ## ju ( 7) stack:  a-   |   -    jump
579;
580; void inst_ju() {
581;   ip = TOS - 1;         // I'm not sure about that '-1'
582;   inst_dr();
583; }
584
585; PROBLEM THERE - SP is 16-bit and argument to JUMP may
586; be 32bit  XXX - check it in already created image
587; BUT - current image < 64k, so there shouldn't be problems
588
589inst_ju
590            ldx  SP
591            lda  #TOSl,b,x
592
593            dec  a
594            sta  IP
595            jsr  update_ipptr
596            jmp  inst_dr
597
598; ---------------------------------------------------------
599; ## ca ( 8) stack:  a-   |   -A   call
600;
601; void inst_ca() {
602;   rp++;
603;   TORS = ip;
604;   ip = TOS - 1;
605;   inst_dr();
606; }
607
608inst_ca
609            lda  RP
610            clc
611            adc  #CELL_SIZE
612            sta  RP
613            tay
614
615            lda  IP
616            sta  #TRSl,b,y
617
618            ; for completness sake
619            ldx  SP
620            lda  #TOSl,b,x
621
622            dec  a
623            sta  IP
624            jsr  update_ipptr
625            jmp  inst_dr
626
627
628; ---------------------------------------------------------
629; ## cc ( 9) stack: af-   |   -A   conditional call
630;
631; void inst_cc() {
632;   CELL a, b;
633;   a = TOS; inst_dr();  /* Target */
634;   b = TOS; inst_dr();  /* Flag   */
635;   if (b != 0) {
636;     rp++;
637;     TORS = ip;
638;     ip = a - 1;
639;   }
640; }
641
642inst_cc
643            ldx   SP              ; a
644            lda   #TOSl,b,x
645            sta   TMP
646            jsr   inst_dr
647
648            ldx   SP
649            lda   #TOSl,b,x
650            bne   inst_cc_jmp
651            lda   #TOSh,b,x
652            bne   inst_cc_jmp
653            jmp   inst_dr
654
655inst_cc_jmp jsr   inst_dr         ; for compatibility
656            lda   RP
657            clc
658            adc   #CELL_SIZE
659            sta   RP
660            tay
661
662            lda   IP
663            sta   #TRSl,b,y
664            lda   #$0
665            sta   #TRSh,b,y       ; only lower..
666
667            lda   TMP
668            dec   a
669            sta   IP
670            jsr  update_ipptr
671
672            rts
673            ;jmp   inst_dr
674
675; ---------------------------------------------------------
676; ## re (10) stack:   -   |  A-    return
677;
678; void inst_re() {
679;   ip = TORS;
680;   rp--;
681; }
682
683inst_re
684            ldy   RP
685            lda   #TRSl,b,y
686            sta   IP
687            jsr  update_ipptr
688
689            tya
690            sec
691            sbc   #4
692            sta   RP
693
694            rts
695
696; ---------------------------------------------------------
697; ## eq (11) stack: xy-f  |   -    equality
698;
699; void inst_eq() {
700;   NOS = (NOS == TOS) ? -1 : 0;
701;   inst_dr();
702; }
703
704inst_eq
705            ldx   SP
706            lda   #NOSl,b,x
707            cmp   #TOSl,b,x
708            bne   inst_eq_no
709
710            lda   #NOSh,b,x
711            cmp   #TOSh,b,x
712            bne   inst_eq_no
713
714            lda   #<>-1
715            sta   #NOSl,b,x
716            lda   #>`-1
717            sta   #NOSh,b,x
718            jmp   inst_dr
719
720inst_eq_no  stz   #NOSl,b,x
721            stz   #NOSh,b,x
722            jmp   inst_dr
723
724
725; ---------------------------------------------------------
726; ## ne (12) stack: xy-f  |   -    inequality
727;
728; void inst_eq() {
729;   NOS = (NOS != TOS) ? -1 : 0;
730;   inst_dr();
731; }
732
733inst_ne
734            ldx   SP
735            lda   #NOSl,b,x
736            cmp   #TOSl,b,x
737            bne   inst_ne_no
738
739            lda   #NOSh,b,x
740            cmp   #TOSh,b,x
741            bne   inst_ne_no
742
743            stz   #NOSl,b,x
744            stz   #NOSh,b,x
745            jmp   inst_dr
746
747inst_ne_no  lda   #<>-1
748            sta   #NOSl,b,x
749            lda   #>`-1
750            sta   #NOSh,b,x
751            jmp   inst_dr
752
753
754; ---------------------------------------------------------
755; ## lt (13) stack: xy-f  |   -    less than
756;
757; void inst_eq() {
758;   NOS = (NOS < TOS) ? -1 : 0;
759;   inst_dr();
760; }
761
762; it should be a signed comparison then
763; http://www.6502.org/tutorials/compare_beyond.html#5
764
765inst_lt
766            ldx   SP
767            lda   #NOSl,b,x
768            cmp   #TOSl,b,x
769            lda   #NOSh,b,x
770            sbc   #TOSh,b,x
771            bvc   inst_lt0        ; N eor V
772            eor   #$80
773inst_lt0    bmi   inst_lt_lt
774
775            stz   #NOSl,b,x
776            stz   #NOSh,b,x
777            jmp   inst_dr
778
779inst_lt_lt  lda   #<>-1
780            sta   #NOSl,b,x
781            lda   #>`-1
782            sta   #NOSh,b,x
783            jmp   inst_dr
784
785; ---------------------------------------------------------
786; ## gt (14) stack: xy-f  |   -    greater than
787;
788; void inst_eq() {
789;   NOS = (NOS > TOS) ? -1 : 0;
790;   inst_dr();
791; }
792
793; it should be a signed comparison then
794; http://www.6502.org/tutorials/compare_beyond.html#5
795
796inst_gt
797            ldx   SP
798            lda   #TOSl,b,x
799            cmp   #NOSl,b,x
800            lda   #TOSh,b,x
801            sbc   #NOSh,b,x
802            bvc   inst_gt0        ; N eor V
803            eor   #$80
804inst_gt0    bmi   inst_gt_gt
805
806            stz   #NOSl,b,x
807            stz   #NOSh,b,x
808            jmp   inst_dr
809
810inst_gt_gt  lda   #<>-1
811            sta   #NOSl,b,x
812            lda   #>`-1
813            sta   #NOSh,b,x
814            jmp   inst_dr
815
816; ---------------------------------------------------------
817; ## fe (15) stack:  a-n  |   -    fetch
818;
819; void inst_fe() {
820; #ifndef NOCHECKS
821;   if (TOS >= CELL_MAX || TOS < -5) {
822;     ip = CELL_MAX;
823;     printf("\nERROR (nga/inst_fe): Fetch beyond valid memory range\n");
824;     exit(1);
825;   } else {
826; #endif
827;     switch (TOS) {
828;       case -1: TOS = sp - 1; break;
829;       case -2: TOS = rp; break;
830;       case -3: TOS = CELL_MAX; break;
831;       case -4: TOS = CELL_MIN_VAL; break;
832;       case -5: TOS = CELL_MAX_VAL; break;
833;       default: TOS = memory[TOS]; break;
834;     }
835; #ifndef NOCHECKS
836;   }
837; #endif
838; }
839
840; XXX - there no checks now, as we don't have a way
841;       to report them
842;
843
844inst_fe
845            ldx   SP
846            lda   #TOSh,b,x
847            bmi   inst_fe0       ; special values
848
849            lda   #TOSl,b,x      ; only 16 bit
850            sta  TMP
851            stz  TMP+2
852
853            asl  TMP             ; IPPTR = IP*4
854            rol  TMP+2
855            asl  TMP
856            rol  TMP+2
857
858            clc                  ; add base
859            lda  TMP+2
860            adc  #`IMAGE_ADDR
861            sta  TMP+2
862
863            lda  TMP
864
865            lda  [TMP]
866            sta  #TOSl,b,x
867            ldy  #2
868            lda  [TMP],y
869            sta  #TOSh,b,x
870            rts
871
872inst_fe0    lda   #TOSl,b,x
873            inc   a               ; it was -1?
874            bne   inst_fe1        ; no
875            lda   SP              ; "TOS = sp-1"
876            dec   a
877            lsr   a               ; SP in bytes
878            lsr   a               ; stack uses cells
879            sta   #TOSl,b,x
880            stz   #TOSh,b,x
881            rts
882
883inst_fe1    inc   a               ; it was -2?
884            bne   inst_fe2
885            lda   RP              ; "TOS = rp"
886            lsr   a
887            lsr   a
888            sta   #TOSl,b,x
889            stz   #TOSh,b,x
890            rts
891
892inst_fe2    inc   a               ; it was -3?
893            bne   inst_fe3
894            lda   #CELL_MAX       ; "TOS = CELL_MAX"
895            sta   #TOSl,b,x
896            stz   #TOSh,b,x       ; XXX - we uses max 64k
897            rts
898
899inst_fe3    inc   a               ; it was -4?
900            bne   inst_fe4
901            lda   #$0000          ; "TOS = CELL_MIN_VAL"
902            sta   #TOSl,b,x
903            lda   #$8000          ; XXX - check it
904            sta   #TOSh,b,x
905            rts
906
907inst_fe4    inc   a               ; it was -5?
908            bne   inst_bad
909            lda   #$ffff          ; "TOS = CELL_MAX_VAL"
910            sta   #TOSl,b,x
911            lda   #$7fff          ; XXX - check it
912            sta   #TOSh,b,x
913            rts
914
915inst_bad                          ; XXX - message to interpreter
916            .sdb `err_memuf
917            ldx  #<>err_memuf
918            jsr  prints
919            .setaxl
920            lda  #CELL_MAX-1
921            sta  IP
922            rts
923
924
925; ---------------------------------------------------------
926; ## st (16) stack: na-   |   -    store
927;
928; void inst_st() {
929; #ifndef NOCHECKS
930;   if (TOS <= CELL_MAX && TOS >= 0) {
931; #endif
932;     memory[TOS] = NOS;
933;     inst_dr();
934;     inst_dr();
935; #ifndef NOCHECKS
936;   } else {
937;     ip = CELL_MAX;
938;     printf("\nERROR (nga/inst_st): Store beyond valid memory range\n");
939;     exit(1);
940;   }
941; #endif
942; }
943
944; XXX - no checks now
945
946inst_st
947            ldx  SP
948            lda  #TOSl,b,x       ; XXX only low word in use
949
950            sta  TMP
951            stz  TMP+2
952
953            asl  TMP             ; IPPTR = IP*4
954            rol  TMP+2
955            asl  TMP
956            rol  TMP+2
957
958            clc                  ; add base
959            lda  TMP+2
960            adc  #`IMAGE_ADDR
961            sta  TMP+2
962
963            lda  #NOSl,b,x
964            sta  [TMP]
965            ldy  #2
966            lda  #NOSh,b,x
967            sta  [TMP],y
968
969            jsr  inst_dr
970            jmp  inst_dr
971
972; ---------------------------------------------------------
973; ## ad (17) stack: xy-n  |   -    addition
974;
975; void inst_ad() {
976;   NOS += TOS;
977;   inst_dr();
978; }
979
980inst_ad
981            ldx  SP
982            clc
983            lda   #NOSl,b,x
984            adc  #TOSl,b,x
985            sta  #NOSl,b,x
986            lda  #NOSh,b,x
987            adc  #TOSh,b,x
988            sta  #NOSh,b,x
989            jmp  inst_dr
990
991; ---------------------------------------------------------
992; ## su (18) stack: xy-n  |   -    subtraction
993;
994; void inst_su() {
995;   NOS -= TOS;
996;   inst_dr();
997; }
998
999inst_su
1000            ldx  SP
1001            sec
1002            lda   #NOSl,b,x
1003            sbc  #TOSl,b,x
1004            sta  #NOSl,b,x
1005            lda  #NOSh,b,x
1006            sbc  #TOSh,b,x
1007            sta  #NOSh,b,x
1008            jmp  inst_dr
1009
1010; ---------------------------------------------------------
1011; ## mu (19) stack: xy-n  |   -    multiplication
1012;
1013; void inst_mu() {
1014;   NOS *= TOS;
1015;   inst_dr();
1016; }
1017
1018; taken almost verbatim from of816 forth:
1019; 32-bit unsigned multiplication with 64-bit result
1020; right-shifting version by dclxvi
1021
1022N = TMP
1023
1024inst_mu
1025            ldx   SP
1026            ; only for 1:1 with original nga
1027            lda   #TOSh,b,x
1028            pha
1029            lda   #TOSl,b,x
1030            pha
1031            ; end
1032
1033            lda   N+2
1034            pha
1035            lda   N
1036            pha
1037            lda   #$00
1038            sta   N
1039            ldy   #32
1040            lsr   #TOSh,b,x      ; STACKBASE+6,x
1041            ror   #TOSl,b,x      ; STACKBASE+4,x
1042l1:         bcc   l2
1043            clc
1044            sta   N+2
1045            lda   N
1046            adc   #NOSl,b,x      ; STACKBASE+0,x
1047            sta   N
1048            lda   N+2
1049            adc   #NOSh,b,x      ; STACKBASE+2,x
1050l2:         ror   a
1051            ror   N
1052            ror   #TOSh,b,x      ; STACKBASE+6,x
1053            ror   #TOSl,b,x      ; STACKBASE+4,x
1054            dey
1055            bne   l1
1056            sta   #NOSh,b,x      ; STACKBASE+2,x
1057            lda   N
1058            sta   #NOSl,b,x      ; STACKBASE+0,x
1059            pla
1060            sta   N
1061            pla
1062            sta   N+2
1063
1064            ; only for 1:1 with original nga - XXX - fix it
1065            lda   #TOSl,b,x
1066            sta   #NOSl,b,x
1067            lda   #TOSh,b,x
1068            sta   #NOSh,b,x
1069            ;
1070            pla
1071            sta   #TOSl,b,x
1072            pla
1073            sta   #TOSh,b,x
1074            ; end
1075
1076            jmp   inst_dr
1077
1078; ---------------------------------------------------------
1079; ## di (20) stack: xy-rq |   -    divide & remainder
1080;
1081; void inst_di() {
1082;   CELL a, b;
1083;   a = TOS;
1084;   b = NOS;
1085; #ifndef NOCHECKS
1086;   if (a == 0) {
1087;     printf("\nERROR (nga/inst_di): Division by zero\n");
1088;     exit(1);
1089;   }
1090; #endif
1091;   TOS = b / a;
1092;   NOS = b % a;
1093; }
1094
1095; XXX - now all operation are unsigned, change this!
1096            .warn "inst_di is unsigned, change it"
1097
1098inst_di
1099            ldx  SP
1100
1101            lda  #TOSl,b,x
1102            bne  di4
1103            lda  #TOSh,b,x
1104            bne  di4
1105
1106            .sdb `err_0div
1107            ldx  #<>err_0div
1108            jsr  prints
1109            .sdb MEM_SEGMENT
1110            .setaxl
1111            lda  #CELL_MAX-1
1112            sta  IP
1113            rts
1114
1115di4         lda  #NOSl,b,x       ; NOS to TMPb via TMP(a)
1116            sta  TMP
1117            lda  #NOSh,b,x
1118            sta  TMP+2
1119            bit  TMP+2
1120            bpl  di3
1121
1122            inc  TMPd
1123            jsr  negate_tmp
1124di3         lda  TMP             ; NOS from TMP(a) to TMPb
1125            sta  TMPb
1126            lda  TMP+2
1127            sta  TMPb+2
1128
1129            lda  #TOSl,b,x       ; TOS to TMP(a)
1130            sta  TMP
1131            lda  #TOSh,b,x
1132            sta  TMP+2
1133            bit  TMP+2
1134            bpl  di2
1135
1136            inc  TMPd
1137            jsr  negate_tmp
1138di2         stz  #TOSl,b,x       ; prepare result space
1139            stz  #TOSh,b,x
1140            stz  #NOSl,b,x
1141            stz  #NOSh,b,x
1142
1143di0         lda  TMPb            ; is NOS<TOS?
1144            cmp  TMPa
1145            lda  TMPb+2
1146            sbc  TMPa+2
1147            bvc  di1
1148            eor  #$80
1149di1         bmi  finish          ; yes, NOS<TOS
1150
1151            sec
1152            lda  TMPb
1153            sbc  TMPa
1154            sta  TMPb
1155            lda  TMPb+2
1156            sbc  TMPa+2
1157            sta  TMPb+2
1158
1159            ; increase result
1160            inc  #TOSl,b,x
1161            bne  di0             ; overflow means high+=1
1162            inc  #TOSh,b,x
1163            bra  di0
1164
1165finish      lda  TMPb            ; remainder
1166            sta  #NOSl,b,x
1167            lda  TMPb+2
1168            sta  #NOSh,b,x
1169            rts
1170
1171            ; additional routines for shifting ------------
1172negate_tmp
1173            jsr   invert_tmp
1174            inc   TMP
1175            bne   +
1176            inc   TMP+2
1177+           rts
1178
1179invert_tmp
1180            lda   TMP
1181            eor   #$FFFF
1182            sta   TMP
1183            lda   TMP+2
1184            eor   #$FFFF
1185            sta   TMP+2
1186            rts
1187
1188
1189
1190; ---------------------------------------------------------
1191; ## an (21) stack: xy-n  |   -    bitwise and
1192;
1193; void inst_an() {
1194;   NOS = TOS & NOS;
1195;   inst_dr();
1196; }
1197
1198inst_an
1199            ldx  SP
1200
1201            lda   #NOSl,b,x
1202            and   #TOSl,b,x
1203            sta  #NOSl,b,x
1204
1205            lda   #NOSh,b,x
1206            and   #TOSh,b,x
1207            sta  #NOSh,b,x
1208
1209            jmp  inst_dr
1210
1211; ---------------------------------------------------------
1212; ## or (22) stack: xy-n  |   -    bitwise or
1213;
1214; void inst_an() {
1215;   NOS = TOS | NOS;
1216;   inst_dr();
1217; }
1218
1219inst_or
1220            ldx  SP
1221
1222            lda   #NOSl,b,x
1223            ora   #TOSl,b,x
1224            sta  #NOSl,b,x
1225
1226            lda   #NOSh,b,x
1227            ora   #TOSh,b,x
1228            sta  #NOSh,b,x
1229
1230            jmp  inst_dr
1231
1232; ---------------------------------------------------------
1233; ## xo (23) stack: xy-n  |   -    bitwise xor
1234;
1235; void inst_an() {
1236;   NOS = TOS ^ NOS;
1237;   inst_dr();
1238; }
1239
1240inst_xo
1241            ldx  SP
1242
1243            lda   #NOSl,b,x
1244            eor   #TOSl,b,x
1245            sta  #NOSl,b,x
1246
1247            lda   #NOSh,b,x
1248            eor   #TOSh,b,x
1249            sta  #NOSh,b,x
1250
1251            jmp  inst_dr
1252
1253; ---------------------------------------------------------
1254; ## sh (24) stack: xy-n  |   -    shift
1255;
1256; void inst_sh() {
1257;   CELL y = TOS;
1258;   CELL x = NOS;
1259;   if (TOS < 0)
1260;     NOS = NOS << (TOS * -1);
1261;   else {
1262;     if (x < 0 && y > 0)
1263;       NOS = x >> y | ~(~0U >> y);
1264;     else
1265;       NOS = x >> y;
1266;   }
1267;   inst_dr();
1268; }
1269
1270; 1. because effective shift for a 32bit value is... 32
1271;    there is no need in using high word in shift count,
1272;    low word is sufficient for 65535 shifts
1273; 2. because there is no need in shifting more than 32
1274;    times, then value of low word is masked to six lower
1275;    bits
1276
1277; NOTE: code would be simpler if in case of separate shift
1278;       operations (i.e. shift right/shift left) in muri
1279;
1280
1281inst_sh
1282            ldx  SP
1283
1284            ; check if shift count is positive or negative
1285            bit  #TOSh,b,x
1286            bpl  shr_main        ; shift to right
1287
1288            ; we shifting left, so we need to negate arg
1289            jsr  negate_tos
1290
1291            lda   #TOSl,b,x
1292            and   #63            ; we need only low 6 bits
1293            bne   +              ; do something if > 0
1294            jmp  inst_dr
1295+           tay
1296
1297            ; shifting left is the same for neg and pos vals
1298shl_main    asl  #NOSl,b,x
1299            rol   #NOSh,b,x
1300            dey
1301            bne   shl_main
1302            jmp  inst_dr
1303
1304
1305            ; shift right ---------------------------------
1306shr_main    lda   #TOSl,b,x
1307            and   #63            ; we need only low 6 bits
1308            bne   +              ; do something if > 0
1309            jmp  inst_dr
1310+           tay
1311
1312            ; did we shifting negative or positive value?
1313            bit  #NOSh,b,x
1314            bmi  shr_neg
1315
1316shr_pos     lsr   #NOSh,b,x
1317            ror  #NOSl,b,x
1318            dey
1319            bne   shr_pos
1320            jmp  inst_dr
1321
1322shr_neg     clc
1323            ror   #NOSh,b,x
1324            ror  #NOSl,b,x
1325            dey
1326            bne   shr_neg
1327            jmp  inst_dr
1328
1329            ; additional routines for shifting ------------
1330negate_tos nop
1331            jsr   invert_tos
1332            inc   #TOSl,b,x       ; STACKBASE+0,x
1333            bne   +
1334            inc   #TOSh,b,x       ; STACKBASE+2,x
1335+           rts
1336
1337invert_tos
1338            lda   #TOSl,b,x      ; STACKBASE+0,x
1339            eor   #$FFFF
1340            sta   #TOSl,b,x      ; STACKBASE+0,x
1341            lda   #TOSh,b,x       ; STACKBASE+2,x
1342            eor   #$FFFF
1343            sta   #TOSh,b,x       ; STACKBASE+2,x
1344            rts
1345
1346; ---------------------------------------------------------
1347; ## zr (25) stack:  n-?  |   -    zero return
1348;
1349; returns from a subroutine if the top item on the stack is zero.
1350; If not, it acts like a NOP instead.
1351;
1352; void inst_zr() {
1353;   if (TOS == 0) {
1354;     inst_dr();
1355;     ip = TORS;
1356;     rp--;
1357;   }
1358; }
1359
1360inst_zr
1361            ldx  SP
1362            lda  #TOSl,b,x
1363            bne  zr_quit
1364            lda  #TOSh,b,x
1365            bne  zr_quit
1366
1367do_zr       jsr  inst_dr
1368            ldy  RP
1369            lda  #TRSl,b,y
1370            sta  IP
1371            jsr  update_ipptr
1372
1373            tya
1374            sec
1375            sbc   #CELL_SIZE
1376            sta   RP
1377zr_quit     rts
1378
1379; ---------------------------------------------------------
1380; ## ha (26) stack:   -   |   -    halt
1381
1382inst_ha
1383            lda  #CELL_MAX-1     ; XXX - change it
1384            sta  IP
1385            rts
1386
1387; ---------------------------------------------------------
1388; ## ie (27) stack:   -n  |   -    i/o enumerate
1389
1390inst_ie
1391            lda  SP
1392            clc
1393            adc  #CELL_SIZE      ; 4 bytes
1394            sta  SP
1395            tax
1396
1397            lda  #NUM_DEVICES
1398            sta  #TOSl,b,x
1399            stz  #TOSh,b,x
1400            rts
1401
1402; ---------------------------------------------------------
1403; ## iq (28) stack:  n-xy |   -    i/o query
1404;
1405; void inst_iq() {
1406;   CELL Device = TOS;
1407;   inst_dr();
1408;   IO_queryHandlers[Device]();
1409; }
1410
1411inst_iq
1412            ldx  SP
1413            lda  #TOSl,b,x
1414            pha
1415            jsr  inst_dr
1416            jmp  io_query
1417
1418
1419; ## ii (29) stack: ...n- |   -    i/o invoke
1420;
1421; void inst_ii() {
1422;   CELL Device = TOS;
1423;   inst_dr();
1424;   IO_deviceHandlers[Device]();
1425; }
1426;
1427; void generic_output() {
1428;  putc(stack_pop(), stdout);
1429;  fflush(stdout);
1430;}
1431
1432
1433inst_ii
1434            ldx  SP
1435            lda  #TOSl,b,x
1436            pha
1437            jsr  inst_dr
1438            jmp  io_handle
1439
1440; ---------------------------------------------------------
1441; device support
1442; ---------------------------------------------------------
1443
1444; number of device on stack
1445io_query
1446            ; numbers are counted from 0, so val should be
1447            ; lower than number of devices
1448            ply
1449            cpy  #NUM_DEVICES
1450            bcc  query
1451
1452            lda  #CELL_MAX-1
1453            sta  IP
1454            ;wdm  #KILL                          ; effective error
1455
1456query       lda  SP
1457            clc
1458            adc  #4
1459            sta  SP
1460            tax
1461
1462            tya
1463            sta  #TOSl,b,x       ; device number
1464            stz  #TOSh,b,x
1465
1466            bne  is_key
1467is_output   jmp  version0        ; output ver0
1468
1469is_key      cmp   #1
1470            bne  unknown
1471            jmp  version0        ; keyboard ver0
1472
1473unknown     jsr  inst_dr         ; drop already put dev no
1474            rts                  ; never reachable
1475
1476version0    stz  #NOSl,b,x
1477            stz  #NOSh,b,x
1478            rts
1479
1480
1481; ---------------------------------------------------------
1482; number of device on stack
1483io_handle
1484            ; numbers are counted from 0, so val should be
1485            ; lower than number of devices
1486            ply
1487            cpy  #NUM_DEVICES
1488            bcc  interact
1489            lda  #CELL_MAX-1
1490            sta  IP
1491            ;wdm  #KILL            ; stop - error
1492
1493interact    cpy  #0
1494            beq  screen          ; crude, but should work
1495
1496            ; keyboard input
1497            lda  SP
1498            clc
1499            adc  #4
1500            sta  SP
1501            tax
1502
1503            ;wdm   #TRACE_OFF
1504            jsl   C256_GETCHW    ; all regs preserved here
1505            ;wdm   #TRACE_ON
1506            .setaxl              ; redundant
1507            and   #$00ff          ; only byte lower is needed
1508            cmp   #$0d            ; change 0d to 0a
1509            bne   +
1510            lda   #$0a
1511+           nop
1512            sta  #TOSl,b,x
1513            stz  #TOSh,b,x
1514            rts
1515
1516            ; screen output
1517screen      ldx  SP
1518            txa
1519            sec
1520            sbc  #4
1521            sta  SP
1522
1523            ; SP is new but X point to previous element
1524            lda  #TOSl,b,x
1525            and  #$00ff
1526            cmp  #$0a
1527            bne  +
1528            lda  #$0d
1529+           nop
1530            ;wdm  #TRACE_OFF
1531            jsl  C256_PUTC
1532            ;wdm  #TRACE_ON
1533            .setaxl              ; redundant
1534            rts
1535
1536
1537            .warn "Code size: ", repr(* - main)
1538
1539; ---------------------------------------------------------
1540; # messages
1541
1542; a counted-string experiment
1543pstring     .macro txt
1544            .word(len(\txt))
1545            .text \txt
1546            .endm
1547
1548; zero-terminated strings
1549msg_banner  .text $d, "RETRO/816 - NGA/816-32 2021-02-21", $d, $0
1550msg_mclean  .text "cleaning memory...", $d, $0
1551msg_sclean  .text "cleaning stack...", $d, $0
1552msg_copy    .text "copying image...", $d, $0
1553msg_end     .text "NGA finished, press any key to restart", $d, $0
1554
1555err_uf      .text "ERROR: stack underflow, re-starting system!", $d, $0
1556err_0div    .text "ERROR: division-by-zero, re-starting system!", $d, $0
1557err_halt    .text "INFO: halt op called! Going to infinite loop.", $d, $0
1558err_memuf   .text "ERROR: read from unknown bad, negative mem addr!", $d, $0
1559
1560; ---------------------------------------------------------
1561; # forth image
1562
1563; image create by standard RETRO tools
1564; cp ngaImage barebones.image
1565; ./bin/retro-extend barebones.image interface/barebones.forth
1566;
1567
1568IMAGE_SRC   .binary "barebones.image"
1569IMAGE_END   = *
1570IMAGE_SIZE  = IMAGE_END - IMAGE_SRC
1571
1572; eof
1573