1 /* This file defines a number of threading schemes.
2 
3   Copyright (C) 1995, 1996,1997,1999,2003,2004,2005,2007,2008 Free Software Foundation, Inc.
4 
5   This file is part of Gforth.
6 
7   Gforth is free software; you can redistribute it and/or
8   modify it under the terms of the GNU General Public License
9   as published by the Free Software Foundation, either version 3
10   of the License, or (at your option) any later version.
11 
12   This program is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   GNU General Public License for more details.
16 
17   You should have received a copy of the GNU General Public License
18   along with this program; if not, see http://www.gnu.org/licenses/.
19 
20 
21   This files defines macros for threading. Many sets of macros are
22   defined. Functionally they have only one difference: Some implement
23   direct threading, some indirect threading. The other differences are
24   just variations to help GCC generate faster code for various
25   machines.
26 
27   (Well, to tell the truth, there actually is another functional
28   difference in some pathological cases: e.g., a '!' stores into the
29   cell where the next executed word comes from; or, the next word
30   executed comes from the top-of-stack. These differences are one of
31   the reasons why GCC cannot produce the right variation by itself. We
32   chose disallowing such practices and using the added implementation
33   freedom to achieve a significant speedup, because these practices
34   are not common in Forth (I have never heard of or seen anyone using
35   them), and it is easy to circumvent problems: A control flow change
36   will flush any prefetched words; you may want to do a "0
37   drop" before that to write back the top-of-stack cache.)
38 
39   These macro sets are used in the following ways: After translation
40   to C a typical primitive looks like
41 
42   ...
43   {
44   DEF_CA
45   other declarations
46   NEXT_P0;
47   main part of the primitive
48   NEXT_P1;
49   store results to stack
50   NEXT_P2;
51   }
52 
53   DEF_CA and all the NEXT_P* together must implement NEXT; In the main
54   part the instruction pointer can be read with IP, changed with
55   INC_IP(const_inc), and the cell right behind the presently executing
56   word (i.e. the value of *IP) is accessed with NEXT_INST.
57 
58   If a primitive does not fall through the main part, it has to do the
59   rest by itself. If it changes ip, it has to redo NEXT_P0 (perhaps we
60   should define a macro SET_IP).
61 
62   Some primitives (execute, dodefer) do not end with NEXT, but with
63   EXEC(.). If NEXT_P0 has been called earlier, it has to perform
64   "ip=IP;" to ensure that ip has the right value (NEXT_P0 may change
65   it).
66 
67   Finally, there is NEXT1_P1 and NEXT1_P2, which are parts of EXEC
68   (EXEC(XT) could be defined as "cfa=XT; NEXT1_P1; NEXT1_P2;" (is this
69   true?)) and are used for making docol faster.
70 
71   We can define the ways in which these macros are used with a regular
72   expression:
73 
74   For a primitive
75 
76   DEF_CA NEXT_P0 ( IP | INC_IP | NEXT_INST | ip=...; NEXT_P0 ) * ( NEXT_P1 NEXT_P2 | EXEC(...) )
77 
78   For a run-time routine, e.g., docol:
79   PFA1(cfa) ( NEXT_P0 NEXT | cfa=...; NEXT1_P1; NEXT1_P2 | EXEC(...) )
80 
81   This comment does not yet describe all the dependences that the
82   macros have to satisfy.
83 
84   To organize the former ifdef chaos, each path is separated
85   This gives a quite impressive number of paths, but you clearly
86   find things that go together.
87 
88   It should be possible to organize the whole thing in a way that
89   contains less redundancy and allows a simpler description.
90 
91 */
92 
93 #if !defined(GCC_PR15242_WORKAROUND)
94 #if __GNUC__ == 3
95 /* various gcc-3.x version have problems (including PR15242) that are
96    solved with this workaround */
97 #define GCC_PR15242_WORKAROUND 1
98 #else
99 /* other gcc versions are better off without the workaround for
100    primitives that are not relocatable */
101 #define GCC_PR15242_WORKAROUND 0
102 #endif
103 #endif
104 
105 #if GCC_PR15242_WORKAROUND
106 #define DO_GOTO goto before_goto
107 #else
108 #define DO_GOTO goto *real_ca
109 #endif
110 
111 #ifndef GOTO_ALIGN
112 #define GOTO_ALIGN
113 #endif
114 
115 #define GOTO(target) do {(real_ca=(target));} while(0)
116 #define NEXT_P2 do {NEXT_P1_5; DO_GOTO;} while(0)
117 #define EXEC(XT) do { real_ca=EXEC1(XT); DO_GOTO;} while (0)
118 #define VM_JUMP(target) do {GOTO(target);} while (0)
119 #define NEXT do {DEF_CA NEXT_P1; NEXT_P2;} while(0)
120 #define FIRST_NEXT_P2 NEXT_P1_5; GOTO_ALIGN; \
121 before_goto: goto *real_ca; after_goto:
122 #define FIRST_NEXT do {DEF_CA NEXT_P1; FIRST_NEXT_P2;} while(0)
123 #define IPTOS NEXT_INST
124 
125 
126 #ifdef DOUBLY_INDIRECT
127 # ifndef DEBUG_DITC
128 #  define DEBUG_DITC 0
129 # endif
130 /* define to 1 if you want to check consistency */
131 #  define NEXT_P0	do {cfa1=cfa; cfa=*ip;} while(0)
132 #  define CFA		cfa1
133 #  define MORE_VARS     Xt cfa1;
134 #  define IP		(ip)
135 #  define SET_IP(p)	do {ip=(p); cfa=*ip;} while(0)
136 #  define NEXT_INST	(cfa)
137 #  define INC_IP(const_inc)	do {cfa=IP[const_inc]; ip+=(const_inc);} while(0)
138 #  define DEF_CA	Label MAYBE_UNUSED ca;
139 #  define NEXT_P1	do {\
140   if (DEBUG_DITC && (cfa<=vm_prims+DOESJUMP || cfa>=vm_prims+npriminfos)) \
141     fprintf(stderr,"NEXT encountered prim %p at ip=%p\n", cfa, ip); \
142   ip++;} while(0)
143 #  define NEXT_P1_5	do {ca=**cfa; GOTO(ca);} while(0)
144 #  define EXEC1(XT)	({DEF_CA cfa=(XT);\
145   if (DEBUG_DITC && (cfa>vm_prims+DOESJUMP && cfa<vm_prims+npriminfos)) \
146     fprintf(stderr,"EXEC encountered xt %p at ip=%p, vm_prims=%p, xts=%p\n", cfa, ip, vm_prims, xts); \
147  ca=**cfa; ca;})
148 
149 #elif defined(NO_IP)
150 
151 #define NEXT_P0
152 #  define CFA		cfa
153 #define SET_IP(target)	assert(0)
154 #define INC_IP(n)	((void)0)
155 #define DEF_CA
156 #define NEXT_P1
157 #define NEXT_P1_5		do {goto *next_code;} while(0)
158 /* set next_code to the return address before performing EXEC */
159 /* original: */
160 /* #define EXEC1(XT)	do {cfa=(XT); goto **cfa;} while(0) */
161 /* fake, to make syntax check work */
162 #define EXEC1(XT)	({cfa=(XT); *cfa;})
163 
164 #else  /* !defined(DOUBLY_INDIRECT) && !defined(NO_IP) */
165 
166 #if defined(DIRECT_THREADED)
167 
168 /* This lets the compiler know that cfa is dead before; we place it at
169    "goto *"s that perform direct threaded dispatch (i.e., not EXECUTE
170    etc.), and thus do not reach doers, which would use cfa; the only
171    way to a doer is through EXECUTE etc., which set the cfa
172    themselves.
173 
174    Some of these direct threaded schemes use "cfa" to hold the code
175    address in normal direct threaded code.  Of course we cannot use
176    KILLS there.
177 
178    KILLS works by having an empty asm instruction, and claiming to the
179    compiler that it writes to cfa.
180 
181    KILLS is optional.  You can write
182 
183 #define KILLS
184 
185    and lose just a little performance.
186 */
187 #define KILLS asm("":"=X"(cfa));
188 
189 /* #warning direct threading scheme 8: cfa dead, i386 hack */
190 #  define NEXT_P0
191 #  define CFA		cfa
192 #  define IP		(ip)
193 #  define SET_IP(p)	do {ip=(p); NEXT_P0;} while(0)
194 #  define NEXT_INST	(*IP)
195 #  define INC_IP(const_inc)	do { ip+=(const_inc);} while(0)
196 #  define DEF_CA
197 #  define NEXT_P1	(ip++)
198 #  define NEXT_P1_5	do {KILLS GOTO(*(ip-1));} while(0)
199 #  define EXEC1(XT)	({cfa=(XT); *cfa;})
200 
201 /* direct threaded */
202 #else
203 /* indirect THREADED  */
204 
205 /* #warning indirect threading scheme 8: low latency,cisc */
206 #  define NEXT_P0
207 #  define CFA		cfa
208 #  define IP		(ip)
209 #  define SET_IP(p)	do {ip=(p); NEXT_P0;} while(0)
210 #  define NEXT_INST	(*ip)
211 #  define INC_IP(const_inc)	do {ip+=(const_inc);} while(0)
212 #  define DEF_CA
213 #  define NEXT_P1
214 #  define NEXT_P1_5	do {cfa=*ip++; GOTO(*cfa);} while(0)
215 #  define EXEC1(XT)	({cfa=(XT); *cfa;})
216 
217 /* indirect threaded */
218 #endif
219 
220 #endif /* !defined(DOUBLY_INDIRECT) && !defined(NO_IP) */
221 
222