1 /* This file defines a number of threading schemes. 2 3 Copyright (C) 1995, 1996,1997,1999,2003,2004,2005,2007,2008 Free Software Foundation, Inc. 4 5 This file is part of Gforth. 6 7 Gforth is free software; you can redistribute it and/or 8 modify it under the terms of the GNU General Public License 9 as published by the Free Software Foundation, either version 3 10 of the License, or (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program; if not, see http://www.gnu.org/licenses/. 19 20 21 This files defines macros for threading. Many sets of macros are 22 defined. Functionally they have only one difference: Some implement 23 direct threading, some indirect threading. The other differences are 24 just variations to help GCC generate faster code for various 25 machines. 26 27 (Well, to tell the truth, there actually is another functional 28 difference in some pathological cases: e.g., a '!' stores into the 29 cell where the next executed word comes from; or, the next word 30 executed comes from the top-of-stack. These differences are one of 31 the reasons why GCC cannot produce the right variation by itself. We 32 chose disallowing such practices and using the added implementation 33 freedom to achieve a significant speedup, because these practices 34 are not common in Forth (I have never heard of or seen anyone using 35 them), and it is easy to circumvent problems: A control flow change 36 will flush any prefetched words; you may want to do a "0 37 drop" before that to write back the top-of-stack cache.) 38 39 These macro sets are used in the following ways: After translation 40 to C a typical primitive looks like 41 42 ... 43 { 44 DEF_CA 45 other declarations 46 NEXT_P0; 47 main part of the primitive 48 NEXT_P1; 49 store results to stack 50 NEXT_P2; 51 } 52 53 DEF_CA and all the NEXT_P* together must implement NEXT; In the main 54 part the instruction pointer can be read with IP, changed with 55 INC_IP(const_inc), and the cell right behind the presently executing 56 word (i.e. the value of *IP) is accessed with NEXT_INST. 57 58 If a primitive does not fall through the main part, it has to do the 59 rest by itself. If it changes ip, it has to redo NEXT_P0 (perhaps we 60 should define a macro SET_IP). 61 62 Some primitives (execute, dodefer) do not end with NEXT, but with 63 EXEC(.). If NEXT_P0 has been called earlier, it has to perform 64 "ip=IP;" to ensure that ip has the right value (NEXT_P0 may change 65 it). 66 67 Finally, there is NEXT1_P1 and NEXT1_P2, which are parts of EXEC 68 (EXEC(XT) could be defined as "cfa=XT; NEXT1_P1; NEXT1_P2;" (is this 69 true?)) and are used for making docol faster. 70 71 We can define the ways in which these macros are used with a regular 72 expression: 73 74 For a primitive 75 76 DEF_CA NEXT_P0 ( IP | INC_IP | NEXT_INST | ip=...; NEXT_P0 ) * ( NEXT_P1 NEXT_P2 | EXEC(...) ) 77 78 For a run-time routine, e.g., docol: 79 PFA1(cfa) ( NEXT_P0 NEXT | cfa=...; NEXT1_P1; NEXT1_P2 | EXEC(...) ) 80 81 This comment does not yet describe all the dependences that the 82 macros have to satisfy. 83 84 To organize the former ifdef chaos, each path is separated 85 This gives a quite impressive number of paths, but you clearly 86 find things that go together. 87 88 It should be possible to organize the whole thing in a way that 89 contains less redundancy and allows a simpler description. 90 91 */ 92 93 #if !defined(GCC_PR15242_WORKAROUND) 94 #if __GNUC__ == 3 95 /* various gcc-3.x version have problems (including PR15242) that are 96 solved with this workaround */ 97 #define GCC_PR15242_WORKAROUND 1 98 #else 99 /* other gcc versions are better off without the workaround for 100 primitives that are not relocatable */ 101 #define GCC_PR15242_WORKAROUND 0 102 #endif 103 #endif 104 105 #if GCC_PR15242_WORKAROUND 106 #define DO_GOTO goto before_goto 107 #else 108 #define DO_GOTO goto *real_ca 109 #endif 110 111 #ifndef GOTO_ALIGN 112 #define GOTO_ALIGN 113 #endif 114 115 #define GOTO(target) do {(real_ca=(target));} while(0) 116 #define NEXT_P2 do {NEXT_P1_5; DO_GOTO;} while(0) 117 #define EXEC(XT) do { real_ca=EXEC1(XT); DO_GOTO;} while (0) 118 #define VM_JUMP(target) do {GOTO(target);} while (0) 119 #define NEXT do {DEF_CA NEXT_P1; NEXT_P2;} while(0) 120 #define FIRST_NEXT_P2 NEXT_P1_5; GOTO_ALIGN; \ 121 before_goto: goto *real_ca; after_goto: 122 #define FIRST_NEXT do {DEF_CA NEXT_P1; FIRST_NEXT_P2;} while(0) 123 #define IPTOS NEXT_INST 124 125 126 #ifdef DOUBLY_INDIRECT 127 # ifndef DEBUG_DITC 128 # define DEBUG_DITC 0 129 # endif 130 /* define to 1 if you want to check consistency */ 131 # define NEXT_P0 do {cfa1=cfa; cfa=*ip;} while(0) 132 # define CFA cfa1 133 # define MORE_VARS Xt cfa1; 134 # define IP (ip) 135 # define SET_IP(p) do {ip=(p); cfa=*ip;} while(0) 136 # define NEXT_INST (cfa) 137 # define INC_IP(const_inc) do {cfa=IP[const_inc]; ip+=(const_inc);} while(0) 138 # define DEF_CA Label MAYBE_UNUSED ca; 139 # define NEXT_P1 do {\ 140 if (DEBUG_DITC && (cfa<=vm_prims+DOESJUMP || cfa>=vm_prims+npriminfos)) \ 141 fprintf(stderr,"NEXT encountered prim %p at ip=%p\n", cfa, ip); \ 142 ip++;} while(0) 143 # define NEXT_P1_5 do {ca=**cfa; GOTO(ca);} while(0) 144 # define EXEC1(XT) ({DEF_CA cfa=(XT);\ 145 if (DEBUG_DITC && (cfa>vm_prims+DOESJUMP && cfa<vm_prims+npriminfos)) \ 146 fprintf(stderr,"EXEC encountered xt %p at ip=%p, vm_prims=%p, xts=%p\n", cfa, ip, vm_prims, xts); \ 147 ca=**cfa; ca;}) 148 149 #elif defined(NO_IP) 150 151 #define NEXT_P0 152 # define CFA cfa 153 #define SET_IP(target) assert(0) 154 #define INC_IP(n) ((void)0) 155 #define DEF_CA 156 #define NEXT_P1 157 #define NEXT_P1_5 do {goto *next_code;} while(0) 158 /* set next_code to the return address before performing EXEC */ 159 /* original: */ 160 /* #define EXEC1(XT) do {cfa=(XT); goto **cfa;} while(0) */ 161 /* fake, to make syntax check work */ 162 #define EXEC1(XT) ({cfa=(XT); *cfa;}) 163 164 #else /* !defined(DOUBLY_INDIRECT) && !defined(NO_IP) */ 165 166 #if defined(DIRECT_THREADED) 167 168 /* This lets the compiler know that cfa is dead before; we place it at 169 "goto *"s that perform direct threaded dispatch (i.e., not EXECUTE 170 etc.), and thus do not reach doers, which would use cfa; the only 171 way to a doer is through EXECUTE etc., which set the cfa 172 themselves. 173 174 Some of these direct threaded schemes use "cfa" to hold the code 175 address in normal direct threaded code. Of course we cannot use 176 KILLS there. 177 178 KILLS works by having an empty asm instruction, and claiming to the 179 compiler that it writes to cfa. 180 181 KILLS is optional. You can write 182 183 #define KILLS 184 185 and lose just a little performance. 186 */ 187 #define KILLS asm("":"=X"(cfa)); 188 189 /* #warning direct threading scheme 8: cfa dead, i386 hack */ 190 # define NEXT_P0 191 # define CFA cfa 192 # define IP (ip) 193 # define SET_IP(p) do {ip=(p); NEXT_P0;} while(0) 194 # define NEXT_INST (*IP) 195 # define INC_IP(const_inc) do { ip+=(const_inc);} while(0) 196 # define DEF_CA 197 # define NEXT_P1 (ip++) 198 # define NEXT_P1_5 do {KILLS GOTO(*(ip-1));} while(0) 199 # define EXEC1(XT) ({cfa=(XT); *cfa;}) 200 201 /* direct threaded */ 202 #else 203 /* indirect THREADED */ 204 205 /* #warning indirect threading scheme 8: low latency,cisc */ 206 # define NEXT_P0 207 # define CFA cfa 208 # define IP (ip) 209 # define SET_IP(p) do {ip=(p); NEXT_P0;} while(0) 210 # define NEXT_INST (*ip) 211 # define INC_IP(const_inc) do {ip+=(const_inc);} while(0) 212 # define DEF_CA 213 # define NEXT_P1 214 # define NEXT_P1_5 do {cfa=*ip++; GOTO(*cfa);} while(0) 215 # define EXEC1(XT) ({cfa=(XT); *cfa;}) 216 217 /* indirect threaded */ 218 #endif 219 220 #endif /* !defined(DOUBLY_INDIRECT) && !defined(NO_IP) */ 221 222