1 /* Jitter: fast-branch header. 2 3 Copyright (C) 2017, 2019, 2020 Luca Saiu 4 Written by Luca Saiu 5 6 This file is part of Jitter. 7 8 Jitter is free software: you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation, either version 3 of the License, or 11 (at your option) any later version. 12 13 Jitter is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with Jitter. If not, see <http://www.gnu.org/licenses/>. */ 20 21 22 #ifndef JITTER_FAST_BRANCH_H_ 23 #define JITTER_FAST_BRANCH_H_ 24 25 #include <jitter/jitter-arithmetic.h> 26 27 28 /* Check whether patch-ins are needed, and supported on this machine. 29 * ************************************************************************** */ 30 31 /* This among the rest includes <jitter/machine/jitter-machine.h> , which is 32 enough to see the definition of JITTER_MACHINE_SUPPORTS_BRANCH_AND_LINK used 33 below, and all the machine-specific branch definitions, which are optional; 34 in case patch-ins are used what is missing will be defined here. */ 35 #include <jitter/jitter-patch-in.h> 36 37 38 39 40 /* Low-level versus high-level fast-branches. 41 * ************************************************************************** */ 42 43 /* The low-level machine-specific definition for conditional fast branches, when 44 available, "assume the worst case" in terms of constantness. This means that 45 low-level macros expand to inline assembly code actually performing a 46 conditional, even when there is no need to because the condition is actually 47 a compile-time constant. 48 49 Low-level macros can assume they receive trivial arguments with no side 50 effects, always either constants or variables, even if not necessarily known 51 at compile time. Argument evaluation happens *out* of low-level macros. 52 53 Low-level macros are conceived for easy portability and make machine-specific 54 code more declarative than algorithmic, but are not intended for the user. 55 The user should always use high-level fast branches, defined in a 56 machine-generated header as explained below (using machine-specific low-level 57 fast-branches when available), which evaluate every comparison argument 58 exactly once, rely on statements containing conditionals based on complex 59 constant expression in order to eventually generate optimized code containing 60 only a translation of the appropriate expansion of low-level macros. 61 62 It is not necessary to protect low-level fast-branch operators with do..while 63 (false). The high-level definitions already wrap their uses. Like elsewhere 64 in Jitter, a macro whose name ends with "_" is unsafe in this respect, but 65 is only used internally. */ 66 67 /* This one low-level fast branch has a machine-independent definition, only 68 relying on JITTER_PATCH_IN_SIZE_FAST_BRANCH_UNCONDITIONAL and 69 JITTER_PATCH_IN_SIZE_FAST_BRANCH_UNCONDITIONAL , which of course are to 70 be supplied in the machine-specific header. 71 Low-level fast-branch unconditionally, going to tgt. */ 72 #ifdef JITTER_HAVE_PATCH_IN 73 # define _JITTER_LOW_LEVEL_BRANCH_FAST_(tgt) \ 74 JITTER_PATCH_IN_PLACEHOLDER_GOTO_ \ 75 (JITTER_PATCH_IN_SIZE_FAST_BRANCH_UNCONDITIONAL, \ 76 JITTER_PATCH_IN_CASE_FAST_BRANCH_UNCONDITIONAL, \ 77 tgt, \ 78 0, 0, 0); /* Unused for this case. */ \ 79 __builtin_unreachable () 80 #else /* not using fast branches */ 81 # define _JITTER_LOW_LEVEL_BRANCH_FAST_(tgt) \ 82 /* If fast branches are not used this expands to an ordinary \ 83 branch. The arguments of fast and non-fast branches are \ 84 not at all compatible, but the user will not see the \ 85 difference. */ \ 86 JITTER_BRANCH(tgt) 87 #endif // #ifdef JITTER_HAVE_PATCH_IN 88 89 /* This is a good default solution for overflow-checking on sum and subtraction, 90 which appears to yield optimal code on machines without special support for 91 overflow checks such as RISC-V and pre-r6 MIPS, provided that the 92 fast-branch-on-negative operation is efficient. Instead of using the builtin 93 plus inline asm (which would need to generate a Boolean and then test that 94 same Boolean in another conditional) I can generate better code by computing 95 the condition as a signed word in simple C and then using a low-level 96 fast-branch primitive, presumably written in assembly, only for 97 fast-branching on its sign. */ 98 #if (! defined (_JITTER_LOW_LEVEL_BRANCH_FAST_IF_PLUS_OVERFLOWS_) \ 99 && ! defined (_JITTER_LOW_LEVEL_PLUS_BRANCH_FAST_IF_OVERFLOW_)) 100 # define _JITTER_LOW_LEVEL_BRANCH_FAST_IF_PLUS_OVERFLOWS_(opd0, opd1, tgt) \ 101 _JITTER_LOW_LEVEL_BRANCH_FAST_IF_NEGATIVE_ \ 102 (JITTER_WOULD_PLUS_OVERFLOW_SIGNED_WORD (jitter_uint, \ 103 jitter_int, \ 104 (jitter_int) (opd0), \ 105 (jitter_int) (opd1), \ 106 JITTER_BITS_PER_WORD), \ 107 (tgt)) 108 #endif // no machine-specific support for sum overflow 109 #if (! defined (_JITTER_LOW_LEVEL_BRANCH_FAST_IF_MINUS_OVERFLOWS_) \ 110 && ! defined (_JITTER_LOW_LEVEL_MINUS_BRANCH_FAST_IF_OVERFLOW_)) 111 # define _JITTER_LOW_LEVEL_BRANCH_FAST_IF_MINUS_OVERFLOWS_(opd0, opd1, tgt) \ 112 _JITTER_LOW_LEVEL_BRANCH_FAST_IF_NEGATIVE_ \ 113 (JITTER_WOULD_MINUS_OVERFLOW_SIGNED_WORD (jitter_uint, \ 114 jitter_int, \ 115 (jitter_int) (opd0), \ 116 (jitter_int) (opd1), \ 117 JITTER_BITS_PER_WORD), \ 118 (tgt)) 119 #endif // no machine-specific support for subtraction overflow 120 121 /* No equally good solution for multiplication overflow is obvious. By default 122 the machine-generated header will use GCC's builtin when available, or a C 123 macro otherwise. */ 124 125 126 127 128 /* Middle-level operate-and-branch-on-overflow. 129 * ************************************************************************** */ 130 131 /* The following definitions rewrite low-level operate-and-branch-on-overflow 132 branches into other low-level operate-and-branch-on-overflow which are 133 cheaper to compute, where possible. These constitute a thin abstraction over 134 low-level operate-and-branch-on-overflow, and are used internally in the 135 machine-generated header descrbied below in order to implement high-level 136 branches, of both the operate-and-branch-on-overflow and branch-on-overflow 137 kinds. Middle-level primitives do not optimize away overflow checks: that is 138 done by high-level primitives, which will not resort to middle-level 139 primitives (as there is be no need to: GCC will be able to optimize the 140 operations written in C, in those cases) when the overflow checks yield 141 results known at compile time, either always or never branching. 142 Middle-level primitives are used in the "worst" case by high-level 143 primitives, where the overflow condition is not known, but the operation 144 might still be optimizable; the overflow check will remain in the rewrite. 145 Of course nothing of this is intended for the user, who should only ever see 146 high-level operations. */ 147 148 /* These macros may all evaluate their arguments more than once. They do not 149 even protect their arguments with parentheses in the expansion: they are 150 meant to be only used in machine-generated code, with variables or constants 151 as arguments. */ 152 #define _JITTER_MIDDLE_LEVEL_PLUS_BRANCH_FAST_IF_OVERFLOW_(res, a, b, tgt) \ 153 /* Nothing to do here. */ \ 154 _JITTER_LOW_LEVEL_PLUS_BRANCH_FAST_IF_OVERFLOW_ (res, a, b, tgt) 155 #define _JITTER_MIDDLE_LEVEL_MINUS_BRANCH_FAST_IF_OVERFLOW_(res, a, b, tgt) \ 156 /* Nothing to do here. */ \ 157 _JITTER_LOW_LEVEL_MINUS_BRANCH_FAST_IF_OVERFLOW_ (res, a, b, tgt) 158 #define _JITTER_MIDDLE_LEVEL_TIMES_BRANCH_FAST_IF_OVERFLOW_(res, a, b, tgt) \ 159 if (JITTER_KNOWN_TO_BE (a, -1)) \ 160 { \ 161 /* -1 * b ==> 0 - b */ \ 162 _JITTER_LOW_LEVEL_MINUS_BRANCH_FAST_IF_OVERFLOW_ (res, 0, b, tgt); \ 163 } \ 164 else if (JITTER_KNOWN_TO_BE (b, -1)) \ 165 { \ 166 /* a * -1 ==> 0 - a */ \ 167 _JITTER_LOW_LEVEL_MINUS_BRANCH_FAST_IF_OVERFLOW_ (res, 0, a, tgt); \ 168 } \ 169 else if (JITTER_KNOWN_TO_BE (a, 2)) \ 170 { \ 171 /* 2 * b ==> b + b */ \ 172 _JITTER_LOW_LEVEL_PLUS_BRANCH_FAST_IF_OVERFLOW_ (res, b, b, tgt); \ 173 } \ 174 else if (JITTER_KNOWN_TO_BE (b, 2)) \ 175 { \ 176 /* a * 2 ==> a + a */ \ 177 _JITTER_LOW_LEVEL_PLUS_BRANCH_FAST_IF_OVERFLOW_ (res, a, a, tgt); \ 178 } \ 179 else \ 180 { \ 181 /* Worst case: a * b is not rewritten. */ \ 182 _JITTER_LOW_LEVEL_TIMES_BRANCH_FAST_IF_OVERFLOW_ (res, a, b, tgt); \ 183 } 184 #define _JITTER_MIDDLE_LEVEL_DIVIDED_BRANCH_FAST_IF_OVERFLOW_(res, a, b, tgt) \ 185 if (JITTER_KNOWN_TO_BE (b, -1)) \ 186 { \ 187 /* a / -1 ==> 0 - a */ \ 188 _JITTER_LOW_LEVEL_MINUS_BRANCH_FAST_IF_OVERFLOW_ (res, 0, a, tgt); \ 189 } \ 190 /* No need to treat the a / 2 case here: it cannot overflow, and the \ 191 high-level primitive can handle that. */ \ 192 else if (JITTER_KNOWN_TO_BE_EQUAL (a, b)) \ 193 { \ 194 /* a / a ==> if a == 0 then overflow else 1 */ \ 195 _JITTER_LOW_LEVEL_BRANCH_FAST_IF_ZERO_ (a, tgt); \ 196 (res) = 1; \ 197 } \ 198 else if (JITTER_KNOWN_TO_BE (a, 1)) \ 199 { \ 200 /* 1 / b ==> case b of 0: overflow, 1: 1, -1: -1 else 0 */ \ 201 /* The benefit of this case may be somewhat questionable. This code \ 202 yields a couple of conditionals or conditional moves, but course no \ 203 divisions. Without this case I get possibly a little fewer \ 204 instructions, still with conditionals, but including one division. \ 205 Notice that b is not known and therefore the tests against b will \ 206 survive optimization into run time, or otherwise we would not be here \ 207 in the middle-level primitive. */ \ 208 _JITTER_LOW_LEVEL_BRANCH_FAST_IF_ZERO_ (b, tgt); \ 209 if (b == 1) \ 210 (res) = 1; \ 211 else if (b == -1) \ 212 (res) = -1; \ 213 else \ 214 (res) = 0; \ 215 } \ 216 else if (JITTER_KNOWN_TO_BE (a, -1)) \ 217 { \ 218 /* -1 / b ==> case b of 0: overflow, -1: 1, 1: -1 else 0 */ \ 219 /* The comment about efficiency in the previous case applies here as \ 220 well. */ \ 221 _JITTER_LOW_LEVEL_BRANCH_FAST_IF_ZERO_ (b, tgt); \ 222 if (b == -1) \ 223 (res) = 1; \ 224 else if (b == 1) \ 225 (res) = -1; \ 226 else \ 227 (res) = 0; \ 228 } \ 229 else \ 230 { \ 231 /* Worst case: a / b is not rewritten. */ \ 232 _JITTER_LOW_LEVEL_DIVIDED_BRANCH_FAST_IF_OVERFLOW_ (res, a, b, tgt); \ 233 } 234 #define _JITTER_MIDDLE_LEVEL_REMAINDER_BRANCH_FAST_IF_OVERFLOW_(res, a, b, tgt) \ 235 /* No rewrite here. */ \ 236 _JITTER_LOW_LEVEL_REMAINDER_BRANCH_FAST_IF_OVERFLOW_ (res, a, b, tgt) 237 238 239 240 241 /* High-level negation overflow primitives. 242 * ************************************************************************** */ 243 244 /* The integer negation operation is only a user convenience hiding a first zero 245 operand in a subtraction. In this case it is enough to define high-level 246 primitives, based on the subtraction which have been defined already. */ 247 #define _JITTER_NEGATE_BRANCH_FAST_IF_OVERFLOW(res, opd0, tgt) \ 248 _JITTER_MINUS_BRANCH_FAST_IF_OVERFLOW((res), 0, (opd0), tgt) 249 #define _JITTER_BRANCH_FAST_IF_NEGATE_OVERFLOWS(opd0, tgt) \ 250 _JITTER_BRANCH_FAST_IF_MINUS_OVERFLOWS(0, (opd0), tgt) 251 252 253 254 255 /* High-level unconditional fast branch. 256 * ************************************************************************** */ 257 258 /* High-level unconditional branch. This definition is machine-independent, 259 even if of course it relies on a low-level machine-specific primitive (itself 260 having a fallback definition in case it is missing). */ 261 # define _JITTER_BRANCH_FAST(tgt) \ 262 do \ 263 { \ 264 _JITTER_LOW_LEVEL_BRANCH_FAST_ (tgt); \ 265 } \ 266 while (false) 267 268 269 270 271 /* The machine-generated header for fast branches. 272 * ************************************************************************** */ 273 274 /* In order to make porting easier a machine specification does not need to 275 include a definition for every possible low-level fast branch; moreover, in 276 the case of overflow checking, a machine specification is free to either 277 define operate-and-branch-on-overflow or branch-on-overflow primitives; the 278 missing kind will be defined based on the other. 279 280 This machine-generated header completes the missing definitions for low-level 281 fast branches and defines high-level fast branches. */ 282 # include <jitter/jitter-fast-branch-machine-generated.h> 283 284 285 286 287 /* Fallback fast branch-and-link definition. 288 * ************************************************************************** */ 289 290 /* Provide a fallback definition of _JITTER_BRANCH_FAST_AND_LINK_INTERNAL , if 291 needed. The definition will either use the slow JITTER_BRANCH_AND_LINK if 292 fast labels are not used in this configuration, or use a generic 293 branch-and-link implementation but do the actual branch as a fast branch. 294 295 No fallback is provided if the machine already defiens its own 296 _JITTER_BRANCH_FAST_AND_LINK_INTERNAL : this means that the dispatching model 297 is no-threading, and the machine supports both patch-ins and procedures. 298 299 The user should never call _JITTER_BRANCH_FAST_AND_LINK_INTERNAL , or even 300 JITTER_BRANCH_AND_LINK_INTERNAL . Suitable user macros, without the initial 301 underscore prefix and without the _INTERNAL suffix, are automatically defined 302 to be only visible in caller instructions . This forces the user to correctly 303 declare callers so that return labels can be handled in every case. */ 304 305 /* Provide a definition of _JITTER_BRANCH_FAST_AND_LINK_INTERNAL if needed. */ 306 #if ! defined(JITTER_DISPATCH_NO_THREADING) 307 /* With any dispatching model different from no-threading fast branches revert 308 to generic slow branches; in the same way fast branch-and-link operations 309 revert to generic slow branch-and-link operations. */ 310 # define _JITTER_BRANCH_FAST_AND_LINK_INTERNAL(target) \ 311 JITTER_BRANCH_AND_LINK_INTERNAL(target) 312 #elif ! defined(JITTER_HAVE_PATCH_IN) 313 # if defined(JITTER_MACHINE_SUPPORTS_PROCEDURE) 314 /* Currently machine-specific support must also provide patch-ins if it 315 provides procedures. */ 316 # error "the machine supports procedures but not patch-ins" 317 # else 318 /* The machine supports neither patch-ins, nor procedures. We can use the 319 generic implementation of branch-and-link with slow branches. */ 320 # define _JITTER_BRANCH_FAST_AND_LINK_INTERNAL(target) \ 321 JITTER_BRANCH_AND_LINK_INTERNAL(target) 322 # endif // if defined(JITTER_MACHINE_SUPPORTS_PROCEDURE) 323 #elif ! defined(JITTER_MACHINE_SUPPORTS_PROCEDURE) 324 /* The machine supports patch-ins, but not procedures. The best we can do is 325 using a generic branch-and-link implementation (relying on a hidden 326 residual parameter for caller instructions) which performs the actual 327 branch with a fast branch. */ 328 # define _JITTER_BRANCH_FAST_AND_LINK_INTERNAL(target) \ 329 do \ 330 { \ 331 /* Use the return address from the implicit specialized argument */ \ 332 jitter_state_runtime._jitter_link = JITTER_RETURN_ADDRESS; \ 333 _JITTER_BRANCH_FAST (target); \ 334 __builtin_unreachable (); \ 335 } \ 336 while (false) 337 #elif ! defined(_JITTER_BRANCH_FAST_AND_LINK_INTERNAL) 338 # error "The machine claims to support procedures but _JITTER_BRANCH_FAST_AND_LINK_INTERNAL is not defiend." 339 #endif 340 341 342 343 344 /* Ignore the rest of this header if not using patch-ins. 345 * ************************************************************************** */ 346 347 /* The rest of this header expands to nothing if fast branches are not supported 348 in this configuration. The previous CPP includes suffices to make the 349 JITTER_HAVE_PATCH_IN visible, if any. */ 350 351 #ifdef JITTER_HAVE_PATCH_IN 352 353 354 355 356 /* Include headers. 357 * ************************************************************************** */ 358 359 #include <stdio.h> 360 #include <stdbool.h> 361 362 #include <jitter/jitter.h> 363 #include <jitter/jitter-cpp.h> 364 365 366 367 368 /* Fast branch types. 369 * ************************************************************************** */ 370 371 /* The type of a fast branch as a patch-in case, each corresponding to different 372 assembly non-zero natural code to insert in assembly code. 373 374 These are CPP macros rather than enum cases because the case values are used 375 in inline assembly code (actually data), where a known value is used. An 376 assembly "immediate" constraint is not usable, as in some platforms immediate 377 literals are always prefixed by a character such as $ or #, which is not 378 needed or correct in data definitions. 379 380 This complication is a pity: GCC documents the "x86 Operand Modifiers" as 381 non-portable; having the equivalent of the 'c' modifier on every platform 382 would allow for cleaner code. 383 384 It is not necessary to use every case on any given machine, or to use only 385 these. Still it is convenient to have a centralized definition to reuse. */ 386 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_UNCONDITIONAL 1 387 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_ZERO 2 388 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_NONZERO 3 389 390 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_POSITIVE 4 391 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_NONPOSITIVE 5 392 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_NEGATIVE 6 393 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_NONNEGATIVE 7 394 395 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_EQUAL 8 396 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_NOTEQUAL 9 397 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_LESS_UNSIGNED 10 398 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_LESS_SIGNED 11 399 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_NOTGREATER_UNSIGNED 12 400 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_NOTGREATER_SIGNED 13 401 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_GREATER_UNSIGNED 14 402 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_GREATER_SIGNED 15 403 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_NOTLESS_UNSIGNED 16 404 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_NOTLESS_SIGNED 17 405 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_AND 18 406 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_NOTAND 19 407 408 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_ANY 20 409 410 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_BRANCH_AND_LINK 21 411 /* These should be extended in the future with floating-point conditionals. */ 412 413 /* The other values up to 999 included are reserved. Fast branch cases starting 414 at 1000 are available for architecture-specific code to define. */ 415 416 #endif // #ifdef JITTER_HAVE_PATCH_IN 417 #endif // #ifndef JITTER_FAST_BRANCH_H_ 418