1 /* Jitter: fast-branch header.
2 
3    Copyright (C) 2017, 2019, 2020 Luca Saiu
4    Written by Luca Saiu
5 
6    This file is part of Jitter.
7 
8    Jitter is free software: you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation, either version 3 of the License, or
11    (at your option) any later version.
12 
13    Jitter is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with Jitter.  If not, see <http://www.gnu.org/licenses/>. */
20 
21 
22 #ifndef JITTER_FAST_BRANCH_H_
23 #define JITTER_FAST_BRANCH_H_
24 
25 #include <jitter/jitter-arithmetic.h>
26 
27 
28 /* Check whether patch-ins are needed, and supported on this machine.
29  * ************************************************************************** */
30 
31 /* This among the rest includes <jitter/machine/jitter-machine.h> , which is
32    enough to see the definition of JITTER_MACHINE_SUPPORTS_BRANCH_AND_LINK used
33    below, and all the machine-specific branch definitions, which are optional;
34    in case patch-ins are used what is missing will be defined here. */
35 #include <jitter/jitter-patch-in.h>
36 
37 
38 
39 
40 /* Low-level versus high-level fast-branches.
41  * ************************************************************************** */
42 
43 /* The low-level machine-specific definition for conditional fast branches, when
44    available, "assume the worst case" in terms of constantness.  This means that
45    low-level macros expand to inline assembly code actually performing a
46    conditional, even when there is no need to because the condition is actually
47    a compile-time constant.
48 
49    Low-level macros can assume they receive trivial arguments with no side
50    effects, always either constants or variables, even if not necessarily known
51    at compile time.  Argument evaluation happens *out* of low-level macros.
52 
53    Low-level macros are conceived for easy portability and make machine-specific
54    code more declarative than algorithmic, but are not intended for the user.
55    The user should always use high-level fast branches, defined in a
56    machine-generated header as explained below (using machine-specific low-level
57    fast-branches when available), which evaluate every comparison argument
58    exactly once, rely on statements containing conditionals based on complex
59    constant expression in order to eventually generate optimized code containing
60    only a translation of the appropriate expansion of low-level macros.
61 
62    It is not necessary to protect low-level fast-branch operators with do..while
63    (false).  The high-level definitions already wrap their uses.  Like elsewhere
64    in Jitter, a macro whose name ends with "_" is unsafe in this respect, but
65    is only used internally. */
66 
67 /* This one low-level fast branch has a machine-independent definition, only
68    relying on JITTER_PATCH_IN_SIZE_FAST_BRANCH_UNCONDITIONAL and
69    JITTER_PATCH_IN_SIZE_FAST_BRANCH_UNCONDITIONAL , which of course are to
70    be supplied in the machine-specific header.
71    Low-level fast-branch unconditionally, going to tgt. */
72 #ifdef JITTER_HAVE_PATCH_IN
73 # define _JITTER_LOW_LEVEL_BRANCH_FAST_(tgt)             \
74     JITTER_PATCH_IN_PLACEHOLDER_GOTO_                    \
75        (JITTER_PATCH_IN_SIZE_FAST_BRANCH_UNCONDITIONAL,  \
76         JITTER_PATCH_IN_CASE_FAST_BRANCH_UNCONDITIONAL,  \
77         tgt,                                             \
78         0, 0, 0); /* Unused for this case. */            \
79     __builtin_unreachable ()
80 #else /* not using fast branches */
81 # define _JITTER_LOW_LEVEL_BRANCH_FAST_(tgt)                      \
82     /* If fast branches are not used this expands to an ordinary  \
83        branch.  The arguments of fast and non-fast branches are   \
84        not at all compatible, but the user will not see the       \
85        difference. */                                             \
86     JITTER_BRANCH(tgt)
87 #endif // #ifdef JITTER_HAVE_PATCH_IN
88 
89 /* This is a good default solution for overflow-checking on sum and subtraction,
90    which appears to yield optimal code on machines without special support for
91    overflow checks such as RISC-V and pre-r6 MIPS, provided that the
92    fast-branch-on-negative operation is efficient.  Instead of using the builtin
93    plus inline asm (which would need to generate a Boolean and then test that
94    same Boolean in another conditional) I can generate better code by computing
95    the condition as a signed word in simple C and then using a low-level
96    fast-branch primitive, presumably written in assembly, only for
97    fast-branching on its sign. */
98 #if (! defined (_JITTER_LOW_LEVEL_BRANCH_FAST_IF_PLUS_OVERFLOWS_)     \
99      && ! defined (_JITTER_LOW_LEVEL_PLUS_BRANCH_FAST_IF_OVERFLOW_))
100 # define _JITTER_LOW_LEVEL_BRANCH_FAST_IF_PLUS_OVERFLOWS_(opd0, opd1, tgt)  \
101     _JITTER_LOW_LEVEL_BRANCH_FAST_IF_NEGATIVE_                              \
102        (JITTER_WOULD_PLUS_OVERFLOW_SIGNED_WORD (jitter_uint,                \
103                                                 jitter_int,                 \
104                                                 (jitter_int) (opd0),        \
105                                                 (jitter_int) (opd1),        \
106                                                 JITTER_BITS_PER_WORD),      \
107         (tgt))
108 #endif // no machine-specific support for sum overflow
109 #if (! defined (_JITTER_LOW_LEVEL_BRANCH_FAST_IF_MINUS_OVERFLOWS_)     \
110      && ! defined (_JITTER_LOW_LEVEL_MINUS_BRANCH_FAST_IF_OVERFLOW_))
111 # define _JITTER_LOW_LEVEL_BRANCH_FAST_IF_MINUS_OVERFLOWS_(opd0, opd1, tgt)  \
112     _JITTER_LOW_LEVEL_BRANCH_FAST_IF_NEGATIVE_                               \
113        (JITTER_WOULD_MINUS_OVERFLOW_SIGNED_WORD (jitter_uint,                \
114                                                  jitter_int,                 \
115                                                  (jitter_int) (opd0),        \
116                                                  (jitter_int) (opd1),        \
117                                                  JITTER_BITS_PER_WORD),      \
118         (tgt))
119 #endif // no machine-specific support for subtraction overflow
120 
121 /* No equally good solution for multiplication overflow is obvious.  By default
122    the machine-generated header will use GCC's builtin when available, or a C
123    macro otherwise. */
124 
125 
126 
127 
128 /* Middle-level operate-and-branch-on-overflow.
129  * ************************************************************************** */
130 
131 /* The following definitions rewrite low-level operate-and-branch-on-overflow
132    branches into other low-level operate-and-branch-on-overflow which are
133    cheaper to compute, where possible.  These constitute a thin abstraction over
134    low-level operate-and-branch-on-overflow, and are used internally in the
135    machine-generated header descrbied below in order to implement high-level
136    branches, of both the operate-and-branch-on-overflow and branch-on-overflow
137    kinds.  Middle-level primitives do not optimize away overflow checks: that is
138    done by high-level primitives, which will not resort to middle-level
139    primitives (as there is be no need to: GCC will be able to optimize the
140    operations written in C, in those cases) when the overflow checks yield
141    results known at compile time, either always or never branching.
142    Middle-level primitives are used in the "worst" case by high-level
143    primitives, where the overflow condition is not known, but the operation
144    might still be optimizable; the overflow check will remain in the rewrite.
145    Of course nothing of this is intended for the user, who should only ever see
146    high-level operations. */
147 
148 /* These macros may all evaluate their arguments more than once.  They do not
149    even protect their arguments with parentheses in the expansion: they are
150    meant to be only used in machine-generated code, with variables or constants
151    as arguments. */
152 #define _JITTER_MIDDLE_LEVEL_PLUS_BRANCH_FAST_IF_OVERFLOW_(res, a, b, tgt)  \
153   /* Nothing to do here. */                                                 \
154   _JITTER_LOW_LEVEL_PLUS_BRANCH_FAST_IF_OVERFLOW_ (res, a, b, tgt)
155 #define _JITTER_MIDDLE_LEVEL_MINUS_BRANCH_FAST_IF_OVERFLOW_(res, a, b, tgt)  \
156   /* Nothing to do here. */                                                  \
157   _JITTER_LOW_LEVEL_MINUS_BRANCH_FAST_IF_OVERFLOW_ (res, a, b, tgt)
158 #define _JITTER_MIDDLE_LEVEL_TIMES_BRANCH_FAST_IF_OVERFLOW_(res, a, b, tgt)  \
159   if (JITTER_KNOWN_TO_BE (a, -1))                                            \
160     {                                                                        \
161       /* -1 * b  ==>  0 - b */                                               \
162       _JITTER_LOW_LEVEL_MINUS_BRANCH_FAST_IF_OVERFLOW_ (res, 0, b, tgt);     \
163     }                                                                        \
164   else if (JITTER_KNOWN_TO_BE (b, -1))                                       \
165     {                                                                        \
166       /* a * -1  ==>  0 - a */                                               \
167       _JITTER_LOW_LEVEL_MINUS_BRANCH_FAST_IF_OVERFLOW_ (res, 0, a, tgt);     \
168     }                                                                        \
169   else if (JITTER_KNOWN_TO_BE (a, 2))                                        \
170     {                                                                        \
171       /* 2 * b  ==>  b + b */                                                \
172       _JITTER_LOW_LEVEL_PLUS_BRANCH_FAST_IF_OVERFLOW_ (res, b, b, tgt);      \
173     }                                                                        \
174   else if (JITTER_KNOWN_TO_BE (b, 2))                                        \
175     {                                                                        \
176       /* a * 2  ==>  a + a */                                                \
177       _JITTER_LOW_LEVEL_PLUS_BRANCH_FAST_IF_OVERFLOW_ (res, a, a, tgt);      \
178     }                                                                        \
179   else                                                                       \
180     {                                                                        \
181       /* Worst case: a * b is not rewritten. */                              \
182       _JITTER_LOW_LEVEL_TIMES_BRANCH_FAST_IF_OVERFLOW_ (res, a, b, tgt);     \
183     }
184 #define _JITTER_MIDDLE_LEVEL_DIVIDED_BRANCH_FAST_IF_OVERFLOW_(res, a, b, tgt)  \
185   if (JITTER_KNOWN_TO_BE (b, -1))                                              \
186     {                                                                          \
187       /* a / -1  ==>  0 - a */                                                 \
188       _JITTER_LOW_LEVEL_MINUS_BRANCH_FAST_IF_OVERFLOW_ (res, 0, a, tgt);       \
189     }                                                                          \
190   /* No need to treat the a / 2 case here: it cannot overflow, and the         \
191      high-level primitive can handle that. */                                  \
192   else if (JITTER_KNOWN_TO_BE_EQUAL (a, b))                                    \
193     {                                                                          \
194       /* a / a  ==>  if a == 0 then overflow else 1 */                         \
195       _JITTER_LOW_LEVEL_BRANCH_FAST_IF_ZERO_ (a, tgt);                         \
196       (res) = 1;                                                               \
197     }                                                                          \
198   else if (JITTER_KNOWN_TO_BE (a, 1))                                          \
199     {                                                                          \
200       /* 1 / b  ==>  case b of 0: overflow, 1: 1, -1: -1 else 0 */             \
201       /* The benefit of this case may be somewhat questionable.  This code     \
202          yields a couple of conditionals or conditional moves, but course no   \
203          divisions.  Without this case I get possibly a little fewer           \
204          instructions, still with conditionals, but including one division.    \
205          Notice that b is not known and therefore the tests against b will     \
206          survive optimization into run time, or otherwise we would not be here \
207          in the middle-level primitive. */                                     \
208       _JITTER_LOW_LEVEL_BRANCH_FAST_IF_ZERO_ (b, tgt);                         \
209       if (b == 1)                                                              \
210         (res) = 1;                                                             \
211       else if (b == -1)                                                        \
212         (res) = -1;                                                            \
213       else                                                                     \
214         (res) = 0;                                                             \
215     }                                                                          \
216   else if (JITTER_KNOWN_TO_BE (a, -1))                                         \
217     {                                                                          \
218       /* -1 / b  ==>  case b of 0: overflow, -1: 1, 1: -1 else 0 */            \
219       /* The comment about efficiency in the previous case applies here as     \
220          well. */                                                              \
221       _JITTER_LOW_LEVEL_BRANCH_FAST_IF_ZERO_ (b, tgt);                         \
222       if (b == -1)                                                             \
223         (res) = 1;                                                             \
224       else if (b == 1)                                                         \
225         (res) = -1;                                                            \
226       else                                                                     \
227         (res) = 0;                                                             \
228     }                                                                          \
229   else                                                                         \
230     {                                                                          \
231       /* Worst case: a / b is not rewritten. */                                \
232       _JITTER_LOW_LEVEL_DIVIDED_BRANCH_FAST_IF_OVERFLOW_ (res, a, b, tgt);     \
233     }
234 #define _JITTER_MIDDLE_LEVEL_REMAINDER_BRANCH_FAST_IF_OVERFLOW_(res, a, b, tgt) \
235   /* No rewrite here. */                                                        \
236   _JITTER_LOW_LEVEL_REMAINDER_BRANCH_FAST_IF_OVERFLOW_ (res, a, b, tgt)
237 
238 
239 
240 
241 /* High-level negation overflow primitives.
242  * ************************************************************************** */
243 
244 /* The integer negation operation is only a user convenience hiding a first zero
245    operand in a subtraction.  In this case it is enough to define high-level
246    primitives, based on the subtraction which have been defined already. */
247 #define _JITTER_NEGATE_BRANCH_FAST_IF_OVERFLOW(res, opd0, tgt)  \
248   _JITTER_MINUS_BRANCH_FAST_IF_OVERFLOW((res), 0, (opd0), tgt)
249 #define _JITTER_BRANCH_FAST_IF_NEGATE_OVERFLOWS(opd0, tgt)  \
250   _JITTER_BRANCH_FAST_IF_MINUS_OVERFLOWS(0, (opd0), tgt)
251 
252 
253 
254 
255 /* High-level unconditional fast branch.
256  * ************************************************************************** */
257 
258 /* High-level unconditional branch.  This definition is machine-independent,
259    even if of course it relies on a low-level machine-specific primitive (itself
260    having a fallback definition in case it is missing). */
261 # define _JITTER_BRANCH_FAST(tgt)            \
262   do                                         \
263     {                                        \
264       _JITTER_LOW_LEVEL_BRANCH_FAST_ (tgt);  \
265     }                                        \
266   while (false)
267 
268 
269 
270 
271 /* The machine-generated header for fast branches.
272  * ************************************************************************** */
273 
274 /* In order to make porting easier a machine specification does not need to
275    include a definition for every possible low-level fast branch; moreover, in
276    the case of overflow checking, a machine specification is free to either
277    define operate-and-branch-on-overflow or branch-on-overflow primitives; the
278    missing kind will be defined based on the other.
279 
280    This machine-generated header completes the missing definitions for low-level
281    fast branches and defines high-level fast branches. */
282 # include <jitter/jitter-fast-branch-machine-generated.h>
283 
284 
285 
286 
287 /* Fallback fast branch-and-link definition.
288  * ************************************************************************** */
289 
290 /* Provide a fallback definition of _JITTER_BRANCH_FAST_AND_LINK_INTERNAL , if
291    needed.  The definition will either use the slow JITTER_BRANCH_AND_LINK if
292    fast labels are not used in this configuration, or use a generic
293    branch-and-link implementation but do the actual branch as a fast branch.
294 
295    No fallback is provided if the machine already defiens its own
296    _JITTER_BRANCH_FAST_AND_LINK_INTERNAL : this means that the dispatching model
297    is no-threading, and the machine supports both patch-ins and procedures.
298 
299    The user should never call _JITTER_BRANCH_FAST_AND_LINK_INTERNAL , or even
300    JITTER_BRANCH_AND_LINK_INTERNAL .  Suitable user macros, without the initial
301    underscore prefix and without the _INTERNAL suffix, are automatically defined
302    to be only visible in caller instructions .  This forces the user to correctly
303    declare callers so that return labels can be handled in every case. */
304 
305 /* Provide a definition of _JITTER_BRANCH_FAST_AND_LINK_INTERNAL if needed. */
306 #if ! defined(JITTER_DISPATCH_NO_THREADING)
307   /* With any dispatching model different from no-threading fast branches revert
308      to generic slow branches; in the same way fast branch-and-link operations
309      revert to generic slow branch-and-link operations. */
310 # define _JITTER_BRANCH_FAST_AND_LINK_INTERNAL(target) \
311   JITTER_BRANCH_AND_LINK_INTERNAL(target)
312 #elif ! defined(JITTER_HAVE_PATCH_IN)
313 # if defined(JITTER_MACHINE_SUPPORTS_PROCEDURE)
314     /* Currently machine-specific support must also provide patch-ins if it
315        provides procedures. */
316 #   error "the machine supports procedures but not patch-ins"
317 # else
318     /* The machine supports neither patch-ins, nor procedures.  We can use the
319        generic implementation of branch-and-link with slow branches. */
320 #   define _JITTER_BRANCH_FAST_AND_LINK_INTERNAL(target) \
321       JITTER_BRANCH_AND_LINK_INTERNAL(target)
322 # endif // if defined(JITTER_MACHINE_SUPPORTS_PROCEDURE)
323 #elif ! defined(JITTER_MACHINE_SUPPORTS_PROCEDURE)
324   /* The machine supports patch-ins, but not procedures.  The best we can do is
325      using a generic branch-and-link implementation (relying on a hidden
326      residual parameter for caller instructions) which performs the actual
327      branch with a fast branch. */
328 # define _JITTER_BRANCH_FAST_AND_LINK_INTERNAL(target)                       \
329     do                                                                       \
330       {                                                                      \
331         /* Use the return address from the implicit specialized argument */  \
332         jitter_state_runtime._jitter_link = JITTER_RETURN_ADDRESS;           \
333         _JITTER_BRANCH_FAST (target);                                        \
334         __builtin_unreachable ();                                            \
335       }                                                                      \
336     while (false)
337 #elif ! defined(_JITTER_BRANCH_FAST_AND_LINK_INTERNAL)
338 # error "The machine claims to support procedures but _JITTER_BRANCH_FAST_AND_LINK_INTERNAL is not defiend."
339 #endif
340 
341 
342 
343 
344 /* Ignore the rest of this header if not using patch-ins.
345  * ************************************************************************** */
346 
347 /* The rest of this header expands to nothing if fast branches are not supported
348    in this configuration.  The previous CPP includes suffices to make the
349    JITTER_HAVE_PATCH_IN visible, if any. */
350 
351 #ifdef JITTER_HAVE_PATCH_IN
352 
353 
354 
355 
356 /* Include headers.
357  * ************************************************************************** */
358 
359 #include <stdio.h>
360 #include <stdbool.h>
361 
362 #include <jitter/jitter.h>
363 #include <jitter/jitter-cpp.h>
364 
365 
366 
367 
368 /* Fast branch types.
369  * ************************************************************************** */
370 
371 /* The type of a fast branch as a patch-in case, each corresponding to different
372    assembly non-zero natural code to insert in assembly code.
373 
374    These are CPP macros rather than enum cases because the case values are used
375    in inline assembly code (actually data), where a known value is used.  An
376    assembly "immediate" constraint is not usable, as in some platforms immediate
377    literals are always prefixed by a character such as $ or #, which is not
378    needed or correct in data definitions.
379 
380    This complication is a pity: GCC documents the "x86 Operand Modifiers" as
381    non-portable; having the equivalent of the 'c' modifier on every platform
382    would allow for cleaner code.
383 
384    It is not necessary to use every case on any given machine, or to use only
385    these.  Still it is convenient to have a centralized definition to reuse. */
386 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_UNCONDITIONAL                    1
387 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_ZERO                 2
388 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_NONZERO              3
389 
390 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_POSITIVE             4
391 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_NONPOSITIVE          5
392 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_NEGATIVE             6
393 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_NONNEGATIVE          7
394 
395 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_EQUAL                8
396 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_NOTEQUAL             9
397 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_LESS_UNSIGNED        10
398 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_LESS_SIGNED          11
399 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_NOTGREATER_UNSIGNED  12
400 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_NOTGREATER_SIGNED    13
401 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_GREATER_UNSIGNED     14
402 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_GREATER_SIGNED       15
403 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_NOTLESS_UNSIGNED     16
404 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_NOTLESS_SIGNED       17
405 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_AND                  18
406 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_NOTAND               19
407 
408 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_CONDITIONAL_ANY                  20
409 
410 #define JITTER_PATCH_IN_CASE_FAST_BRANCH_BRANCH_AND_LINK                  21
411 /* These should be extended in the future with floating-point conditionals. */
412 
413 /* The other values up to 999 included are reserved.  Fast branch cases starting
414    at 1000 are available for architecture-specific code to define. */
415 
416 #endif // #ifdef JITTER_HAVE_PATCH_IN
417 #endif // #ifndef JITTER_FAST_BRANCH_H_
418