1 
2 /*---------------------------------------------------------------*/
3 /*--- begin                               guest_arm_helpers.c ---*/
4 /*---------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2004-2017 OpenWorks LLP
11       info@open-works.net
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26    02110-1301, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 #include "libvex_basictypes.h"
32 #include "libvex_emnote.h"
33 #include "libvex_guest_arm.h"
34 #include "libvex_ir.h"
35 #include "libvex.h"
36 
37 #include "main_util.h"
38 #include "main_globals.h"
39 #include "guest_generic_bb_to_IR.h"
40 #include "guest_arm_defs.h"
41 #include "guest_arm64_defs.h"  /* for crypto helper functions */
42 
43 
44 /* This file contains helper functions for arm guest code.  Calls to
45    these functions are generated by the back end.  These calls are of
46    course in the host machine code and this file will be compiled to
47    host machine code, so that all makes sense.
48 
49    Only change the signatures of these helper functions very
50    carefully.  If you change the signature here, you'll have to change
51    the parameters passed to it in the IR calls constructed by
52    guest-arm/toIR.c.
53 */
54 
55 
56 /* Set to 1 to get detailed profiling info about individual N, Z, C
57    and V flag evaluation. */
58 #define PROFILE_NZCV_FLAGS 0
59 
60 #if PROFILE_NZCV_FLAGS
61 
62 static UInt tab_n_eval[ARMG_CC_OP_NUMBER];
63 static UInt tab_z_eval[ARMG_CC_OP_NUMBER];
64 static UInt tab_c_eval[ARMG_CC_OP_NUMBER];
65 static UInt tab_v_eval[ARMG_CC_OP_NUMBER];
66 static UInt initted = 0;
67 static UInt tot_evals = 0;
68 
initCounts(void)69 static void initCounts ( void )
70 {
71    UInt i;
72    for (i = 0; i < ARMG_CC_OP_NUMBER; i++) {
73       tab_n_eval[i] = tab_z_eval[i] = tab_c_eval[i] = tab_v_eval[i] = 0;
74    }
75    initted = 1;
76 }
77 
showCounts(void)78 static void showCounts ( void )
79 {
80    UInt i;
81    vex_printf("\n                 N          Z          C          V\n");
82    vex_printf(  "---------------------------------------------------\n");
83    for (i = 0; i < ARMG_CC_OP_NUMBER; i++) {
84       vex_printf("CC_OP=%d  %9d  %9d  %9d  %9d\n",
85                  i,
86                  tab_n_eval[i], tab_z_eval[i],
87                  tab_c_eval[i], tab_v_eval[i] );
88     }
89 }
90 
91 #define NOTE_N_EVAL(_cc_op) NOTE_EVAL(_cc_op, tab_n_eval)
92 #define NOTE_Z_EVAL(_cc_op) NOTE_EVAL(_cc_op, tab_z_eval)
93 #define NOTE_C_EVAL(_cc_op) NOTE_EVAL(_cc_op, tab_c_eval)
94 #define NOTE_V_EVAL(_cc_op) NOTE_EVAL(_cc_op, tab_v_eval)
95 
96 #define NOTE_EVAL(_cc_op, _tab) \
97    do { \
98       if (!initted) initCounts(); \
99       vassert( ((UInt)(_cc_op)) < ARMG_CC_OP_NUMBER); \
100       _tab[(UInt)(_cc_op)]++; \
101       tot_evals++; \
102       if (0 == (tot_evals & 0xFFFFF)) \
103         showCounts(); \
104    } while (0)
105 
106 #endif /* PROFILE_NZCV_FLAGS */
107 
108 
109 /* Calculate the N flag from the supplied thunk components, in the
110    least significant bit of the word.  Returned bits 31:1 are zero. */
111 static
armg_calculate_flag_n(UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_dep3)112 UInt armg_calculate_flag_n ( UInt cc_op, UInt cc_dep1,
113                              UInt cc_dep2, UInt cc_dep3 )
114 {
115 #  if PROFILE_NZCV_FLAGS
116    NOTE_N_EVAL(cc_op);
117 #  endif
118 
119    switch (cc_op) {
120       case ARMG_CC_OP_COPY: {
121          /* (nzcv:28x0, unused, unused) */
122          UInt nf   = (cc_dep1 >> ARMG_CC_SHIFT_N) & 1;
123          return nf;
124       }
125       case ARMG_CC_OP_ADD: {
126          /* (argL, argR, unused) */
127          UInt argL = cc_dep1;
128          UInt argR = cc_dep2;
129          UInt res  = argL + argR;
130          UInt nf   = res >> 31;
131          return nf;
132       }
133       case ARMG_CC_OP_SUB: {
134          /* (argL, argR, unused) */
135          UInt argL = cc_dep1;
136          UInt argR = cc_dep2;
137          UInt res  = argL - argR;
138          UInt nf   = res >> 31;
139          return nf;
140       }
141       case ARMG_CC_OP_ADC: {
142          /* (argL, argR, oldC) */
143          UInt argL = cc_dep1;
144          UInt argR = cc_dep2;
145          UInt oldC = cc_dep3;
146          vassert((oldC & ~1) == 0);
147          UInt res  = argL + argR + oldC;
148          UInt nf   = res >> 31;
149          return nf;
150       }
151       case ARMG_CC_OP_SBB: {
152          /* (argL, argR, oldC) */
153          UInt argL = cc_dep1;
154          UInt argR = cc_dep2;
155          UInt oldC = cc_dep3;
156          vassert((oldC & ~1) == 0);
157          UInt res  = argL - argR - (oldC ^ 1);
158          UInt nf   = res >> 31;
159          return nf;
160       }
161       case ARMG_CC_OP_LOGIC: {
162          /* (res, shco, oldV) */
163          UInt res  = cc_dep1;
164          UInt nf   = res >> 31;
165          return nf;
166       }
167       case ARMG_CC_OP_MUL: {
168          /* (res, unused, oldC:oldV) */
169          UInt res  = cc_dep1;
170          UInt nf   = res >> 31;
171          return nf;
172       }
173       case ARMG_CC_OP_MULL: {
174          /* (resLo32, resHi32, oldC:oldV) */
175          UInt resHi32 = cc_dep2;
176          UInt nf      = resHi32 >> 31;
177          return nf;
178       }
179       default:
180          /* shouldn't really make these calls from generated code */
181          vex_printf("armg_calculate_flag_n"
182                     "( op=%u, dep1=0x%x, dep2=0x%x, dep3=0x%x )\n",
183                     cc_op, cc_dep1, cc_dep2, cc_dep3 );
184          vpanic("armg_calculate_flags_n");
185    }
186 }
187 
188 
189 /* Calculate the Z flag from the supplied thunk components, in the
190    least significant bit of the word.  Returned bits 31:1 are zero. */
191 static
armg_calculate_flag_z(UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_dep3)192 UInt armg_calculate_flag_z ( UInt cc_op, UInt cc_dep1,
193                              UInt cc_dep2, UInt cc_dep3 )
194 {
195 #  if PROFILE_NZCV_FLAGS
196    NOTE_Z_EVAL(cc_op);
197 #  endif
198 
199    switch (cc_op) {
200       case ARMG_CC_OP_COPY: {
201          /* (nzcv:28x0, unused, unused) */
202          UInt zf   = (cc_dep1 >> ARMG_CC_SHIFT_Z) & 1;
203          return zf;
204       }
205       case ARMG_CC_OP_ADD: {
206          /* (argL, argR, unused) */
207          UInt argL = cc_dep1;
208          UInt argR = cc_dep2;
209          UInt res  = argL + argR;
210          UInt zf   = res == 0;
211          return zf;
212       }
213       case ARMG_CC_OP_SUB: {
214          /* (argL, argR, unused) */
215          UInt argL = cc_dep1;
216          UInt argR = cc_dep2;
217          UInt res  = argL - argR;
218          UInt zf   = res == 0;
219          return zf;
220       }
221       case ARMG_CC_OP_ADC: {
222          /* (argL, argR, oldC) */
223          UInt argL = cc_dep1;
224          UInt argR = cc_dep2;
225          UInt oldC = cc_dep3;
226          vassert((oldC & ~1) == 0);
227          UInt res  = argL + argR + oldC;
228          UInt zf   = res == 0;
229          return zf;
230       }
231       case ARMG_CC_OP_SBB: {
232          /* (argL, argR, oldC) */
233          UInt argL = cc_dep1;
234          UInt argR = cc_dep2;
235          UInt oldC = cc_dep3;
236          vassert((oldC & ~1) == 0);
237          UInt res  = argL - argR - (oldC ^ 1);
238          UInt zf   = res == 0;
239          return zf;
240       }
241       case ARMG_CC_OP_LOGIC: {
242          /* (res, shco, oldV) */
243          UInt res  = cc_dep1;
244          UInt zf   = res == 0;
245          return zf;
246       }
247       case ARMG_CC_OP_MUL: {
248          /* (res, unused, oldC:oldV) */
249          UInt res  = cc_dep1;
250          UInt zf   = res == 0;
251          return zf;
252       }
253       case ARMG_CC_OP_MULL: {
254          /* (resLo32, resHi32, oldC:oldV) */
255          UInt resLo32 = cc_dep1;
256          UInt resHi32 = cc_dep2;
257          UInt zf      = (resHi32|resLo32) == 0;
258          return zf;
259       }
260       default:
261          /* shouldn't really make these calls from generated code */
262          vex_printf("armg_calculate_flags_z"
263                     "( op=%u, dep1=0x%x, dep2=0x%x, dep3=0x%x )\n",
264                     cc_op, cc_dep1, cc_dep2, cc_dep3 );
265          vpanic("armg_calculate_flags_z");
266    }
267 }
268 
269 
270 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
271 /* Calculate the C flag from the supplied thunk components, in the
272    least significant bit of the word.  Returned bits 31:1 are zero. */
armg_calculate_flag_c(UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_dep3)273 UInt armg_calculate_flag_c ( UInt cc_op, UInt cc_dep1,
274                              UInt cc_dep2, UInt cc_dep3 )
275 {
276 #  if PROFILE_NZCV_FLAGS
277    NOTE_C_EVAL(cc_op);
278 #  endif
279 
280    switch (cc_op) {
281       case ARMG_CC_OP_COPY: {
282          /* (nzcv:28x0, unused, unused) */
283          UInt cf   = (cc_dep1 >> ARMG_CC_SHIFT_C) & 1;
284          return cf;
285       }
286       case ARMG_CC_OP_ADD: {
287          /* (argL, argR, unused) */
288          UInt argL = cc_dep1;
289          UInt argR = cc_dep2;
290          UInt res  = argL + argR;
291          UInt cf   = res < argL;
292          return cf;
293       }
294       case ARMG_CC_OP_SUB: {
295          /* (argL, argR, unused) */
296          UInt argL = cc_dep1;
297          UInt argR = cc_dep2;
298          UInt cf   = argL >= argR;
299          return cf;
300       }
301       case ARMG_CC_OP_ADC: {
302          /* (argL, argR, oldC) */
303          UInt argL = cc_dep1;
304          UInt argR = cc_dep2;
305          UInt oldC = cc_dep3;
306          vassert((oldC & ~1) == 0);
307          UInt res  = argL + argR + oldC;
308          UInt cf   = oldC ? (res <= argL) : (res < argL);
309          return cf;
310       }
311       case ARMG_CC_OP_SBB: {
312          /* (argL, argR, oldC) */
313          UInt argL = cc_dep1;
314          UInt argR = cc_dep2;
315          UInt oldC = cc_dep3;
316          vassert((oldC & ~1) == 0);
317          UInt cf   = oldC ? (argL >= argR) : (argL > argR);
318          return cf;
319       }
320       case ARMG_CC_OP_LOGIC: {
321          /* (res, shco, oldV) */
322          UInt shco = cc_dep2;
323          vassert((shco & ~1) == 0);
324          UInt cf   = shco;
325          return cf;
326       }
327       case ARMG_CC_OP_MUL: {
328          /* (res, unused, oldC:oldV) */
329          UInt oldC = (cc_dep3 >> 1) & 1;
330          vassert((cc_dep3 & ~3) == 0);
331          UInt cf   = oldC;
332          return cf;
333       }
334       case ARMG_CC_OP_MULL: {
335          /* (resLo32, resHi32, oldC:oldV) */
336          UInt oldC    = (cc_dep3 >> 1) & 1;
337          vassert((cc_dep3 & ~3) == 0);
338          UInt cf      = oldC;
339          return cf;
340       }
341       default:
342          /* shouldn't really make these calls from generated code */
343          vex_printf("armg_calculate_flag_c"
344                     "( op=%u, dep1=0x%x, dep2=0x%x, dep3=0x%x )\n",
345                     cc_op, cc_dep1, cc_dep2, cc_dep3 );
346          vpanic("armg_calculate_flag_c");
347    }
348 }
349 
350 
351 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
352 /* Calculate the V flag from the supplied thunk components, in the
353    least significant bit of the word.  Returned bits 31:1 are zero. */
armg_calculate_flag_v(UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_dep3)354 UInt armg_calculate_flag_v ( UInt cc_op, UInt cc_dep1,
355                              UInt cc_dep2, UInt cc_dep3 )
356 {
357 #  if PROFILE_NZCV_FLAGS
358    NOTE_V_EVAL(cc_op);
359 #  endif
360 
361    switch (cc_op) {
362       case ARMG_CC_OP_COPY: {
363          /* (nzcv:28x0, unused, unused) */
364          UInt vf   = (cc_dep1 >> ARMG_CC_SHIFT_V) & 1;
365          return vf;
366       }
367       case ARMG_CC_OP_ADD: {
368          /* (argL, argR, unused) */
369          UInt argL = cc_dep1;
370          UInt argR = cc_dep2;
371          UInt res  = argL + argR;
372          UInt vf   = ((res ^ argL) & (res ^ argR)) >> 31;
373          return vf;
374       }
375       case ARMG_CC_OP_SUB: {
376          /* (argL, argR, unused) */
377          UInt argL = cc_dep1;
378          UInt argR = cc_dep2;
379          UInt res  = argL - argR;
380          UInt vf   = ((argL ^ argR) & (argL ^ res)) >> 31;
381          return vf;
382       }
383       case ARMG_CC_OP_ADC: {
384          /* (argL, argR, oldC) */
385          UInt argL = cc_dep1;
386          UInt argR = cc_dep2;
387          UInt oldC = cc_dep3;
388          vassert((oldC & ~1) == 0);
389          UInt res  = argL + argR + oldC;
390          UInt vf   = ((res ^ argL) & (res ^ argR)) >> 31;
391          return vf;
392       }
393       case ARMG_CC_OP_SBB: {
394          /* (argL, argR, oldC) */
395          UInt argL = cc_dep1;
396          UInt argR = cc_dep2;
397          UInt oldC = cc_dep3;
398          vassert((oldC & ~1) == 0);
399          UInt res  = argL - argR - (oldC ^ 1);
400          UInt vf   = ((argL ^ argR) & (argL ^ res)) >> 31;
401          return vf;
402       }
403       case ARMG_CC_OP_LOGIC: {
404          /* (res, shco, oldV) */
405          UInt oldV = cc_dep3;
406          vassert((oldV & ~1) == 0);
407          UInt vf   = oldV;
408          return vf;
409       }
410       case ARMG_CC_OP_MUL: {
411          /* (res, unused, oldC:oldV) */
412          UInt oldV = (cc_dep3 >> 0) & 1;
413          vassert((cc_dep3 & ~3) == 0);
414          UInt vf   = oldV;
415          return vf;
416       }
417       case ARMG_CC_OP_MULL: {
418          /* (resLo32, resHi32, oldC:oldV) */
419          UInt oldV    = (cc_dep3 >> 0) & 1;
420          vassert((cc_dep3 & ~3) == 0);
421          UInt vf      = oldV;
422          return vf;
423       }
424       default:
425          /* shouldn't really make these calls from generated code */
426          vex_printf("armg_calculate_flag_v"
427                     "( op=%u, dep1=0x%x, dep2=0x%x, dep3=0x%x )\n",
428                     cc_op, cc_dep1, cc_dep2, cc_dep3 );
429          vpanic("armg_calculate_flag_v");
430    }
431 }
432 
433 
434 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
435 /* Calculate NZCV from the supplied thunk components, in the positions
436    they appear in the CPSR, viz bits 31:28 for N Z C V respectively.
437    Returned bits 27:0 are zero. */
armg_calculate_flags_nzcv(UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_dep3)438 UInt armg_calculate_flags_nzcv ( UInt cc_op, UInt cc_dep1,
439                                  UInt cc_dep2, UInt cc_dep3 )
440 {
441    UInt f;
442    UInt res = 0;
443    f = armg_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
444    res |= (f << ARMG_CC_SHIFT_N);
445    f = armg_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
446    res |= (f << ARMG_CC_SHIFT_Z);
447    f = armg_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3);
448    res |= (f << ARMG_CC_SHIFT_C);
449    f = armg_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
450    res |= (f << ARMG_CC_SHIFT_V);
451    return res;
452 }
453 
454 
455 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
456 /* Calculate the QC flag from the arguments, in the lowest bit
457    of the word (bit 0).  Urr, having this out of line is bizarre.
458    Push back inline. */
armg_calculate_flag_qc(UInt resL1,UInt resL2,UInt resR1,UInt resR2)459 UInt armg_calculate_flag_qc ( UInt resL1, UInt resL2,
460                               UInt resR1, UInt resR2 )
461 {
462    if (resL1 != resR1 || resL2 != resR2)
463       return 1;
464    else
465       return 0;
466 }
467 
468 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
469 /* Calculate the specified condition from the thunk components, in the
470    lowest bit of the word (bit 0).  Returned bits 31:1 are zero. */
armg_calculate_condition(UInt cond_n_op,UInt cc_dep1,UInt cc_dep2,UInt cc_dep3)471 UInt armg_calculate_condition ( UInt cond_n_op /* (ARMCondcode << 4) | cc_op */,
472                                 UInt cc_dep1,
473                                 UInt cc_dep2, UInt cc_dep3 )
474 {
475    UInt cond  = cond_n_op >> 4;
476    UInt cc_op = cond_n_op & 0xF;
477    UInt nf, zf, vf, cf, inv;
478    //   vex_printf("XXXXXXXX %x %x %x %x\n",
479    //              cond_n_op, cc_dep1, cc_dep2, cc_dep3);
480 
481    // skip flags computation in this case
482    if (cond == ARMCondAL) return 1;
483 
484    inv  = cond & 1;
485 
486    switch (cond) {
487       case ARMCondEQ:    // Z=1         => z
488       case ARMCondNE:    // Z=0
489          zf = armg_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
490          return inv ^ zf;
491 
492       case ARMCondHS:    // C=1         => c
493       case ARMCondLO:    // C=0
494          cf = armg_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3);
495          return inv ^ cf;
496 
497       case ARMCondMI:    // N=1         => n
498       case ARMCondPL:    // N=0
499          nf = armg_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
500          return inv ^ nf;
501 
502       case ARMCondVS:    // V=1         => v
503       case ARMCondVC:    // V=0
504          vf = armg_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
505          return inv ^ vf;
506 
507       case ARMCondHI:    // C=1 && Z=0   => c & ~z
508       case ARMCondLS:    // C=0 || Z=1
509          cf = armg_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3);
510          zf = armg_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
511          return inv ^ (1 & (cf & ~zf));
512 
513       case ARMCondGE:    // N=V          => ~(n^v)
514       case ARMCondLT:    // N!=V
515          nf = armg_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
516          vf = armg_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
517          return inv ^ (1 & ~(nf ^ vf));
518 
519       case ARMCondGT:    // Z=0 && N=V   => ~z & ~(n^v)  =>  ~(z | (n^v))
520       case ARMCondLE:    // Z=1 || N!=V
521          nf = armg_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
522          vf = armg_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
523          zf = armg_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
524          return inv ^ (1 & ~(zf | (nf ^ vf)));
525 
526       case ARMCondAL: // handled above
527       case ARMCondNV: // should never get here: Illegal instr
528       default:
529          /* shouldn't really make these calls from generated code */
530          vex_printf("armg_calculate_condition(ARM)"
531                     "( %u, %u, 0x%x, 0x%x, 0x%x )\n",
532                     cond, cc_op, cc_dep1, cc_dep2, cc_dep3 );
533          vpanic("armg_calculate_condition(ARM)");
534    }
535 }
536 
537 
538 /*---------------------------------------------------------------*/
539 /*--- Crypto instruction helpers                              ---*/
540 /*---------------------------------------------------------------*/
541 
542 /* DIRTY HELPERS for doing AES support:
543    * AESE (SubBytes, then ShiftRows)
544    * AESD (InvShiftRows, then InvSubBytes)
545    * AESMC (MixColumns)
546    * AESIMC (InvMixColumns)
547    These don't actually have to be dirty helpers -- they could be
548    clean, but for the fact that they return a V128 and a clean helper
549    can't do that.
550 
551    These just call onwards to the implementations of the same in
552    guest_arm64_helpers.c.  In all of these cases, we expect |res| to
553    be at least 8 aligned.
554 */
555 /* CALLED FROM GENERATED CODE */
armg_dirtyhelper_AESE(V128 * res,UInt arg32_3,UInt arg32_2,UInt arg32_1,UInt arg32_0)556 void armg_dirtyhelper_AESE (
557         /*OUT*/V128* res,
558         UInt arg32_3, UInt arg32_2, UInt arg32_1, UInt arg32_0
559      )
560 {
561    vassert(0 == (((HWord)res) & (8-1)));
562    ULong argHi = (((ULong)arg32_3) << 32) | ((ULong)arg32_2);
563    ULong argLo = (((ULong)arg32_1) << 32) | ((ULong)arg32_0);
564    arm64g_dirtyhelper_AESE(res, argHi, argLo);
565 }
566 
567 /* CALLED FROM GENERATED CODE */
armg_dirtyhelper_AESD(V128 * res,UInt arg32_3,UInt arg32_2,UInt arg32_1,UInt arg32_0)568 void armg_dirtyhelper_AESD (
569         /*OUT*/V128* res,
570         UInt arg32_3, UInt arg32_2, UInt arg32_1, UInt arg32_0
571      )
572 {
573    vassert(0 == (((HWord)res) & (8-1)));
574    ULong argHi = (((ULong)arg32_3) << 32) | ((ULong)arg32_2);
575    ULong argLo = (((ULong)arg32_1) << 32) | ((ULong)arg32_0);
576    arm64g_dirtyhelper_AESD(res, argHi, argLo);
577 }
578 
579 /* CALLED FROM GENERATED CODE */
armg_dirtyhelper_AESMC(V128 * res,UInt arg32_3,UInt arg32_2,UInt arg32_1,UInt arg32_0)580 void armg_dirtyhelper_AESMC (
581         /*OUT*/V128* res,
582         UInt arg32_3, UInt arg32_2, UInt arg32_1, UInt arg32_0
583      )
584 {
585    vassert(0 == (((HWord)res) & (8-1)));
586    ULong argHi = (((ULong)arg32_3) << 32) | ((ULong)arg32_2);
587    ULong argLo = (((ULong)arg32_1) << 32) | ((ULong)arg32_0);
588    arm64g_dirtyhelper_AESMC(res, argHi, argLo);
589 }
590 
591 /* CALLED FROM GENERATED CODE */
armg_dirtyhelper_AESIMC(V128 * res,UInt arg32_3,UInt arg32_2,UInt arg32_1,UInt arg32_0)592 void armg_dirtyhelper_AESIMC (
593         /*OUT*/V128* res,
594         UInt arg32_3, UInt arg32_2, UInt arg32_1, UInt arg32_0
595      )
596 {
597    vassert(0 == (((HWord)res) & (8-1)));
598    ULong argHi = (((ULong)arg32_3) << 32) | ((ULong)arg32_2);
599    ULong argLo = (((ULong)arg32_1) << 32) | ((ULong)arg32_0);
600    arm64g_dirtyhelper_AESIMC(res, argHi, argLo);
601 }
602 
603 
604 /* DIRTY HELPERS for the SHA instruction family.  Same comments
605    as for the AES group above apply.
606 */
607 
608 /* CALLED FROM GENERATED CODE */
armg_dirtyhelper_SHA1C(V128 * res,UInt argD3,UInt argD2,UInt argD1,UInt argD0,UInt argN3,UInt argN2,UInt argN1,UInt argN0,UInt argM3,UInt argM2,UInt argM1,UInt argM0)609 void armg_dirtyhelper_SHA1C (
610         /*OUT*/V128* res,
611         UInt argD3, UInt argD2, UInt argD1, UInt argD0,
612         UInt argN3, UInt argN2, UInt argN1, UInt argN0,
613         UInt argM3, UInt argM2, UInt argM1, UInt argM0
614      )
615 {
616    vassert(0 == (((HWord)res) & (8-1)));
617    ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2);
618    ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0);
619    ULong argNhi = (((ULong)argN3) << 32) | ((ULong)argN2);
620    ULong argNlo = (((ULong)argN1) << 32) | ((ULong)argN0);
621    ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2);
622    ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0);
623    arm64g_dirtyhelper_SHA1C(res, argDhi, argDlo,
624                                  argNhi, argNlo, argMhi, argMlo);
625 }
626 
627 /* CALLED FROM GENERATED CODE */
armg_dirtyhelper_SHA1P(V128 * res,UInt argD3,UInt argD2,UInt argD1,UInt argD0,UInt argN3,UInt argN2,UInt argN1,UInt argN0,UInt argM3,UInt argM2,UInt argM1,UInt argM0)628 void armg_dirtyhelper_SHA1P (
629         /*OUT*/V128* res,
630         UInt argD3, UInt argD2, UInt argD1, UInt argD0,
631         UInt argN3, UInt argN2, UInt argN1, UInt argN0,
632         UInt argM3, UInt argM2, UInt argM1, UInt argM0
633      )
634 {
635    vassert(0 == (((HWord)res) & (8-1)));
636    ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2);
637    ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0);
638    ULong argNhi = (((ULong)argN3) << 32) | ((ULong)argN2);
639    ULong argNlo = (((ULong)argN1) << 32) | ((ULong)argN0);
640    ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2);
641    ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0);
642    arm64g_dirtyhelper_SHA1P(res, argDhi, argDlo,
643                                  argNhi, argNlo, argMhi, argMlo);
644 }
645 
646 /* CALLED FROM GENERATED CODE */
armg_dirtyhelper_SHA1M(V128 * res,UInt argD3,UInt argD2,UInt argD1,UInt argD0,UInt argN3,UInt argN2,UInt argN1,UInt argN0,UInt argM3,UInt argM2,UInt argM1,UInt argM0)647 void armg_dirtyhelper_SHA1M (
648         /*OUT*/V128* res,
649         UInt argD3, UInt argD2, UInt argD1, UInt argD0,
650         UInt argN3, UInt argN2, UInt argN1, UInt argN0,
651         UInt argM3, UInt argM2, UInt argM1, UInt argM0
652      )
653 {
654    vassert(0 == (((HWord)res) & (8-1)));
655    ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2);
656    ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0);
657    ULong argNhi = (((ULong)argN3) << 32) | ((ULong)argN2);
658    ULong argNlo = (((ULong)argN1) << 32) | ((ULong)argN0);
659    ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2);
660    ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0);
661    arm64g_dirtyhelper_SHA1M(res, argDhi, argDlo,
662                                  argNhi, argNlo, argMhi, argMlo);
663 }
664 
665 /* CALLED FROM GENERATED CODE */
armg_dirtyhelper_SHA1SU0(V128 * res,UInt argD3,UInt argD2,UInt argD1,UInt argD0,UInt argN3,UInt argN2,UInt argN1,UInt argN0,UInt argM3,UInt argM2,UInt argM1,UInt argM0)666 void armg_dirtyhelper_SHA1SU0 (
667         /*OUT*/V128* res,
668         UInt argD3, UInt argD2, UInt argD1, UInt argD0,
669         UInt argN3, UInt argN2, UInt argN1, UInt argN0,
670         UInt argM3, UInt argM2, UInt argM1, UInt argM0
671      )
672 {
673    vassert(0 == (((HWord)res) & (8-1)));
674    ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2);
675    ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0);
676    ULong argNhi = (((ULong)argN3) << 32) | ((ULong)argN2);
677    ULong argNlo = (((ULong)argN1) << 32) | ((ULong)argN0);
678    ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2);
679    ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0);
680    arm64g_dirtyhelper_SHA1SU0(res, argDhi, argDlo,
681                                    argNhi, argNlo, argMhi, argMlo);
682 }
683 
684 /* CALLED FROM GENERATED CODE */
armg_dirtyhelper_SHA256H(V128 * res,UInt argD3,UInt argD2,UInt argD1,UInt argD0,UInt argN3,UInt argN2,UInt argN1,UInt argN0,UInt argM3,UInt argM2,UInt argM1,UInt argM0)685 void armg_dirtyhelper_SHA256H (
686         /*OUT*/V128* res,
687         UInt argD3, UInt argD2, UInt argD1, UInt argD0,
688         UInt argN3, UInt argN2, UInt argN1, UInt argN0,
689         UInt argM3, UInt argM2, UInt argM1, UInt argM0
690      )
691 {
692    vassert(0 == (((HWord)res) & (8-1)));
693    ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2);
694    ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0);
695    ULong argNhi = (((ULong)argN3) << 32) | ((ULong)argN2);
696    ULong argNlo = (((ULong)argN1) << 32) | ((ULong)argN0);
697    ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2);
698    ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0);
699    arm64g_dirtyhelper_SHA256H(res, argDhi, argDlo,
700                                    argNhi, argNlo, argMhi, argMlo);
701 }
702 
703 /* CALLED FROM GENERATED CODE */
armg_dirtyhelper_SHA256H2(V128 * res,UInt argD3,UInt argD2,UInt argD1,UInt argD0,UInt argN3,UInt argN2,UInt argN1,UInt argN0,UInt argM3,UInt argM2,UInt argM1,UInt argM0)704 void armg_dirtyhelper_SHA256H2 (
705         /*OUT*/V128* res,
706         UInt argD3, UInt argD2, UInt argD1, UInt argD0,
707         UInt argN3, UInt argN2, UInt argN1, UInt argN0,
708         UInt argM3, UInt argM2, UInt argM1, UInt argM0
709      )
710 {
711    vassert(0 == (((HWord)res) & (8-1)));
712    ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2);
713    ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0);
714    ULong argNhi = (((ULong)argN3) << 32) | ((ULong)argN2);
715    ULong argNlo = (((ULong)argN1) << 32) | ((ULong)argN0);
716    ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2);
717    ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0);
718    arm64g_dirtyhelper_SHA256H2(res, argDhi, argDlo,
719                                     argNhi, argNlo, argMhi, argMlo);
720 }
721 
722 /* CALLED FROM GENERATED CODE */
armg_dirtyhelper_SHA256SU1(V128 * res,UInt argD3,UInt argD2,UInt argD1,UInt argD0,UInt argN3,UInt argN2,UInt argN1,UInt argN0,UInt argM3,UInt argM2,UInt argM1,UInt argM0)723 void armg_dirtyhelper_SHA256SU1 (
724         /*OUT*/V128* res,
725         UInt argD3, UInt argD2, UInt argD1, UInt argD0,
726         UInt argN3, UInt argN2, UInt argN1, UInt argN0,
727         UInt argM3, UInt argM2, UInt argM1, UInt argM0
728      )
729 {
730    vassert(0 == (((HWord)res) & (8-1)));
731    ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2);
732    ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0);
733    ULong argNhi = (((ULong)argN3) << 32) | ((ULong)argN2);
734    ULong argNlo = (((ULong)argN1) << 32) | ((ULong)argN0);
735    ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2);
736    ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0);
737    arm64g_dirtyhelper_SHA256SU1(res, argDhi, argDlo,
738                                      argNhi, argNlo, argMhi, argMlo);
739 }
740 
741 /* CALLED FROM GENERATED CODE */
armg_dirtyhelper_SHA1SU1(V128 * res,UInt argD3,UInt argD2,UInt argD1,UInt argD0,UInt argM3,UInt argM2,UInt argM1,UInt argM0)742 void armg_dirtyhelper_SHA1SU1 (
743         /*OUT*/V128* res,
744         UInt argD3, UInt argD2, UInt argD1, UInt argD0,
745         UInt argM3, UInt argM2, UInt argM1, UInt argM0
746      )
747 {
748    vassert(0 == (((HWord)res) & (8-1)));
749    ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2);
750    ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0);
751    ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2);
752    ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0);
753    arm64g_dirtyhelper_SHA1SU1(res, argDhi, argDlo, argMhi, argMlo);
754 }
755 
756 /* CALLED FROM GENERATED CODE */
armg_dirtyhelper_SHA256SU0(V128 * res,UInt argD3,UInt argD2,UInt argD1,UInt argD0,UInt argM3,UInt argM2,UInt argM1,UInt argM0)757 void armg_dirtyhelper_SHA256SU0 (
758         /*OUT*/V128* res,
759         UInt argD3, UInt argD2, UInt argD1, UInt argD0,
760         UInt argM3, UInt argM2, UInt argM1, UInt argM0
761      )
762 {
763    vassert(0 == (((HWord)res) & (8-1)));
764    ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2);
765    ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0);
766    ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2);
767    ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0);
768    arm64g_dirtyhelper_SHA256SU0(res, argDhi, argDlo, argMhi, argMlo);
769 }
770 
771 /* CALLED FROM GENERATED CODE */
armg_dirtyhelper_SHA1H(V128 * res,UInt argM3,UInt argM2,UInt argM1,UInt argM0)772 void armg_dirtyhelper_SHA1H (
773         /*OUT*/V128* res,
774         UInt argM3, UInt argM2, UInt argM1, UInt argM0
775      )
776 {
777    vassert(0 == (((HWord)res) & (8-1)));
778    ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2);
779    ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0);
780    arm64g_dirtyhelper_SHA1H(res, argMhi, argMlo);
781 }
782 
783 /* CALLED FROM GENERATED CODE */
armg_dirtyhelper_VMULLP64(V128 * res,UInt argN1,UInt argN0,UInt argM1,UInt argM0)784 void armg_dirtyhelper_VMULLP64 (
785         /*OUT*/V128* res,
786         UInt argN1, UInt argN0, UInt argM1, UInt argM0
787      )
788 {
789    vassert(0 == (((HWord)res) & (8-1)));
790    ULong argN = (((ULong)argN1) << 32) | ((ULong)argN0);
791    ULong argM = (((ULong)argM1) << 32) | ((ULong)argM0);
792    arm64g_dirtyhelper_PMULLQ(res, argN, argM);
793 }
794 
795 
796 /*---------------------------------------------------------------*/
797 /*--- Flag-helpers translation-time function specialisers.    ---*/
798 /*--- These help iropt specialise calls the above run-time    ---*/
799 /*--- flags functions.                                        ---*/
800 /*---------------------------------------------------------------*/
801 
802 /* Used by the optimiser to try specialisations.  Returns an
803    equivalent expression, or NULL if none. */
804 
isU32(IRExpr * e,UInt n)805 static Bool isU32 ( IRExpr* e, UInt n )
806 {
807    return
808       toBool( e->tag == Iex_Const
809               && e->Iex.Const.con->tag == Ico_U32
810               && e->Iex.Const.con->Ico.U32 == n );
811 }
812 
guest_arm_spechelper(const HChar * function_name,IRExpr ** args,IRStmt ** precedingStmts,Int n_precedingStmts)813 IRExpr* guest_arm_spechelper ( const HChar* function_name,
814                                IRExpr** args,
815                                IRStmt** precedingStmts,
816                                Int      n_precedingStmts )
817 {
818 #  define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
819 #  define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
820 #  define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
821 #  define mkU8(_n)  IRExpr_Const(IRConst_U8(_n))
822 
823    Int i, arity = 0;
824    for (i = 0; args[i]; i++)
825       arity++;
826 #  if 0
827    vex_printf("spec request:\n");
828    vex_printf("   %s  ", function_name);
829    for (i = 0; i < arity; i++) {
830       vex_printf("  ");
831       ppIRExpr(args[i]);
832    }
833    vex_printf("\n");
834 #  endif
835 
836    /* --------- specialising "armg_calculate_condition" --------- */
837 
838    if (vex_streq(function_name, "armg_calculate_condition")) {
839 
840       /* specialise calls to the "armg_calculate_condition" function.
841          Not sure whether this is strictly necessary, but: the
842          replacement IR must produce only the values 0 or 1.  Bits
843          31:1 are required to be zero. */
844       IRExpr *cond_n_op, *cc_dep1, *cc_dep2, *cc_ndep;
845       vassert(arity == 4);
846       cond_n_op = args[0]; /* (ARMCondcode << 4)  |  ARMG_CC_OP_* */
847       cc_dep1   = args[1];
848       cc_dep2   = args[2];
849       cc_ndep   = args[3];
850 
851       /*---------------- SUB ----------------*/
852 
853       if (isU32(cond_n_op, (ARMCondEQ << 4) | ARMG_CC_OP_SUB)) {
854          /* EQ after SUB --> test argL == argR */
855          return unop(Iop_1Uto32,
856                      binop(Iop_CmpEQ32, cc_dep1, cc_dep2));
857       }
858       if (isU32(cond_n_op, (ARMCondNE << 4) | ARMG_CC_OP_SUB)) {
859          /* NE after SUB --> test argL != argR */
860          return unop(Iop_1Uto32,
861                      binop(Iop_CmpNE32, cc_dep1, cc_dep2));
862       }
863 
864       if (isU32(cond_n_op, (ARMCondGT << 4) | ARMG_CC_OP_SUB)) {
865          /* GT after SUB --> test argL >s argR
866                          --> test argR <s argL */
867          return unop(Iop_1Uto32,
868                      binop(Iop_CmpLT32S, cc_dep2, cc_dep1));
869       }
870       if (isU32(cond_n_op, (ARMCondLE << 4) | ARMG_CC_OP_SUB)) {
871          /* LE after SUB --> test argL <=s argR */
872          return unop(Iop_1Uto32,
873                      binop(Iop_CmpLE32S, cc_dep1, cc_dep2));
874       }
875 
876       if (isU32(cond_n_op, (ARMCondLT << 4) | ARMG_CC_OP_SUB)) {
877          /* LT after SUB --> test argL <s argR */
878          return unop(Iop_1Uto32,
879                      binop(Iop_CmpLT32S, cc_dep1, cc_dep2));
880       }
881 
882       if (isU32(cond_n_op, (ARMCondGE << 4) | ARMG_CC_OP_SUB)) {
883          /* GE after SUB --> test argL >=s argR
884                          --> test argR <=s argL */
885          return unop(Iop_1Uto32,
886                      binop(Iop_CmpLE32S, cc_dep2, cc_dep1));
887       }
888 
889       if (isU32(cond_n_op, (ARMCondHS << 4) | ARMG_CC_OP_SUB)) {
890          /* HS after SUB --> test argL >=u argR
891                          --> test argR <=u argL */
892          return unop(Iop_1Uto32,
893                      binop(Iop_CmpLE32U, cc_dep2, cc_dep1));
894       }
895       if (isU32(cond_n_op, (ARMCondLO << 4) | ARMG_CC_OP_SUB)) {
896          /* LO after SUB --> test argL <u argR */
897          return unop(Iop_1Uto32,
898                      binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
899       }
900 
901       if (isU32(cond_n_op, (ARMCondLS << 4) | ARMG_CC_OP_SUB)) {
902          /* LS after SUB --> test argL <=u argR */
903          return unop(Iop_1Uto32,
904                      binop(Iop_CmpLE32U, cc_dep1, cc_dep2));
905       }
906       if (isU32(cond_n_op, (ARMCondHI << 4) | ARMG_CC_OP_SUB)) {
907          /* HI after SUB --> test argL >u argR
908                          --> test argR <u argL */
909          return unop(Iop_1Uto32,
910                      binop(Iop_CmpLT32U, cc_dep2, cc_dep1));
911       }
912 
913       /*---------------- SBB ----------------*/
914 
915       if (isU32(cond_n_op, (ARMCondHS << 4) | ARMG_CC_OP_SBB)) {
916          /* This seems to happen a lot in softfloat code, eg __divdf3+140 */
917          /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
918          /* HS after SBB (same as C after SBB below)
919             --> oldC ? (argL >=u argR) : (argL >u argR)
920             --> oldC ? (argR <=u argL) : (argR <u argL)
921          */
922          return
923             IRExpr_ITE(
924                binop(Iop_CmpNE32, cc_ndep, mkU32(0)),
925                /* case oldC != 0 */
926                unop(Iop_1Uto32, binop(Iop_CmpLE32U, cc_dep2, cc_dep1)),
927                /* case oldC == 0 */
928                unop(Iop_1Uto32, binop(Iop_CmpLT32U, cc_dep2, cc_dep1))
929             );
930       }
931 
932       /*---------------- LOGIC ----------------*/
933 
934       if (isU32(cond_n_op, (ARMCondEQ << 4) | ARMG_CC_OP_LOGIC)) {
935          /* EQ after LOGIC --> test res == 0 */
936          return unop(Iop_1Uto32,
937                      binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
938       }
939       if (isU32(cond_n_op, (ARMCondNE << 4) | ARMG_CC_OP_LOGIC)) {
940          /* NE after LOGIC --> test res != 0 */
941          return unop(Iop_1Uto32,
942                      binop(Iop_CmpNE32, cc_dep1, mkU32(0)));
943       }
944 
945       if (isU32(cond_n_op, (ARMCondPL << 4) | ARMG_CC_OP_LOGIC)) {
946          /* PL after LOGIC --> test (res >> 31) == 0 */
947          return unop(Iop_1Uto32,
948                      binop(Iop_CmpEQ32,
949                            binop(Iop_Shr32, cc_dep1, mkU8(31)),
950                            mkU32(0)));
951       }
952       if (isU32(cond_n_op, (ARMCondMI << 4) | ARMG_CC_OP_LOGIC)) {
953          /* MI after LOGIC --> test (res >> 31) == 1 */
954          return unop(Iop_1Uto32,
955                      binop(Iop_CmpEQ32,
956                            binop(Iop_Shr32, cc_dep1, mkU8(31)),
957                            mkU32(1)));
958       }
959 
960       /*---------------- COPY ----------------*/
961 
962       /* --- 0,1 --- */
963       if (isU32(cond_n_op, (ARMCondEQ << 4) | ARMG_CC_OP_COPY)) {
964          /* EQ after COPY --> (cc_dep1 >> ARMG_CC_SHIFT_Z) & 1 */
965          return binop(Iop_And32,
966                       binop(Iop_Shr32, cc_dep1,
967                             mkU8(ARMG_CC_SHIFT_Z)),
968                       mkU32(1));
969       }
970       if (isU32(cond_n_op, (ARMCondNE << 4) | ARMG_CC_OP_COPY)) {
971          /* NE after COPY --> ((cc_dep1 >> ARMG_CC_SHIFT_Z) ^ 1) & 1 */
972          return binop(Iop_And32,
973                       binop(Iop_Xor32,
974                             binop(Iop_Shr32, cc_dep1,
975                                              mkU8(ARMG_CC_SHIFT_Z)),
976                             mkU32(1)),
977                       mkU32(1));
978       }
979 
980       /* --- 4,5 --- */
981       if (isU32(cond_n_op, (ARMCondMI << 4) | ARMG_CC_OP_COPY)) {
982          /* MI after COPY --> (cc_dep1 >> ARMG_CC_SHIFT_N) & 1 */
983          return binop(Iop_And32,
984                       binop(Iop_Shr32, cc_dep1,
985                             mkU8(ARMG_CC_SHIFT_N)),
986                       mkU32(1));
987       }
988       if (isU32(cond_n_op, (ARMCondPL << 4) | ARMG_CC_OP_COPY)) {
989          /* PL after COPY --> ((cc_dep1 >> ARMG_CC_SHIFT_N) ^ 1) & 1 */
990          return binop(Iop_And32,
991                       binop(Iop_Xor32,
992                             binop(Iop_Shr32, cc_dep1,
993                                              mkU8(ARMG_CC_SHIFT_N)),
994                             mkU32(1)),
995                       mkU32(1));
996       }
997 
998       /* --- 12,13 --- */
999       if (isU32(cond_n_op, (ARMCondGT << 4) | ARMG_CC_OP_COPY)) {
1000          /* GT after COPY --> ((z | (n^v)) & 1) ^ 1 */
1001          IRExpr* n = binop(Iop_Shr32, cc_dep1, mkU8(ARMG_CC_SHIFT_N));
1002          IRExpr* v = binop(Iop_Shr32, cc_dep1, mkU8(ARMG_CC_SHIFT_V));
1003          IRExpr* z = binop(Iop_Shr32, cc_dep1, mkU8(ARMG_CC_SHIFT_Z));
1004          return binop(Iop_Xor32,
1005                       binop(Iop_And32,
1006                             binop(Iop_Or32, z, binop(Iop_Xor32, n, v)),
1007                             mkU32(1)),
1008                       mkU32(1));
1009       }
1010       if (isU32(cond_n_op, (ARMCondLE << 4) | ARMG_CC_OP_COPY)) {
1011          /* LE after COPY --> ((z | (n^v)) & 1) ^ 0 */
1012          IRExpr* n = binop(Iop_Shr32, cc_dep1, mkU8(ARMG_CC_SHIFT_N));
1013          IRExpr* v = binop(Iop_Shr32, cc_dep1, mkU8(ARMG_CC_SHIFT_V));
1014          IRExpr* z = binop(Iop_Shr32, cc_dep1, mkU8(ARMG_CC_SHIFT_Z));
1015          return binop(Iop_Xor32,
1016                       binop(Iop_And32,
1017                             binop(Iop_Or32, z, binop(Iop_Xor32, n, v)),
1018                             mkU32(1)),
1019                       mkU32(0));
1020       }
1021 
1022       /*----------------- AL -----------------*/
1023 
1024       /* A critically important case for Thumb code.
1025 
1026          What we're trying to spot is the case where cond_n_op is an
1027          expression of the form Or32(..., 0xE0) since that means the
1028          caller is asking for CondAL and we can simply return 1
1029          without caring what the ... part is.  This is a potentially
1030          dodgy kludge in that it assumes that the ... part has zeroes
1031          in bits 7:4, so that the result of the Or32 is guaranteed to
1032          be 0xE in bits 7:4.  Given that the places where this first
1033          arg are constructed (in guest_arm_toIR.c) are very
1034          constrained, we can get away with this.  To make this
1035          guaranteed safe would require to have a new primop, Slice44
1036          or some such, thusly
1037 
1038          Slice44(arg1, arg2) = 0--(24)--0 arg1[7:4] arg2[3:0]
1039 
1040          and we would then look for Slice44(0xE0, ...)
1041          which would give the required safety property.
1042 
1043          It would be infeasibly expensive to scan backwards through
1044          the entire block looking for an assignment to the temp, so
1045          just look at the previous 16 statements.  That should find it
1046          if it is an interesting case, as a result of how the
1047          boilerplate guff at the start of each Thumb insn translation
1048          is made.
1049       */
1050       if (cond_n_op->tag == Iex_RdTmp) {
1051          Int    j;
1052          IRTemp look_for = cond_n_op->Iex.RdTmp.tmp;
1053          Int    limit    = n_precedingStmts - 16;
1054          if (limit < 0) limit = 0;
1055          if (0) vex_printf("scanning %d .. %d\n", n_precedingStmts-1, limit);
1056          for (j = n_precedingStmts - 1; j >= limit; j--) {
1057             IRStmt* st = precedingStmts[j];
1058             if (st->tag == Ist_WrTmp
1059                 && st->Ist.WrTmp.tmp == look_for
1060                 && st->Ist.WrTmp.data->tag == Iex_Binop
1061                 && st->Ist.WrTmp.data->Iex.Binop.op == Iop_Or32
1062                 && isU32(st->Ist.WrTmp.data->Iex.Binop.arg2, (ARMCondAL << 4)))
1063                return mkU32(1);
1064          }
1065          /* Didn't find any useful binding to the first arg
1066             in the previous 16 stmts. */
1067       }
1068    }
1069 
1070    /* --------- specialising "armg_calculate_flag_c" --------- */
1071 
1072    else
1073    if (vex_streq(function_name, "armg_calculate_flag_c")) {
1074 
1075       /* specialise calls to the "armg_calculate_flag_c" function.
1076          Note that the returned value must be either 0 or 1; nonzero
1077          bits 31:1 are not allowed.  In turn, incoming oldV and oldC
1078          values (from the thunk) are assumed to have bits 31:1
1079          clear. */
1080       IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
1081       vassert(arity == 4);
1082       cc_op   = args[0]; /* ARMG_CC_OP_* */
1083       cc_dep1 = args[1];
1084       cc_dep2 = args[2];
1085       cc_ndep = args[3];
1086 
1087       if (isU32(cc_op, ARMG_CC_OP_LOGIC)) {
1088          /* Thunk args are (result, shco, oldV) */
1089          /* C after LOGIC --> shco */
1090          return cc_dep2;
1091       }
1092 
1093       if (isU32(cc_op, ARMG_CC_OP_SUB)) {
1094          /* Thunk args are (argL, argR, unused) */
1095          /* C after SUB --> argL >=u argR
1096                         --> argR <=u argL */
1097          return unop(Iop_1Uto32,
1098                      binop(Iop_CmpLE32U, cc_dep2, cc_dep1));
1099       }
1100 
1101       if (isU32(cc_op, ARMG_CC_OP_SBB)) {
1102          /* This happens occasionally in softfloat code, eg __divdf3+140 */
1103          /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
1104          /* C after SBB (same as HS after SBB above)
1105             --> oldC ? (argL >=u argR) : (argL >u argR)
1106             --> oldC ? (argR <=u argL) : (argR <u argL)
1107          */
1108          return
1109             IRExpr_ITE(
1110                binop(Iop_CmpNE32, cc_ndep, mkU32(0)),
1111                /* case oldC != 0 */
1112                unop(Iop_1Uto32, binop(Iop_CmpLE32U, cc_dep2, cc_dep1)),
1113                /* case oldC == 0 */
1114                unop(Iop_1Uto32, binop(Iop_CmpLT32U, cc_dep2, cc_dep1))
1115             );
1116       }
1117 
1118    }
1119 
1120    /* --------- specialising "armg_calculate_flag_v" --------- */
1121 
1122    else
1123    if (vex_streq(function_name, "armg_calculate_flag_v")) {
1124 
1125       /* specialise calls to the "armg_calculate_flag_v" function.
1126          Note that the returned value must be either 0 or 1; nonzero
1127          bits 31:1 are not allowed.  In turn, incoming oldV and oldC
1128          values (from the thunk) are assumed to have bits 31:1
1129          clear. */
1130       IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
1131       vassert(arity == 4);
1132       cc_op   = args[0]; /* ARMG_CC_OP_* */
1133       cc_dep1 = args[1];
1134       cc_dep2 = args[2];
1135       cc_ndep = args[3];
1136 
1137       if (isU32(cc_op, ARMG_CC_OP_LOGIC)) {
1138          /* Thunk args are (result, shco, oldV) */
1139          /* V after LOGIC --> oldV */
1140          return cc_ndep;
1141       }
1142 
1143       if (isU32(cc_op, ARMG_CC_OP_SUB)) {
1144          /* Thunk args are (argL, argR, unused) */
1145          /* V after SUB
1146             --> let res = argL - argR
1147                 in ((argL ^ argR) & (argL ^ res)) >> 31
1148             --> ((argL ^ argR) & (argL ^ (argL - argR))) >> 31
1149          */
1150          IRExpr* argL = cc_dep1;
1151          IRExpr* argR = cc_dep2;
1152          return
1153             binop(Iop_Shr32,
1154                   binop(Iop_And32,
1155                         binop(Iop_Xor32, argL, argR),
1156                         binop(Iop_Xor32, argL, binop(Iop_Sub32, argL, argR))
1157                   ),
1158                   mkU8(31)
1159             );
1160       }
1161 
1162       if (isU32(cc_op, ARMG_CC_OP_SBB)) {
1163          /* This happens occasionally in softfloat code, eg __divdf3+140 */
1164          /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
1165          /* V after SBB
1166             --> let res = argL - argR - (oldC ^ 1)
1167                 in  (argL ^ argR) & (argL ^ res) & 1
1168          */
1169          return
1170             binop(
1171                Iop_And32,
1172                binop(
1173                   Iop_And32,
1174                   // argL ^ argR
1175                   binop(Iop_Xor32, cc_dep1, cc_dep2),
1176                   // argL ^ (argL - argR - (oldC ^ 1))
1177                   binop(Iop_Xor32,
1178                         cc_dep1,
1179                         binop(Iop_Sub32,
1180                               binop(Iop_Sub32, cc_dep1, cc_dep2),
1181                               binop(Iop_Xor32, cc_ndep, mkU32(1)))
1182                   )
1183                ),
1184                mkU32(1)
1185             );
1186       }
1187 
1188    }
1189 
1190 #  undef unop
1191 #  undef binop
1192 #  undef mkU32
1193 #  undef mkU8
1194 
1195    return NULL;
1196 }
1197 
1198 
1199 /*----------------------------------------------*/
1200 /*--- The exported fns ..                    ---*/
1201 /*----------------------------------------------*/
1202 
1203 /* VISIBLE TO LIBVEX CLIENT */
1204 #if 0
1205 void LibVEX_GuestARM_put_flags ( UInt flags_native,
1206                                  /*OUT*/VexGuestARMState* vex_state )
1207 {
1208    vassert(0); // FIXME
1209 
1210    /* Mask out everything except N Z V C. */
1211    flags_native
1212       &= (ARMG_CC_MASK_N | ARMG_CC_MASK_Z | ARMG_CC_MASK_V | ARMG_CC_MASK_C);
1213 
1214    vex_state->guest_CC_OP   = ARMG_CC_OP_COPY;
1215    vex_state->guest_CC_DEP1 = flags_native;
1216    vex_state->guest_CC_DEP2 = 0;
1217    vex_state->guest_CC_NDEP = 0;
1218 }
1219 #endif
1220 
1221 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestARM_get_cpsr(const VexGuestARMState * vex_state)1222 UInt LibVEX_GuestARM_get_cpsr ( /*IN*/const VexGuestARMState* vex_state )
1223 {
1224    UInt cpsr = 0;
1225    // NZCV
1226    cpsr |= armg_calculate_flags_nzcv(
1227                vex_state->guest_CC_OP,
1228                vex_state->guest_CC_DEP1,
1229                vex_state->guest_CC_DEP2,
1230                vex_state->guest_CC_NDEP
1231             );
1232    vassert(0 == (cpsr & 0x0FFFFFFF));
1233    // Q
1234    if (vex_state->guest_QFLAG32 > 0)
1235       cpsr |= (1 << 27);
1236    // GE
1237    if (vex_state->guest_GEFLAG0 > 0)
1238       cpsr |= (1 << 16);
1239    if (vex_state->guest_GEFLAG1 > 0)
1240       cpsr |= (1 << 17);
1241    if (vex_state->guest_GEFLAG2 > 0)
1242       cpsr |= (1 << 18);
1243    if (vex_state->guest_GEFLAG3 > 0)
1244       cpsr |= (1 << 19);
1245    // M
1246    cpsr |= (1 << 4); // 0b10000 means user-mode
1247    // J,T   J (bit 24) is zero by initialisation above
1248    // T  we copy from R15T[0]
1249    if (vex_state->guest_R15T & 1)
1250       cpsr |= (1 << 5);
1251    // ITSTATE we punt on for the time being.  Could compute it
1252    // if needed though.
1253    // E, endianness, 0 (littleendian) from initialisation above
1254    // A,I,F disable some async exceptions.  Not sure about these.
1255    // Leave as zero for the time being.
1256    return cpsr;
1257 }
1258 
1259 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestARM_initialise(VexGuestARMState * vex_state)1260 void LibVEX_GuestARM_initialise ( /*OUT*/VexGuestARMState* vex_state )
1261 {
1262    vex_state->host_EvC_FAILADDR = 0;
1263    vex_state->host_EvC_COUNTER = 0;
1264 
1265    vex_state->guest_R0  = 0;
1266    vex_state->guest_R1  = 0;
1267    vex_state->guest_R2  = 0;
1268    vex_state->guest_R3  = 0;
1269    vex_state->guest_R4  = 0;
1270    vex_state->guest_R5  = 0;
1271    vex_state->guest_R6  = 0;
1272    vex_state->guest_R7  = 0;
1273    vex_state->guest_R8  = 0;
1274    vex_state->guest_R9  = 0;
1275    vex_state->guest_R10 = 0;
1276    vex_state->guest_R11 = 0;
1277    vex_state->guest_R12 = 0;
1278    vex_state->guest_R13 = 0;
1279    vex_state->guest_R14 = 0;
1280    vex_state->guest_R15T = 0;  /* NB: implies ARM mode */
1281 
1282    vex_state->guest_CC_OP   = ARMG_CC_OP_COPY;
1283    vex_state->guest_CC_DEP1 = 0;
1284    vex_state->guest_CC_DEP2 = 0;
1285    vex_state->guest_CC_NDEP = 0;
1286    vex_state->guest_QFLAG32 = 0;
1287    vex_state->guest_GEFLAG0 = 0;
1288    vex_state->guest_GEFLAG1 = 0;
1289    vex_state->guest_GEFLAG2 = 0;
1290    vex_state->guest_GEFLAG3 = 0;
1291 
1292    vex_state->guest_EMNOTE  = EmNote_NONE;
1293    vex_state->guest_CMSTART = 0;
1294    vex_state->guest_CMLEN   = 0;
1295    vex_state->guest_NRADDR  = 0;
1296    vex_state->guest_IP_AT_SYSCALL = 0;
1297 
1298    vex_state->guest_D0  = 0;
1299    vex_state->guest_D1  = 0;
1300    vex_state->guest_D2  = 0;
1301    vex_state->guest_D3  = 0;
1302    vex_state->guest_D4  = 0;
1303    vex_state->guest_D5  = 0;
1304    vex_state->guest_D6  = 0;
1305    vex_state->guest_D7  = 0;
1306    vex_state->guest_D8  = 0;
1307    vex_state->guest_D9  = 0;
1308    vex_state->guest_D10 = 0;
1309    vex_state->guest_D11 = 0;
1310    vex_state->guest_D12 = 0;
1311    vex_state->guest_D13 = 0;
1312    vex_state->guest_D14 = 0;
1313    vex_state->guest_D15 = 0;
1314    vex_state->guest_D16 = 0;
1315    vex_state->guest_D17 = 0;
1316    vex_state->guest_D18 = 0;
1317    vex_state->guest_D19 = 0;
1318    vex_state->guest_D20 = 0;
1319    vex_state->guest_D21 = 0;
1320    vex_state->guest_D22 = 0;
1321    vex_state->guest_D23 = 0;
1322    vex_state->guest_D24 = 0;
1323    vex_state->guest_D25 = 0;
1324    vex_state->guest_D26 = 0;
1325    vex_state->guest_D27 = 0;
1326    vex_state->guest_D28 = 0;
1327    vex_state->guest_D29 = 0;
1328    vex_state->guest_D30 = 0;
1329    vex_state->guest_D31 = 0;
1330 
1331    /* ARM encoded; zero is the default as it happens (result flags
1332       (NZCV) cleared, FZ disabled, round to nearest, non-vector mode,
1333       all exns masked, all exn sticky bits cleared). */
1334    vex_state->guest_FPSCR = 0;
1335 
1336    vex_state->guest_TPIDRURO = 0;
1337    vex_state->guest_TPIDRURW = 0;
1338 
1339    /* Not in a Thumb IT block. */
1340    vex_state->guest_ITSTATE = 0;
1341 }
1342 
1343 
1344 /*-----------------------------------------------------------*/
1345 /*--- Describing the arm guest state, for the benefit     ---*/
1346 /*--- of iropt and instrumenters.                         ---*/
1347 /*-----------------------------------------------------------*/
1348 
1349 /* Figure out if any part of the guest state contained in minoff
1350    .. maxoff requires precise memory exceptions.  If in doubt return
1351    True (but this generates significantly slower code).
1352 
1353    We enforce precise exns for guest R13(sp), R15T(pc), R7, R11.
1354 
1355 
1356    Only R13(sp) is needed in mode VexRegUpdSpAtMemAccess.
1357 */
guest_arm_state_requires_precise_mem_exns(Int minoff,Int maxoff,VexRegisterUpdates pxControl)1358 Bool guest_arm_state_requires_precise_mem_exns (
1359         Int minoff, Int maxoff, VexRegisterUpdates pxControl
1360      )
1361 {
1362    Int sp_min = offsetof(VexGuestARMState, guest_R13);
1363    Int sp_max = sp_min + 4 - 1;
1364    Int pc_min = offsetof(VexGuestARMState, guest_R15T);
1365    Int pc_max = pc_min + 4 - 1;
1366 
1367    if (maxoff < sp_min || minoff > sp_max) {
1368       /* no overlap with sp */
1369       if (pxControl == VexRegUpdSpAtMemAccess)
1370          return False; // We only need to check stack pointer.
1371    } else {
1372       return True;
1373    }
1374 
1375    if (maxoff < pc_min || minoff > pc_max) {
1376       /* no overlap with pc */
1377    } else {
1378       return True;
1379    }
1380 
1381    /* We appear to need precise updates of R11 in order to get proper
1382       stacktraces from non-optimised code. */
1383    Int r11_min = offsetof(VexGuestARMState, guest_R11);
1384    Int r11_max = r11_min + 4 - 1;
1385 
1386    if (maxoff < r11_min || minoff > r11_max) {
1387       /* no overlap with r11 */
1388    } else {
1389       return True;
1390    }
1391 
1392    /* Ditto R7, particularly needed for proper stacktraces in Thumb
1393       code. */
1394    Int r7_min = offsetof(VexGuestARMState, guest_R7);
1395    Int r7_max = r7_min + 4 - 1;
1396 
1397    if (maxoff < r7_min || minoff > r7_max) {
1398       /* no overlap with r7 */
1399    } else {
1400       return True;
1401    }
1402 
1403    return False;
1404 }
1405 
1406 
1407 
1408 #define ALWAYSDEFD(field)                           \
1409     { offsetof(VexGuestARMState, field),            \
1410       (sizeof ((VexGuestARMState*)0)->field) }
1411 
1412 VexGuestLayout
1413    armGuest_layout
1414       = {
1415           /* Total size of the guest state, in bytes. */
1416           .total_sizeB = sizeof(VexGuestARMState),
1417 
1418           /* Describe the stack pointer. */
1419           .offset_SP = offsetof(VexGuestARMState,guest_R13),
1420           .sizeof_SP = 4,
1421 
1422           /* Describe the instruction pointer. */
1423           .offset_IP = offsetof(VexGuestARMState,guest_R15T),
1424           .sizeof_IP = 4,
1425 
1426           /* Describe any sections to be regarded by Memcheck as
1427              'always-defined'. */
1428           .n_alwaysDefd = 10,
1429 
1430           /* flags thunk: OP is always defd, whereas DEP1 and DEP2
1431              have to be tracked.  See detailed comment in gdefs.h on
1432              meaning of thunk fields. */
1433           .alwaysDefd
1434              = { /* 0 */ ALWAYSDEFD(guest_R15T),
1435                  /* 1 */ ALWAYSDEFD(guest_CC_OP),
1436                  /* 2 */ ALWAYSDEFD(guest_CC_NDEP),
1437                  /* 3 */ ALWAYSDEFD(guest_EMNOTE),
1438                  /* 4 */ ALWAYSDEFD(guest_CMSTART),
1439                  /* 5 */ ALWAYSDEFD(guest_CMLEN),
1440                  /* 6 */ ALWAYSDEFD(guest_NRADDR),
1441                  /* 7 */ ALWAYSDEFD(guest_IP_AT_SYSCALL),
1442                  /* 8 */ ALWAYSDEFD(guest_TPIDRURO),
1443                  /* 9 */ ALWAYSDEFD(guest_ITSTATE)
1444                }
1445         };
1446 
1447 
1448 /*---------------------------------------------------------------*/
1449 /*--- end                                 guest_arm_helpers.c ---*/
1450 /*---------------------------------------------------------------*/
1451