1
2 /*--------------------------------------------------------------------*/
3 /*--- begin guest_generic_bb_to_IR.c ---*/
4 /*--------------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2015 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex_ir.h"
38 #include "libvex.h"
39 #include "main_util.h"
40 #include "main_globals.h"
41 #include "guest_generic_bb_to_IR.h"
42
43
44 /* Forwards .. */
45 VEX_REGPARM(2)
46 static UInt genericg_compute_checksum_4al ( HWord first_w32, HWord n_w32s );
47 VEX_REGPARM(1)
48 static UInt genericg_compute_checksum_4al_1 ( HWord first_w32 );
49 VEX_REGPARM(1)
50 static UInt genericg_compute_checksum_4al_2 ( HWord first_w32 );
51 VEX_REGPARM(1)
52 static UInt genericg_compute_checksum_4al_3 ( HWord first_w32 );
53 VEX_REGPARM(1)
54 static UInt genericg_compute_checksum_4al_4 ( HWord first_w32 );
55 VEX_REGPARM(1)
56 static UInt genericg_compute_checksum_4al_5 ( HWord first_w32 );
57 VEX_REGPARM(1)
58 static UInt genericg_compute_checksum_4al_6 ( HWord first_w32 );
59 VEX_REGPARM(1)
60 static UInt genericg_compute_checksum_4al_7 ( HWord first_w32 );
61 VEX_REGPARM(1)
62 static UInt genericg_compute_checksum_4al_8 ( HWord first_w32 );
63 VEX_REGPARM(1)
64 static UInt genericg_compute_checksum_4al_9 ( HWord first_w32 );
65 VEX_REGPARM(1)
66 static UInt genericg_compute_checksum_4al_10 ( HWord first_w32 );
67 VEX_REGPARM(1)
68 static UInt genericg_compute_checksum_4al_11 ( HWord first_w32 );
69 VEX_REGPARM(1)
70 static UInt genericg_compute_checksum_4al_12 ( HWord first_w32 );
71
72 VEX_REGPARM(2)
73 static ULong genericg_compute_checksum_8al ( HWord first_w64, HWord n_w64s );
74 VEX_REGPARM(1)
75 static ULong genericg_compute_checksum_8al_1 ( HWord first_w64 );
76 VEX_REGPARM(1)
77 static ULong genericg_compute_checksum_8al_2 ( HWord first_w64 );
78 VEX_REGPARM(1)
79 static ULong genericg_compute_checksum_8al_3 ( HWord first_w64 );
80 VEX_REGPARM(1)
81 static ULong genericg_compute_checksum_8al_4 ( HWord first_w64 );
82 VEX_REGPARM(1)
83 static ULong genericg_compute_checksum_8al_5 ( HWord first_w64 );
84 VEX_REGPARM(1)
85 static ULong genericg_compute_checksum_8al_6 ( HWord first_w64 );
86 VEX_REGPARM(1)
87 static ULong genericg_compute_checksum_8al_7 ( HWord first_w64 );
88 VEX_REGPARM(1)
89 static ULong genericg_compute_checksum_8al_8 ( HWord first_w64 );
90 VEX_REGPARM(1)
91 static ULong genericg_compute_checksum_8al_9 ( HWord first_w64 );
92 VEX_REGPARM(1)
93 static ULong genericg_compute_checksum_8al_10 ( HWord first_w64 );
94 VEX_REGPARM(1)
95 static ULong genericg_compute_checksum_8al_11 ( HWord first_w64 );
96 VEX_REGPARM(1)
97 static ULong genericg_compute_checksum_8al_12 ( HWord first_w64 );
98
99 /* Small helpers */
const_False(void * callback_opaque,Addr a)100 static Bool const_False ( void* callback_opaque, Addr a ) {
101 return False;
102 }
103
104 /* Disassemble a complete basic block, starting at guest_IP_start,
105 returning a new IRSB. The disassembler may chase across basic
106 block boundaries if it wishes and if chase_into_ok allows it.
107 The precise guest address ranges from which code has been taken
108 are written into vge. guest_IP_bbstart is taken to be the IP in
109 the guest's address space corresponding to the instruction at
110 &guest_code[0].
111
112 dis_instr_fn is the arch-specific fn to disassemble on function; it
113 is this that does the real work.
114
115 needs_self_check is a callback used to ask the caller which of the
116 extents, if any, a self check is required for. The returned value
117 is a bitmask with a 1 in position i indicating that the i'th extent
118 needs a check. Since there can be at most 3 extents, the returned
119 values must be between 0 and 7.
120
121 The number of extents which did get a self check (0 to 3) is put in
122 n_sc_extents. The caller already knows this because it told us
123 which extents to add checks for, via the needs_self_check callback,
124 but we ship the number back out here for the caller's convenience.
125
126 preamble_function is a callback which allows the caller to add
127 its own IR preamble (following the self-check, if any). May be
128 NULL. If non-NULL, the IRSB under construction is handed to
129 this function, which presumably adds IR statements to it. The
130 callback may optionally complete the block and direct bb_to_IR
131 not to disassemble any instructions into it; this is indicated
132 by the callback returning True.
133
134 offB_CMADDR and offB_CMLEN are the offsets of guest_CMADDR and
135 guest_CMLEN. Since this routine has to work for any guest state,
136 without knowing what it is, those offsets have to passed in.
137
138 callback_opaque is a caller-supplied pointer to data which the
139 callbacks may want to see. Vex has no idea what it is.
140 (In fact it's a VgInstrumentClosure.)
141 */
142
143 /* Regarding IP updating. dis_instr_fn (that does the guest specific
144 work of disassembling an individual instruction) must finish the
145 resulting IR with "PUT(guest_IP) = ". Hence in all cases it must
146 state the next instruction address.
147
148 If the block is to be ended at that point, then this routine
149 (bb_to_IR) will set up the next/jumpkind/offsIP fields so as to
150 make a transfer (of the right kind) to "GET(guest_IP)". Hence if
151 dis_instr_fn generates incorrect IP updates we will see it
152 immediately (due to jumping to the wrong next guest address).
153
154 However it is also necessary to set this up so it can be optimised
155 nicely. The IRSB exit is defined to update the guest IP, so that
156 chaining works -- since the chain_me stubs expect the chain-to
157 address to be in the guest state. Hence what the IRSB next fields
158 will contain initially is (implicitly)
159
160 PUT(guest_IP) [implicitly] = GET(guest_IP) [explicit expr on ::next]
161
162 which looks pretty strange at first. Eg so unconditional branch
163 to some address 0x123456 looks like this:
164
165 PUT(guest_IP) = 0x123456; // dis_instr_fn generates this
166 // the exit
167 PUT(guest_IP) [implicitly] = GET(guest_IP); exit-Boring
168
169 after redundant-GET and -PUT removal by iropt, we get what we want:
170
171 // the exit
172 PUT(guest_IP) [implicitly] = 0x123456; exit-Boring
173
174 This makes the IRSB-end case the same as the side-exit case: update
175 IP, then transfer. There is no redundancy of representation for
176 the destination, and we use the destination specified by
177 dis_instr_fn, so any errors it makes show up sooner.
178 */
179
bb_to_IR(VexGuestExtents * vge,UInt * n_sc_extents,UInt * n_guest_instrs,VexRegisterUpdates * pxControl,void * callback_opaque,DisOneInstrFn dis_instr_fn,const UChar * guest_code,Addr guest_IP_bbstart,Bool (* chase_into_ok)(void *,Addr),VexEndness host_endness,Bool sigill_diag,VexArch arch_guest,const VexArchInfo * archinfo_guest,const VexAbiInfo * abiinfo_both,IRType guest_word_type,UInt (* needs_self_check)(void *,VexRegisterUpdates *,const VexGuestExtents *),Bool (* preamble_function)(void *,IRSB *),Int offB_GUEST_CMSTART,Int offB_GUEST_CMLEN,Int offB_GUEST_IP,Int szB_GUEST_IP)180 IRSB* bb_to_IR (
181 /*OUT*/VexGuestExtents* vge,
182 /*OUT*/UInt* n_sc_extents,
183 /*OUT*/UInt* n_guest_instrs, /* stats only */
184 /*MOD*/VexRegisterUpdates* pxControl,
185 /*IN*/ void* callback_opaque,
186 /*IN*/ DisOneInstrFn dis_instr_fn,
187 /*IN*/ const UChar* guest_code,
188 /*IN*/ Addr guest_IP_bbstart,
189 /*IN*/ Bool (*chase_into_ok)(void*,Addr),
190 /*IN*/ VexEndness host_endness,
191 /*IN*/ Bool sigill_diag,
192 /*IN*/ VexArch arch_guest,
193 /*IN*/ const VexArchInfo* archinfo_guest,
194 /*IN*/ const VexAbiInfo* abiinfo_both,
195 /*IN*/ IRType guest_word_type,
196 /*IN*/ UInt (*needs_self_check)
197 (void*, /*MB_MOD*/VexRegisterUpdates*,
198 const VexGuestExtents*),
199 /*IN*/ Bool (*preamble_function)(void*,IRSB*),
200 /*IN*/ Int offB_GUEST_CMSTART,
201 /*IN*/ Int offB_GUEST_CMLEN,
202 /*IN*/ Int offB_GUEST_IP,
203 /*IN*/ Int szB_GUEST_IP
204 )
205 {
206 Long delta;
207 Int i, n_instrs, first_stmt_idx;
208 Bool resteerOK, debug_print;
209 DisResult dres;
210 IRStmt* imark;
211 IRStmt* nop;
212 static Int n_resteers = 0;
213 Int d_resteers = 0;
214 Int selfcheck_idx = 0;
215 IRSB* irsb;
216 Addr guest_IP_curr_instr;
217 IRConst* guest_IP_bbstart_IRConst = NULL;
218 Int n_cond_resteers_allowed = 2;
219 UShort tmpsize;
220
221 Bool (*resteerOKfn)(void*,Addr) = NULL;
222
223 debug_print = toBool(vex_traceflags & VEX_TRACE_FE);
224
225 /* check sanity .. */
226 vassert(sizeof(HWord) == sizeof(void*));
227 vassert(vex_control.guest_max_insns >= 1);
228 vassert(vex_control.guest_max_insns <= 100);
229 vassert(vex_control.guest_max_bytes >= 1);
230 vassert(vex_control.guest_max_bytes <= 5000);
231 vassert(vex_control.guest_chase_thresh >= 0);
232 vassert(vex_control.guest_chase_thresh < vex_control.guest_max_insns);
233 vassert(guest_word_type == Ity_I32 || guest_word_type == Ity_I64);
234
235 if (guest_word_type == Ity_I32) {
236 vassert(szB_GUEST_IP == 4);
237 vassert((offB_GUEST_IP % 4) == 0);
238 } else {
239 vassert(szB_GUEST_IP == 8);
240 vassert((offB_GUEST_IP % 8) == 0);
241 }
242
243 /* Although we will try to disassemble up to vex_control.guest_max_insns
244 insns into the block, the individual insn assemblers may hint to us that a
245 disassembled instruction is verbose. In that case we will lower the limit
246 so as to ensure that the JIT doesn't run out of space. See bug 375839 for
247 the motivating example. */
248 Int guest_max_insns_really = vex_control.guest_max_insns;
249
250 /* Start a new, empty extent. */
251 vge->n_used = 1;
252 vge->base[0] = guest_IP_bbstart;
253 vge->len[0] = 0;
254 *n_sc_extents = 0;
255
256 /* And a new IR superblock to dump the result into. */
257 irsb = emptyIRSB();
258
259 /* Delta keeps track of how far along the guest_code array we have
260 so far gone. */
261 delta = 0;
262 n_instrs = 0;
263 *n_guest_instrs = 0;
264
265 /* Guest addresses as IRConsts. Used in self-checks to specify the
266 restart-after-discard point. */
267 guest_IP_bbstart_IRConst
268 = guest_word_type==Ity_I32
269 ? IRConst_U32(toUInt(guest_IP_bbstart))
270 : IRConst_U64(guest_IP_bbstart);
271
272 /* Leave 15 spaces in which to put the check statements for a self
273 checking translation (up to 3 extents, and 5 stmts required for
274 each). We won't know until later the extents and checksums of
275 the areas, if any, that need to be checked. */
276 nop = IRStmt_NoOp();
277 selfcheck_idx = irsb->stmts_used;
278 for (i = 0; i < 3 * 5; i++)
279 addStmtToIRSB( irsb, nop );
280
281 /* If the caller supplied a function to add its own preamble, use
282 it now. */
283 if (preamble_function) {
284 Bool stopNow = preamble_function( callback_opaque, irsb );
285 if (stopNow) {
286 /* The callback has completed the IR block without any guest
287 insns being disassembled into it, so just return it at
288 this point, even if a self-check was requested - as there
289 is nothing to self-check. The 15 self-check no-ops will
290 still be in place, but they are harmless. */
291 return irsb;
292 }
293 }
294
295 /* Process instructions. */
296 while (True) {
297 vassert(n_instrs < guest_max_insns_really);
298
299 /* Regardless of what chase_into_ok says, is chasing permissible
300 at all right now? Set resteerOKfn accordingly. */
301 resteerOK
302 = toBool(
303 n_instrs < vex_control.guest_chase_thresh
304 /* we can't afford to have a resteer once we're on the
305 last extent slot. */
306 && vge->n_used < 3
307 );
308
309 resteerOKfn
310 = resteerOK ? chase_into_ok : const_False;
311
312 /* n_cond_resteers_allowed keeps track of whether we're still
313 allowing dis_instr_fn to chase conditional branches. It
314 starts (at 2) and gets decremented each time dis_instr_fn
315 tells us it has chased a conditional branch. We then
316 decrement it, and use it to tell later calls to dis_instr_fn
317 whether or not it is allowed to chase conditional
318 branches. */
319 vassert(n_cond_resteers_allowed >= 0 && n_cond_resteers_allowed <= 2);
320
321 /* This is the IP of the instruction we're just about to deal
322 with. */
323 guest_IP_curr_instr = guest_IP_bbstart + delta;
324
325 /* This is the irsb statement array index of the first stmt in
326 this insn. That will always be the instruction-mark
327 descriptor. */
328 first_stmt_idx = irsb->stmts_used;
329
330 /* Add an instruction-mark statement. We won't know until after
331 disassembling the instruction how long it instruction is, so
332 just put in a zero length and we'll fix it up later.
333
334 On ARM, the least significant bit of the instr address
335 distinguishes ARM vs Thumb instructions. All instructions
336 actually start on at least 2-aligned addresses. So we need
337 to ignore the bottom bit of the insn address when forming the
338 IMark's address field, but put that bottom bit in the delta
339 field, so that comparisons against guest_R15T for Thumb can
340 be done correctly. By inspecting the delta field,
341 instruction processors can determine whether the instruction
342 was originally Thumb or ARM. For more details of this
343 convention, see comments on definition of guest_R15T in
344 libvex_guest_arm.h. */
345 if (arch_guest == VexArchARM && (guest_IP_curr_instr & 1)) {
346 /* Thumb insn => mask out the T bit, but put it in delta */
347 addStmtToIRSB( irsb,
348 IRStmt_IMark(guest_IP_curr_instr & ~(Addr)1,
349 0, /* len */
350 1 /* delta */
351 )
352 );
353 } else {
354 /* All other targets: store IP as-is, and set delta to zero. */
355 addStmtToIRSB( irsb,
356 IRStmt_IMark(guest_IP_curr_instr,
357 0, /* len */
358 0 /* delta */
359 )
360 );
361 }
362
363 if (debug_print && n_instrs > 0)
364 vex_printf("\n");
365
366 /* Finally, actually disassemble an instruction. */
367 vassert(irsb->next == NULL);
368 dres = dis_instr_fn ( irsb,
369 resteerOKfn,
370 toBool(n_cond_resteers_allowed > 0),
371 callback_opaque,
372 guest_code,
373 delta,
374 guest_IP_curr_instr,
375 arch_guest,
376 archinfo_guest,
377 abiinfo_both,
378 host_endness,
379 sigill_diag );
380
381 /* stay sane ... */
382 vassert(dres.whatNext == Dis_StopHere
383 || dres.whatNext == Dis_Continue
384 || dres.whatNext == Dis_ResteerU
385 || dres.whatNext == Dis_ResteerC);
386 /* ... disassembled insn length is sane ... */
387 vassert(dres.len >= 0 && dres.len <= 24);
388 /* ... continueAt is zero if no resteer requested ... */
389 if (dres.whatNext != Dis_ResteerU && dres.whatNext != Dis_ResteerC)
390 vassert(dres.continueAt == 0);
391 /* ... if we disallowed conditional resteers, check that one
392 didn't actually happen anyway ... */
393 if (n_cond_resteers_allowed == 0)
394 vassert(dres.whatNext != Dis_ResteerC);
395
396 /* If the disassembly function passed us a hint, take note of it. */
397 if (LIKELY(dres.hint == Dis_HintNone)) {
398 /* Do nothing */
399 } else {
400 vassert(dres.hint == Dis_HintVerbose);
401 /* The current insn is known to be verbose. Lower the max insns limit
402 if necessary so as to avoid running the JIT out of space in the
403 event that we've encountered the start of a long sequence of them.
404 This is expected to be a very rare event. In any case the remaining
405 limit (30 insns) is still so high that most blocks will terminate
406 anyway before then. So this is very unlikely to give a perf hit in
407 practice. See bug 375839 for the motivating example. */
408 if (guest_max_insns_really > 30) {
409 guest_max_insns_really = 30;
410 }
411 }
412
413 /* Fill in the insn-mark length field. */
414 vassert(first_stmt_idx >= 0 && first_stmt_idx < irsb->stmts_used);
415 imark = irsb->stmts[first_stmt_idx];
416 vassert(imark);
417 vassert(imark->tag == Ist_IMark);
418 vassert(imark->Ist.IMark.len == 0);
419 imark->Ist.IMark.len = dres.len;
420
421 /* Print the resulting IR, if needed. */
422 if (vex_traceflags & VEX_TRACE_FE) {
423 for (i = first_stmt_idx; i < irsb->stmts_used; i++) {
424 vex_printf(" ");
425 ppIRStmt(irsb->stmts[i]);
426 vex_printf("\n");
427 }
428 }
429
430 /* Individual insn disassembly may not mess with irsb->next.
431 This function is the only place where it can be set. */
432 vassert(irsb->next == NULL);
433 vassert(irsb->jumpkind == Ijk_Boring);
434 vassert(irsb->offsIP == 0);
435
436 /* Update the VexGuestExtents we are constructing. */
437 /* If vex_control.guest_max_insns is required to be < 100 and
438 each insn is at max 20 bytes long, this limit of 5000 then
439 seems reasonable since the max possible extent length will be
440 100 * 20 == 2000. */
441 vassert(vge->len[vge->n_used-1] < 5000);
442 tmpsize = toUShort(toUInt( vge->len[vge->n_used-1] + dres.len ));
443
444 /* If we've gone over the maximum lift size, roll back and abort */
445 if (tmpsize > vex_control.guest_max_bytes) {
446 if (n_instrs == 0) {
447 vpanic("Not enough bytes given to decode even a single instruction");
448 }
449 irsb->stmts_used = first_stmt_idx;
450 /* first_stmt_idx is never read from again except to sanity check,
451 so it's safe to set a fake value here */
452 first_stmt_idx--;
453 dres.whatNext = Dis_StopHere;
454 dres.jk_StopHere = Ijk_Boring;
455 } else {
456 n_instrs++;
457 vge->len[vge->n_used-1] = tmpsize;
458 }
459
460 /* Individual insn disassembly must finish the IR for each
461 instruction with an assignment to the guest PC. */
462 vassert(first_stmt_idx < irsb->stmts_used);
463 /* it follows that irsb->stmts_used must be > 0 */
464 { IRStmt* st = irsb->stmts[irsb->stmts_used-1];
465 vassert(st);
466 vassert(st->tag == Ist_Put);
467 vassert(st->Ist.Put.offset == offB_GUEST_IP);
468 /* Really we should also check that the type of the Put'd data
469 == guest_word_type, but that's a bit expensive. */
470 }
471
472 /* Advance delta (inconspicuous but very important :-) */
473 delta += (Long)dres.len;
474
475 switch (dres.whatNext) {
476 case Dis_Continue:
477 vassert(dres.continueAt == 0);
478 vassert(dres.jk_StopHere == Ijk_INVALID);
479 if (n_instrs < guest_max_insns_really &&
480 vge->len[vge->n_used-1] < vex_control.guest_max_bytes) {
481 /* keep going */
482 } else {
483 /* We have to stop. See comment above re irsb field
484 settings here. */
485 irsb->next = IRExpr_Get(offB_GUEST_IP, guest_word_type);
486 /* irsb->jumpkind must already by Ijk_Boring */
487 irsb->offsIP = offB_GUEST_IP;
488 goto done;
489 }
490 break;
491 case Dis_StopHere:
492 vassert(dres.continueAt == 0);
493 vassert(dres.jk_StopHere != Ijk_INVALID);
494 /* See comment above re irsb field settings here. */
495 irsb->next = IRExpr_Get(offB_GUEST_IP, guest_word_type);
496 irsb->jumpkind = dres.jk_StopHere;
497 irsb->offsIP = offB_GUEST_IP;
498 goto done;
499
500 case Dis_ResteerU:
501 case Dis_ResteerC:
502 /* Check that we actually allowed a resteer .. */
503 vassert(resteerOK);
504 if (dres.whatNext == Dis_ResteerC) {
505 vassert(n_cond_resteers_allowed > 0);
506 n_cond_resteers_allowed--;
507 }
508 /* figure out a new delta to continue at. */
509 vassert(resteerOKfn(callback_opaque,dres.continueAt));
510 delta = dres.continueAt - guest_IP_bbstart;
511 /* we now have to start a new extent slot. */
512 vge->n_used++;
513 vassert(vge->n_used <= 3);
514 vge->base[vge->n_used-1] = dres.continueAt;
515 vge->len[vge->n_used-1] = 0;
516 n_resteers++;
517 d_resteers++;
518 if (0 && (n_resteers & 0xFF) == 0)
519 vex_printf("resteer[%d,%d] to 0x%llx (delta = %lld)\n",
520 n_resteers, d_resteers,
521 dres.continueAt, delta);
522 break;
523 default:
524 vpanic("bb_to_IR");
525 }
526 }
527 /*NOTREACHED*/
528 vassert(0);
529
530 done:
531 /* We're done. The only thing that might need attending to is that
532 a self-checking preamble may need to be created. If so it gets
533 placed in the 15 slots reserved above.
534
535 The scheme is to compute a rather crude checksum of the code
536 we're making a translation of, and add to the IR a call to a
537 helper routine which recomputes the checksum every time the
538 translation is run, and requests a retranslation if it doesn't
539 match. This is obviously very expensive and considerable
540 efforts are made to speed it up:
541
542 * the checksum is computed from all the naturally aligned
543 host-sized words that overlap the translated code. That means
544 it could depend on up to 7 bytes before and 7 bytes after
545 which aren't part of the translated area, and so if those
546 change then we'll unnecessarily have to discard and
547 retranslate. This seems like a pretty remote possibility and
548 it seems as if the benefit of not having to deal with the ends
549 of the range at byte precision far outweigh any possible extra
550 translations needed.
551
552 * there's a generic routine and 12 specialised cases, which
553 handle the cases of 1 through 12-word lengths respectively.
554 They seem to cover about 90% of the cases that occur in
555 practice.
556
557 We ask the caller, via needs_self_check, which of the 3 vge
558 extents needs a check, and only generate check code for those
559 that do.
560 */
561 {
562 Addr base2check;
563 UInt len2check;
564 HWord expectedhW;
565 IRTemp tistart_tmp, tilen_tmp;
566 HWord VEX_REGPARM(2) (*fn_generic)(HWord, HWord);
567 HWord VEX_REGPARM(1) (*fn_spec)(HWord);
568 const HChar* nm_generic;
569 const HChar* nm_spec;
570 HWord fn_generic_entry = 0;
571 HWord fn_spec_entry = 0;
572 UInt host_word_szB = sizeof(HWord);
573 IRType host_word_type = Ity_INVALID;
574
575 UInt extents_needing_check
576 = needs_self_check(callback_opaque, pxControl, vge);
577
578 if (host_word_szB == 4) host_word_type = Ity_I32;
579 if (host_word_szB == 8) host_word_type = Ity_I64;
580 vassert(host_word_type != Ity_INVALID);
581
582 vassert(vge->n_used >= 1 && vge->n_used <= 3);
583
584 /* Caller shouldn't claim that nonexistent extents need a
585 check. */
586 vassert((extents_needing_check >> vge->n_used) == 0);
587
588 for (i = 0; i < vge->n_used; i++) {
589
590 /* Do we need to generate a check for this extent? */
591 if ((extents_needing_check & (1 << i)) == 0)
592 continue;
593
594 /* Tell the caller */
595 (*n_sc_extents)++;
596
597 /* the extent we're generating a check for */
598 base2check = vge->base[i];
599 len2check = vge->len[i];
600
601 /* stay sane */
602 vassert(len2check >= 0 && len2check < 1000/*arbitrary*/);
603
604 /* Skip the check if the translation involved zero bytes */
605 if (len2check == 0)
606 continue;
607
608 HWord first_hW = ((HWord)base2check)
609 & ~(HWord)(host_word_szB-1);
610 HWord last_hW = (((HWord)base2check) + len2check - 1)
611 & ~(HWord)(host_word_szB-1);
612 vassert(first_hW <= last_hW);
613 HWord hW_diff = last_hW - first_hW;
614 vassert(0 == (hW_diff & (host_word_szB-1)));
615 HWord hWs_to_check = (hW_diff + host_word_szB) / host_word_szB;
616 vassert(hWs_to_check > 0
617 && hWs_to_check < 1004/*arbitrary*/ / host_word_szB);
618
619 /* vex_printf("%lx %lx %ld\n", first_hW, last_hW, hWs_to_check); */
620
621 if (host_word_szB == 8) {
622 fn_generic = (VEX_REGPARM(2) HWord(*)(HWord, HWord))
623 genericg_compute_checksum_8al;
624 nm_generic = "genericg_compute_checksum_8al";
625 } else {
626 fn_generic = (VEX_REGPARM(2) HWord(*)(HWord, HWord))
627 genericg_compute_checksum_4al;
628 nm_generic = "genericg_compute_checksum_4al";
629 }
630
631 fn_spec = NULL;
632 nm_spec = NULL;
633
634 if (host_word_szB == 8) {
635 const HChar* nm = NULL;
636 ULong VEX_REGPARM(1) (*fn)(HWord) = NULL;
637 switch (hWs_to_check) {
638 case 1: fn = genericg_compute_checksum_8al_1;
639 nm = "genericg_compute_checksum_8al_1"; break;
640 case 2: fn = genericg_compute_checksum_8al_2;
641 nm = "genericg_compute_checksum_8al_2"; break;
642 case 3: fn = genericg_compute_checksum_8al_3;
643 nm = "genericg_compute_checksum_8al_3"; break;
644 case 4: fn = genericg_compute_checksum_8al_4;
645 nm = "genericg_compute_checksum_8al_4"; break;
646 case 5: fn = genericg_compute_checksum_8al_5;
647 nm = "genericg_compute_checksum_8al_5"; break;
648 case 6: fn = genericg_compute_checksum_8al_6;
649 nm = "genericg_compute_checksum_8al_6"; break;
650 case 7: fn = genericg_compute_checksum_8al_7;
651 nm = "genericg_compute_checksum_8al_7"; break;
652 case 8: fn = genericg_compute_checksum_8al_8;
653 nm = "genericg_compute_checksum_8al_8"; break;
654 case 9: fn = genericg_compute_checksum_8al_9;
655 nm = "genericg_compute_checksum_8al_9"; break;
656 case 10: fn = genericg_compute_checksum_8al_10;
657 nm = "genericg_compute_checksum_8al_10"; break;
658 case 11: fn = genericg_compute_checksum_8al_11;
659 nm = "genericg_compute_checksum_8al_11"; break;
660 case 12: fn = genericg_compute_checksum_8al_12;
661 nm = "genericg_compute_checksum_8al_12"; break;
662 default: break;
663 }
664 fn_spec = (VEX_REGPARM(1) HWord(*)(HWord)) fn;
665 nm_spec = nm;
666 } else {
667 const HChar* nm = NULL;
668 UInt VEX_REGPARM(1) (*fn)(HWord) = NULL;
669 switch (hWs_to_check) {
670 case 1: fn = genericg_compute_checksum_4al_1;
671 nm = "genericg_compute_checksum_4al_1"; break;
672 case 2: fn = genericg_compute_checksum_4al_2;
673 nm = "genericg_compute_checksum_4al_2"; break;
674 case 3: fn = genericg_compute_checksum_4al_3;
675 nm = "genericg_compute_checksum_4al_3"; break;
676 case 4: fn = genericg_compute_checksum_4al_4;
677 nm = "genericg_compute_checksum_4al_4"; break;
678 case 5: fn = genericg_compute_checksum_4al_5;
679 nm = "genericg_compute_checksum_4al_5"; break;
680 case 6: fn = genericg_compute_checksum_4al_6;
681 nm = "genericg_compute_checksum_4al_6"; break;
682 case 7: fn = genericg_compute_checksum_4al_7;
683 nm = "genericg_compute_checksum_4al_7"; break;
684 case 8: fn = genericg_compute_checksum_4al_8;
685 nm = "genericg_compute_checksum_4al_8"; break;
686 case 9: fn = genericg_compute_checksum_4al_9;
687 nm = "genericg_compute_checksum_4al_9"; break;
688 case 10: fn = genericg_compute_checksum_4al_10;
689 nm = "genericg_compute_checksum_4al_10"; break;
690 case 11: fn = genericg_compute_checksum_4al_11;
691 nm = "genericg_compute_checksum_4al_11"; break;
692 case 12: fn = genericg_compute_checksum_4al_12;
693 nm = "genericg_compute_checksum_4al_12"; break;
694 default: break;
695 }
696 fn_spec = (VEX_REGPARM(1) HWord(*)(HWord))fn;
697 nm_spec = nm;
698 }
699
700 expectedhW = fn_generic( first_hW, hWs_to_check );
701 /* If we got a specialised version, check it produces the same
702 result as the generic version! */
703 if (fn_spec) {
704 vassert(nm_spec);
705 vassert(expectedhW == fn_spec( first_hW ));
706 } else {
707 vassert(!nm_spec);
708 }
709
710 /* Set CMSTART and CMLEN. These will describe to the despatcher
711 the area of guest code to invalidate should we exit with a
712 self-check failure. */
713
714 tistart_tmp = newIRTemp(irsb->tyenv, guest_word_type);
715 tilen_tmp = newIRTemp(irsb->tyenv, guest_word_type);
716
717 IRConst* base2check_IRConst
718 = guest_word_type==Ity_I32 ? IRConst_U32(toUInt(base2check))
719 : IRConst_U64(base2check);
720 IRConst* len2check_IRConst
721 = guest_word_type==Ity_I32 ? IRConst_U32(len2check)
722 : IRConst_U64(len2check);
723
724 irsb->stmts[selfcheck_idx + i * 5 + 0]
725 = IRStmt_WrTmp(tistart_tmp, IRExpr_Const(base2check_IRConst) );
726
727 irsb->stmts[selfcheck_idx + i * 5 + 1]
728 = IRStmt_WrTmp(tilen_tmp, IRExpr_Const(len2check_IRConst) );
729
730 irsb->stmts[selfcheck_idx + i * 5 + 2]
731 = IRStmt_Put( offB_GUEST_CMSTART, IRExpr_RdTmp(tistart_tmp) );
732
733 irsb->stmts[selfcheck_idx + i * 5 + 3]
734 = IRStmt_Put( offB_GUEST_CMLEN, IRExpr_RdTmp(tilen_tmp) );
735
736 /* Generate the entry point descriptors */
737 if (abiinfo_both->host_ppc_calls_use_fndescrs) {
738 HWord* descr = (HWord*)fn_generic;
739 fn_generic_entry = descr[0];
740 if (fn_spec) {
741 descr = (HWord*)fn_spec;
742 fn_spec_entry = descr[0];
743 } else {
744 fn_spec_entry = (HWord)NULL;
745 }
746 } else {
747 fn_generic_entry = (HWord)fn_generic;
748 if (fn_spec) {
749 fn_spec_entry = (HWord)fn_spec;
750 } else {
751 fn_spec_entry = (HWord)NULL;
752 }
753 }
754
755 IRExpr* callexpr = NULL;
756 if (fn_spec) {
757 callexpr = mkIRExprCCall(
758 host_word_type, 1/*regparms*/,
759 nm_spec, (void*)fn_spec_entry,
760 mkIRExprVec_1(
761 mkIRExpr_HWord( (HWord)first_hW )
762 )
763 );
764 } else {
765 callexpr = mkIRExprCCall(
766 host_word_type, 2/*regparms*/,
767 nm_generic, (void*)fn_generic_entry,
768 mkIRExprVec_2(
769 mkIRExpr_HWord( (HWord)first_hW ),
770 mkIRExpr_HWord( (HWord)hWs_to_check )
771 )
772 );
773 }
774
775 irsb->stmts[selfcheck_idx + i * 5 + 4]
776 = IRStmt_Exit(
777 IRExpr_Binop(
778 host_word_type==Ity_I64 ? Iop_CmpNE64 : Iop_CmpNE32,
779 callexpr,
780 host_word_type==Ity_I64
781 ? IRExpr_Const(IRConst_U64(expectedhW))
782 : IRExpr_Const(IRConst_U32(expectedhW))
783 ),
784 Ijk_InvalICache,
785 /* Where we must restart if there's a failure: at the
786 first extent, regardless of which extent the
787 failure actually happened in. */
788 guest_IP_bbstart_IRConst,
789 offB_GUEST_IP
790 );
791 } /* for (i = 0; i < vge->n_used; i++) */
792 }
793
794 /* irsb->next must now be set, since we've finished the block.
795 Print it if necessary.*/
796 vassert(irsb->next != NULL);
797 if (debug_print) {
798 vex_printf(" ");
799 vex_printf( "PUT(%d) = ", irsb->offsIP);
800 ppIRExpr( irsb->next );
801 vex_printf( "; exit-");
802 ppIRJumpKind(irsb->jumpkind);
803 vex_printf( "\n");
804 vex_printf( "\n");
805 }
806
807 *n_guest_instrs = n_instrs;
808 return irsb;
809 }
810
811
812 /*-------------------------------------------------------------
813 A support routine for doing self-checking translations.
814 -------------------------------------------------------------*/
815
816 /* CLEAN HELPER */
817 /* CALLED FROM GENERATED CODE */
818
819 /* Compute a checksum of host memory at [addr .. addr+len-1], as fast
820 as possible. All _4al versions assume that the supplied address is
821 4 aligned. All length values are in 4-byte chunks. These fns
822 arecalled once for every use of a self-checking translation, so
823 they needs to be as fast as possible. */
824
825 /* --- 32-bit versions, used only on 32-bit hosts --- */
826
ROL32(UInt w,Int n)827 static inline UInt ROL32 ( UInt w, Int n ) {
828 w = (w << n) | (w >> (32-n));
829 return w;
830 }
831
832 VEX_REGPARM(2)
genericg_compute_checksum_4al(HWord first_w32,HWord n_w32s)833 static UInt genericg_compute_checksum_4al ( HWord first_w32, HWord n_w32s )
834 {
835 UInt sum1 = 0, sum2 = 0;
836 UInt* p = (UInt*)first_w32;
837 /* unrolled */
838 while (n_w32s >= 4) {
839 UInt w;
840 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
841 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
842 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
843 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
844 p += 4;
845 n_w32s -= 4;
846 sum1 ^= sum2;
847 }
848 while (n_w32s >= 1) {
849 UInt w;
850 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
851 p += 1;
852 n_w32s -= 1;
853 sum1 ^= sum2;
854 }
855 return sum1 + sum2;
856 }
857
858 /* Specialised versions of the above function */
859
860 VEX_REGPARM(1)
genericg_compute_checksum_4al_1(HWord first_w32)861 static UInt genericg_compute_checksum_4al_1 ( HWord first_w32 )
862 {
863 UInt sum1 = 0, sum2 = 0;
864 UInt* p = (UInt*)first_w32;
865 UInt w;
866 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
867 sum1 ^= sum2;
868 return sum1 + sum2;
869 }
870
871 VEX_REGPARM(1)
genericg_compute_checksum_4al_2(HWord first_w32)872 static UInt genericg_compute_checksum_4al_2 ( HWord first_w32 )
873 {
874 UInt sum1 = 0, sum2 = 0;
875 UInt* p = (UInt*)first_w32;
876 UInt w;
877 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
878 sum1 ^= sum2;
879 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
880 sum1 ^= sum2;
881 return sum1 + sum2;
882 }
883
884 VEX_REGPARM(1)
genericg_compute_checksum_4al_3(HWord first_w32)885 static UInt genericg_compute_checksum_4al_3 ( HWord first_w32 )
886 {
887 UInt sum1 = 0, sum2 = 0;
888 UInt* p = (UInt*)first_w32;
889 UInt w;
890 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
891 sum1 ^= sum2;
892 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
893 sum1 ^= sum2;
894 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
895 sum1 ^= sum2;
896 return sum1 + sum2;
897 }
898
899 VEX_REGPARM(1)
genericg_compute_checksum_4al_4(HWord first_w32)900 static UInt genericg_compute_checksum_4al_4 ( HWord first_w32 )
901 {
902 UInt sum1 = 0, sum2 = 0;
903 UInt* p = (UInt*)first_w32;
904 UInt w;
905 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
906 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
907 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
908 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
909 sum1 ^= sum2;
910 return sum1 + sum2;
911 }
912
913 VEX_REGPARM(1)
genericg_compute_checksum_4al_5(HWord first_w32)914 static UInt genericg_compute_checksum_4al_5 ( HWord first_w32 )
915 {
916 UInt sum1 = 0, sum2 = 0;
917 UInt* p = (UInt*)first_w32;
918 UInt w;
919 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
920 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
921 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
922 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
923 sum1 ^= sum2;
924 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
925 sum1 ^= sum2;
926 return sum1 + sum2;
927 }
928
929 VEX_REGPARM(1)
genericg_compute_checksum_4al_6(HWord first_w32)930 static UInt genericg_compute_checksum_4al_6 ( HWord first_w32 )
931 {
932 UInt sum1 = 0, sum2 = 0;
933 UInt* p = (UInt*)first_w32;
934 UInt w;
935 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
936 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
937 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
938 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
939 sum1 ^= sum2;
940 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
941 sum1 ^= sum2;
942 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
943 sum1 ^= sum2;
944 return sum1 + sum2;
945 }
946
947 VEX_REGPARM(1)
genericg_compute_checksum_4al_7(HWord first_w32)948 static UInt genericg_compute_checksum_4al_7 ( HWord first_w32 )
949 {
950 UInt sum1 = 0, sum2 = 0;
951 UInt* p = (UInt*)first_w32;
952 UInt w;
953 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
954 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
955 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
956 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
957 sum1 ^= sum2;
958 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
959 sum1 ^= sum2;
960 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
961 sum1 ^= sum2;
962 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
963 sum1 ^= sum2;
964 return sum1 + sum2;
965 }
966
967 VEX_REGPARM(1)
genericg_compute_checksum_4al_8(HWord first_w32)968 static UInt genericg_compute_checksum_4al_8 ( HWord first_w32 )
969 {
970 UInt sum1 = 0, sum2 = 0;
971 UInt* p = (UInt*)first_w32;
972 UInt w;
973 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
974 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
975 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
976 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
977 sum1 ^= sum2;
978 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
979 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
980 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
981 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
982 sum1 ^= sum2;
983 return sum1 + sum2;
984 }
985
986 VEX_REGPARM(1)
genericg_compute_checksum_4al_9(HWord first_w32)987 static UInt genericg_compute_checksum_4al_9 ( HWord first_w32 )
988 {
989 UInt sum1 = 0, sum2 = 0;
990 UInt* p = (UInt*)first_w32;
991 UInt w;
992 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
993 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
994 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
995 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
996 sum1 ^= sum2;
997 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
998 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
999 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1000 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1001 sum1 ^= sum2;
1002 w = p[8]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1003 sum1 ^= sum2;
1004 return sum1 + sum2;
1005 }
1006
1007 VEX_REGPARM(1)
genericg_compute_checksum_4al_10(HWord first_w32)1008 static UInt genericg_compute_checksum_4al_10 ( HWord first_w32 )
1009 {
1010 UInt sum1 = 0, sum2 = 0;
1011 UInt* p = (UInt*)first_w32;
1012 UInt w;
1013 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1014 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1015 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1016 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1017 sum1 ^= sum2;
1018 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1019 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1020 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1021 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1022 sum1 ^= sum2;
1023 w = p[8]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1024 sum1 ^= sum2;
1025 w = p[9]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1026 sum1 ^= sum2;
1027 return sum1 + sum2;
1028 }
1029
1030 VEX_REGPARM(1)
genericg_compute_checksum_4al_11(HWord first_w32)1031 static UInt genericg_compute_checksum_4al_11 ( HWord first_w32 )
1032 {
1033 UInt sum1 = 0, sum2 = 0;
1034 UInt* p = (UInt*)first_w32;
1035 UInt w;
1036 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1037 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1038 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1039 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1040 sum1 ^= sum2;
1041 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1042 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1043 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1044 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1045 sum1 ^= sum2;
1046 w = p[8]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1047 sum1 ^= sum2;
1048 w = p[9]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1049 sum1 ^= sum2;
1050 w = p[10]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1051 sum1 ^= sum2;
1052 return sum1 + sum2;
1053 }
1054
1055 VEX_REGPARM(1)
genericg_compute_checksum_4al_12(HWord first_w32)1056 static UInt genericg_compute_checksum_4al_12 ( HWord first_w32 )
1057 {
1058 UInt sum1 = 0, sum2 = 0;
1059 UInt* p = (UInt*)first_w32;
1060 UInt w;
1061 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1062 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1063 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1064 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1065 sum1 ^= sum2;
1066 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1067 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1068 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1069 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1070 sum1 ^= sum2;
1071 w = p[8]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1072 w = p[9]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1073 w = p[10]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1074 w = p[11]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1075 sum1 ^= sum2;
1076 return sum1 + sum2;
1077 }
1078
1079
1080 /* --- 64-bit versions, used only on 64-bit hosts --- */
1081
ROL64(ULong w,Int n)1082 static inline ULong ROL64 ( ULong w, Int n ) {
1083 w = (w << n) | (w >> (64-n));
1084 return w;
1085 }
1086
1087 VEX_REGPARM(2)
genericg_compute_checksum_8al(HWord first_w64,HWord n_w64s)1088 static ULong genericg_compute_checksum_8al ( HWord first_w64, HWord n_w64s )
1089 {
1090 ULong sum1 = 0, sum2 = 0;
1091 ULong* p = (ULong*)first_w64;
1092 /* unrolled */
1093 while (n_w64s >= 4) {
1094 ULong w;
1095 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1096 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1097 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1098 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1099 p += 4;
1100 n_w64s -= 4;
1101 sum1 ^= sum2;
1102 }
1103 while (n_w64s >= 1) {
1104 ULong w;
1105 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1106 p += 1;
1107 n_w64s -= 1;
1108 sum1 ^= sum2;
1109 }
1110 return sum1 + sum2;
1111 }
1112
1113 /* Specialised versions of the above function */
1114
1115 VEX_REGPARM(1)
genericg_compute_checksum_8al_1(HWord first_w64)1116 static ULong genericg_compute_checksum_8al_1 ( HWord first_w64 )
1117 {
1118 ULong sum1 = 0, sum2 = 0;
1119 ULong* p = (ULong*)first_w64;
1120 ULong w;
1121 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1122 sum1 ^= sum2;
1123 return sum1 + sum2;
1124 }
1125
1126 VEX_REGPARM(1)
genericg_compute_checksum_8al_2(HWord first_w64)1127 static ULong genericg_compute_checksum_8al_2 ( HWord first_w64 )
1128 {
1129 ULong sum1 = 0, sum2 = 0;
1130 ULong* p = (ULong*)first_w64;
1131 ULong w;
1132 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1133 sum1 ^= sum2;
1134 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1135 sum1 ^= sum2;
1136 return sum1 + sum2;
1137 }
1138
1139 VEX_REGPARM(1)
genericg_compute_checksum_8al_3(HWord first_w64)1140 static ULong genericg_compute_checksum_8al_3 ( HWord first_w64 )
1141 {
1142 ULong sum1 = 0, sum2 = 0;
1143 ULong* p = (ULong*)first_w64;
1144 ULong w;
1145 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1146 sum1 ^= sum2;
1147 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1148 sum1 ^= sum2;
1149 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1150 sum1 ^= sum2;
1151 return sum1 + sum2;
1152 }
1153
1154 VEX_REGPARM(1)
genericg_compute_checksum_8al_4(HWord first_w64)1155 static ULong genericg_compute_checksum_8al_4 ( HWord first_w64 )
1156 {
1157 ULong sum1 = 0, sum2 = 0;
1158 ULong* p = (ULong*)first_w64;
1159 ULong w;
1160 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1161 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1162 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1163 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1164 sum1 ^= sum2;
1165 return sum1 + sum2;
1166 }
1167
1168 VEX_REGPARM(1)
genericg_compute_checksum_8al_5(HWord first_w64)1169 static ULong genericg_compute_checksum_8al_5 ( HWord first_w64 )
1170 {
1171 ULong sum1 = 0, sum2 = 0;
1172 ULong* p = (ULong*)first_w64;
1173 ULong w;
1174 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1175 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1176 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1177 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1178 sum1 ^= sum2;
1179 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1180 sum1 ^= sum2;
1181 return sum1 + sum2;
1182 }
1183
1184 VEX_REGPARM(1)
genericg_compute_checksum_8al_6(HWord first_w64)1185 static ULong genericg_compute_checksum_8al_6 ( HWord first_w64 )
1186 {
1187 ULong sum1 = 0, sum2 = 0;
1188 ULong* p = (ULong*)first_w64;
1189 ULong w;
1190 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1191 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1192 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1193 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1194 sum1 ^= sum2;
1195 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1196 sum1 ^= sum2;
1197 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1198 sum1 ^= sum2;
1199 return sum1 + sum2;
1200 }
1201
1202 VEX_REGPARM(1)
genericg_compute_checksum_8al_7(HWord first_w64)1203 static ULong genericg_compute_checksum_8al_7 ( HWord first_w64 )
1204 {
1205 ULong sum1 = 0, sum2 = 0;
1206 ULong* p = (ULong*)first_w64;
1207 ULong w;
1208 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1209 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1210 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1211 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1212 sum1 ^= sum2;
1213 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1214 sum1 ^= sum2;
1215 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1216 sum1 ^= sum2;
1217 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1218 sum1 ^= sum2;
1219 return sum1 + sum2;
1220 }
1221
1222 VEX_REGPARM(1)
genericg_compute_checksum_8al_8(HWord first_w64)1223 static ULong genericg_compute_checksum_8al_8 ( HWord first_w64 )
1224 {
1225 ULong sum1 = 0, sum2 = 0;
1226 ULong* p = (ULong*)first_w64;
1227 ULong w;
1228 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1229 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1230 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1231 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1232 sum1 ^= sum2;
1233 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1234 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1235 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1236 w = p[7]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1237 sum1 ^= sum2;
1238 return sum1 + sum2;
1239 }
1240
1241 VEX_REGPARM(1)
genericg_compute_checksum_8al_9(HWord first_w64)1242 static ULong genericg_compute_checksum_8al_9 ( HWord first_w64 )
1243 {
1244 ULong sum1 = 0, sum2 = 0;
1245 ULong* p = (ULong*)first_w64;
1246 ULong w;
1247 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1248 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1249 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1250 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1251 sum1 ^= sum2;
1252 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1253 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1254 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1255 w = p[7]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1256 sum1 ^= sum2;
1257 w = p[8]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1258 sum1 ^= sum2;
1259 return sum1 + sum2;
1260 }
1261
1262 VEX_REGPARM(1)
genericg_compute_checksum_8al_10(HWord first_w64)1263 static ULong genericg_compute_checksum_8al_10 ( HWord first_w64 )
1264 {
1265 ULong sum1 = 0, sum2 = 0;
1266 ULong* p = (ULong*)first_w64;
1267 ULong w;
1268 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1269 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1270 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1271 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1272 sum1 ^= sum2;
1273 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1274 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1275 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1276 w = p[7]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1277 sum1 ^= sum2;
1278 w = p[8]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1279 sum1 ^= sum2;
1280 w = p[9]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1281 sum1 ^= sum2;
1282 return sum1 + sum2;
1283 }
1284
1285 VEX_REGPARM(1)
genericg_compute_checksum_8al_11(HWord first_w64)1286 static ULong genericg_compute_checksum_8al_11 ( HWord first_w64 )
1287 {
1288 ULong sum1 = 0, sum2 = 0;
1289 ULong* p = (ULong*)first_w64;
1290 ULong w;
1291 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1292 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1293 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1294 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1295 sum1 ^= sum2;
1296 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1297 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1298 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1299 w = p[7]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1300 sum1 ^= sum2;
1301 w = p[8]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1302 sum1 ^= sum2;
1303 w = p[9]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1304 sum1 ^= sum2;
1305 w = p[10]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1306 sum1 ^= sum2;
1307 return sum1 + sum2;
1308 }
1309
1310 VEX_REGPARM(1)
genericg_compute_checksum_8al_12(HWord first_w64)1311 static ULong genericg_compute_checksum_8al_12 ( HWord first_w64 )
1312 {
1313 ULong sum1 = 0, sum2 = 0;
1314 ULong* p = (ULong*)first_w64;
1315 ULong w;
1316 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1317 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1318 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1319 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1320 sum1 ^= sum2;
1321 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1322 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1323 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1324 w = p[7]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1325 sum1 ^= sum2;
1326 w = p[8]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1327 w = p[9]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1328 w = p[10]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1329 w = p[11]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1330 sum1 ^= sum2;
1331 return sum1 + sum2;
1332 }
1333
1334 /*--------------------------------------------------------------------*/
1335 /*--- end guest_generic_bb_to_IR.c ---*/
1336 /*--------------------------------------------------------------------*/
1337