1 /* $Id: sparc-timing.c,v 1.3 2010/02/14 15:57:09 fredette Exp $ */
2
3 /* ic/sparc/sparc-timing.c - SPARC instruction timing support: */
4
5 /*
6 * Copyright (c) 2009 Matt Fredette
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by Matt Fredette.
20 * 4. The name of the author may not be used to endorse or promote products
21 * derived from this software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
25 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
27 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
28 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
29 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
31 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
32 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 */
35
36 /* includes: */
37 #include "sparc-impl.h"
38
39 _TME_RCSID("$Id: sparc-timing.c,v 1.3 2010/02/14 15:57:09 fredette Exp $");
40
41 /* macros: */
42
43 /* at or below this maximum number of microseconds, we will spin
44 instead of yield: */
45 #define TME_SPARC_TIMING_SPIN_USEC_MAX (4096)
46
47 /* normally, when we yield we do a plain yield so we are immediately
48 runnable again. this makes timing loops more accurate, at the
49 expense of consuming the host CPU. if this is nonzero, when we
50 yield we will instead do a sleep or wait on an external event: */
51 #define TME_SPARC_TIMING_YIELD_BLOCK (FALSE)
52
53 /* this does a timing loop update: */
54 static void
_tme_sparc_timing_loop_update(struct tme_sparc * ic,tme_sparc_ireg_umax_t update_count_m1)55 _tme_sparc_timing_loop_update(struct tme_sparc *ic,
56 tme_sparc_ireg_umax_t update_count_m1)
57 {
58 tme_uint32_t insn_update;
59 unsigned long opcode;
60 unsigned int reg_rd;
61 signed int immediate;
62 tme_sparc_ireg_umax_t addend_total_m1;
63
64 /* get the update instruction: */
65 insn_update = ic->_tme_sparc_insn;
66
67 /* get the opcode: */
68 opcode = TME_FIELD_MASK_EXTRACTU(insn_update, (0x3f << 19));
69
70 /* get the rd register: */
71 reg_rd = TME_FIELD_MASK_EXTRACTU(insn_update, TME_SPARC_FORMAT3_MASK_RD);
72 TME_SPARC_REG_INDEX(ic, reg_rd);
73
74 /* get the immediate: */
75 immediate = insn_update & 2;
76 immediate = 1 - immediate;
77
78 /* get the total addend: */
79 addend_total_m1 = update_count_m1;
80 if (ic->tme_sparc_timing_loop_addend < 0) {
81 addend_total_m1 = -addend_total_m1;
82 }
83
84 /* if this is a v9 CPU: */
85 if (TME_SPARC_VERSION(ic) >= 9) {
86 #ifdef TME_HAVE_INT64_T
87
88 /* save the immediate: */
89 ic->tme_sparc_ireg_uint64(TME_SPARC_IREG_TMP(0)) = immediate;
90
91 /* do all but one of the updates of the rd register directly: */
92 ic->tme_sparc_ireg_uint64(reg_rd) += addend_total_m1;
93
94 /* do the final update, including setting any condition codes: */
95 (*(ic->_tme_sparc64_execute_opmap[opcode]))
96 (ic,
97 &ic->tme_sparc_ireg_uint64(reg_rd),
98 &ic->tme_sparc_ireg_uint64(TME_SPARC_IREG_TMP(0)),
99 &ic->tme_sparc_ireg_uint64(reg_rd));
100
101 #endif /* TME_HAVE_INT64_T */
102 }
103
104 /* otherwise, this is a v7 or v8 CPU: */
105 else {
106
107 /* save the immediate: */
108 ic->tme_sparc_ireg_uint32(TME_SPARC_IREG_TMP(0)) = immediate;
109
110 /* do all but one of the updates of the rd register directly: */
111 ic->tme_sparc_ireg_uint32(reg_rd) += addend_total_m1;
112
113 /* do the final update, including setting any condition codes: */
114 (*(ic->_tme_sparc32_execute_opmap[opcode]))
115 (ic,
116 &ic->tme_sparc_ireg_uint32(reg_rd),
117 &ic->tme_sparc_ireg_uint32(TME_SPARC_IREG_TMP(0)),
118 &ic->tme_sparc_ireg_uint32(reg_rd));
119 }
120 }
121
122 /* this returns nonzero if the branch to . instruction and the update
123 instruction in its delay slot are a supported timing loop: */
124 int
tme_sparc_timing_loop_ok(tme_uint32_t insn_branch_dot,tme_uint32_t insn_update)125 tme_sparc_timing_loop_ok(tme_uint32_t insn_branch_dot,
126 tme_uint32_t insn_update)
127 {
128 unsigned int op2;
129 tme_uint32_t conds_mask;
130 unsigned int cond;
131
132 /* if the update instruction is not an add, addcc, sub, or subcc
133 with the i bit set: */
134 if ((insn_update
135 & ((tme_uint32_t)
136 ((0x3 << 30) /* format */
137 + (0x2b << 19) /* op3 (mask addcc to add, sub to add) */
138 + (1 << 13)))) /* i */
139 != ((tme_uint32_t)
140 ((0x2 << 30) /* format */
141 + (0x00 << 19) /* op3 (add) */
142 + (1 << 13)))) { /* i */
143
144 /* we only support timing loops with plain add or subtract
145 update instructions: */
146 return (FALSE);
147 }
148
149 /* if the simm13 is not 1 or -1: */
150 if (((insn_update
151 + (insn_update & 2))
152 & 0x1fff)
153 != 1) {
154
155 /* we only support timing loops with plain add or subtract update
156 instructions with immediates of 1 or -1: */
157 return (FALSE);
158 }
159
160 /* if rd is %g0: */
161 #if TME_SPARC_IREG_G0 != 0
162 #error "TME_SPARC_IREG_G0 changed"
163 #endif
164 if ((insn_update & TME_SPARC_FORMAT3_MASK_RD) == 0) {
165
166 /* we only support timing loops with plain add or subtract update
167 instructions with destination registers other than %g0: */
168 return (FALSE);
169 }
170
171 /* if rs1 and rd are not the same: */
172 #if TME_SPARC_FORMAT3_MASK_RD < TME_SPARC_FORMAT3_MASK_RS1
173 #error "TME_SPARC_FORMAT3_MASK_ values changed"
174 #endif
175 if ((((insn_update
176 / (TME_SPARC_FORMAT3_MASK_RD
177 / TME_SPARC_FORMAT3_MASK_RS1))
178 ^ insn_update)
179 & TME_SPARC_FORMAT3_MASK_RS1) != 0) {
180
181 /* we only support timing loops with plain add or subtract update
182 instructions where the source register and destination register
183 are the same: */
184 return (FALSE);
185 }
186
187 /* all branch instructions are format two instructions: */
188 assert ((insn_branch_dot & (tme_uint32_t) (0x3 << 30)) == 0);
189
190 /* if this isn't a Bicc or a v9 BPcc instruction: */
191 op2 = TME_FIELD_MASK_EXTRACTU(insn_branch_dot, (0x7 << 22));
192 if (__tme_predict_false(op2 != 2 && op2 != 1)) {
193
194 /* we support all timing loops with a branch to . instructions
195 that don't depend on the integer condition codes: */
196 return (TRUE);
197 }
198
199 /* otherwise, this is a Bicc or a v9 BPcc instruction: */
200 else {
201
202 /* if this is not an addcc or subcc instruction: */
203 if (__tme_predict_false((insn_update & (0x10 << 19)) == 0)) {
204
205 /* we support timing loops with Bicc and BPcc instructions even
206 when the update instruction doesn't change the integer
207 condition codes: */
208 return (TRUE);
209 }
210
211 /* if this is a subcc instruction: */
212 if (insn_update & (0x04 << 19)) {
213
214 /* we support timing loops that use subcc with all conditions
215 except for vc and vs (the overflow conditions) and never: */
216 conds_mask
217 = ((1 << (TME_SPARC_COND_NOT + TME_SPARC_COND_N))
218 + (1 << (TME_SPARC_COND_NOT + TME_SPARC_COND_E))
219 + (1 << (TME_SPARC_COND_NOT + TME_SPARC_COND_LE))
220 + (1 << (TME_SPARC_COND_NOT + TME_SPARC_COND_L))
221 + (1 << (TME_SPARC_COND_NOT + TME_SPARC_COND_LEU))
222 + (1 << (TME_SPARC_COND_NOT + TME_SPARC_COND_CS))
223 + (1 << (TME_SPARC_COND_NOT + TME_SPARC_COND_NEG))
224 + (1 << TME_SPARC_COND_E)
225 + (1 << TME_SPARC_COND_LE)
226 + (1 << TME_SPARC_COND_L)
227 + (1 << TME_SPARC_COND_LEU)
228 + (1 << TME_SPARC_COND_CS)
229 + (1 << TME_SPARC_COND_NEG)
230 );
231 }
232
233 /* otherwise, this is an addcc instruction: */
234 else {
235
236 /* we support timing loops that use addcc with only these
237 conditions: */
238 conds_mask
239 = ((1 << (TME_SPARC_COND_NOT + TME_SPARC_COND_N))
240 + (1 << (TME_SPARC_COND_NOT + TME_SPARC_COND_E))
241 + (1 << (TME_SPARC_COND_NOT + TME_SPARC_COND_CS))
242 + (1 << (TME_SPARC_COND_NOT + TME_SPARC_COND_NEG))
243 + (1 << TME_SPARC_COND_E)
244 + (1 << TME_SPARC_COND_CS)
245 + (1 << TME_SPARC_COND_NEG)
246 );
247 }
248
249 /* if we don't support the condition: */
250 cond = TME_FIELD_MASK_EXTRACTU(insn_branch_dot, (0xf << 25));
251 if ((conds_mask & TME_BIT(cond)) == 0) {
252
253 /* we don't support this timing loop: */
254 return (FALSE);
255 }
256
257 /* otherwise, we support this timing loop: */
258 return (TRUE);
259 }
260 }
261
262 /* this starts a timing loop: */
263 static void
_tme_sparc_timing_loop_start(struct tme_sparc * ic,tme_uint32_t insn_update)264 _tme_sparc_timing_loop_start(struct tme_sparc *ic,
265 tme_uint32_t insn_update)
266 {
267 unsigned int reg_rd;
268 tme_sparc_ireg_umax_t value_rd;
269 signed int addend;
270 tme_uint32_t insn_branch_dot;
271 const struct timeval *sleep;
272 unsigned int op2;
273 unsigned int cond;
274 tme_sparc_ireg_umax_t value_sign;
275 tme_sparc_ireg_umax_t value_zero;
276 tme_sparc_ireg_umax_t value_true_greatest;
277 tme_sparc_ireg_umax_t value_test;
278 tme_sparc_ireg_umax_t branch_taken_count_max_m1;
279 unsigned int loop_cycles_each;
280 tme_sparc_ireg_umax_t cycles_scaled_max;
281 union tme_value64 cycles_finish;
282 tme_sparc_ireg_umax_t usec;
283 tme_uint32_t usec32;
284 static struct timeval sleep_buffer;
285
286 /* at this point, the timing loop branch to . has been taken, and
287 the PCs have been updated, so both PC and PC_next_next point to
288 the timing loop update instruction (in insn_update), and PC_next
289 points to the timing loop branch to . instruction again.
290
291 a taken conditional branch never annuls, and sparc-execute.c and
292 sparc-rc-insns.c handle a "ba,a ." instruction specially, so we
293 know that the update instruction must execute at least as many
294 times as the timing loop branch to . is taken.
295
296 the timing loop branch to . has just been taken (this is why
297 PC_next_next is the same as PC). this first take was when the
298 branch to . was detected in sparc-execute.c, or when
299 tme_sparc_timing_loop_assist() determined that the recode
300 instructions thunk that called it did so after a taken branch.
301
302 this very first take is implicit in the taken branch count that
303 we compute and store in
304 ic->tme_sparc_timing_loop_branch_taken_count_max_m1 and/or pass
305 to _tme_sparc_timing_loop_update() - i.e., we always compute the
306 taken branch count minus one.
307
308 this is good because it is possible for the timing loop update
309 instruction to be executed 2^cc_width times. if initially %o3 is
310 zero and %icc.Z is clear, this bne will be taken 2^32 times:
311
312 bne .
313 deccc %o3
314
315 NB that in this specific case, where the timing loop branch to
316 . does not annul, the timing loop update instruction will
317 actually be run a total of (2^32)+1 times: 2^32 times
318 corresponding to the 2^32 times that the branch is taken, plus
319 one final time when the branch is *not* taken, but the update
320 instruction is not annulled.
321
322 this function only counts and performs the updates corresponding
323 to the times that the branch is *taken*.
324 _tme_sparc_timing_loop_update() does the count minus one updates
325 directly in the destination register, followed by a true
326 instruction execution for the last (to update any condition
327 codes).
328
329 whether or not the branch to . instruction annuls, and any needed
330 "one final time" update instruction will be handled either by
331 sparc-execute.c, or by a combination of the recode instructions
332 thunk and tme_sparc_timing_loop_assist(): */
333
334 /* NB: our caller has already saved the current host cycles counter
335 in ic->tme_sparc_timing_loop_start: */
336
337 /* get the rd register: */
338 reg_rd = TME_FIELD_MASK_EXTRACTU(insn_update, TME_SPARC_FORMAT3_MASK_RD);
339 TME_SPARC_REG_INDEX(ic, reg_rd);
340
341 /* if this is a v9 CPU: */
342 if (TME_SPARC_VERSION(ic) >= 9) {
343 #ifdef TME_HAVE_INT64_T
344
345 /* get the rd register value: */
346 value_rd = ic->tme_sparc_ireg_uint64(reg_rd);
347
348 #else /* !TME_HAVE_INT64_T */
349
350 /* silence uninitialized variable warnings: */
351 value_rd = 0;
352
353 #endif /* !TME_HAVE_INT64_T */
354 }
355
356 /* otherwise, this is not a v9 CPU: */
357 else {
358
359 /* get the rd register value: */
360 value_rd = (tme_int32_t) ic->tme_sparc_ireg_uint32(reg_rd);
361 }
362
363 /* assume that this is an add or addcc instruction: */
364 addend = insn_update & 2;
365 addend = 1 - addend;
366
367 /* if this is a sub or subcc instruction: */
368 if (insn_update & (0x04 << 19)) {
369
370 /* complement the addend: */
371 addend = -addend;
372 }
373
374 /* get the branch to . instruction: */
375 insn_branch_dot = ic->_tme_sparc_insn;
376
377 /* save the update instruction: */
378 ic->_tme_sparc_insn = insn_update;
379
380 /* save the addend: */
381 ic->tme_sparc_timing_loop_addend = addend;
382
383 /* assume that there isn't a maximum number of times that the branch
384 to . can be taken (i.e., that the branch to . doesn't depend on
385 the value of rd), as if the branch condition were always: */
386 cond = TME_SPARC_COND_NOT + TME_SPARC_COND_N;
387
388 /* assume that if the branch does depend on the value of rd, that
389 the sign bit in values of rd is the last bit: */
390 value_sign = 1;
391 value_sign <<= ((sizeof(value_sign) * 8) - 1);
392
393 /* silence uninitialized variable warnings: */
394 value_zero = 0;
395 value_true_greatest = 0;
396
397 /* get the op2 field of the branch to . instruction: */
398 op2 = TME_FIELD_MASK_EXTRACTU(insn_branch_dot, (0x7 << 22));
399
400 /* if this is a v9 BPr: */
401 if (op2 == 3) {
402
403 /* if this BPr tests rd: */
404 if (((insn_branch_dot
405 ^ insn_update)
406 & TME_SPARC_FORMAT3_MASK_RS1) == 0) {
407
408 /* get the condition field, and shift the "not" bit from bit two
409 to bit three, to match the other branches: */
410 cond = TME_FIELD_MASK_EXTRACTU(insn_branch_dot, (0x7 << 25));
411 cond = (cond + 4) & (TME_SPARC_COND_NOT | 3);
412
413 /* dispatch on the condition: */
414 if ((cond % TME_SPARC_COND_NOT) == TME_SPARC_COND_E) {
415 value_zero = -addend;
416 value_true_greatest = 0;
417 }
418 else {
419 assert ((cond % TME_SPARC_COND_NOT) == TME_SPARC_COND_LE
420 || (cond % TME_SPARC_COND_NOT) == TME_SPARC_COND_L);
421 value_zero = value_sign - addend;
422 #if (TME_SPARC_COND_L & 1) == 0 || (TME_SPARC_COND_LE & 1) != 0
423 #error "TME_SPARC_COND_ values changed"
424 #endif
425 value_true_greatest = value_sign - (cond & 1);
426 }
427 }
428 }
429
430 /* otherwise, if this is a Bicc or a v9 BPcc: */
431 else if (op2 == 2 || op2 == 1) {
432
433 /* if this is an addcc or subcc instruction: */
434 if (insn_update & (0x10 << 19)) {
435
436 /* get the condition: */
437 cond = TME_FIELD_MASK_EXTRACTU(insn_branch_dot, (0xf << 25));
438
439 /* if this is a Bicc, or a BPcc with the cc1 bit clear, the
440 sign bit in values of rd is bit 31: */
441 if (sizeof(value_sign) > sizeof(tme_uint32_t)
442 && ((insn_branch_dot >> 21) & op2 & 1) == 0) {
443 value_sign = (((tme_uint32_t) 1) << 31);
444 }
445
446 /* if this is a subcc instruction: */
447 if (insn_update & (0x04 << 19)) {
448
449 /* dispatch on the condition: */
450 switch (cond % TME_SPARC_COND_NOT) {
451 default:
452 /* we should have caught this unsupported condition in
453 tme_sparc_timing_loop_ok(): */
454 assert (FALSE);
455 /* FALLTHROUGH */
456 case TME_SPARC_COND_N:
457 /* nothing to do */
458 break;
459 case TME_SPARC_COND_E:
460 value_zero = -addend;
461 value_true_greatest = 0;
462 break;
463 case TME_SPARC_COND_LE:
464 value_zero = value_sign;
465 value_true_greatest = value_sign - addend;
466 break;
467 case TME_SPARC_COND_L:
468 value_zero = value_sign;
469 value_true_greatest = (value_sign - 1) - addend;
470 break;
471 case TME_SPARC_COND_LEU:
472 value_zero = 0;
473 value_true_greatest = (value_sign * 2) - addend;
474 break;
475 case TME_SPARC_COND_CS:
476 value_zero = 0;
477 value_true_greatest = (value_sign * 2) - (addend + 1);
478 break;
479 case TME_SPARC_COND_NEG:
480 value_zero = value_sign - addend;
481 value_true_greatest = value_sign - 1;
482 break;
483 }
484 }
485
486 /* otherwise, this is an addcc instruction: */
487 else {
488
489 /* dispatch on the condition: */
490 switch (cond % TME_SPARC_COND_NOT) {
491 default:
492 /* we should have caught this unsupported condition in
493 tme_sparc_timing_loop_ok(): */
494 assert (FALSE);
495 /* FALLTHROUGH */
496 case TME_SPARC_COND_N:
497 /* nothing to do */
498 break;
499 case TME_SPARC_COND_E:
500 value_zero = -addend;
501 value_true_greatest = 0;
502 break;
503 case TME_SPARC_COND_CS:
504 value_zero = -addend;
505 value_true_greatest = (value_sign * 2) - (addend - 1);
506 break;
507 case TME_SPARC_COND_NEG:
508 value_zero = value_sign - addend;
509 value_true_greatest = value_sign - 1;
510 break;
511 }
512 }
513 }
514 }
515
516 /* the condition can't be never: */
517 assert (cond != TME_SPARC_COND_N);
518
519 /* assume that, if we block, we will block forever: */
520 sleep = (const struct timeval *) NULL;
521
522 /* if the condition is always, there is no maximum number of times
523 that the branch to . can be taken: */
524 #if TME_SPARC_COND_N != 0
525 #error "TME_SPARC_COND_ values changed"
526 #endif
527 ic->tme_sparc_timing_loop_branch_taken_max = (cond % TME_SPARC_COND_NOT);
528 if (cond == (TME_SPARC_COND_NOT + TME_SPARC_COND_N)) {
529
530 /* we may never finish: */
531 ic->tme_sparc_timing_loop_finish.tme_value64_uint32_lo = (0 - (tme_uint32_t) 1);
532 ic->tme_sparc_timing_loop_finish.tme_value64_uint32_hi = (0 - (tme_uint32_t) 1);
533 }
534
535 /* otherwise, the condition isn't always, so there is a maximum
536 number of times that the branch to . can be taken: */
537 else {
538
539 /* it's not possible for all (adjusted-to-zero) values to be true.
540 at least all-bits-one must be false: */
541 assert (value_true_greatest <= ((value_sign - 1) * 2));
542
543 /* test the initial value of rd: */
544 value_test = (value_rd - value_zero) & ((value_sign * 2) - 1);
545
546 /* if the initial value of rd will make the condition (ignoring
547 TME_SPARC_COND_NOT) true after the first rd update
548 instruction: */
549 if (value_test <= value_true_greatest) {
550
551 /* if this condition has TME_SPARC_COND_NOT: */
552 if (cond & TME_SPARC_COND_NOT) {
553
554 /* the branch to . will only be taken the first time: */
555 branch_taken_count_max_m1 = 1 - 1;
556 }
557
558 /* otherwise, if the addend is -1: */
559 else if (addend < 0) {
560
561 /* the branch to . will be taken the first time, followed by
562 at most (value_test + 1) more times when the value of rd
563 makes the condition true: */
564 branch_taken_count_max_m1 = (1 + (value_test + 1)) - 1;
565 }
566
567 /* otherwise, the addend is 1: */
568 else {
569
570 /* the branch to . will be taken the first time, followed by
571 at most ((value_true_greatest - value_test) + 1) more times
572 when the value of rd makes the condition true: */
573 branch_taken_count_max_m1 = (1 + ((value_true_greatest - value_test) + 1)) - 1;
574 }
575 }
576
577 /* otherwise, the initial value of rd will make the condition
578 (ignoring TME_SPARC_COND_NOT) false after the first update
579 instruction: */
580 else {
581
582 /* if this condition doesn't have TME_SPARC_COND_NOT: */
583 if ((cond & TME_SPARC_COND_NOT) == 0) {
584
585 /* the branch to . will only be taken the first time: */
586 branch_taken_count_max_m1 = 1 - 1;
587 }
588
589 /* otherwise, if the addend is -1: */
590 else if (addend < 0) {
591
592 /* the branch to . will be taken the first time, followed by
593 at most (value_test - value_true_greatest) more times when
594 the value of rd makes the condition false: */
595 branch_taken_count_max_m1 = (1 + (value_test - value_true_greatest)) - 1;
596 }
597
598 /* otherwise, the addend is 1: */
599 else {
600
601 /* the branch to . will be taken the first time, followed by
602 at most (~value_test + 1) more times when the value of rd
603 makes the condition false: */
604 branch_taken_count_max_m1 = ((1 + (~value_test + 1)) - 1) & ((value_sign * 2) - 1);
605 }
606 }
607
608 /* set the maximum number of times the branch to . can be taken: */
609 ic->tme_sparc_timing_loop_branch_taken_count_max_m1 = branch_taken_count_max_m1;
610
611 /* if each loop iteration takes more than one cycle: */
612 loop_cycles_each = ic->tme_sparc_timing_loop_cycles_each;
613 if (__tme_predict_false(loop_cycles_each != 1)) {
614
615 /* get the maximum number of cycles to loop: */
616 /* NB: we try to deal with overflow: */
617 if (__tme_predict_false(loop_cycles_each != 2)) {
618 cycles_scaled_max
619 = (branch_taken_count_max_m1
620 * loop_cycles_each);
621 }
622 else {
623 cycles_scaled_max = branch_taken_count_max_m1 * 2;
624 }
625 cycles_scaled_max += loop_cycles_each;
626 if (__tme_predict_false(cycles_scaled_max < ic->tme_sparc_timing_loop_branch_taken_count_max_m1)) {
627 cycles_scaled_max = 0 - (tme_sparc_ireg_umax_t) 1;
628 }
629 }
630
631 /* otherwise, each loop iteration takes one cycle: */
632 else {
633
634 /* get the maximum number of cycles to loop: */
635 /* NB: we try to deal with overflow: */
636 cycles_scaled_max = branch_taken_count_max_m1 + 1;
637 cycles_scaled_max -= (cycles_scaled_max == 0);
638 }
639
640 /* we can't be looping for zero cycles: */
641 assert (cycles_scaled_max > 0);
642
643 /* get the latest host cycle counter when the timing loop must
644 finish, if it doesn't finish sooner: */
645 #ifdef TME_HAVE_INT64_T
646 cycles_finish.tme_value64_uint = cycles_scaled_max;
647 #else /* !TME_HAVE_INT64_T */
648 cycles_finish.tme_value64_uint32_lo = cycles_scaled_max;
649 cycles_finish.tme_value64_uint32_hi = 0;
650 #endif /* !TME_HAVE_INT64_T */
651 cycles_finish
652 = tme_misc_cycles_scaled(&ic->tme_sparc_cycles_unscaling,
653 &cycles_finish);
654 (void) tme_value64_add(&cycles_finish, &ic->tme_sparc_timing_loop_start);
655 ic->tme_sparc_timing_loop_finish = cycles_finish;
656
657 /* if the number of cycles to spin is small enough that we should
658 truly spin, instead of yield: */
659 if (cycles_scaled_max
660 <= (ic->tme_sparc_cycles_scaled_per_usec
661 * TME_SPARC_TIMING_SPIN_USEC_MAX)) {
662
663 /* spin: */
664 tme_misc_cycles_spin_until(&ic->tme_sparc_timing_loop_finish);
665
666 /* do the timing loop update: */
667 _tme_sparc_timing_loop_update(ic,
668 ic->tme_sparc_timing_loop_branch_taken_count_max_m1);
669
670 /* unwind back to instruction execution: */
671 return;
672 }
673
674 /* if we will block until an external event: */
675 if (TME_SPARC_TIMING_YIELD_BLOCK) {
676
677 /* if the number of cycles to loop doesn't fit in 32 bits: */
678 if (__tme_predict_false(cycles_scaled_max
679 & ~ (tme_sparc_ireg_umax_t) (tme_uint32_t) (0 - (tme_uint32_t) 1))) {
680
681 /* convert cycles into microseconds: */
682 usec = cycles_scaled_max / ic->tme_sparc_cycles_scaled_per_usec;
683
684 /* set the sleep time: */
685 sleep_buffer.tv_sec = (usec / 1000000);
686 sleep_buffer.tv_usec = (usec % 1000000);
687 }
688
689 /* otherwise, the number of cycles to loop fits in 32 bits: */
690 else {
691
692 /* convert cycles into microseconds: */
693 usec32 = ((tme_uint32_t) cycles_scaled_max) / ic->tme_sparc_cycles_scaled_per_usec;
694
695 /* assume that we will sleep for less than one second: */
696 sleep_buffer.tv_sec = 0;
697
698 /* if the sleep time is one second or more: */
699 if (__tme_predict_false(usec32 >= 1000000)) {
700
701 /* set the sleep time seconds: */
702 sleep_buffer.tv_sec = (usec32 / 1000000);
703
704 /* get the microseconds: */
705 usec32 = (usec32 % 1000000);
706 }
707
708 /* set the sleep time microseconds: */
709 sleep_buffer.tv_usec = usec32;
710 }
711
712 /* we won't block forever: */
713 sleep = &sleep_buffer;
714 }
715 }
716
717 /* unbusy the instruction TLB entry: */
718 assert (ic->_tme_sparc_itlb_current_token != NULL);
719 tme_token_unbusy(ic->_tme_sparc_itlb_current_token);
720
721 /* if threads are cooperative: */
722 if (TME_THREADS_COOPERATIVE) {
723
724 /* forget the instruction TLB entry: */
725 ic->_tme_sparc_itlb_current_token = NULL;
726
727 /* we will redispatch into timing mode: */
728 ic->_tme_sparc_mode = TME_SPARC_MODE_TIMING_LOOP;
729 }
730
731 /* if we're blocking: */
732 if (TME_SPARC_TIMING_YIELD_BLOCK) {
733
734 /* lock the external mutex: */
735 tme_mutex_lock(&ic->tme_sparc_external_mutex);
736
737 /* check one last time for any external signal: */
738 if (tme_memory_atomic_read_flag(&ic->tme_sparc_external_flag)) {
739 tme_memory_atomic_write_flag(&ic->tme_sparc_external_flag, FALSE);
740 (*ic->_tme_sparc_external_check)(ic, TME_SPARC_EXTERNAL_CHECK_MUTEX_LOCKED);
741 }
742
743 /* block on the external signal condition: */
744 if (sleep != NULL) {
745 tme_cond_sleep_yield(&ic->tme_sparc_external_cond,
746 &ic->tme_sparc_external_mutex,
747 sleep);
748 }
749 else {
750 tme_cond_wait_yield(&ic->tme_sparc_external_cond,
751 &ic->tme_sparc_external_mutex);
752 }
753
754 /* unlock the external mutex: */
755 tme_mutex_unlock(&ic->tme_sparc_external_mutex);
756 }
757
758 /* otherwise, we're not blocking: */
759 else {
760
761 /* do the simple yield: */
762 tme_thread_yield();
763 }
764
765 /* finish the timing loop: */
766 tme_sparc_timing_loop_finish(ic);
767
768 /* relock the instruction TLB entry: */
769 tme_sparc_callout_relock(ic);
770
771 /* unwind back to instruction execution: */
772 return;
773 }
774
775 /* this possibly starts a timing loop from the instruction
776 executor: */
777 void
tme_sparc_timing_loop_start(struct tme_sparc * ic)778 tme_sparc_timing_loop_start(struct tme_sparc *ic)
779 {
780 tme_uint32_t insn_update;
781 tme_uint32_t insn_branch_dot;
782 tme_sparc_ireg_umax_t pc;
783
784 /* save the current host cycles counter: */
785 ic->tme_sparc_timing_loop_start = tme_misc_cycles();
786
787 /* get the update instruction from the branch delay slot: */
788 insn_update = tme_sparc_fetch_nearby(ic, 1);
789
790 /* get the branch to . instruction: */
791 insn_branch_dot = ic->_tme_sparc_insn;
792
793 /* if we don't support this timing loop: */
794 if (!tme_sparc_timing_loop_ok(insn_branch_dot,
795 insn_update)) {
796 return;
797 }
798
799 /* at this point, PC and PC_next_next both point to the branch to .,
800 and PC_next points to the update instruction. we have to advance
801 the PCs, because _tme_sparc_timing_loop_update() expects PC and
802 PC_next_next to point to the update instruction, PC_next to point
803 to the branch to .: */
804
805 /* if this is a v9 CPU: */
806 if (TME_SPARC_VERSION(ic) >= 9) {
807 #ifdef TME_HAVE_INT64_T
808
809 /* advance the PCs: */
810 pc = ic->tme_sparc_ireg_uint64(TME_SPARC_IREG_PC_NEXT);
811 assert (ic->tme_sparc_ireg_uint64(TME_SPARC_IREG_PC)
812 == ic->tme_sparc_ireg_uint64(TME_SPARC_IREG_PC_NEXT_NEXT));
813 assert (((ic->tme_sparc_ireg_uint64(TME_SPARC_IREG_PC)
814 + sizeof(tme_uint32_t))
815 & ic->tme_sparc_address_mask)
816 == pc);
817 ic->tme_sparc_ireg_uint64(TME_SPARC_IREG_PC_NEXT)
818 = ic->tme_sparc_ireg_uint64(TME_SPARC_IREG_PC_NEXT_NEXT);
819 ic->tme_sparc_ireg_uint64(TME_SPARC_IREG_PC) = pc;
820 ic->tme_sparc_ireg_uint64(TME_SPARC_IREG_PC_NEXT_NEXT) = pc;
821
822 #endif /* TME_HAVE_INT64_T */
823 }
824
825 /* otherwise, this is a v7 or v8 CPU: */
826 else {
827
828 /* advance the PCs: */
829 pc = ic->tme_sparc_ireg_uint32(TME_SPARC_IREG_PC_NEXT);
830 assert (ic->tme_sparc_ireg_uint32(TME_SPARC_IREG_PC)
831 == ic->tme_sparc_ireg_uint32(TME_SPARC_IREG_PC_NEXT_NEXT));
832 assert ((ic->tme_sparc_ireg_uint32(TME_SPARC_IREG_PC)
833 + sizeof(tme_uint32_t))
834 == pc);
835 ic->tme_sparc_ireg_uint32(TME_SPARC_IREG_PC_NEXT)
836 = ic->tme_sparc_ireg_uint32(TME_SPARC_IREG_PC_NEXT_NEXT);
837 ic->tme_sparc_ireg_uint32(TME_SPARC_IREG_PC) = pc;
838 ic->tme_sparc_ireg_uint32(TME_SPARC_IREG_PC_NEXT_NEXT) = pc;
839 }
840
841 /* start the timing loop: */
842 _tme_sparc_timing_loop_start(ic,
843 insn_update);
844 }
845
846 #if TME_HAVE_RECODE
847
848 /* the recode assist function for timing loops: */
849 tme_recode_uguest_t
tme_sparc_timing_loop_assist(struct tme_ic * _ic,tme_recode_uguest_t insn_branch_dot,tme_recode_uguest_t junk)850 tme_sparc_timing_loop_assist(struct tme_ic *_ic,
851 tme_recode_uguest_t insn_branch_dot,
852 tme_recode_uguest_t junk)
853 {
854 struct tme_sparc *ic;
855 tme_sparc_ireg_umax_t pc_next_next;
856 int branch_dot_taken;
857 tme_uint32_t insn_update;
858
859 /* recover our ic: */
860 ic = (struct tme_sparc *) _ic;
861
862 /* save the branch to . instruction in the normal instruction
863 position: */
864 /* NB: we do this even though PC currently points to the timing loop
865 update instruction: */
866 ic->_tme_sparc_insn = insn_branch_dot;
867
868 /* save the current host cycles counter: */
869 ic->tme_sparc_timing_loop_start = tme_misc_cycles();
870
871 /* NB: unlike tme_sparc_timing_loop_start(), this function may be
872 called after the branch to . has *not* been taken. this happens
873 when the branch to . is conditional and does not annul - this is
874 the "one final time" update instruction discussed in
875 _tme_sparc_timing_loop_start().
876
877 at this point, PC points to the update instruction, PC_next
878 points to the branch to . (if the branch to . was taken) or to
879 the instruction following the update instruction (if the branch
880 to . was not taken and does not annul): */
881
882 /* if this is a v9 CPU: */
883 if (TME_SPARC_VERSION(ic) >= 9) {
884 #ifdef TME_HAVE_INT64_T
885
886 /* set PC_next_next from PC_next: */
887 pc_next_next
888 = ((ic->tme_sparc_ireg_uint64(TME_SPARC_IREG_PC_NEXT)
889 + sizeof(tme_uint32_t))
890 & ic->tme_sparc_address_mask);
891 ic->tme_sparc_ireg_uint64(TME_SPARC_IREG_PC_NEXT_NEXT) = pc_next_next;
892
893 /* see if the timing loop branch to . instruction was taken: */
894 branch_dot_taken = (ic->tme_sparc_ireg_uint64(TME_SPARC_IREG_PC) == pc_next_next);
895
896 /* get the timing loop update instruction: */
897 insn_update = ic->tme_sparc_ireg_uint64(TME_SPARC_IREG_INSN);
898
899 #else /* !TME_HAVE_INT64_T */
900
901 /* silence uninitialized variable warnings: */
902 branch_dot_taken = 0;
903 insn_update = 0;
904
905 #endif /* !TME_HAVE_INT64_T */
906 }
907
908 /* otherwise, this is not a v9 CPU: */
909 else {
910
911 /* set PC_next_next from PC_next: */
912 pc_next_next
913 = (ic->tme_sparc_ireg_uint32(TME_SPARC_IREG_PC_NEXT)
914 + sizeof(tme_uint32_t));
915 ic->tme_sparc_ireg_uint32(TME_SPARC_IREG_PC_NEXT_NEXT) = pc_next_next;
916
917 /* see if the timing loop branch to . instruction was taken: */
918 branch_dot_taken = (ic->tme_sparc_ireg_uint32(TME_SPARC_IREG_PC) == (tme_uint32_t) pc_next_next);
919
920 /* get the timing loop update instruction: */
921 insn_update = ic->tme_sparc_ireg_uint32(TME_SPARC_IREG_INSN);
922 }
923
924 /* if the timing loop branch to . instruction was taken: */
925 if (branch_dot_taken) {
926
927 /* end any recode verifying: */
928 tme_sparc_recode_verify_end_preinstruction(ic);
929
930 /* start the timing loop: */
931 _tme_sparc_timing_loop_start(ic,
932 insn_update);
933 }
934
935 /* otherwise, the timing loop branch to . instruction was not
936 taken, and it does not annul: */
937 else {
938
939 /* do the one final update: */
940 ic->_tme_sparc_insn = insn_update;
941 _tme_sparc_timing_loop_update(ic, 0);
942 }
943
944 /* unwind back to instruction execution: */
945 return (0);
946 }
947
948 #endif /* TME_HAVE_RECODE */
949
950 /* this finishes a timing loop: */
951 void
tme_sparc_timing_loop_finish(struct tme_sparc * ic)952 tme_sparc_timing_loop_finish(struct tme_sparc *ic)
953 {
954 union tme_value64 cycles_finish;
955 union tme_value64 cycles_scaled_u;
956 tme_sparc_ireg_umax_t cycles_scaled;
957 unsigned int loop_cycles_each;
958 tme_sparc_ireg_umax_t branch_taken_count_m1;
959
960 /* loop forever: */
961 for (;;) {
962
963 /* get the current host cycle counter: */
964 cycles_finish = tme_misc_cycles();
965
966 /* if the timing loop has finished: */
967 if (tme_value64_cmp(&cycles_finish, >=, &ic->tme_sparc_timing_loop_finish)) {
968 break;
969 }
970
971 /* if an external event has happened: */
972 if (tme_memory_atomic_read_flag(&ic->tme_sparc_external_flag)) {
973 break;
974 }
975
976 /* if we block, we were supposed to block until an external event
977 happened: */
978 assert (!TME_SPARC_TIMING_YIELD_BLOCK);
979
980 /* yield: */
981 tme_thread_yield();
982 }
983
984 /* get the number of cycles elapsed: */
985 /* NB: we try to deal with overflow: */
986 (void) tme_value64_sub(&cycles_finish, &ic->tme_sparc_timing_loop_start);
987 cycles_scaled_u
988 = tme_misc_cycles_scaled(&ic->tme_sparc_cycles_scaling,
989 &cycles_finish);
990 #ifdef TME_HAVE_INT64_T
991 cycles_scaled = cycles_scaled_u.tme_value64_uint;
992 #else /* !TME_HAVE_INT64_T */
993 cycles_scaled
994 = (cycles_scaled_u.tme_value64_uint32_hi
995 ? (tme_uint32_t) (0 - (tme_uint32_t) 1)
996 : cycles_scaled_u.tme_value64_uint32_lo);
997 #endif /* !TME_HAVE_INT64_T */
998
999 /* NB: it's unusual, but actually okay if no cycles have elapsed.
1000 this just means that the branch to . will only be taken that
1001 first time. since we need the count of times the branch to .
1002 was taken, minus one, dividing the elapsed cycles by the number
1003 of cycles per loop gets exactly what we need: */
1004
1005 /* get the count of times the branch to . was taken, minus one: */
1006 loop_cycles_each = ic->tme_sparc_timing_loop_cycles_each;
1007 if (__tme_predict_false(loop_cycles_each != 1)) {
1008 if (__tme_predict_false(loop_cycles_each != 2)) {
1009 branch_taken_count_m1 = cycles_scaled / loop_cycles_each;
1010 }
1011 else {
1012 branch_taken_count_m1 = cycles_scaled / 2;
1013 }
1014 }
1015 else {
1016 branch_taken_count_m1 = cycles_scaled;
1017 }
1018
1019 /* if there is a maximum count of times the branch to . could be taken: */
1020 if (ic->tme_sparc_timing_loop_branch_taken_max) {
1021
1022 /* make sure that the branch to . isn't taken any more than the
1023 maximum: */
1024 if (branch_taken_count_m1 > ic->tme_sparc_timing_loop_branch_taken_count_max_m1) {
1025 branch_taken_count_m1 = ic->tme_sparc_timing_loop_branch_taken_count_max_m1;
1026 }
1027 }
1028
1029 /* do the timing loop update: */
1030 _tme_sparc_timing_loop_update(ic,
1031 branch_taken_count_m1);
1032
1033 /* zero the instruction burst: */
1034 ic->_tme_sparc_instruction_burst_remaining = 0;
1035 ic->_tme_sparc_instruction_burst_other = TRUE;
1036
1037 /* if threads are cooperative: */
1038 if (TME_THREADS_COOPERATIVE) {
1039
1040 /* we will chain into execution mode: */
1041 ic->_tme_sparc_mode = TME_SPARC_MODE_EXECUTION;
1042
1043 /* save a redispatch and resume execution directly: */
1044 (*ic->_tme_sparc_execute)(ic);
1045 abort();
1046 }
1047
1048 /* otherwise, threads are preemptive: */
1049
1050 /* unwind back to instruction execution: */
1051 return;
1052 }
1053