1 /* $Id: sparc-timing.c,v 1.3 2010/02/14 15:57:09 fredette Exp $ */
2 
3 /* ic/sparc/sparc-timing.c - SPARC instruction timing support: */
4 
5 /*
6  * Copyright (c) 2009 Matt Fredette
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed by Matt Fredette.
20  * 4. The name of the author may not be used to endorse or promote products
21  *    derived from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
25  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
27  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
28  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
29  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
31  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
32  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 /* includes: */
37 #include "sparc-impl.h"
38 
39 _TME_RCSID("$Id: sparc-timing.c,v 1.3 2010/02/14 15:57:09 fredette Exp $");
40 
41 /* macros: */
42 
43 /* at or below this maximum number of microseconds, we will spin
44    instead of yield: */
45 #define TME_SPARC_TIMING_SPIN_USEC_MAX		(4096)
46 
47 /* normally, when we yield we do a plain yield so we are immediately
48    runnable again.  this makes timing loops more accurate, at the
49    expense of consuming the host CPU.  if this is nonzero, when we
50    yield we will instead do a sleep or wait on an external event: */
51 #define TME_SPARC_TIMING_YIELD_BLOCK		(FALSE)
52 
53 /* this does a timing loop update: */
54 static void
_tme_sparc_timing_loop_update(struct tme_sparc * ic,tme_sparc_ireg_umax_t update_count_m1)55 _tme_sparc_timing_loop_update(struct tme_sparc *ic,
56 			      tme_sparc_ireg_umax_t update_count_m1)
57 {
58   tme_uint32_t insn_update;
59   unsigned long opcode;
60   unsigned int reg_rd;
61   signed int immediate;
62   tme_sparc_ireg_umax_t addend_total_m1;
63 
64   /* get the update instruction: */
65   insn_update = ic->_tme_sparc_insn;
66 
67   /* get the opcode: */
68   opcode = TME_FIELD_MASK_EXTRACTU(insn_update, (0x3f << 19));
69 
70   /* get the rd register: */
71   reg_rd = TME_FIELD_MASK_EXTRACTU(insn_update, TME_SPARC_FORMAT3_MASK_RD);
72   TME_SPARC_REG_INDEX(ic, reg_rd);
73 
74   /* get the immediate: */
75   immediate = insn_update & 2;
76   immediate = 1 - immediate;
77 
78   /* get the total addend: */
79   addend_total_m1 = update_count_m1;
80   if (ic->tme_sparc_timing_loop_addend < 0) {
81     addend_total_m1 = -addend_total_m1;
82   }
83 
84   /* if this is a v9 CPU: */
85   if (TME_SPARC_VERSION(ic) >= 9) {
86 #ifdef TME_HAVE_INT64_T
87 
88     /* save the immediate: */
89     ic->tme_sparc_ireg_uint64(TME_SPARC_IREG_TMP(0)) = immediate;
90 
91     /* do all but one of the updates of the rd register directly: */
92     ic->tme_sparc_ireg_uint64(reg_rd) += addend_total_m1;
93 
94     /* do the final update, including setting any condition codes: */
95     (*(ic->_tme_sparc64_execute_opmap[opcode]))
96       (ic,
97        &ic->tme_sparc_ireg_uint64(reg_rd),
98        &ic->tme_sparc_ireg_uint64(TME_SPARC_IREG_TMP(0)),
99        &ic->tme_sparc_ireg_uint64(reg_rd));
100 
101 #endif /* TME_HAVE_INT64_T */
102   }
103 
104   /* otherwise, this is a v7 or v8 CPU: */
105   else {
106 
107     /* save the immediate: */
108     ic->tme_sparc_ireg_uint32(TME_SPARC_IREG_TMP(0)) = immediate;
109 
110     /* do all but one of the updates of the rd register directly: */
111     ic->tme_sparc_ireg_uint32(reg_rd) += addend_total_m1;
112 
113     /* do the final update, including setting any condition codes: */
114     (*(ic->_tme_sparc32_execute_opmap[opcode]))
115       (ic,
116        &ic->tme_sparc_ireg_uint32(reg_rd),
117        &ic->tme_sparc_ireg_uint32(TME_SPARC_IREG_TMP(0)),
118        &ic->tme_sparc_ireg_uint32(reg_rd));
119   }
120 }
121 
122 /* this returns nonzero if the branch to . instruction and the update
123    instruction in its delay slot are a supported timing loop: */
124 int
tme_sparc_timing_loop_ok(tme_uint32_t insn_branch_dot,tme_uint32_t insn_update)125 tme_sparc_timing_loop_ok(tme_uint32_t insn_branch_dot,
126 			 tme_uint32_t insn_update)
127 {
128   unsigned int op2;
129   tme_uint32_t conds_mask;
130   unsigned int cond;
131 
132   /* if the update instruction is not an add, addcc, sub, or subcc
133      with the i bit set: */
134   if ((insn_update
135        & ((tme_uint32_t)
136 	  ((0x3 << 30)		/* format */
137 	   + (0x2b << 19)	/* op3 (mask addcc to add, sub to add) */
138 	   + (1 << 13))))	/* i */
139       != ((tme_uint32_t)
140 	  ((0x2 << 30)		/* format */
141 	   + (0x00 << 19)	/* op3 (add) */
142 	   + (1 << 13)))) {	/* i */
143 
144     /* we only support timing loops with plain add or subtract
145        update instructions: */
146     return (FALSE);
147   }
148 
149   /* if the simm13 is not 1 or -1: */
150   if (((insn_update
151 	+ (insn_update & 2))
152        & 0x1fff)
153       != 1) {
154 
155     /* we only support timing loops with plain add or subtract update
156        instructions with immediates of 1 or -1: */
157     return (FALSE);
158   }
159 
160   /* if rd is %g0: */
161 #if TME_SPARC_IREG_G0 != 0
162 #error "TME_SPARC_IREG_G0 changed"
163 #endif
164   if ((insn_update & TME_SPARC_FORMAT3_MASK_RD) == 0) {
165 
166     /* we only support timing loops with plain add or subtract update
167        instructions with destination registers other than %g0: */
168     return (FALSE);
169   }
170 
171   /* if rs1 and rd are not the same: */
172 #if TME_SPARC_FORMAT3_MASK_RD < TME_SPARC_FORMAT3_MASK_RS1
173 #error "TME_SPARC_FORMAT3_MASK_ values changed"
174 #endif
175   if ((((insn_update
176 	 / (TME_SPARC_FORMAT3_MASK_RD
177 	    / TME_SPARC_FORMAT3_MASK_RS1))
178 	^ insn_update)
179        & TME_SPARC_FORMAT3_MASK_RS1) != 0) {
180 
181     /* we only support timing loops with plain add or subtract update
182        instructions where the source register and destination register
183        are the same: */
184     return (FALSE);
185   }
186 
187   /* all branch instructions are format two instructions: */
188   assert ((insn_branch_dot & (tme_uint32_t) (0x3 << 30)) == 0);
189 
190   /* if this isn't a Bicc or a v9 BPcc instruction: */
191   op2 = TME_FIELD_MASK_EXTRACTU(insn_branch_dot, (0x7 << 22));
192   if (__tme_predict_false(op2 != 2 && op2 != 1)) {
193 
194     /* we support all timing loops with a branch to . instructions
195        that don't depend on the integer condition codes: */
196     return (TRUE);
197   }
198 
199   /* otherwise, this is a Bicc or a v9 BPcc instruction: */
200   else {
201 
202     /* if this is not an addcc or subcc instruction: */
203     if (__tme_predict_false((insn_update & (0x10 << 19)) == 0)) {
204 
205       /* we support timing loops with Bicc and BPcc instructions even
206 	 when the update instruction doesn't change the integer
207 	 condition codes: */
208       return (TRUE);
209     }
210 
211     /* if this is a subcc instruction: */
212     if (insn_update & (0x04 << 19)) {
213 
214       /* we support timing loops that use subcc with all conditions
215 	 except for vc and vs (the overflow conditions) and never: */
216       conds_mask
217 	= ((1 << (TME_SPARC_COND_NOT + TME_SPARC_COND_N))
218 	   + (1 << (TME_SPARC_COND_NOT + TME_SPARC_COND_E))
219 	   + (1 << (TME_SPARC_COND_NOT + TME_SPARC_COND_LE))
220 	   + (1 << (TME_SPARC_COND_NOT + TME_SPARC_COND_L))
221 	   + (1 << (TME_SPARC_COND_NOT + TME_SPARC_COND_LEU))
222 	   + (1 << (TME_SPARC_COND_NOT + TME_SPARC_COND_CS))
223 	   + (1 << (TME_SPARC_COND_NOT + TME_SPARC_COND_NEG))
224 	   + (1 << TME_SPARC_COND_E)
225 	   + (1 << TME_SPARC_COND_LE)
226 	   + (1 << TME_SPARC_COND_L)
227 	   + (1 << TME_SPARC_COND_LEU)
228 	   + (1 << TME_SPARC_COND_CS)
229 	   + (1 << TME_SPARC_COND_NEG)
230 	   );
231     }
232 
233     /* otherwise, this is an addcc instruction: */
234     else {
235 
236       /* we support timing loops that use addcc with only these
237 	 conditions: */
238       conds_mask
239 	= ((1 << (TME_SPARC_COND_NOT + TME_SPARC_COND_N))
240 	   + (1 << (TME_SPARC_COND_NOT + TME_SPARC_COND_E))
241 	   + (1 << (TME_SPARC_COND_NOT + TME_SPARC_COND_CS))
242 	   + (1 << (TME_SPARC_COND_NOT + TME_SPARC_COND_NEG))
243 	   + (1 << TME_SPARC_COND_E)
244 	   + (1 << TME_SPARC_COND_CS)
245 	   + (1 << TME_SPARC_COND_NEG)
246 	   );
247     }
248 
249     /* if we don't support the condition: */
250     cond = TME_FIELD_MASK_EXTRACTU(insn_branch_dot, (0xf << 25));
251     if ((conds_mask & TME_BIT(cond)) == 0) {
252 
253       /* we don't support this timing loop: */
254       return (FALSE);
255     }
256 
257     /* otherwise, we support this timing loop: */
258     return (TRUE);
259   }
260 }
261 
262 /* this starts a timing loop: */
263 static void
_tme_sparc_timing_loop_start(struct tme_sparc * ic,tme_uint32_t insn_update)264 _tme_sparc_timing_loop_start(struct tme_sparc *ic,
265 			     tme_uint32_t insn_update)
266 {
267   unsigned int reg_rd;
268   tme_sparc_ireg_umax_t value_rd;
269   signed int addend;
270   tme_uint32_t insn_branch_dot;
271   const struct timeval *sleep;
272   unsigned int op2;
273   unsigned int cond;
274   tme_sparc_ireg_umax_t value_sign;
275   tme_sparc_ireg_umax_t value_zero;
276   tme_sparc_ireg_umax_t value_true_greatest;
277   tme_sparc_ireg_umax_t value_test;
278   tme_sparc_ireg_umax_t branch_taken_count_max_m1;
279   unsigned int loop_cycles_each;
280   tme_sparc_ireg_umax_t cycles_scaled_max;
281   union tme_value64 cycles_finish;
282   tme_sparc_ireg_umax_t usec;
283   tme_uint32_t usec32;
284   static struct timeval sleep_buffer;
285 
286   /* at this point, the timing loop branch to . has been taken, and
287      the PCs have been updated, so both PC and PC_next_next point to
288      the timing loop update instruction (in insn_update), and PC_next
289      points to the timing loop branch to . instruction again.
290 
291      a taken conditional branch never annuls, and sparc-execute.c and
292      sparc-rc-insns.c handle a "ba,a ." instruction specially, so we
293      know that the update instruction must execute at least as many
294      times as the timing loop branch to . is taken.
295 
296      the timing loop branch to . has just been taken (this is why
297      PC_next_next is the same as PC).  this first take was when the
298      branch to . was detected in sparc-execute.c, or when
299      tme_sparc_timing_loop_assist() determined that the recode
300      instructions thunk that called it did so after a taken branch.
301 
302      this very first take is implicit in the taken branch count that
303      we compute and store in
304      ic->tme_sparc_timing_loop_branch_taken_count_max_m1 and/or pass
305      to _tme_sparc_timing_loop_update() - i.e., we always compute the
306      taken branch count minus one.
307 
308      this is good because it is possible for the timing loop update
309      instruction to be executed 2^cc_width times.  if initially %o3 is
310      zero and %icc.Z is clear, this bne will be taken 2^32 times:
311 
312      bne .
313      deccc %o3
314 
315      NB that in this specific case, where the timing loop branch to
316      . does not annul, the timing loop update instruction will
317      actually be run a total of (2^32)+1 times: 2^32 times
318      corresponding to the 2^32 times that the branch is taken, plus
319      one final time when the branch is *not* taken, but the update
320      instruction is not annulled.
321 
322      this function only counts and performs the updates corresponding
323      to the times that the branch is *taken*.
324      _tme_sparc_timing_loop_update() does the count minus one updates
325      directly in the destination register, followed by a true
326      instruction execution for the last (to update any condition
327      codes).
328 
329      whether or not the branch to . instruction annuls, and any needed
330      "one final time" update instruction will be handled either by
331      sparc-execute.c, or by a combination of the recode instructions
332      thunk and tme_sparc_timing_loop_assist(): */
333 
334   /* NB: our caller has already saved the current host cycles counter
335      in ic->tme_sparc_timing_loop_start: */
336 
337   /* get the rd register: */
338   reg_rd = TME_FIELD_MASK_EXTRACTU(insn_update, TME_SPARC_FORMAT3_MASK_RD);
339   TME_SPARC_REG_INDEX(ic, reg_rd);
340 
341   /* if this is a v9 CPU: */
342   if (TME_SPARC_VERSION(ic) >= 9) {
343 #ifdef TME_HAVE_INT64_T
344 
345     /* get the rd register value: */
346     value_rd = ic->tme_sparc_ireg_uint64(reg_rd);
347 
348 #else  /* !TME_HAVE_INT64_T */
349 
350     /* silence uninitialized variable warnings: */
351     value_rd = 0;
352 
353 #endif /* !TME_HAVE_INT64_T */
354   }
355 
356   /* otherwise, this is not a v9 CPU: */
357   else {
358 
359     /* get the rd register value: */
360     value_rd = (tme_int32_t) ic->tme_sparc_ireg_uint32(reg_rd);
361   }
362 
363   /* assume that this is an add or addcc instruction: */
364   addend = insn_update & 2;
365   addend = 1 - addend;
366 
367   /* if this is a sub or subcc instruction: */
368   if (insn_update & (0x04 << 19)) {
369 
370     /* complement the addend: */
371     addend = -addend;
372   }
373 
374   /* get the branch to . instruction: */
375   insn_branch_dot = ic->_tme_sparc_insn;
376 
377   /* save the update instruction: */
378   ic->_tme_sparc_insn = insn_update;
379 
380   /* save the addend: */
381   ic->tme_sparc_timing_loop_addend = addend;
382 
383   /* assume that there isn't a maximum number of times that the branch
384      to . can be taken (i.e., that the branch to . doesn't depend on
385      the value of rd), as if the branch condition were always: */
386   cond = TME_SPARC_COND_NOT + TME_SPARC_COND_N;
387 
388   /* assume that if the branch does depend on the value of rd, that
389      the sign bit in values of rd is the last bit: */
390   value_sign = 1;
391   value_sign <<= ((sizeof(value_sign) * 8) - 1);
392 
393   /* silence uninitialized variable warnings: */
394   value_zero = 0;
395   value_true_greatest = 0;
396 
397   /* get the op2 field of the branch to . instruction: */
398   op2 = TME_FIELD_MASK_EXTRACTU(insn_branch_dot, (0x7 << 22));
399 
400   /* if this is a v9 BPr: */
401   if (op2 == 3) {
402 
403     /* if this BPr tests rd: */
404     if (((insn_branch_dot
405 	  ^ insn_update)
406 	 & TME_SPARC_FORMAT3_MASK_RS1) == 0) {
407 
408       /* get the condition field, and shift the "not" bit from bit two
409 	 to bit three, to match the other branches: */
410       cond = TME_FIELD_MASK_EXTRACTU(insn_branch_dot, (0x7 << 25));
411       cond = (cond + 4) & (TME_SPARC_COND_NOT | 3);
412 
413       /* dispatch on the condition: */
414       if ((cond % TME_SPARC_COND_NOT) == TME_SPARC_COND_E) {
415 	value_zero = -addend;
416 	value_true_greatest = 0;
417       }
418       else {
419 	assert ((cond % TME_SPARC_COND_NOT) == TME_SPARC_COND_LE
420 		|| (cond % TME_SPARC_COND_NOT) == TME_SPARC_COND_L);
421 	value_zero = value_sign - addend;
422 #if (TME_SPARC_COND_L & 1) == 0 || (TME_SPARC_COND_LE & 1) != 0
423 #error "TME_SPARC_COND_ values changed"
424 #endif
425 	value_true_greatest = value_sign - (cond & 1);
426       }
427     }
428   }
429 
430   /* otherwise, if this is a Bicc or a v9 BPcc: */
431   else if (op2 == 2 || op2 == 1) {
432 
433     /* if this is an addcc or subcc instruction: */
434     if (insn_update & (0x10 << 19)) {
435 
436       /* get the condition: */
437       cond = TME_FIELD_MASK_EXTRACTU(insn_branch_dot, (0xf << 25));
438 
439       /* if this is a Bicc, or a BPcc with the cc1 bit clear, the
440 	 sign bit in values of rd is bit 31: */
441       if (sizeof(value_sign) > sizeof(tme_uint32_t)
442 	  && ((insn_branch_dot >> 21) & op2 & 1) == 0) {
443 	value_sign = (((tme_uint32_t) 1) << 31);
444       }
445 
446       /* if this is a subcc instruction: */
447       if (insn_update & (0x04 << 19)) {
448 
449 	/* dispatch on the condition: */
450 	switch (cond % TME_SPARC_COND_NOT) {
451 	default:
452 	  /* we should have caught this unsupported condition in
453 	     tme_sparc_timing_loop_ok(): */
454 	  assert (FALSE);
455 	  /* FALLTHROUGH */
456 	case TME_SPARC_COND_N:
457 	  /* nothing to do */
458 	  break;
459 	case TME_SPARC_COND_E:
460 	  value_zero = -addend;
461 	  value_true_greatest = 0;
462 	  break;
463 	case TME_SPARC_COND_LE:
464 	  value_zero = value_sign;
465 	  value_true_greatest = value_sign - addend;
466 	  break;
467 	case TME_SPARC_COND_L:
468 	  value_zero = value_sign;
469 	  value_true_greatest = (value_sign - 1) - addend;
470 	  break;
471 	case TME_SPARC_COND_LEU:
472 	  value_zero = 0;
473 	  value_true_greatest = (value_sign * 2) - addend;
474 	  break;
475 	case TME_SPARC_COND_CS:
476 	  value_zero = 0;
477 	  value_true_greatest = (value_sign * 2) - (addend + 1);
478 	  break;
479 	case TME_SPARC_COND_NEG:
480 	  value_zero = value_sign - addend;
481 	  value_true_greatest = value_sign - 1;
482 	  break;
483 	}
484       }
485 
486       /* otherwise, this is an addcc instruction: */
487       else {
488 
489 	/* dispatch on the condition: */
490 	switch (cond % TME_SPARC_COND_NOT) {
491 	default:
492 	  /* we should have caught this unsupported condition in
493 	     tme_sparc_timing_loop_ok(): */
494 	  assert (FALSE);
495 	  /* FALLTHROUGH */
496 	case TME_SPARC_COND_N:
497 	  /* nothing to do */
498 	  break;
499 	case TME_SPARC_COND_E:
500 	  value_zero = -addend;
501 	  value_true_greatest = 0;
502 	  break;
503 	case TME_SPARC_COND_CS:
504 	  value_zero = -addend;
505 	  value_true_greatest = (value_sign * 2) - (addend - 1);
506 	  break;
507 	case TME_SPARC_COND_NEG:
508 	  value_zero = value_sign - addend;
509 	  value_true_greatest = value_sign - 1;
510 	  break;
511 	}
512       }
513     }
514   }
515 
516   /* the condition can't be never: */
517   assert (cond != TME_SPARC_COND_N);
518 
519   /* assume that, if we block, we will block forever: */
520   sleep = (const struct timeval *) NULL;
521 
522   /* if the condition is always, there is no maximum number of times
523      that the branch to . can be taken: */
524 #if TME_SPARC_COND_N != 0
525 #error "TME_SPARC_COND_ values changed"
526 #endif
527   ic->tme_sparc_timing_loop_branch_taken_max = (cond % TME_SPARC_COND_NOT);
528   if (cond == (TME_SPARC_COND_NOT + TME_SPARC_COND_N)) {
529 
530     /* we may never finish: */
531     ic->tme_sparc_timing_loop_finish.tme_value64_uint32_lo = (0 - (tme_uint32_t) 1);
532     ic->tme_sparc_timing_loop_finish.tme_value64_uint32_hi = (0 - (tme_uint32_t) 1);
533   }
534 
535   /* otherwise, the condition isn't always, so there is a maximum
536      number of times that the branch to . can be taken: */
537   else {
538 
539     /* it's not possible for all (adjusted-to-zero) values to be true.
540        at least all-bits-one must be false: */
541     assert (value_true_greatest <= ((value_sign - 1) * 2));
542 
543     /* test the initial value of rd: */
544     value_test = (value_rd - value_zero) & ((value_sign * 2) - 1);
545 
546     /* if the initial value of rd will make the condition (ignoring
547        TME_SPARC_COND_NOT) true after the first rd update
548        instruction: */
549     if (value_test <= value_true_greatest) {
550 
551       /* if this condition has TME_SPARC_COND_NOT: */
552       if (cond & TME_SPARC_COND_NOT) {
553 
554 	/* the branch to . will only be taken the first time: */
555 	branch_taken_count_max_m1 = 1 - 1;
556       }
557 
558       /* otherwise, if the addend is -1: */
559       else if (addend < 0) {
560 
561 	/* the branch to . will be taken the first time, followed by
562 	   at most (value_test + 1) more times when the value of rd
563 	   makes the condition true: */
564 	branch_taken_count_max_m1 = (1 + (value_test + 1)) - 1;
565       }
566 
567       /* otherwise, the addend is 1: */
568       else {
569 
570 	/* the branch to . will be taken the first time, followed by
571 	   at most ((value_true_greatest - value_test) + 1) more times
572 	   when the value of rd makes the condition true: */
573 	branch_taken_count_max_m1 = (1 + ((value_true_greatest - value_test) + 1)) - 1;
574       }
575     }
576 
577     /* otherwise, the initial value of rd will make the condition
578        (ignoring TME_SPARC_COND_NOT) false after the first update
579        instruction: */
580     else {
581 
582       /* if this condition doesn't have TME_SPARC_COND_NOT: */
583       if ((cond & TME_SPARC_COND_NOT) == 0) {
584 
585 	/* the branch to . will only be taken the first time: */
586 	branch_taken_count_max_m1 = 1 - 1;
587       }
588 
589       /* otherwise, if the addend is -1: */
590       else if (addend < 0) {
591 
592 	/* the branch to . will be taken the first time, followed by
593 	   at most (value_test - value_true_greatest) more times when
594 	   the value of rd makes the condition false: */
595 	branch_taken_count_max_m1 = (1 + (value_test - value_true_greatest)) - 1;
596       }
597 
598       /* otherwise, the addend is 1: */
599       else {
600 
601 	/* the branch to . will be taken the first time, followed by
602 	   at most (~value_test + 1) more times when the value of rd
603 	   makes the condition false: */
604 	branch_taken_count_max_m1 = ((1 + (~value_test + 1)) - 1) & ((value_sign * 2) - 1);
605       }
606     }
607 
608     /* set the maximum number of times the branch to . can be taken: */
609     ic->tme_sparc_timing_loop_branch_taken_count_max_m1 = branch_taken_count_max_m1;
610 
611     /* if each loop iteration takes more than one cycle: */
612     loop_cycles_each = ic->tme_sparc_timing_loop_cycles_each;
613     if (__tme_predict_false(loop_cycles_each != 1)) {
614 
615       /* get the maximum number of cycles to loop: */
616       /* NB: we try to deal with overflow: */
617       if (__tme_predict_false(loop_cycles_each != 2)) {
618 	cycles_scaled_max
619 	  = (branch_taken_count_max_m1
620 	     * loop_cycles_each);
621       }
622       else {
623 	cycles_scaled_max = branch_taken_count_max_m1 * 2;
624       }
625       cycles_scaled_max += loop_cycles_each;
626       if (__tme_predict_false(cycles_scaled_max < ic->tme_sparc_timing_loop_branch_taken_count_max_m1)) {
627 	cycles_scaled_max = 0 - (tme_sparc_ireg_umax_t) 1;
628       }
629     }
630 
631     /* otherwise, each loop iteration takes one cycle: */
632     else {
633 
634       /* get the maximum number of cycles to loop: */
635       /* NB: we try to deal with overflow: */
636       cycles_scaled_max = branch_taken_count_max_m1 + 1;
637       cycles_scaled_max -= (cycles_scaled_max == 0);
638     }
639 
640     /* we can't be looping for zero cycles: */
641     assert (cycles_scaled_max > 0);
642 
643     /* get the latest host cycle counter when the timing loop must
644        finish, if it doesn't finish sooner: */
645 #ifdef TME_HAVE_INT64_T
646     cycles_finish.tme_value64_uint = cycles_scaled_max;
647 #else  /* !TME_HAVE_INT64_T */
648     cycles_finish.tme_value64_uint32_lo = cycles_scaled_max;
649     cycles_finish.tme_value64_uint32_hi = 0;
650 #endif /* !TME_HAVE_INT64_T */
651     cycles_finish
652       = tme_misc_cycles_scaled(&ic->tme_sparc_cycles_unscaling,
653 			       &cycles_finish);
654     (void) tme_value64_add(&cycles_finish, &ic->tme_sparc_timing_loop_start);
655     ic->tme_sparc_timing_loop_finish = cycles_finish;
656 
657     /* if the number of cycles to spin is small enough that we should
658        truly spin, instead of yield: */
659     if (cycles_scaled_max
660 	<= (ic->tme_sparc_cycles_scaled_per_usec
661 	    * TME_SPARC_TIMING_SPIN_USEC_MAX)) {
662 
663       /* spin: */
664       tme_misc_cycles_spin_until(&ic->tme_sparc_timing_loop_finish);
665 
666       /* do the timing loop update: */
667       _tme_sparc_timing_loop_update(ic,
668 				    ic->tme_sparc_timing_loop_branch_taken_count_max_m1);
669 
670       /* unwind back to instruction execution: */
671       return;
672     }
673 
674     /* if we will block until an external event: */
675     if (TME_SPARC_TIMING_YIELD_BLOCK) {
676 
677       /* if the number of cycles to loop doesn't fit in 32 bits: */
678       if (__tme_predict_false(cycles_scaled_max
679 			      & ~ (tme_sparc_ireg_umax_t) (tme_uint32_t) (0 - (tme_uint32_t) 1))) {
680 
681 	/* convert cycles into microseconds: */
682 	usec = cycles_scaled_max / ic->tme_sparc_cycles_scaled_per_usec;
683 
684 	/* set the sleep time: */
685 	sleep_buffer.tv_sec = (usec / 1000000);
686 	sleep_buffer.tv_usec = (usec % 1000000);
687       }
688 
689       /* otherwise, the number of cycles to loop fits in 32 bits: */
690       else {
691 
692 	/* convert cycles into microseconds: */
693 	usec32 = ((tme_uint32_t) cycles_scaled_max) / ic->tme_sparc_cycles_scaled_per_usec;
694 
695 	/* assume that we will sleep for less than one second: */
696 	sleep_buffer.tv_sec = 0;
697 
698 	/* if the sleep time is one second or more: */
699 	if (__tme_predict_false(usec32 >= 1000000)) {
700 
701 	  /* set the sleep time seconds: */
702 	  sleep_buffer.tv_sec = (usec32 / 1000000);
703 
704 	  /* get the microseconds: */
705 	  usec32 = (usec32 % 1000000);
706 	}
707 
708 	/* set the sleep time microseconds: */
709 	sleep_buffer.tv_usec = usec32;
710       }
711 
712       /* we won't block forever: */
713       sleep = &sleep_buffer;
714     }
715   }
716 
717   /* unbusy the instruction TLB entry: */
718   assert (ic->_tme_sparc_itlb_current_token != NULL);
719   tme_token_unbusy(ic->_tme_sparc_itlb_current_token);
720 
721   /* if threads are cooperative: */
722   if (TME_THREADS_COOPERATIVE) {
723 
724     /* forget the instruction TLB entry: */
725     ic->_tme_sparc_itlb_current_token = NULL;
726 
727     /* we will redispatch into timing mode: */
728     ic->_tme_sparc_mode = TME_SPARC_MODE_TIMING_LOOP;
729   }
730 
731   /* if we're blocking: */
732   if (TME_SPARC_TIMING_YIELD_BLOCK) {
733 
734     /* lock the external mutex: */
735     tme_mutex_lock(&ic->tme_sparc_external_mutex);
736 
737     /* check one last time for any external signal: */
738     if (tme_memory_atomic_read_flag(&ic->tme_sparc_external_flag)) {
739       tme_memory_atomic_write_flag(&ic->tme_sparc_external_flag, FALSE);
740       (*ic->_tme_sparc_external_check)(ic, TME_SPARC_EXTERNAL_CHECK_MUTEX_LOCKED);
741     }
742 
743     /* block on the external signal condition: */
744     if (sleep != NULL) {
745       tme_cond_sleep_yield(&ic->tme_sparc_external_cond,
746 			   &ic->tme_sparc_external_mutex,
747 			   sleep);
748     }
749     else {
750       tme_cond_wait_yield(&ic->tme_sparc_external_cond,
751 			  &ic->tme_sparc_external_mutex);
752     }
753 
754     /* unlock the external mutex: */
755     tme_mutex_unlock(&ic->tme_sparc_external_mutex);
756   }
757 
758   /* otherwise, we're not blocking: */
759   else {
760 
761     /* do the simple yield: */
762     tme_thread_yield();
763   }
764 
765   /* finish the timing loop: */
766   tme_sparc_timing_loop_finish(ic);
767 
768   /* relock the instruction TLB entry: */
769   tme_sparc_callout_relock(ic);
770 
771   /* unwind back to instruction execution: */
772   return;
773 }
774 
775 /* this possibly starts a timing loop from the instruction
776    executor: */
777 void
tme_sparc_timing_loop_start(struct tme_sparc * ic)778 tme_sparc_timing_loop_start(struct tme_sparc *ic)
779 {
780   tme_uint32_t insn_update;
781   tme_uint32_t insn_branch_dot;
782   tme_sparc_ireg_umax_t pc;
783 
784   /* save the current host cycles counter: */
785   ic->tme_sparc_timing_loop_start = tme_misc_cycles();
786 
787   /* get the update instruction from the branch delay slot: */
788   insn_update = tme_sparc_fetch_nearby(ic, 1);
789 
790   /* get the branch to . instruction: */
791   insn_branch_dot = ic->_tme_sparc_insn;
792 
793   /* if we don't support this timing loop: */
794   if (!tme_sparc_timing_loop_ok(insn_branch_dot,
795 				insn_update)) {
796     return;
797   }
798 
799   /* at this point, PC and PC_next_next both point to the branch to .,
800      and PC_next points to the update instruction.  we have to advance
801      the PCs, because _tme_sparc_timing_loop_update() expects PC and
802      PC_next_next to point to the update instruction, PC_next to point
803      to the branch to .: */
804 
805   /* if this is a v9 CPU: */
806   if (TME_SPARC_VERSION(ic) >= 9) {
807 #ifdef TME_HAVE_INT64_T
808 
809     /* advance the PCs: */
810     pc = ic->tme_sparc_ireg_uint64(TME_SPARC_IREG_PC_NEXT);
811     assert (ic->tme_sparc_ireg_uint64(TME_SPARC_IREG_PC)
812 	    == ic->tme_sparc_ireg_uint64(TME_SPARC_IREG_PC_NEXT_NEXT));
813     assert (((ic->tme_sparc_ireg_uint64(TME_SPARC_IREG_PC)
814 	      + sizeof(tme_uint32_t))
815 	     & ic->tme_sparc_address_mask)
816 	    == pc);
817     ic->tme_sparc_ireg_uint64(TME_SPARC_IREG_PC_NEXT)
818       = ic->tme_sparc_ireg_uint64(TME_SPARC_IREG_PC_NEXT_NEXT);
819     ic->tme_sparc_ireg_uint64(TME_SPARC_IREG_PC) = pc;
820     ic->tme_sparc_ireg_uint64(TME_SPARC_IREG_PC_NEXT_NEXT) = pc;
821 
822 #endif /* TME_HAVE_INT64_T */
823   }
824 
825   /* otherwise, this is a v7 or v8 CPU: */
826   else {
827 
828     /* advance the PCs: */
829     pc = ic->tme_sparc_ireg_uint32(TME_SPARC_IREG_PC_NEXT);
830     assert (ic->tme_sparc_ireg_uint32(TME_SPARC_IREG_PC)
831 	    == ic->tme_sparc_ireg_uint32(TME_SPARC_IREG_PC_NEXT_NEXT));
832     assert ((ic->tme_sparc_ireg_uint32(TME_SPARC_IREG_PC)
833 	      + sizeof(tme_uint32_t))
834 	    == pc);
835     ic->tme_sparc_ireg_uint32(TME_SPARC_IREG_PC_NEXT)
836       = ic->tme_sparc_ireg_uint32(TME_SPARC_IREG_PC_NEXT_NEXT);
837     ic->tme_sparc_ireg_uint32(TME_SPARC_IREG_PC) = pc;
838     ic->tme_sparc_ireg_uint32(TME_SPARC_IREG_PC_NEXT_NEXT) = pc;
839   }
840 
841   /* start the timing loop: */
842   _tme_sparc_timing_loop_start(ic,
843 			       insn_update);
844 }
845 
846 #if TME_HAVE_RECODE
847 
848 /* the recode assist function for timing loops: */
849 tme_recode_uguest_t
tme_sparc_timing_loop_assist(struct tme_ic * _ic,tme_recode_uguest_t insn_branch_dot,tme_recode_uguest_t junk)850 tme_sparc_timing_loop_assist(struct tme_ic *_ic,
851 			     tme_recode_uguest_t insn_branch_dot,
852 			     tme_recode_uguest_t junk)
853 {
854   struct tme_sparc *ic;
855   tme_sparc_ireg_umax_t pc_next_next;
856   int branch_dot_taken;
857   tme_uint32_t insn_update;
858 
859   /* recover our ic: */
860   ic = (struct tme_sparc *) _ic;
861 
862   /* save the branch to . instruction in the normal instruction
863      position: */
864   /* NB: we do this even though PC currently points to the timing loop
865      update instruction: */
866   ic->_tme_sparc_insn = insn_branch_dot;
867 
868   /* save the current host cycles counter: */
869   ic->tme_sparc_timing_loop_start = tme_misc_cycles();
870 
871   /* NB: unlike tme_sparc_timing_loop_start(), this function may be
872      called after the branch to . has *not* been taken.  this happens
873      when the branch to . is conditional and does not annul - this is
874      the "one final time" update instruction discussed in
875      _tme_sparc_timing_loop_start().
876 
877      at this point, PC points to the update instruction, PC_next
878      points to the branch to . (if the branch to . was taken) or to
879      the instruction following the update instruction (if the branch
880      to . was not taken and does not annul): */
881 
882   /* if this is a v9 CPU: */
883   if (TME_SPARC_VERSION(ic) >= 9) {
884 #ifdef TME_HAVE_INT64_T
885 
886     /* set PC_next_next from PC_next: */
887     pc_next_next
888       = ((ic->tme_sparc_ireg_uint64(TME_SPARC_IREG_PC_NEXT)
889 	  + sizeof(tme_uint32_t))
890 	 & ic->tme_sparc_address_mask);
891     ic->tme_sparc_ireg_uint64(TME_SPARC_IREG_PC_NEXT_NEXT) = pc_next_next;
892 
893     /* see if the timing loop branch to . instruction was taken: */
894     branch_dot_taken = (ic->tme_sparc_ireg_uint64(TME_SPARC_IREG_PC) == pc_next_next);
895 
896     /* get the timing loop update instruction: */
897     insn_update = ic->tme_sparc_ireg_uint64(TME_SPARC_IREG_INSN);
898 
899 #else  /* !TME_HAVE_INT64_T */
900 
901     /* silence uninitialized variable warnings: */
902     branch_dot_taken = 0;
903     insn_update = 0;
904 
905 #endif /* !TME_HAVE_INT64_T */
906   }
907 
908   /* otherwise, this is not a v9 CPU: */
909   else {
910 
911     /* set PC_next_next from PC_next: */
912     pc_next_next
913       = (ic->tme_sparc_ireg_uint32(TME_SPARC_IREG_PC_NEXT)
914 	 + sizeof(tme_uint32_t));
915     ic->tme_sparc_ireg_uint32(TME_SPARC_IREG_PC_NEXT_NEXT) = pc_next_next;
916 
917     /* see if the timing loop branch to . instruction was taken: */
918     branch_dot_taken = (ic->tme_sparc_ireg_uint32(TME_SPARC_IREG_PC) == (tme_uint32_t) pc_next_next);
919 
920     /* get the timing loop update instruction: */
921     insn_update = ic->tme_sparc_ireg_uint32(TME_SPARC_IREG_INSN);
922   }
923 
924   /* if the timing loop branch to . instruction was taken: */
925   if (branch_dot_taken) {
926 
927     /* end any recode verifying: */
928     tme_sparc_recode_verify_end_preinstruction(ic);
929 
930     /* start the timing loop: */
931     _tme_sparc_timing_loop_start(ic,
932 				 insn_update);
933   }
934 
935   /* otherwise, the timing loop branch to . instruction was not
936      taken, and it does not annul: */
937   else {
938 
939     /* do the one final update: */
940     ic->_tme_sparc_insn = insn_update;
941     _tme_sparc_timing_loop_update(ic, 0);
942   }
943 
944   /* unwind back to instruction execution: */
945   return (0);
946 }
947 
948 #endif /* TME_HAVE_RECODE */
949 
950 /* this finishes a timing loop: */
951 void
tme_sparc_timing_loop_finish(struct tme_sparc * ic)952 tme_sparc_timing_loop_finish(struct tme_sparc *ic)
953 {
954   union tme_value64 cycles_finish;
955   union tme_value64 cycles_scaled_u;
956   tme_sparc_ireg_umax_t cycles_scaled;
957   unsigned int loop_cycles_each;
958   tme_sparc_ireg_umax_t branch_taken_count_m1;
959 
960   /* loop forever: */
961   for (;;) {
962 
963     /* get the current host cycle counter: */
964     cycles_finish = tme_misc_cycles();
965 
966     /* if the timing loop has finished: */
967     if (tme_value64_cmp(&cycles_finish, >=, &ic->tme_sparc_timing_loop_finish)) {
968       break;
969     }
970 
971     /* if an external event has happened: */
972     if (tme_memory_atomic_read_flag(&ic->tme_sparc_external_flag)) {
973       break;
974     }
975 
976     /* if we block, we were supposed to block until an external event
977        happened: */
978     assert (!TME_SPARC_TIMING_YIELD_BLOCK);
979 
980     /* yield: */
981     tme_thread_yield();
982   }
983 
984   /* get the number of cycles elapsed: */
985   /* NB: we try to deal with overflow: */
986   (void) tme_value64_sub(&cycles_finish, &ic->tme_sparc_timing_loop_start);
987   cycles_scaled_u
988     = tme_misc_cycles_scaled(&ic->tme_sparc_cycles_scaling,
989 			     &cycles_finish);
990 #ifdef TME_HAVE_INT64_T
991   cycles_scaled = cycles_scaled_u.tme_value64_uint;
992 #else  /* !TME_HAVE_INT64_T */
993   cycles_scaled
994     = (cycles_scaled_u.tme_value64_uint32_hi
995        ? (tme_uint32_t) (0 - (tme_uint32_t) 1)
996        : cycles_scaled_u.tme_value64_uint32_lo);
997 #endif /* !TME_HAVE_INT64_T */
998 
999   /* NB: it's unusual, but actually okay if no cycles have elapsed.
1000      this just means that the branch to . will only be taken that
1001      first time.  since we need the count of times the branch to .
1002      was taken, minus one, dividing the elapsed cycles by the number
1003      of cycles per loop gets exactly what we need: */
1004 
1005   /* get the count of times the branch to . was taken, minus one: */
1006   loop_cycles_each = ic->tme_sparc_timing_loop_cycles_each;
1007   if (__tme_predict_false(loop_cycles_each != 1)) {
1008     if (__tme_predict_false(loop_cycles_each != 2)) {
1009       branch_taken_count_m1 = cycles_scaled / loop_cycles_each;
1010     }
1011     else {
1012       branch_taken_count_m1 = cycles_scaled / 2;
1013     }
1014   }
1015   else {
1016     branch_taken_count_m1 = cycles_scaled;
1017   }
1018 
1019   /* if there is a maximum count of times the branch to . could be taken: */
1020   if (ic->tme_sparc_timing_loop_branch_taken_max) {
1021 
1022     /* make sure that the branch to . isn't taken any more than the
1023        maximum: */
1024     if (branch_taken_count_m1 > ic->tme_sparc_timing_loop_branch_taken_count_max_m1) {
1025       branch_taken_count_m1 = ic->tme_sparc_timing_loop_branch_taken_count_max_m1;
1026     }
1027   }
1028 
1029   /* do the timing loop update: */
1030   _tme_sparc_timing_loop_update(ic,
1031 				branch_taken_count_m1);
1032 
1033   /* zero the instruction burst: */
1034   ic->_tme_sparc_instruction_burst_remaining = 0;
1035   ic->_tme_sparc_instruction_burst_other = TRUE;
1036 
1037   /* if threads are cooperative: */
1038   if (TME_THREADS_COOPERATIVE) {
1039 
1040     /* we will chain into execution mode: */
1041     ic->_tme_sparc_mode = TME_SPARC_MODE_EXECUTION;
1042 
1043     /* save a redispatch and resume execution directly: */
1044     (*ic->_tme_sparc_execute)(ic);
1045     abort();
1046   }
1047 
1048   /* otherwise, threads are preemptive: */
1049 
1050   /* unwind back to instruction execution: */
1051   return;
1052 }
1053