1/* extrinsrt r1, r2, src, size, dst: replace bits [dst:dst+size) in r1
2 *  with bits [src:src+size) in r2
3 *
4 * bra(n)z annul: no delay slot
5 */
6
7/* Bitfield version of NVC0_3D_VERTEX_ARRAY_PER_INSTANCE[].
8 * Args: size, bitfield
9 */
10.section #mme9097_per_instance_bf
11   parm $r3
12   mov $r2 0x0
13   maddr 0x1620
14loop:
15   mov $r1 (add $r1 -1)
16   send (extrshl $r3 $r2 0x1 0x0)
17   exit branz $r1 #loop
18   mov $r2 (add $r2 0x1)
19
20/* The comments above the macros describe what they *should* be doing,
21 * but we use less functionality for now.
22 */
23
24/*
25 * for (i = 0; i < 8; ++i)
26 *    [NVC0_3D_BLEND_ENABLE(i)] = BIT(i of arg);
27 *
28 * [3428] = arg;
29 *
30 * if (arg == 0 || [NVC0_3D_MULTISAMPLE_ENABLE] == 0)
31 *    [0d9c] = 0;
32 * else
33 *    [0d9c] = [342c];
34 */
35.section #mme9097_blend_enables
36   maddr 0x14d8
37   send (extrinsrt 0x0 $r1 0x0 0x1 0x0)
38   send (extrinsrt 0x0 $r1 0x1 0x1 0x0)
39   send (extrinsrt 0x0 $r1 0x2 0x1 0x0)
40   send (extrinsrt 0x0 $r1 0x3 0x1 0x0)
41   send (extrinsrt 0x0 $r1 0x4 0x1 0x0)
42   send (extrinsrt 0x0 $r1 0x5 0x1 0x0)
43   exit send (extrinsrt 0x0 $r1 0x6 0x1 0x0)
44   send (extrinsrt 0x0 $r1 0x7 0x1 0x0)
45
46/*
47 * uint64 limit = (parm(0) << 32) | parm(1);
48 * uint64 start = (parm(2) << 32);
49 *
50 * if (limit) {
51 *    start |= parm(3);
52 *    --limit;
53 * } else {
54 *    start |= 1;
55 * }
56 *
57 * [0x1c04 + (arg & 0xf) * 16 + 0] = (start >> 32) & 0xff;
58 * [0x1c04 + (arg & 0xf) * 16 + 4] = start & 0xffffffff;
59 * [0x1f00 + (arg & 0xf) * 8 + 0] = (limit >> 32) & 0xff;
60 * [0x1f00 + (arg & 0xf) * 8 + 4] = limit & 0xffffffff;
61 */
62.section #mme9097_vertex_array_select
63   parm $r2
64   parm $r3
65   parm $r4
66   parm $r5
67   mov $r6 (extrinsrt 0x0 $r1 0x0 0x4 0x2)
68   mov $r7 (extrinsrt 0x0 $r1 0x0 0x4 0x1)
69   maddr $r6 (add $r6 0x1701)
70   send $r4
71   send $r5
72   maddr $r7 (add $r7 0x17c0)
73   exit send $r2
74   send $r3
75
76/*
77 * [GL_POLYGON_MODE_FRONT] = arg;
78 *
79 * if (BIT(31 of [0x3410]))
80 *    [1a24] = 0x7353;
81 *
82 * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || [NVC0_3D_SP_SELECT(4)] == 0x41)
83 *    [02ec] = 0;
84 * else
85 * if ([GL_POLYGON_MODE_BACK] == GL_LINE || arg == GL_LINE)
86 *    [02ec] = BYTE(1 of [0x3410]) << 4;
87 * else
88 *    [02ec] = BYTE(0 of [0x3410]) << 4;
89 */
90.section #mme9097_poly_mode_front
91   read $r2 0x36c
92   read $r3 0x830
93   mov $r7 (or $r1 $r2)
94   read $r4 0x840
95   mov $r2 0x1
96   mov $r6 0x60
97   mov $r7 (and $r7 $r2)
98   braz $r7 #locn_0a_pmf
99   maddr 0x36b
100   mov $r6 0x200
101locn_0a_pmf:
102   mov $r7 (or $r3 $r4)
103   mov $r7 (and $r7 $r2)
104   braz $r7 #locn_0f_pmf
105   send $r1
106   mov $r6 0x0
107locn_0f_pmf:
108   exit maddr 0xbb
109   send $r6
110
111/*
112 * [GL_POLYGON_MODE_BACK] = arg;
113 *
114 * if (BIT(31 of [0x3410]))
115 *    [1a24] = 0x7353;
116 *
117 * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || [NVC0_3D_SP_SELECT(4)] == 0x41)
118 *    [02ec] = 0;
119 * else
120 * if ([GL_POLYGON_MODE_FRONT] == GL_LINE || arg == GL_LINE)
121 *    [02ec] = BYTE(1 of [0x3410]) << 4;
122 * else
123 *    [02ec] = BYTE(0 of [0x3410]) << 4;
124 */
125/* NOTE: 0x3410 = 0x80002006 by default,
126 *  POLYGON_MODE == GL_LINE check replaced by (MODE & 1)
127 *  SP_SELECT(i) == (i << 4) | 1 check replaced by SP_SELECT(i) & 1
128 */
129.section #mme9097_poly_mode_back
130   read $r2 0x36b
131   read $r3 0x830
132   mov $r7 (or $r1 $r2)
133   read $r4 0x840
134   mov $r2 0x1
135   mov $r6 0x60
136   mov $r7 (and $r7 $r2)
137   braz $r7 #locn_0a_pmb
138   maddr 0x36c
139   mov $r6 0x200
140locn_0a_pmb:
141   mov $r7 (or $r3 $r4)
142   mov $r7 (and $r7 $r2)
143   braz $r7 #locn_0f_pmb
144   send $r1
145   mov $r6 0x0
146locn_0f_pmb:
147   exit maddr 0xbb
148   send $r6
149
150/*
151 * [NVC0_3D_SP_SELECT(4)] = arg
152 *
153 * if BIT(31 of [0x3410]) == 0
154 *    [1a24] = 0x7353;
155 *
156 * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || arg == 0x41)
157 *    [02ec] = 0
158 * else
159 * if (any POLYGON MODE == LINE)
160 *    [02ec] = BYTE(1 of [3410]) << 4;
161 * else
162 *    [02ec] = BYTE(0 of [3410]) << 4; // 02ec valid bits are 0xff1
163 */
164.section #mme9097_gp_select
165   read $r2 0x36b
166   read $r3 0x36c
167   mov $r7 (or $r2 $r3)
168   read $r4 0x830
169   mov $r2 0x1
170   mov $r6 0x60
171   mov $r7 (and $r7 $r2)
172   braz $r7 #locn_0a_gs
173   maddr 0x840
174   mov $r6 0x200
175locn_0a_gs:
176   mov $r7 (or $r1 $r4)
177   mov $r7 (and $r7 $r2)
178   braz $r7 #locn_0f_gs
179   send $r1
180   mov $r6 0x0
181locn_0f_gs:
182   exit maddr 0xbb
183   send $r6
184
185/*
186 * [NVC0_3D_SP_SELECT(3)] = arg
187 *
188 * if BIT(31 of [0x3410]) == 0
189 *    [1a24] = 0x7353;
190 *
191 * if (arg == 0x31) {
192 *    if (BIT(2 of [0x3430])) {
193 *       int i = 15; do { --i; } while(i);
194 *       [0x1a2c] = 0;
195 *    }
196 * }
197 *
198 * if ([NVC0_3D_SP_SELECT(4)] == 0x41 || arg == 0x31)
199 *    [02ec] = 0
200 * else
201 * if ([any POLYGON_MODE] == GL_LINE)
202 *    [02ec] = BYTE(1 of [3410]) << 4;
203 * else
204 *    [02ec] = BYTE(0 of [3410]) << 4;
205 */
206.section #mme9097_tep_select
207   read $r2 0x36b
208   read $r3 0x36c
209   mov $r7 (or $r2 $r3)
210   read $r4 0x840
211   mov $r2 0x1
212   mov $r6 0x60
213   mov $r7 (and $r7 $r2)
214   braz $r7 #locn_0a_ts
215   maddr 0x830
216   mov $r6 0x200
217locn_0a_ts:
218   mov $r7 (or $r1 $r4)
219   mov $r7 (and $r7 $r2)
220   braz $r7 #locn_0f_ts
221   send $r1
222   mov $r6 0x0
223locn_0f_ts:
224   exit maddr 0xbb
225   send $r6
226
227/* NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT
228 *
229 * NOTE: Saves and restores VB_ELEMENT,INSTANCE_BASE.
230 * Forcefully sets VERTEX_ID_BASE to the value of VB_ELEMENT_BASE.
231 *
232 * arg     = mode
233 * parm[0] = start_drawid
234 * parm[1] = numparams
235 * parm[2 + 5n + 0] = count
236 * parm[2 + 5n + 1] = instance_count
237 * parm[2 + 5n + 2] = start
238 * parm[2 + 5n + 3] = index_bias
239 * parm[2 + 5n + 4] = start_instance
240 *
241 * SCRATCH[0] = saved VB_ELEMENT_BASE
242 * SCRATCH[1] = saved VB_INSTANCE_BASE
243 */
244.section #mme9097_draw_elts_indirect
245   read $r6 0x50d /* VB_ELEMENT_BASE */
246   read $r7 0x50e /* VB_INSTANCE_BASE */
247   maddr 0x1d00
248   send $r6 /* SCRATCH[0] = VB_ELEMENT_BASE */
249   send $r7 /* SCRATCH[1] = VB_INSTANCE_BASE */
250   parm $r6 /* start_drawid */
251   parm $r7 /* numparams */
252dei_draw_again:
253   parm $r3 /* count */
254   parm $r2 /* instance_count */
255   parm $r4 maddr 0x5f7 /* INDEX_BATCH_FIRST, start */
256   parm $r4 send $r4 /* index_bias, send start */
257   maddr 0x18e3 /* CB_POS */
258   send 0x1a0 /* 256 + 160 */
259   braz $r2 #dei_end
260   parm $r5 send $r4 /* start_instance, send index_bias */
261   send $r5 /* send start_instance */
262   send $r6 /* draw id */
263   maddr 0x150d /* VB_ELEMENT,INSTANCE_BASE */
264   send $r4
265   send $r5
266   maddr 0x446
267   send $r4
268   mov $r4 0x1
269   mov $r1 (extrinsrt $r1 0x0 0 1 26) /* clear INSTANCE_NEXT */
270dei_again:
271   maddr 0x586 /* VERTEX_BEGIN_GL */
272   send $r1 /* mode */
273   maddr 0x5f8 /* INDEX_BATCH_COUNT */
274   send $r3 /* count */
275   mov $r2 (sub $r2 $r4)
276   maddrsend 0x585 /* VERTEX_END_GL */
277   branz $r2 #dei_again
278   mov $r1 (extrinsrt $r1 $r4 0 1 26) /* set INSTANCE_NEXT */
279dei_end:
280   mov $r7 (add $r7 -1)
281   branz $r7 #dei_draw_again
282   mov $r6 (add $r6 1)
283   read $r6 0xd00
284   read $r7 0xd01
285   maddr 0x150d /* VB_ELEMENT,INSTANCE_BASE */
286   send $r6
287   send $r7
288   exit maddr 0x446
289   send $r6
290
291/* NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT:
292 *
293 * NOTE: Saves and restores VB_INSTANCE_BASE.
294 *
295 * arg     = mode
296 * parm[0] = start_drawid
297 * parm[1] = numparams
298 * parm[2 + 4n + 0] = count
299 * parm[2 + 4n + 1] = instance_count
300 * parm[2 + 4n + 2] = start
301 * parm[2 + 4n + 3] = start_instance
302 */
303.section #mme9097_draw_arrays_indirect
304   read $r5 0x50e /* VB_INSTANCE_BASE */
305   parm $r6 /* start_drawid */
306   parm $r7 /* numparams */
307dai_draw_again:
308   parm $r2 /* count */
309   parm $r3 /* instance_count */
310   parm $r4 maddr 0x35d /* VERTEX_BUFFER_FIRST, start */
311   braz $r3 #dai_end
312   parm $r4 send $r4 /* start_instance */
313   maddr 0x18e3 /* CB_POS */
314   send 0x1a0 /* 256 + 160 */
315   send 0x0 /* send 0 as base_vertex */
316   send $r4 /* send start_instance */
317   send $r6 /* draw id */
318   maddr 0x50e /* VB_INSTANCE_BASE */
319   send $r4
320   mov $r4 0x1
321   mov $r1 (extrinsrt $r1 0x0 0 1 26) /* clear INSTANCE_NEXT */
322dai_again:
323   maddr 0x586 /* VERTEX_BEGIN_GL */
324   send $r1 /* mode */
325   maddr 0x35e /* VERTEX_BUFFER_COUNT */
326   send $r2
327   mov $r3 (sub $r3 $r4)
328   maddrsend 0x585 /* VERTEX_END_GL */
329   branz $r3 #dai_again
330   mov $r1 (extrinsrt $r1 $r4 0 1 26) /* set INSTANCE_NEXT */
331dai_end:
332   mov $r7 (add $r7 -1)
333   branz $r7 #dai_draw_again
334   mov $r6 (add $r6 1)
335   exit maddr 0x50e /* VB_INSTANCE_BASE to restore */
336   send $r5
337
338/* NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT
339 *
340 * NOTE: Saves and restores VB_ELEMENT,INSTANCE_BASE.
341 * Forcefully sets VERTEX_ID_BASE to the value of VB_ELEMENT_BASE.
342 *
343 * arg     = mode
344 * parm[0] = start_drawid
345 * parm[1] = numparams
346 * parm[2] = totaldraws
347 * parm[3 + 5n + 0] = count
348 * parm[3 + 5n + 1] = instance_count
349 * parm[3 + 5n + 2] = start
350 * parm[3 + 5n + 3] = index_bias
351 * parm[3 + 5n + 4] = start_instance
352 *
353 * SCRATCH[0] = saved VB_ELEMENT_BASE
354 * SCRATCH[1] = saved VB_INSTANCE_BASE
355 * SCRATCH[2] = draws left
356 */
357.section #mme9097_draw_elts_indirect_count
358   read $r6 0x50d /* VB_ELEMENT_BASE */
359   read $r7 0x50e /* VB_INSTANCE_BASE */
360   maddr 0x1d00
361   send $r6 /* SCRATCH[0] = VB_ELEMENT_BASE */
362   send $r7 /* SCRATCH[1] = VB_INSTANCE_BASE */
363   parm $r6 /* start_drawid */
364   parm $r7 /* numparams */
365   parm $r5 /* totaldraws */
366   mov $r5 (sub $r5 $r6) /* draws left */
367   braz $r5 #deic_runout
368   mov $r3 (extrinsrt 0x0 $r5 31 1 0) /* extract high bit */
369   branz $r3 #deic_runout
370   send $r5
371deic_draw_again:
372   parm $r3 /* count */
373   parm $r2 /* instance_count */
374   parm $r4 maddr 0x5f7 /* INDEX_BATCH_FIRST, start */
375   parm $r4 send $r4 /* index_bias, send start */
376   maddr 0x18e3 /* CB_POS */
377   send 0x1a0 /* 256 + 160 */
378   braz $r2 #deic_end
379   parm $r5 send $r4 /* start_instance, send index_bias */
380   send $r5 /* send start_instance */
381   send $r6 /* draw id */
382   maddr 0x150d /* VB_ELEMENT,INSTANCE_BASE */
383   send $r4
384   send $r5
385   maddr 0x446
386   send $r4
387   mov $r4 0x1
388   mov $r1 (extrinsrt $r1 0x0 0 1 26) /* clear INSTANCE_NEXT */
389deic_again:
390   maddr 0x586 /* VERTEX_BEGIN_GL */
391   send $r1 /* mode */
392   maddr 0x5f8 /* INDEX_BATCH_COUNT */
393   send $r3 /* count */
394   mov $r2 (sub $r2 $r4)
395   maddrsend 0x585 /* VERTEX_END_GL */
396   branz $r2 #deic_again
397   mov $r1 (extrinsrt $r1 $r4 0 1 26) /* set INSTANCE_NEXT */
398deic_end:
399   read $r5 0xd02
400   mov $r5 (add $r5 -1)
401   braz $r5 #deic_runout_check
402   mov $r7 (add $r7 -1)
403   maddr 0xd02
404   send $r5
405   branz $r7 #deic_draw_again
406   mov $r6 (add $r6 1)
407deic_restore:
408   read $r6 0xd00
409   read $r7 0xd01
410   maddr 0x150d /* VB_ELEMENT,INSTANCE_BASE */
411   send $r6
412   send $r7
413   exit maddr 0x446
414   send $r6
415deic_runout:
416   parm $r2
417   parm $r2
418   parm $r2
419   parm $r2
420   parm $r2
421   mov $r7 (add $r7 -1)
422deic_runout_check:
423   branz annul $r7 #deic_runout
424   bra annul #deic_restore
425
426/* NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT:
427 *
428 * NOTE: Saves and restores VB_INSTANCE_BASE.
429 *
430 * arg     = mode
431 * parm[0] = start_drawid
432 * parm[1] = numparams
433 * parm[2] = totaldraws
434 * parm[3 + 4n + 0] = count
435 * parm[3 + 4n + 1] = instance_count
436 * parm[3 + 4n + 2] = start
437 * parm[3 + 4n + 3] = start_instance
438 *
439 * SCRATCH[0] = VB_INSTANCE_BASE
440 */
441.section #mme9097_draw_arrays_indirect_count
442   read $r5 0x50e /* VB_INSTANCE_BASE */
443   maddr 0xd00
444   parm $r6 send $r5 /* start_drawid, save VB_INSTANCE_BASE */
445   parm $r7 /* numparams */
446   parm $r5 /* totaldraws */
447   mov $r5 (sub $r5 $r6) /* draws left */
448   braz $r5 #daic_runout
449   mov $r3 (extrinsrt 0x0 $r5 31 1 0) /* extract high bit */
450   branz annul $r3 #daic_runout
451daic_draw_again:
452   parm $r2 /* count */
453   parm $r3 /* instance_count */
454   parm $r4 maddr 0x35d /* VERTEX_BUFFER_FIRST, start */
455   braz $r3 #daic_end
456   parm $r4 send $r4 /* start_instance */
457   maddr 0x18e3 /* CB_POS */
458   send 0x1a0 /* 256 + 160 */
459   send 0x0 /* send 0 as base_vertex */
460   send $r4 /* send start_instance */
461   send $r6 /* draw id */
462   maddr 0x50e /* VB_INSTANCE_BASE */
463   send $r4
464   mov $r4 0x1
465   mov $r1 (extrinsrt $r1 0x0 0 1 26) /* clear INSTANCE_NEXT */
466daic_again:
467   maddr 0x586 /* VERTEX_BEGIN_GL */
468   send $r1 /* mode */
469   maddr 0x35e /* VERTEX_BUFFER_COUNT */
470   send $r2
471   mov $r3 (sub $r3 $r4)
472   maddrsend 0x585 /* VERTEX_END_GL */
473   branz $r3 #daic_again
474   mov $r1 (extrinsrt $r1 $r4 0 1 26) /* set INSTANCE_NEXT */
475daic_end:
476   mov $r5 (add $r5 -1)
477   braz $r5 #daic_runout_check
478   mov $r7 (add $r7 -1)
479   branz $r7 #daic_draw_again
480   mov $r6 (add $r6 1)
481daic_restore:
482   read $r5 0xd00
483   exit maddr 0x50e /* VB_INSTANCE_BASE to restore */
484   send $r5
485daic_runout:
486   parm $r2
487   parm $r2
488   parm $r2
489   parm $r2
490   mov $r7 (add $r7 -1)
491daic_runout_check:
492   branz annul $r7 #daic_runout
493   bra annul #daic_restore
494
495/* NVC0_3D_MACRO_QUERY_BUFFER_WRITE:
496 *
497 * This is a combination macro for all of our query buffer object needs.
498 * It has the option to clamp results to a configurable amount, as well as
499 * to write out one or two words.
500 *
501 * We use the query engine to write out the values, and expect the query
502 * address to point to the right place.
503 *
504 * arg = clamp value (0 means unclamped). clamped means just 1 written value.
505 * parm[0] = LSB of end value
506 * parm[1] = MSB of end value
507 * parm[2] = LSB of start value
508 * parm[3] = MSB of start value
509 * parm[4] = desired sequence
510 * parm[5] = actual sequence
511 * parm[6] = query high address
512 * parm[7] = query low address
513 */
514.section #mme9097_query_buffer_write
515   parm $r2
516   parm $r3
517   parm $r4
518   parm $r5 maddr 0x16c0 /* QUERY_ADDRESS_HIGH */
519   parm $r6
520   parm $r7
521   mov $r6 (sub $r7 $r6) /* actual - desired */
522   mov $r6 (sbb 0x0 0x0) /* if there was underflow, not reached yet */
523   parm $r7
524   exit braz $r6 #qbw_ready
525   parm $r6
526qbw_ready:
527   mov $r2 (sub $r2 $r4)
528   braz $r1 #qbw_postclamp
529   mov $r3 (sbb $r3 $r5)
530   branz annul $r3 #qbw_clamp
531   mov $r4 (sub $r1 $r2)
532   mov $r4 (sbb 0x0 0x0)
533   braz annul $r4 #qbw_postclamp
534qbw_clamp:
535   mov $r2 $r1
536qbw_postclamp:
537   send $r7
538   send $r6
539   send $r2
540   branz $r1 #qbw_done
541   mov $r4 0x1000
542   send (extrinsrt 0x0 $r4 0x0 0x10 0x10)
543   maddr 0x16c0 /* QUERY_ADDRESS_HIGH */
544   mov $r5 0x4
545   mov $r6 (add $r6 $r5)
546   mov $r7 (adc $r7 0x0)
547   send $r7
548   send $r6
549   send $r3
550qbw_done:
551   exit send (extrinsrt 0x0 $r4 0x0 0x10 0x10)
552   maddrsend 0x44
553
554/* NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE:
555 *
556 * This sets basically all the conservative rasterization state. It sets
557 * CONSERVATIVE_RASTER to one while doing so.
558 *
559 * arg = biasx | biasy<<4 | (dilation*4)<<8 | mode<<10
560 */
561.section #mme9097_conservative_raster_state
562   /* Mode and dilation */
563   maddr 0x1d00 /* SCRATCH[0] */
564   send 0x0 /* unknown */
565   send (extrinsrt 0x0 $r1 8 3 23) /* value */
566   mov $r2 0x7
567   send (extrinsrt 0x0 $r2 0 3 23) /* write mask */
568   maddr 0x18c4 /* FIRMWARE[4] */
569   mov $r2 0x831
570   send (extrinsrt 0x0 $r2 0 12 11) /* sends 0x418800 */
571   /* Subpixel precision */
572   mov $r2 (extrinsrt 0x0 $r1 0 4 0)
573   mov $r2 (extrinsrt $r2 $r1 4 4 8)
574   maddr 0x8287 /* SUBPIXEL_PRECISION[0] (incrementing by 8 methods) */
575   mov $r3 16 /* loop counter */
576crs_loop:
577   mov $r3 (add $r3 -1)
578   branz $r3 #crs_loop
579   send $r2
580   /* Enable */
581   exit maddr 0x1452 /* CONSERVATIVE_RASTER */
582   send 0x1
583
584/* NVC0_3D_MACRO_COMPUTE_COUNTER
585 *
586 * This macro takes 6 values, num_groups_* and group_size_*, and adds their
587 * product to the current value
588 *
589 * It's used for keeping track of the number of executed indirect
590 * compute invocations for statistics.
591 *
592 * SCRATCH[4] = current counter [low]
593 * SCRATCH[5] = current counter [high]
594 *
595 * arg     = number of parameters to multiply together, ideally 6
596 * parm[0] = num_groups_x
597 * parm[1] = num_groups_y
598 * parm[2] = num_groups_z
599 * parm[3] = group_size_x
600 * parm[4] = group_size_y
601 * parm[5] = group_size_z
602 */
603.section #mme9097_compute_counter
604   mov $r7 $r1
605   mov $r1 1 /* low result */
606   mov $r2 0 /* high result */
607iic_loop_start:
608   parm $r3 /* val, next integer to multiply in */
609   /* multiplication start - look at low bit, add if set, shift right/left */
610   mov $r4 0 /* low temp */
611   mov $r5 0 /* high temp */
612iic_mul_start: /* temp = result * val */
613   braz annul $r3 #iic_mul_done
614iic_mul_body:
615   mov $r6 (extrinsrt 0x0 $r3 0 1 0) /* val & 1 - check low bit */
616   braz $r6 #iic_mul_cont /* bit not set */
617   mov $r3 (extrinsrt 0x0 $r3 1 31 0) /* val >>= 1 - shift right */
618
619   mov $r4 (add $r4 $r1) /* temp += result */
620   mov $r5 (adc $r5 $r2)
621iic_mul_cont:
622   mov $r1 (add $r1 $r1) /* shift left, part 1 (result *= 2) */
623   bra #iic_mul_start
624   mov $r2 (adc $r2 $r2) /* shift left, part 2 */
625iic_mul_done:
626   /* decrease loop counter, keep going if necessary */
627   mov $r7 (add $r7 -1)
628   /* result = temp ( = result * val ) */
629   mov $r1 $r4
630   branz $r7 #iic_loop_start
631   mov $r2 $r5
632
633   /* increment current value by newly-calculated invocation count */
634   read $r3 0xd04 /* SCRATCH[4] */
635   read $r4 0xd05 /* SCRATCH[5] */
636   maddr 0x1d04 /* SCRATCH[4] */
637   exit send (add $r3 $r1)
638   send (adc $r4 $r2)
639
640/* NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY
641 *
642 * This macro writes out the indirect counter plus a direct value to
643 * the given address using QUERY_GET (64-bit value).
644 *
645 * arg     = direct counter low
646 * parm[0] = direct counter high
647 * parm[1] = query address high
648 * parm[2] = query address low
649 */
650.section #mme9097_compute_counter_to_query
651   parm $r2 /* counter high */
652   read $r3 0xd04 /* SCRATCH[4] */
653   read $r4 0xd05 /* SCRATCH[5] */
654   mov $r1 (add $r1 $r3)
655   mov $r2 (adc $r2 $r4)
656
657   parm $r3 maddr 0x16c0 /* QUERY_ADDRESS_HIGH */
658   parm $r4 send $r3
659   send $r4 /* r3 = addr high, r4 = addr low */
660   send $r1 /* sum low */
661   mov $r5 0x1000
662   send (extrinsrt 0x0 $r5 0x0 0x10 0x10) /* GET_SHORT */
663
664   /* add 4 to the address */
665   mov $r1 0x4
666   mov $r4 (add $r4 $r1) /* addr low */
667   mov $r3 (adc $r3 0x0) /* addr high */
668   maddr 0x16c0 /* QUERY_ADDRESS_HIGH */
669   send $r3 /* addr high */
670   send $r4 /* addr low */
671   exit send $r2 /* sum high */
672   send (extrinsrt 0x0 $r5 0x0 0x10 0x10) /* GET_SHORT */
673