1 /*
2 * Ingenic XBurst Media eXtension Unit (MXU) translation routines.
3 *
4 * Copyright (c) 2004-2005 Jocelyn Mayer
5 * Copyright (c) 2006 Marius Groeger (FPU operations)
6 * Copyright (c) 2006 Thiemo Seufer (MIPS32R2 support)
7 * Copyright (c) 2009 CodeSourcery (MIPS16 and microMIPS support)
8 * Copyright (c) 2012 Jia Liu & Dongxue Zhang (MIPS ASE DSP support)
9 *
10 * SPDX-License-Identifier: LGPL-2.1-or-later
11 *
12 * Datasheet:
13 *
14 * "XBurst® Instruction Set Architecture MIPS eXtension/enhanced Unit
15 * Programming Manual", Ingenic Semiconductor Co, Ltd., revision June 2, 2017
16 */
17
18 #include "qemu/osdep.h"
19 #include "translate.h"
20
21 /*
22 *
23 * AN OVERVIEW OF MXU EXTENSION INSTRUCTION SET
24 * ============================================
25 *
26 *
27 * MXU (full name: MIPS eXtension/enhanced Unit) is a SIMD extension of MIPS32
28 * instructions set. It is designed to fit the needs of signal, graphical and
29 * video processing applications. MXU instruction set is used in Xburst family
30 * of microprocessors by Ingenic.
31 *
32 * MXU unit contains 17 registers called X0-X16. X0 is always zero, and X16 is
33 * the control register.
34 *
35 *
36 * The notation used in MXU assembler mnemonics
37 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
38 *
39 * Register operands:
40 *
41 * XRa, XRb, XRc, XRd - MXU registers
42 * Rb, Rc, Rd, Rs, Rt - general purpose MIPS registers
43 *
44 * Non-register operands:
45 *
46 * aptn1 - 1-bit accumulate add/subtract pattern
47 * aptn2 - 2-bit accumulate add/subtract pattern
48 * eptn2 - 2-bit execute add/subtract pattern
49 * optn2 - 2-bit operand pattern
50 * optn3 - 3-bit operand pattern
51 * sft4 - 4-bit shift amount
52 * strd2 - 2-bit stride amount
53 *
54 * Prefixes:
55 *
56 * Level of parallelism: Operand size:
57 * S - single operation at a time 32 - word
58 * D - two operations in parallel 16 - half word
59 * Q - four operations in parallel 8 - byte
60 *
61 * Operations:
62 *
63 * ADD - Add or subtract
64 * ADDC - Add with carry-in
65 * ACC - Accumulate
66 * ASUM - Sum together then accumulate (add or subtract)
67 * ASUMC - Sum together then accumulate (add or subtract) with carry-in
68 * AVG - Average between 2 operands
69 * ABD - Absolute difference
70 * ALN - Align data
71 * AND - Logical bitwise 'and' operation
72 * CPS - Copy sign
73 * EXTR - Extract bits
74 * I2M - Move from GPR register to MXU register
75 * LDD - Load data from memory to XRF
76 * LDI - Load data from memory to XRF (and increase the address base)
77 * LUI - Load unsigned immediate
78 * MUL - Multiply
79 * MULU - Unsigned multiply
80 * MADD - 64-bit operand add 32x32 product
81 * MSUB - 64-bit operand subtract 32x32 product
82 * MAC - Multiply and accumulate (add or subtract)
83 * MAD - Multiply and add or subtract
84 * MAX - Maximum between 2 operands
85 * MIN - Minimum between 2 operands
86 * M2I - Move from MXU register to GPR register
87 * MOVZ - Move if zero
88 * MOVN - Move if non-zero
89 * NOR - Logical bitwise 'nor' operation
90 * OR - Logical bitwise 'or' operation
91 * STD - Store data from XRF to memory
92 * SDI - Store data from XRF to memory (and increase the address base)
93 * SLT - Set of less than comparison
94 * SAD - Sum of absolute differences
95 * SLL - Logical shift left
96 * SLR - Logical shift right
97 * SAR - Arithmetic shift right
98 * SAT - Saturation
99 * SFL - Shuffle
100 * SCOP - Calculate x’s scope (-1, means x<0; 0, means x==0; 1, means x>0)
101 * XOR - Logical bitwise 'exclusive or' operation
102 *
103 * Suffixes:
104 *
105 * E - Expand results
106 * F - Fixed point multiplication
107 * L - Low part result
108 * R - Doing rounding
109 * V - Variable instead of immediate
110 * W - Combine above L and V
111 *
112 *
113 * The list of MXU instructions grouped by functionality
114 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
115 *
116 * Load/Store instructions Multiplication instructions
117 * ----------------------- ---------------------------
118 *
119 * S32LDD XRa, Rb, s12 S32MADD XRa, XRd, Rs, Rt
120 * S32STD XRa, Rb, s12 S32MADDU XRa, XRd, Rs, Rt
121 * S32LDDV XRa, Rb, rc, strd2 S32MSUB XRa, XRd, Rs, Rt
122 * S32STDV XRa, Rb, rc, strd2 S32MSUBU XRa, XRd, Rs, Rt
123 * S32LDI XRa, Rb, s12 S32MUL XRa, XRd, Rs, Rt
124 * S32SDI XRa, Rb, s12 S32MULU XRa, XRd, Rs, Rt
125 * S32LDIV XRa, Rb, rc, strd2 D16MUL XRa, XRb, XRc, XRd, optn2
126 * S32SDIV XRa, Rb, rc, strd2 D16MULE XRa, XRb, XRc, optn2
127 * S32LDDR XRa, Rb, s12 D16MULF XRa, XRb, XRc, optn2
128 * S32STDR XRa, Rb, s12 D16MAC XRa, XRb, XRc, XRd, aptn2, optn2
129 * S32LDDVR XRa, Rb, rc, strd2 D16MACE XRa, XRb, XRc, XRd, aptn2, optn2
130 * S32STDVR XRa, Rb, rc, strd2 D16MACF XRa, XRb, XRc, XRd, aptn2, optn2
131 * S32LDIR XRa, Rb, s12 D16MADL XRa, XRb, XRc, XRd, aptn2, optn2
132 * S32SDIR XRa, Rb, s12 S16MAD XRa, XRb, XRc, XRd, aptn1, optn2
133 * S32LDIVR XRa, Rb, rc, strd2 Q8MUL XRa, XRb, XRc, XRd
134 * S32SDIVR XRa, Rb, rc, strd2 Q8MULSU XRa, XRb, XRc, XRd
135 * S16LDD XRa, Rb, s10, eptn2 Q8MAC XRa, XRb, XRc, XRd, aptn2
136 * S16STD XRa, Rb, s10, eptn2 Q8MACSU XRa, XRb, XRc, XRd, aptn2
137 * S16LDI XRa, Rb, s10, eptn2 Q8MADL XRa, XRb, XRc, XRd, aptn2
138 * S16SDI XRa, Rb, s10, eptn2
139 * S8LDD XRa, Rb, s8, eptn3
140 * S8STD XRa, Rb, s8, eptn3 Addition and subtraction instructions
141 * S8LDI XRa, Rb, s8, eptn3 -------------------------------------
142 * S8SDI XRa, Rb, s8, eptn3
143 * LXW Rd, Rs, Rt, strd2 D32ADD XRa, XRb, XRc, XRd, eptn2
144 * LXH Rd, Rs, Rt, strd2 D32ADDC XRa, XRb, XRc, XRd
145 * LXHU Rd, Rs, Rt, strd2 D32ACC XRa, XRb, XRc, XRd, eptn2
146 * LXB Rd, Rs, Rt, strd2 D32ACCM XRa, XRb, XRc, XRd, eptn2
147 * LXBU Rd, Rs, Rt, strd2 D32ASUM XRa, XRb, XRc, XRd, eptn2
148 * S32CPS XRa, XRb, XRc
149 * Q16ADD XRa, XRb, XRc, XRd, eptn2, optn2
150 * Comparison instructions Q16ACC XRa, XRb, XRc, XRd, eptn2
151 * ----------------------- Q16ACCM XRa, XRb, XRc, XRd, eptn2
152 * D16ASUM XRa, XRb, XRc, XRd, eptn2
153 * S32MAX XRa, XRb, XRc D16CPS XRa, XRb,
154 * S32MIN XRa, XRb, XRc D16AVG XRa, XRb, XRc
155 * S32SLT XRa, XRb, XRc D16AVGR XRa, XRb, XRc
156 * S32MOVZ XRa, XRb, XRc Q8ADD XRa, XRb, XRc, eptn2
157 * S32MOVN XRa, XRb, XRc Q8ADDE XRa, XRb, XRc, XRd, eptn2
158 * D16MAX XRa, XRb, XRc Q8ACCE XRa, XRb, XRc, XRd, eptn2
159 * D16MIN XRa, XRb, XRc Q8ABD XRa, XRb, XRc
160 * D16SLT XRa, XRb, XRc Q8SAD XRa, XRb, XRc, XRd
161 * D16MOVZ XRa, XRb, XRc Q8AVG XRa, XRb, XRc
162 * D16MOVN XRa, XRb, XRc Q8AVGR XRa, XRb, XRc
163 * Q8MAX XRa, XRb, XRc D8SUM XRa, XRb, XRc, XRd
164 * Q8MIN XRa, XRb, XRc D8SUMC XRa, XRb, XRc, XRd
165 * Q8SLT XRa, XRb, XRc
166 * Q8SLTU XRa, XRb, XRc
167 * Q8MOVZ XRa, XRb, XRc Shift instructions
168 * Q8MOVN XRa, XRb, XRc ------------------
169 *
170 * D32SLL XRa, XRb, XRc, XRd, sft4
171 * Bitwise instructions D32SLR XRa, XRb, XRc, XRd, sft4
172 * -------------------- D32SAR XRa, XRb, XRc, XRd, sft4
173 * D32SARL XRa, XRb, XRc, sft4
174 * S32NOR XRa, XRb, XRc D32SLLV XRa, XRb, Rb
175 * S32AND XRa, XRb, XRc D32SLRV XRa, XRb, Rb
176 * S32XOR XRa, XRb, XRc D32SARV XRa, XRb, Rb
177 * S32OR XRa, XRb, XRc D32SARW XRa, XRb, XRc, Rb
178 * Q16SLL XRa, XRb, XRc, XRd, sft4
179 * Q16SLR XRa, XRb, XRc, XRd, sft4
180 * Miscellaneous instructions Q16SAR XRa, XRb, XRc, XRd, sft4
181 * ------------------------- Q16SLLV XRa, XRb, Rb
182 * Q16SLRV XRa, XRb, Rb
183 * S32SFL XRa, XRb, XRc, XRd, optn2 Q16SARV XRa, XRb, Rb
184 * S32ALN XRa, XRb, XRc, Rb
185 * S32ALNI XRa, XRb, XRc, s3
186 * S32LUI XRa, s8, optn3 Move instructions
187 * S32EXTR XRa, XRb, Rb, bits5 -----------------
188 * S32EXTRV XRa, XRb, Rs, Rt
189 * Q16SCOP XRa, XRb, XRc, XRd S32M2I XRa, Rb
190 * Q16SAT XRa, XRb, XRc S32I2M XRa, Rb
191 *
192 *
193 * The opcode organization of MXU instructions
194 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
195 *
196 * The bits 31..26 of all MXU instructions are equal to 0x1C (also referred
197 * as opcode SPECIAL2 in the base MIPS ISA). The organization and meaning of
198 * other bits up to the instruction level is as follows:
199 *
200 * bits
201 * 05..00
202 *
203 * ┌─ 000000 ─ OPC_MXU_S32MADD
204 * ├─ 000001 ─ OPC_MXU_S32MADDU
205 * ├─ 000010 ─ <not assigned> (non-MXU OPC_MUL)
206 * │
207 * │ 20..18
208 * ├─ 000011 ─ OPC_MXU__POOL00 ─┬─ 000 ─ OPC_MXU_S32MAX
209 * │ ├─ 001 ─ OPC_MXU_S32MIN
210 * │ ├─ 010 ─ OPC_MXU_D16MAX
211 * │ ├─ 011 ─ OPC_MXU_D16MIN
212 * │ ├─ 100 ─ OPC_MXU_Q8MAX
213 * │ ├─ 101 ─ OPC_MXU_Q8MIN
214 * │ ├─ 110 ─ OPC_MXU_Q8SLT
215 * │ └─ 111 ─ OPC_MXU_Q8SLTU
216 * ├─ 000100 ─ OPC_MXU_S32MSUB
217 * ├─ 000101 ─ OPC_MXU_S32MSUBU 20..18
218 * ├─ 000110 ─ OPC_MXU__POOL01 ─┬─ 000 ─ OPC_MXU_S32SLT
219 * │ ├─ 001 ─ OPC_MXU_D16SLT
220 * │ ├─ 010 ─ OPC_MXU_D16AVG
221 * │ ├─ 011 ─ OPC_MXU_D16AVGR
222 * │ ├─ 100 ─ OPC_MXU_Q8AVG
223 * │ ├─ 101 ─ OPC_MXU_Q8AVGR
224 * │ └─ 111 ─ OPC_MXU_Q8ADD
225 * │
226 * │ 20..18
227 * ├─ 000111 ─ OPC_MXU__POOL02 ─┬─ 000 ─ OPC_MXU_S32CPS
228 * │ ├─ 010 ─ OPC_MXU_D16CPS
229 * │ ├─ 100 ─ OPC_MXU_Q8ABD
230 * │ └─ 110 ─ OPC_MXU_Q16SAT
231 * ├─ 001000 ─ OPC_MXU_D16MUL
232 * │ 25..24
233 * ├─ 001001 ─ OPC_MXU__POOL03 ─┬─ 00 ─ OPC_MXU_D16MULF
234 * │ └─ 01 ─ OPC_MXU_D16MULE
235 * ├─ 001010 ─ OPC_MXU_D16MAC
236 * ├─ 001011 ─ OPC_MXU_D16MACF
237 * ├─ 001100 ─ OPC_MXU_D16MADL
238 * ├─ 001101 ─ OPC_MXU_S16MAD
239 * ├─ 001110 ─ OPC_MXU_Q16ADD
240 * ├─ 001111 ─ OPC_MXU_D16MACE 20 (13..10 don't care)
241 * │ ┌─ 0 ─ OPC_MXU_S32LDD
242 * ├─ 010000 ─ OPC_MXU__POOL04 ─┴─ 1 ─ OPC_MXU_S32LDDR
243 * │
244 * │ 20 (13..10 don't care)
245 * ├─ 010001 ─ OPC_MXU__POOL05 ─┬─ 0 ─ OPC_MXU_S32STD
246 * │ └─ 1 ─ OPC_MXU_S32STDR
247 * │
248 * │ 13..10
249 * ├─ 010010 ─ OPC_MXU__POOL06 ─┬─ 0000 ─ OPC_MXU_S32LDDV
250 * │ └─ 0001 ─ OPC_MXU_S32LDDVR
251 * │
252 * │ 13..10
253 * ├─ 010011 ─ OPC_MXU__POOL07 ─┬─ 0000 ─ OPC_MXU_S32STDV
254 * │ └─ 0001 ─ OPC_MXU_S32STDVR
255 * │
256 * │ 20 (13..10 don't care)
257 * ├─ 010100 ─ OPC_MXU__POOL08 ─┬─ 0 ─ OPC_MXU_S32LDI
258 * │ └─ 1 ─ OPC_MXU_S32LDIR
259 * │
260 * │ 20 (13..10 don't care)
261 * ├─ 010101 ─ OPC_MXU__POOL09 ─┬─ 0 ─ OPC_MXU_S32SDI
262 * │ └─ 1 ─ OPC_MXU_S32SDIR
263 * │
264 * │ 13..10
265 * ├─ 010110 ─ OPC_MXU__POOL10 ─┬─ 0000 ─ OPC_MXU_S32LDIV
266 * │ └─ 0001 ─ OPC_MXU_S32LDIVR
267 * │
268 * │ 13..10
269 * ├─ 010111 ─ OPC_MXU__POOL11 ─┬─ 0000 ─ OPC_MXU_S32SDIV
270 * │ └─ 0001 ─ OPC_MXU_S32SDIVR
271 * ├─ 011000 ─ OPC_MXU_D32ADD (catches D32ADDC too)
272 * │ 23..22
273 * MXU ├─ 011001 ─ OPC_MXU__POOL12 ─┬─ 00 ─ OPC_MXU_D32ACC
274 * opcodes ─┤ ├─ 01 ─ OPC_MXU_D32ACCM
275 * │ └─ 10 ─ OPC_MXU_D32ASUM
276 * ├─ 011010 ─ <not assigned>
277 * │ 23..22
278 * ├─ 011011 ─ OPC_MXU__POOL13 ─┬─ 00 ─ OPC_MXU_Q16ACC
279 * │ ├─ 01 ─ OPC_MXU_Q16ACCM
280 * │ └─ 10 ─ OPC_MXU_D16ASUM
281 * │
282 * │ 23..22
283 * ├─ 011100 ─ OPC_MXU__POOL14 ─┬─ 00 ─ OPC_MXU_Q8ADDE
284 * │ ├─ 01 ─ OPC_MXU_D8SUM
285 * ├─ 011101 ─ OPC_MXU_Q8ACCE └─ 10 ─ OPC_MXU_D8SUMC
286 * ├─ 011110 ─ <not assigned>
287 * ├─ 011111 ─ <not assigned>
288 * ├─ 100000 ─ <not assigned> (overlaps with CLZ)
289 * ├─ 100001 ─ <not assigned> (overlaps with CLO)
290 * ├─ 100010 ─ OPC_MXU_S8LDD
291 * ├─ 100011 ─ OPC_MXU_S8STD 15..14
292 * ├─ 100100 ─ OPC_MXU_S8LDI ┌─ 00 ─ OPC_MXU_S32MUL
293 * ├─ 100101 ─ OPC_MXU_S8SDI ├─ 01 ─ OPC_MXU_S32MULU
294 * │ ├─ 10 ─ OPC_MXU_S32EXTR
295 * ├─ 100110 ─ OPC_MXU__POOL15 ─┴─ 11 ─ OPC_MXU_S32EXTRV
296 * │
297 * │ 20..18
298 * ├─ 100111 ─ OPC_MXU__POOL16 ─┬─ 000 ─ OPC_MXU_D32SARW
299 * │ ├─ 001 ─ OPC_MXU_S32ALN
300 * │ ├─ 010 ─ OPC_MXU_S32ALNI
301 * │ ├─ 011 ─ OPC_MXU_S32LUI
302 * │ ├─ 100 ─ OPC_MXU_S32NOR
303 * │ ├─ 101 ─ OPC_MXU_S32AND
304 * │ ├─ 110 ─ OPC_MXU_S32OR
305 * │ └─ 111 ─ OPC_MXU_S32XOR
306 * │
307 * │ 8..6
308 * ├─ 101000 ─ OPC_MXU__POOL17 ─┬─ 000 ─ OPC_MXU_LXB
309 * │ ├─ 001 ─ OPC_MXU_LXH
310 * ├─ 101001 ─ <not assigned> ├─ 011 ─ OPC_MXU_LXW
311 * ├─ 101010 ─ OPC_MXU_S16LDD ├─ 100 ─ OPC_MXU_LXBU
312 * ├─ 101011 ─ OPC_MXU_S16STD └─ 101 ─ OPC_MXU_LXHU
313 * ├─ 101100 ─ OPC_MXU_S16LDI
314 * ├─ 101101 ─ OPC_MXU_S16SDI
315 * ├─ 101110 ─ OPC_MXU_S32M2I
316 * ├─ 101111 ─ OPC_MXU_S32I2M
317 * ├─ 110000 ─ OPC_MXU_D32SLL
318 * ├─ 110001 ─ OPC_MXU_D32SLR 20..18
319 * ├─ 110010 ─ OPC_MXU_D32SARL ┌─ 000 ─ OPC_MXU_D32SLLV
320 * ├─ 110011 ─ OPC_MXU_D32SAR ├─ 001 ─ OPC_MXU_D32SLRV
321 * ├─ 110100 ─ OPC_MXU_Q16SLL ├─ 011 ─ OPC_MXU_D32SARV
322 * ├─ 110101 ─ OPC_MXU_Q16SLR ├─ 100 ─ OPC_MXU_Q16SLLV
323 * │ ├─ 101 ─ OPC_MXU_Q16SLRV
324 * ├─ 110110 ─ OPC_MXU__POOL18 ─┴─ 111 ─ OPC_MXU_Q16SARV
325 * │
326 * ├─ 110111 ─ OPC_MXU_Q16SAR
327 * │ 23..22
328 * ├─ 111000 ─ OPC_MXU__POOL19 ─┬─ 00 ─ OPC_MXU_Q8MUL
329 * │ └─ 10 ─ OPC_MXU_Q8MULSU
330 * │
331 * │ 20..18
332 * ├─ 111001 ─ OPC_MXU__POOL20 ─┬─ 000 ─ OPC_MXU_Q8MOVZ
333 * │ ├─ 001 ─ OPC_MXU_Q8MOVN
334 * │ ├─ 010 ─ OPC_MXU_D16MOVZ
335 * │ ├─ 011 ─ OPC_MXU_D16MOVN
336 * │ ├─ 100 ─ OPC_MXU_S32MOVZ
337 * │ └─ 101 ─ OPC_MXU_S32MOVN
338 * │
339 * │ 23..22
340 * ├─ 111010 ─ OPC_MXU__POOL21 ─┬─ 00 ─ OPC_MXU_Q8MAC
341 * │ └─ 10 ─ OPC_MXU_Q8MACSU
342 * ├─ 111011 ─ OPC_MXU_Q16SCOP
343 * ├─ 111100 ─ OPC_MXU_Q8MADL
344 * ├─ 111101 ─ OPC_MXU_S32SFL
345 * ├─ 111110 ─ OPC_MXU_Q8SAD
346 * └─ 111111 ─ <not assigned> (overlaps with SDBBP)
347 *
348 *
349 * Compiled after:
350 *
351 * "XBurst® Instruction Set Architecture MIPS eXtension/enhanced Unit
352 * Programming Manual", Ingenic Semiconductor Co, Ltd., revision June 2, 2017
353 */
354
355 enum {
356 OPC_MXU_S32MADD = 0x00,
357 OPC_MXU_S32MADDU = 0x01,
358 OPC_MXU__POOL00 = 0x03,
359 OPC_MXU_S32MSUB = 0x04,
360 OPC_MXU_S32MSUBU = 0x05,
361 OPC_MXU__POOL01 = 0x06,
362 OPC_MXU__POOL02 = 0x07,
363 OPC_MXU_D16MUL = 0x08,
364 OPC_MXU__POOL03 = 0x09,
365 OPC_MXU_D16MAC = 0x0A,
366 OPC_MXU_D16MACF = 0x0B,
367 OPC_MXU_D16MADL = 0x0C,
368 OPC_MXU_S16MAD = 0x0D,
369 OPC_MXU_Q16ADD = 0x0E,
370 OPC_MXU_D16MACE = 0x0F,
371 OPC_MXU__POOL04 = 0x10,
372 OPC_MXU__POOL05 = 0x11,
373 OPC_MXU__POOL06 = 0x12,
374 OPC_MXU__POOL07 = 0x13,
375 OPC_MXU__POOL08 = 0x14,
376 OPC_MXU__POOL09 = 0x15,
377 OPC_MXU__POOL10 = 0x16,
378 OPC_MXU__POOL11 = 0x17,
379 OPC_MXU_D32ADD = 0x18,
380 OPC_MXU__POOL12 = 0x19,
381 OPC_MXU__POOL13 = 0x1B,
382 OPC_MXU__POOL14 = 0x1C,
383 OPC_MXU_Q8ACCE = 0x1D,
384 OPC_MXU_S8LDD = 0x22,
385 OPC_MXU_S8STD = 0x23,
386 OPC_MXU_S8LDI = 0x24,
387 OPC_MXU_S8SDI = 0x25,
388 OPC_MXU__POOL15 = 0x26,
389 OPC_MXU__POOL16 = 0x27,
390 OPC_MXU__POOL17 = 0x28,
391 OPC_MXU_S16LDD = 0x2A,
392 OPC_MXU_S16STD = 0x2B,
393 OPC_MXU_S16LDI = 0x2C,
394 OPC_MXU_S16SDI = 0x2D,
395 OPC_MXU_S32M2I = 0x2E,
396 OPC_MXU_S32I2M = 0x2F,
397 OPC_MXU_D32SLL = 0x30,
398 OPC_MXU_D32SLR = 0x31,
399 OPC_MXU_D32SARL = 0x32,
400 OPC_MXU_D32SAR = 0x33,
401 OPC_MXU_Q16SLL = 0x34,
402 OPC_MXU_Q16SLR = 0x35,
403 OPC_MXU__POOL18 = 0x36,
404 OPC_MXU_Q16SAR = 0x37,
405 OPC_MXU__POOL19 = 0x38,
406 OPC_MXU__POOL20 = 0x39,
407 OPC_MXU__POOL21 = 0x3A,
408 OPC_MXU_Q16SCOP = 0x3B,
409 OPC_MXU_Q8MADL = 0x3C,
410 OPC_MXU_S32SFL = 0x3D,
411 OPC_MXU_Q8SAD = 0x3E,
412 };
413
414
415 /*
416 * MXU pool 00
417 */
418 enum {
419 OPC_MXU_S32MAX = 0x00,
420 OPC_MXU_S32MIN = 0x01,
421 OPC_MXU_D16MAX = 0x02,
422 OPC_MXU_D16MIN = 0x03,
423 OPC_MXU_Q8MAX = 0x04,
424 OPC_MXU_Q8MIN = 0x05,
425 OPC_MXU_Q8SLT = 0x06,
426 OPC_MXU_Q8SLTU = 0x07,
427 };
428
429 /*
430 * MXU pool 01
431 */
432 enum {
433 OPC_MXU_S32SLT = 0x00,
434 OPC_MXU_D16SLT = 0x01,
435 OPC_MXU_D16AVG = 0x02,
436 OPC_MXU_D16AVGR = 0x03,
437 OPC_MXU_Q8AVG = 0x04,
438 OPC_MXU_Q8AVGR = 0x05,
439 OPC_MXU_Q8ADD = 0x07,
440 };
441
442 /*
443 * MXU pool 02
444 */
445 enum {
446 OPC_MXU_S32CPS = 0x00,
447 OPC_MXU_D16CPS = 0x02,
448 OPC_MXU_Q8ABD = 0x04,
449 OPC_MXU_Q16SAT = 0x06,
450 };
451
452 /*
453 * MXU pool 03
454 */
455 enum {
456 OPC_MXU_D16MULF = 0x00,
457 OPC_MXU_D16MULE = 0x01,
458 };
459
460 /*
461 * MXU pool 04 05 06 07 08 09 10 11
462 */
463 enum {
464 OPC_MXU_S32LDST = 0x00,
465 OPC_MXU_S32LDSTR = 0x01,
466 };
467
468 /*
469 * MXU pool 12
470 */
471 enum {
472 OPC_MXU_D32ACC = 0x00,
473 OPC_MXU_D32ACCM = 0x01,
474 OPC_MXU_D32ASUM = 0x02,
475 };
476
477 /*
478 * MXU pool 13
479 */
480 enum {
481 OPC_MXU_Q16ACC = 0x00,
482 OPC_MXU_Q16ACCM = 0x01,
483 OPC_MXU_D16ASUM = 0x02,
484 };
485
486 /*
487 * MXU pool 14
488 */
489 enum {
490 OPC_MXU_Q8ADDE = 0x00,
491 OPC_MXU_D8SUM = 0x01,
492 OPC_MXU_D8SUMC = 0x02,
493 };
494
495 /*
496 * MXU pool 15
497 */
498 enum {
499 OPC_MXU_S32MUL = 0x00,
500 OPC_MXU_S32MULU = 0x01,
501 OPC_MXU_S32EXTR = 0x02,
502 OPC_MXU_S32EXTRV = 0x03,
503 };
504
505 /*
506 * MXU pool 16
507 */
508 enum {
509 OPC_MXU_D32SARW = 0x00,
510 OPC_MXU_S32ALN = 0x01,
511 OPC_MXU_S32ALNI = 0x02,
512 OPC_MXU_S32LUI = 0x03,
513 OPC_MXU_S32NOR = 0x04,
514 OPC_MXU_S32AND = 0x05,
515 OPC_MXU_S32OR = 0x06,
516 OPC_MXU_S32XOR = 0x07,
517 };
518
519 /*
520 * MXU pool 17
521 */
522 enum {
523 OPC_MXU_LXB = 0x00,
524 OPC_MXU_LXH = 0x01,
525 OPC_MXU_LXW = 0x03,
526 OPC_MXU_LXBU = 0x04,
527 OPC_MXU_LXHU = 0x05,
528 };
529
530 /*
531 * MXU pool 18
532 */
533 enum {
534 OPC_MXU_D32SLLV = 0x00,
535 OPC_MXU_D32SLRV = 0x01,
536 OPC_MXU_D32SARV = 0x03,
537 OPC_MXU_Q16SLLV = 0x04,
538 OPC_MXU_Q16SLRV = 0x05,
539 OPC_MXU_Q16SARV = 0x07,
540 };
541
542 /*
543 * MXU pool 19
544 */
545 enum {
546 OPC_MXU_Q8MUL = 0x00,
547 OPC_MXU_Q8MULSU = 0x02,
548 };
549
550 /*
551 * MXU pool 20
552 */
553 enum {
554 OPC_MXU_Q8MOVZ = 0x00,
555 OPC_MXU_Q8MOVN = 0x01,
556 OPC_MXU_D16MOVZ = 0x02,
557 OPC_MXU_D16MOVN = 0x03,
558 OPC_MXU_S32MOVZ = 0x04,
559 OPC_MXU_S32MOVN = 0x05,
560 };
561
562 /*
563 * MXU pool 21
564 */
565 enum {
566 OPC_MXU_Q8MAC = 0x00,
567 OPC_MXU_Q8MACSU = 0x02,
568 };
569
570
571 /* MXU accumulate add/subtract 1-bit pattern 'aptn1' */
572 #define MXU_APTN1_A 0
573 #define MXU_APTN1_S 1
574
575 /* MXU accumulate add/subtract 2-bit pattern 'aptn2' */
576 #define MXU_APTN2_AA 0
577 #define MXU_APTN2_AS 1
578 #define MXU_APTN2_SA 2
579 #define MXU_APTN2_SS 3
580
581 /* MXU execute add/subtract 2-bit pattern 'eptn2' */
582 #define MXU_EPTN2_AA 0
583 #define MXU_EPTN2_AS 1
584 #define MXU_EPTN2_SA 2
585 #define MXU_EPTN2_SS 3
586
587 /* MXU operand getting pattern 'optn2' */
588 #define MXU_OPTN2_PTN0 0
589 #define MXU_OPTN2_PTN1 1
590 #define MXU_OPTN2_PTN2 2
591 #define MXU_OPTN2_PTN3 3
592 /* alternative naming scheme for 'optn2' */
593 #define MXU_OPTN2_WW 0
594 #define MXU_OPTN2_LW 1
595 #define MXU_OPTN2_HW 2
596 #define MXU_OPTN2_XW 3
597
598 /* MXU operand getting pattern 'optn3' */
599 #define MXU_OPTN3_PTN0 0
600 #define MXU_OPTN3_PTN1 1
601 #define MXU_OPTN3_PTN2 2
602 #define MXU_OPTN3_PTN3 3
603 #define MXU_OPTN3_PTN4 4
604 #define MXU_OPTN3_PTN5 5
605 #define MXU_OPTN3_PTN6 6
606 #define MXU_OPTN3_PTN7 7
607
608 /* MXU registers */
609 static TCGv mxu_gpr[NUMBER_OF_MXU_REGISTERS - 1];
610 static TCGv mxu_CR;
611
612 static const char mxuregnames[NUMBER_OF_MXU_REGISTERS][4] = {
613 "XR1", "XR2", "XR3", "XR4", "XR5", "XR6", "XR7", "XR8",
614 "XR9", "XR10", "XR11", "XR12", "XR13", "XR14", "XR15", "XCR",
615 };
616
mxu_translate_init(void)617 void mxu_translate_init(void)
618 {
619 for (unsigned i = 0; i < NUMBER_OF_MXU_REGISTERS - 1; i++) {
620 mxu_gpr[i] = tcg_global_mem_new(tcg_env,
621 offsetof(CPUMIPSState, active_tc.mxu_gpr[i]),
622 mxuregnames[i]);
623 }
624
625 mxu_CR = tcg_global_mem_new(tcg_env,
626 offsetof(CPUMIPSState, active_tc.mxu_cr),
627 mxuregnames[NUMBER_OF_MXU_REGISTERS - 1]);
628 }
629
630 /* MXU General purpose registers moves. */
gen_load_mxu_gpr(TCGv t,unsigned int reg)631 static inline void gen_load_mxu_gpr(TCGv t, unsigned int reg)
632 {
633 if (reg == 0) {
634 tcg_gen_movi_tl(t, 0);
635 } else if (reg <= 15) {
636 tcg_gen_mov_tl(t, mxu_gpr[reg - 1]);
637 }
638 }
639
gen_store_mxu_gpr(TCGv t,unsigned int reg)640 static inline void gen_store_mxu_gpr(TCGv t, unsigned int reg)
641 {
642 if (reg > 0 && reg <= 15) {
643 tcg_gen_mov_tl(mxu_gpr[reg - 1], t);
644 }
645 }
646
gen_extract_mxu_gpr(TCGv t,unsigned int reg,unsigned int ofs,unsigned int len)647 static inline void gen_extract_mxu_gpr(TCGv t, unsigned int reg,
648 unsigned int ofs, unsigned int len)
649 {
650 if (reg == 0) {
651 tcg_gen_movi_tl(t, 0);
652 } else if (reg <= 15) {
653 tcg_gen_extract_tl(t, mxu_gpr[reg - 1], ofs, len);
654 }
655 }
656
657 /* MXU control register moves. */
gen_load_mxu_cr(TCGv t)658 static inline void gen_load_mxu_cr(TCGv t)
659 {
660 tcg_gen_mov_tl(t, mxu_CR);
661 }
662
gen_store_mxu_cr(TCGv t)663 static inline void gen_store_mxu_cr(TCGv t)
664 {
665 /* TODO: Add handling of RW rules for MXU_CR. */
666 tcg_gen_mov_tl(mxu_CR, t);
667 }
668
669 /*
670 * S32I2M XRa, rb - Register move from GRF to XRF
671 */
gen_mxu_s32i2m(DisasContext * ctx)672 static void gen_mxu_s32i2m(DisasContext *ctx)
673 {
674 TCGv t0;
675 uint32_t XRa, Rb;
676
677 t0 = tcg_temp_new();
678
679 XRa = extract32(ctx->opcode, 6, 5);
680 Rb = extract32(ctx->opcode, 16, 5);
681
682 gen_load_gpr(t0, Rb);
683 if (XRa <= 15) {
684 gen_store_mxu_gpr(t0, XRa);
685 } else if (XRa == 16) {
686 gen_store_mxu_cr(t0);
687 }
688 }
689
690 /*
691 * S32M2I XRa, rb - Register move from XRF to GRF
692 */
gen_mxu_s32m2i(DisasContext * ctx)693 static void gen_mxu_s32m2i(DisasContext *ctx)
694 {
695 TCGv t0;
696 uint32_t XRa, Rb;
697
698 t0 = tcg_temp_new();
699
700 XRa = extract32(ctx->opcode, 6, 5);
701 Rb = extract32(ctx->opcode, 16, 5);
702
703 if (XRa <= 15) {
704 gen_load_mxu_gpr(t0, XRa);
705 } else if (XRa == 16) {
706 gen_load_mxu_cr(t0);
707 }
708
709 gen_store_gpr(t0, Rb);
710 }
711
712 /*
713 * S8LDD XRa, Rb, s8, optn3 - Load a byte from memory to XRF
714 *
715 * S8LDI XRa, Rb, s8, optn3 - Load a byte from memory to XRF,
716 * post modify address register
717 */
gen_mxu_s8ldd(DisasContext * ctx,bool postmodify)718 static void gen_mxu_s8ldd(DisasContext *ctx, bool postmodify)
719 {
720 TCGv t0, t1;
721 uint32_t XRa, Rb, s8, optn3;
722
723 t0 = tcg_temp_new();
724 t1 = tcg_temp_new();
725
726 XRa = extract32(ctx->opcode, 6, 4);
727 s8 = extract32(ctx->opcode, 10, 8);
728 optn3 = extract32(ctx->opcode, 18, 3);
729 Rb = extract32(ctx->opcode, 21, 5);
730
731 gen_load_gpr(t0, Rb);
732 tcg_gen_addi_tl(t0, t0, (int8_t)s8);
733 if (postmodify) {
734 gen_store_gpr(t0, Rb);
735 }
736
737 switch (optn3) {
738 /* XRa[7:0] = tmp8 */
739 case MXU_OPTN3_PTN0:
740 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
741 gen_load_mxu_gpr(t0, XRa);
742 tcg_gen_deposit_tl(t0, t0, t1, 0, 8);
743 break;
744 /* XRa[15:8] = tmp8 */
745 case MXU_OPTN3_PTN1:
746 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
747 gen_load_mxu_gpr(t0, XRa);
748 tcg_gen_deposit_tl(t0, t0, t1, 8, 8);
749 break;
750 /* XRa[23:16] = tmp8 */
751 case MXU_OPTN3_PTN2:
752 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
753 gen_load_mxu_gpr(t0, XRa);
754 tcg_gen_deposit_tl(t0, t0, t1, 16, 8);
755 break;
756 /* XRa[31:24] = tmp8 */
757 case MXU_OPTN3_PTN3:
758 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
759 gen_load_mxu_gpr(t0, XRa);
760 tcg_gen_deposit_tl(t0, t0, t1, 24, 8);
761 break;
762 /* XRa = {8'b0, tmp8, 8'b0, tmp8} */
763 case MXU_OPTN3_PTN4:
764 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
765 tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
766 break;
767 /* XRa = {tmp8, 8'b0, tmp8, 8'b0} */
768 case MXU_OPTN3_PTN5:
769 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
770 tcg_gen_shli_tl(t1, t1, 8);
771 tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
772 break;
773 /* XRa = {{8{sign of tmp8}}, tmp8, {8{sign of tmp8}}, tmp8} */
774 case MXU_OPTN3_PTN6:
775 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_SB);
776 tcg_gen_mov_tl(t0, t1);
777 tcg_gen_andi_tl(t0, t0, 0xFF00FFFF);
778 tcg_gen_shli_tl(t1, t1, 16);
779 tcg_gen_or_tl(t0, t0, t1);
780 break;
781 /* XRa = {tmp8, tmp8, tmp8, tmp8} */
782 case MXU_OPTN3_PTN7:
783 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
784 tcg_gen_deposit_tl(t1, t1, t1, 8, 8);
785 tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
786 break;
787 }
788
789 gen_store_mxu_gpr(t0, XRa);
790 }
791
792 /*
793 * S8STD XRa, Rb, s8, optn3 - Store a byte from XRF to memory
794 *
795 * S8SDI XRa, Rb, s8, optn3 - Store a byte from XRF to memory,
796 * post modify address register
797 */
gen_mxu_s8std(DisasContext * ctx,bool postmodify)798 static void gen_mxu_s8std(DisasContext *ctx, bool postmodify)
799 {
800 TCGv t0, t1;
801 uint32_t XRa, Rb, s8, optn3;
802
803 t0 = tcg_temp_new();
804 t1 = tcg_temp_new();
805
806 XRa = extract32(ctx->opcode, 6, 4);
807 s8 = extract32(ctx->opcode, 10, 8);
808 optn3 = extract32(ctx->opcode, 18, 3);
809 Rb = extract32(ctx->opcode, 21, 5);
810
811 if (optn3 > 3) {
812 /* reserved, do nothing */
813 return;
814 }
815
816 gen_load_gpr(t0, Rb);
817 tcg_gen_addi_tl(t0, t0, (int8_t)s8);
818 if (postmodify) {
819 gen_store_gpr(t0, Rb);
820 }
821 gen_load_mxu_gpr(t1, XRa);
822
823 switch (optn3) {
824 /* XRa[7:0] => tmp8 */
825 case MXU_OPTN3_PTN0:
826 tcg_gen_extract_tl(t1, t1, 0, 8);
827 break;
828 /* XRa[15:8] => tmp8 */
829 case MXU_OPTN3_PTN1:
830 tcg_gen_extract_tl(t1, t1, 8, 8);
831 break;
832 /* XRa[23:16] => tmp8 */
833 case MXU_OPTN3_PTN2:
834 tcg_gen_extract_tl(t1, t1, 16, 8);
835 break;
836 /* XRa[31:24] => tmp8 */
837 case MXU_OPTN3_PTN3:
838 tcg_gen_extract_tl(t1, t1, 24, 8);
839 break;
840 }
841
842 tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_UB);
843 }
844
845 /*
846 * S16LDD XRa, Rb, s10, optn2 - Load a halfword from memory to XRF
847 *
848 * S16LDI XRa, Rb, s10, optn2 - Load a halfword from memory to XRF,
849 * post modify address register
850 */
gen_mxu_s16ldd(DisasContext * ctx,bool postmodify)851 static void gen_mxu_s16ldd(DisasContext *ctx, bool postmodify)
852 {
853 TCGv t0, t1;
854 uint32_t XRa, Rb, optn2;
855 int32_t s10;
856
857 t0 = tcg_temp_new();
858 t1 = tcg_temp_new();
859
860 XRa = extract32(ctx->opcode, 6, 4);
861 s10 = sextract32(ctx->opcode, 10, 9) * 2;
862 optn2 = extract32(ctx->opcode, 19, 2);
863 Rb = extract32(ctx->opcode, 21, 5);
864
865 gen_load_gpr(t0, Rb);
866 tcg_gen_addi_tl(t0, t0, s10);
867 if (postmodify) {
868 gen_store_gpr(t0, Rb);
869 }
870
871 switch (optn2) {
872 /* XRa[15:0] = tmp16 */
873 case MXU_OPTN2_PTN0:
874 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW);
875 gen_load_mxu_gpr(t0, XRa);
876 tcg_gen_deposit_tl(t0, t0, t1, 0, 16);
877 break;
878 /* XRa[31:16] = tmp16 */
879 case MXU_OPTN2_PTN1:
880 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW);
881 gen_load_mxu_gpr(t0, XRa);
882 tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
883 break;
884 /* XRa = sign_extend(tmp16) */
885 case MXU_OPTN2_PTN2:
886 tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SW);
887 break;
888 /* XRa = {tmp16, tmp16} */
889 case MXU_OPTN2_PTN3:
890 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW);
891 tcg_gen_deposit_tl(t0, t1, t1, 0, 16);
892 tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
893 break;
894 }
895
896 gen_store_mxu_gpr(t0, XRa);
897 }
898
899 /*
900 * S16STD XRa, Rb, s8, optn2 - Store a byte from XRF to memory
901 *
902 * S16SDI XRa, Rb, s8, optn2 - Store a byte from XRF to memory,
903 * post modify address register
904 */
gen_mxu_s16std(DisasContext * ctx,bool postmodify)905 static void gen_mxu_s16std(DisasContext *ctx, bool postmodify)
906 {
907 TCGv t0, t1;
908 uint32_t XRa, Rb, optn2;
909 int32_t s10;
910
911 t0 = tcg_temp_new();
912 t1 = tcg_temp_new();
913
914 XRa = extract32(ctx->opcode, 6, 4);
915 s10 = sextract32(ctx->opcode, 10, 9) * 2;
916 optn2 = extract32(ctx->opcode, 19, 2);
917 Rb = extract32(ctx->opcode, 21, 5);
918
919 if (optn2 > 1) {
920 /* reserved, do nothing */
921 return;
922 }
923
924 gen_load_gpr(t0, Rb);
925 tcg_gen_addi_tl(t0, t0, s10);
926 if (postmodify) {
927 gen_store_gpr(t0, Rb);
928 }
929 gen_load_mxu_gpr(t1, XRa);
930
931 switch (optn2) {
932 /* XRa[15:0] => tmp16 */
933 case MXU_OPTN2_PTN0:
934 tcg_gen_extract_tl(t1, t1, 0, 16);
935 break;
936 /* XRa[31:16] => tmp16 */
937 case MXU_OPTN2_PTN1:
938 tcg_gen_extract_tl(t1, t1, 16, 16);
939 break;
940 }
941
942 tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_UW);
943 }
944
945 /*
946 * S32MUL XRa, XRd, rs, rt - Signed 32x32=>64 bit multiplication
947 * of GPR's and stores result into pair of MXU registers.
948 * It strains HI and LO registers.
949 *
950 * S32MULU XRa, XRd, rs, rt - Unsigned 32x32=>64 bit multiplication
951 * of GPR's and stores result into pair of MXU registers.
952 * It strains HI and LO registers.
953 */
gen_mxu_s32mul(DisasContext * ctx,bool mulu)954 static void gen_mxu_s32mul(DisasContext *ctx, bool mulu)
955 {
956 TCGv t0, t1;
957 uint32_t XRa, XRd, rs, rt;
958
959 t0 = tcg_temp_new();
960 t1 = tcg_temp_new();
961
962 XRa = extract32(ctx->opcode, 6, 4);
963 XRd = extract32(ctx->opcode, 10, 4);
964 rs = extract32(ctx->opcode, 16, 5);
965 rt = extract32(ctx->opcode, 21, 5);
966
967 if (unlikely(rs == 0 || rt == 0)) {
968 tcg_gen_movi_tl(t0, 0);
969 tcg_gen_movi_tl(t1, 0);
970 } else {
971 gen_load_gpr(t0, rs);
972 gen_load_gpr(t1, rt);
973
974 if (mulu) {
975 tcg_gen_mulu2_tl(t0, t1, t0, t1);
976 } else {
977 tcg_gen_muls2_tl(t0, t1, t0, t1);
978 }
979 }
980 tcg_gen_mov_tl(cpu_HI[0], t1);
981 tcg_gen_mov_tl(cpu_LO[0], t0);
982 gen_store_mxu_gpr(t1, XRa);
983 gen_store_mxu_gpr(t0, XRd);
984 }
985
986 /*
987 * D16MUL XRa, XRb, XRc, XRd, optn2 - Signed 16 bit pattern multiplication
988 * D16MULF XRa, XRb, XRc, optn2 - Signed Q15 fraction pattern multiplication
989 * with rounding and packing result
990 * D16MULE XRa, XRb, XRc, XRd, optn2 - Signed Q15 fraction pattern
991 * multiplication with rounding
992 */
gen_mxu_d16mul(DisasContext * ctx,bool fractional,bool packed_result)993 static void gen_mxu_d16mul(DisasContext *ctx, bool fractional,
994 bool packed_result)
995 {
996 TCGv t0, t1, t2, t3;
997 uint32_t XRa, XRb, XRc, XRd, optn2;
998
999 t0 = tcg_temp_new();
1000 t1 = tcg_temp_new();
1001 t2 = tcg_temp_new();
1002 t3 = tcg_temp_new();
1003
1004 XRa = extract32(ctx->opcode, 6, 4);
1005 XRb = extract32(ctx->opcode, 10, 4);
1006 XRc = extract32(ctx->opcode, 14, 4);
1007 XRd = extract32(ctx->opcode, 18, 4);
1008 optn2 = extract32(ctx->opcode, 22, 2);
1009
1010 /*
1011 * TODO: XRd field isn't used for D16MULF
1012 * There's no knowledge how this field affect
1013 * instruction decoding/behavior
1014 */
1015
1016 gen_load_mxu_gpr(t1, XRb);
1017 tcg_gen_sextract_tl(t0, t1, 0, 16);
1018 tcg_gen_sextract_tl(t1, t1, 16, 16);
1019 gen_load_mxu_gpr(t3, XRc);
1020 tcg_gen_sextract_tl(t2, t3, 0, 16);
1021 tcg_gen_sextract_tl(t3, t3, 16, 16);
1022
1023 switch (optn2) {
1024 case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
1025 tcg_gen_mul_tl(t3, t1, t3);
1026 tcg_gen_mul_tl(t2, t0, t2);
1027 break;
1028 case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
1029 tcg_gen_mul_tl(t3, t0, t3);
1030 tcg_gen_mul_tl(t2, t0, t2);
1031 break;
1032 case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
1033 tcg_gen_mul_tl(t3, t1, t3);
1034 tcg_gen_mul_tl(t2, t1, t2);
1035 break;
1036 case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
1037 tcg_gen_mul_tl(t3, t0, t3);
1038 tcg_gen_mul_tl(t2, t1, t2);
1039 break;
1040 }
1041 if (fractional) {
1042 TCGLabel *l_done = gen_new_label();
1043 TCGv rounding = tcg_temp_new();
1044
1045 tcg_gen_shli_tl(t3, t3, 1);
1046 tcg_gen_shli_tl(t2, t2, 1);
1047 tcg_gen_andi_tl(rounding, mxu_CR, 0x2);
1048 tcg_gen_brcondi_tl(TCG_COND_EQ, rounding, 0, l_done);
1049 if (packed_result) {
1050 TCGLabel *l_apply_bias_l = gen_new_label();
1051 TCGLabel *l_apply_bias_r = gen_new_label();
1052 TCGLabel *l_half_done = gen_new_label();
1053 TCGv bias = tcg_temp_new();
1054
1055 /*
1056 * D16MULF supports unbiased rounding aka "bankers rounding",
1057 * "round to even", "convergent rounding"
1058 */
1059 tcg_gen_andi_tl(bias, mxu_CR, 0x4);
1060 tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_l);
1061 tcg_gen_andi_tl(t0, t3, 0x1ffff);
1062 tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_half_done);
1063 gen_set_label(l_apply_bias_l);
1064 tcg_gen_addi_tl(t3, t3, 0x8000);
1065 gen_set_label(l_half_done);
1066 tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_r);
1067 tcg_gen_andi_tl(t0, t2, 0x1ffff);
1068 tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_done);
1069 gen_set_label(l_apply_bias_r);
1070 tcg_gen_addi_tl(t2, t2, 0x8000);
1071 } else {
1072 /* D16MULE doesn't support unbiased rounding */
1073 tcg_gen_addi_tl(t3, t3, 0x8000);
1074 tcg_gen_addi_tl(t2, t2, 0x8000);
1075 }
1076 gen_set_label(l_done);
1077 }
1078 if (!packed_result) {
1079 gen_store_mxu_gpr(t3, XRa);
1080 gen_store_mxu_gpr(t2, XRd);
1081 } else {
1082 tcg_gen_andi_tl(t3, t3, 0xffff0000);
1083 tcg_gen_shri_tl(t2, t2, 16);
1084 tcg_gen_or_tl(t3, t3, t2);
1085 gen_store_mxu_gpr(t3, XRa);
1086 }
1087 }
1088
1089 /*
1090 * D16MAC XRa, XRb, XRc, XRd, aptn2, optn2
1091 * Signed 16 bit pattern multiply and accumulate
1092 * D16MACF XRa, XRb, XRc, aptn2, optn2
1093 * Signed Q15 fraction pattern multiply accumulate and pack
1094 * D16MACE XRa, XRb, XRc, XRd, aptn2, optn2
1095 * Signed Q15 fraction pattern multiply and accumulate
1096 */
gen_mxu_d16mac(DisasContext * ctx,bool fractional,bool packed_result)1097 static void gen_mxu_d16mac(DisasContext *ctx, bool fractional,
1098 bool packed_result)
1099 {
1100 TCGv t0, t1, t2, t3;
1101 uint32_t XRa, XRb, XRc, XRd, optn2, aptn2;
1102
1103 t0 = tcg_temp_new();
1104 t1 = tcg_temp_new();
1105 t2 = tcg_temp_new();
1106 t3 = tcg_temp_new();
1107
1108 XRa = extract32(ctx->opcode, 6, 4);
1109 XRb = extract32(ctx->opcode, 10, 4);
1110 XRc = extract32(ctx->opcode, 14, 4);
1111 XRd = extract32(ctx->opcode, 18, 4);
1112 optn2 = extract32(ctx->opcode, 22, 2);
1113 aptn2 = extract32(ctx->opcode, 24, 2);
1114
1115 gen_load_mxu_gpr(t1, XRb);
1116 tcg_gen_sextract_tl(t0, t1, 0, 16);
1117 tcg_gen_sextract_tl(t1, t1, 16, 16);
1118
1119 gen_load_mxu_gpr(t3, XRc);
1120 tcg_gen_sextract_tl(t2, t3, 0, 16);
1121 tcg_gen_sextract_tl(t3, t3, 16, 16);
1122
1123 switch (optn2) {
1124 case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
1125 tcg_gen_mul_tl(t3, t1, t3);
1126 tcg_gen_mul_tl(t2, t0, t2);
1127 break;
1128 case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
1129 tcg_gen_mul_tl(t3, t0, t3);
1130 tcg_gen_mul_tl(t2, t0, t2);
1131 break;
1132 case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
1133 tcg_gen_mul_tl(t3, t1, t3);
1134 tcg_gen_mul_tl(t2, t1, t2);
1135 break;
1136 case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
1137 tcg_gen_mul_tl(t3, t0, t3);
1138 tcg_gen_mul_tl(t2, t1, t2);
1139 break;
1140 }
1141
1142 if (fractional) {
1143 tcg_gen_shli_tl(t3, t3, 1);
1144 tcg_gen_shli_tl(t2, t2, 1);
1145 }
1146 gen_load_mxu_gpr(t0, XRa);
1147 gen_load_mxu_gpr(t1, XRd);
1148
1149 switch (aptn2) {
1150 case MXU_APTN2_AA:
1151 tcg_gen_add_tl(t3, t0, t3);
1152 tcg_gen_add_tl(t2, t1, t2);
1153 break;
1154 case MXU_APTN2_AS:
1155 tcg_gen_add_tl(t3, t0, t3);
1156 tcg_gen_sub_tl(t2, t1, t2);
1157 break;
1158 case MXU_APTN2_SA:
1159 tcg_gen_sub_tl(t3, t0, t3);
1160 tcg_gen_add_tl(t2, t1, t2);
1161 break;
1162 case MXU_APTN2_SS:
1163 tcg_gen_sub_tl(t3, t0, t3);
1164 tcg_gen_sub_tl(t2, t1, t2);
1165 break;
1166 }
1167
1168 if (fractional) {
1169 TCGLabel *l_done = gen_new_label();
1170 TCGv rounding = tcg_temp_new();
1171
1172 tcg_gen_andi_tl(rounding, mxu_CR, 0x2);
1173 tcg_gen_brcondi_tl(TCG_COND_EQ, rounding, 0, l_done);
1174 if (packed_result) {
1175 TCGLabel *l_apply_bias_l = gen_new_label();
1176 TCGLabel *l_apply_bias_r = gen_new_label();
1177 TCGLabel *l_half_done = gen_new_label();
1178 TCGv bias = tcg_temp_new();
1179
1180 /*
1181 * D16MACF supports unbiased rounding aka "bankers rounding",
1182 * "round to even", "convergent rounding"
1183 */
1184 tcg_gen_andi_tl(bias, mxu_CR, 0x4);
1185 tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_l);
1186 tcg_gen_andi_tl(t0, t3, 0x1ffff);
1187 tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_half_done);
1188 gen_set_label(l_apply_bias_l);
1189 tcg_gen_addi_tl(t3, t3, 0x8000);
1190 gen_set_label(l_half_done);
1191 tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_r);
1192 tcg_gen_andi_tl(t0, t2, 0x1ffff);
1193 tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_done);
1194 gen_set_label(l_apply_bias_r);
1195 tcg_gen_addi_tl(t2, t2, 0x8000);
1196 } else {
1197 /* D16MACE doesn't support unbiased rounding */
1198 tcg_gen_addi_tl(t3, t3, 0x8000);
1199 tcg_gen_addi_tl(t2, t2, 0x8000);
1200 }
1201 gen_set_label(l_done);
1202 }
1203
1204 if (!packed_result) {
1205 gen_store_mxu_gpr(t3, XRa);
1206 gen_store_mxu_gpr(t2, XRd);
1207 } else {
1208 tcg_gen_andi_tl(t3, t3, 0xffff0000);
1209 tcg_gen_shri_tl(t2, t2, 16);
1210 tcg_gen_or_tl(t3, t3, t2);
1211 gen_store_mxu_gpr(t3, XRa);
1212 }
1213 }
1214
1215 /*
1216 * D16MADL XRa, XRb, XRc, XRd, aptn2, optn2 - Double packed
1217 * unsigned 16 bit pattern multiply and add/subtract.
1218 */
gen_mxu_d16madl(DisasContext * ctx)1219 static void gen_mxu_d16madl(DisasContext *ctx)
1220 {
1221 TCGv t0, t1, t2, t3;
1222 uint32_t XRa, XRb, XRc, XRd, optn2, aptn2;
1223
1224 t0 = tcg_temp_new();
1225 t1 = tcg_temp_new();
1226 t2 = tcg_temp_new();
1227 t3 = tcg_temp_new();
1228
1229 XRa = extract32(ctx->opcode, 6, 4);
1230 XRb = extract32(ctx->opcode, 10, 4);
1231 XRc = extract32(ctx->opcode, 14, 4);
1232 XRd = extract32(ctx->opcode, 18, 4);
1233 optn2 = extract32(ctx->opcode, 22, 2);
1234 aptn2 = extract32(ctx->opcode, 24, 2);
1235
1236 gen_load_mxu_gpr(t1, XRb);
1237 tcg_gen_sextract_tl(t0, t1, 0, 16);
1238 tcg_gen_sextract_tl(t1, t1, 16, 16);
1239
1240 gen_load_mxu_gpr(t3, XRc);
1241 tcg_gen_sextract_tl(t2, t3, 0, 16);
1242 tcg_gen_sextract_tl(t3, t3, 16, 16);
1243
1244 switch (optn2) {
1245 case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
1246 tcg_gen_mul_tl(t3, t1, t3);
1247 tcg_gen_mul_tl(t2, t0, t2);
1248 break;
1249 case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
1250 tcg_gen_mul_tl(t3, t0, t3);
1251 tcg_gen_mul_tl(t2, t0, t2);
1252 break;
1253 case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
1254 tcg_gen_mul_tl(t3, t1, t3);
1255 tcg_gen_mul_tl(t2, t1, t2);
1256 break;
1257 case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
1258 tcg_gen_mul_tl(t3, t0, t3);
1259 tcg_gen_mul_tl(t2, t1, t2);
1260 break;
1261 }
1262 tcg_gen_extract_tl(t2, t2, 0, 16);
1263 tcg_gen_extract_tl(t3, t3, 0, 16);
1264
1265 gen_load_mxu_gpr(t1, XRa);
1266 tcg_gen_extract_tl(t0, t1, 0, 16);
1267 tcg_gen_extract_tl(t1, t1, 16, 16);
1268
1269 switch (aptn2) {
1270 case MXU_APTN2_AA:
1271 tcg_gen_add_tl(t3, t1, t3);
1272 tcg_gen_add_tl(t2, t0, t2);
1273 break;
1274 case MXU_APTN2_AS:
1275 tcg_gen_add_tl(t3, t1, t3);
1276 tcg_gen_sub_tl(t2, t0, t2);
1277 break;
1278 case MXU_APTN2_SA:
1279 tcg_gen_sub_tl(t3, t1, t3);
1280 tcg_gen_add_tl(t2, t0, t2);
1281 break;
1282 case MXU_APTN2_SS:
1283 tcg_gen_sub_tl(t3, t1, t3);
1284 tcg_gen_sub_tl(t2, t0, t2);
1285 break;
1286 }
1287
1288 tcg_gen_andi_tl(t2, t2, 0xffff);
1289 tcg_gen_shli_tl(t3, t3, 16);
1290 tcg_gen_or_tl(mxu_gpr[XRd - 1], t3, t2);
1291 }
1292
1293 /*
1294 * S16MAD XRa, XRb, XRc, XRd, aptn2, optn2 - Single packed
1295 * signed 16 bit pattern multiply and 32-bit add/subtract.
1296 */
gen_mxu_s16mad(DisasContext * ctx)1297 static void gen_mxu_s16mad(DisasContext *ctx)
1298 {
1299 TCGv t0, t1;
1300 uint32_t XRa, XRb, XRc, XRd, optn2, aptn1, pad;
1301
1302 t0 = tcg_temp_new();
1303 t1 = tcg_temp_new();
1304
1305 XRa = extract32(ctx->opcode, 6, 4);
1306 XRb = extract32(ctx->opcode, 10, 4);
1307 XRc = extract32(ctx->opcode, 14, 4);
1308 XRd = extract32(ctx->opcode, 18, 4);
1309 optn2 = extract32(ctx->opcode, 22, 2);
1310 aptn1 = extract32(ctx->opcode, 24, 1);
1311 pad = extract32(ctx->opcode, 25, 1);
1312
1313 if (pad) {
1314 /* FIXME check if it influence the result */
1315 }
1316
1317 gen_load_mxu_gpr(t0, XRb);
1318 gen_load_mxu_gpr(t1, XRc);
1319
1320 switch (optn2) {
1321 case MXU_OPTN2_WW: /* XRB.H*XRC.H */
1322 tcg_gen_sextract_tl(t0, t0, 16, 16);
1323 tcg_gen_sextract_tl(t1, t1, 16, 16);
1324 break;
1325 case MXU_OPTN2_LW: /* XRB.L*XRC.L */
1326 tcg_gen_sextract_tl(t0, t0, 0, 16);
1327 tcg_gen_sextract_tl(t1, t1, 0, 16);
1328 break;
1329 case MXU_OPTN2_HW: /* XRB.H*XRC.L */
1330 tcg_gen_sextract_tl(t0, t0, 16, 16);
1331 tcg_gen_sextract_tl(t1, t1, 0, 16);
1332 break;
1333 case MXU_OPTN2_XW: /* XRB.L*XRC.H */
1334 tcg_gen_sextract_tl(t0, t0, 0, 16);
1335 tcg_gen_sextract_tl(t1, t1, 16, 16);
1336 break;
1337 }
1338 tcg_gen_mul_tl(t0, t0, t1);
1339
1340 gen_load_mxu_gpr(t1, XRa);
1341
1342 switch (aptn1) {
1343 case MXU_APTN1_A:
1344 tcg_gen_add_tl(t1, t1, t0);
1345 break;
1346 case MXU_APTN1_S:
1347 tcg_gen_sub_tl(t1, t1, t0);
1348 break;
1349 }
1350
1351 gen_store_mxu_gpr(t1, XRd);
1352 }
1353
1354 /*
1355 * Q8MUL XRa, XRb, XRc, XRd - Parallel quad unsigned 8 bit multiply
1356 * Q8MULSU XRa, XRb, XRc, XRd - Parallel quad signed 8 bit multiply
1357 * Q8MAC XRa, XRb, XRc, XRd - Parallel quad unsigned 8 bit multiply
1358 * and accumulate
1359 * Q8MACSU XRa, XRb, XRc, XRd - Parallel quad signed 8 bit multiply
1360 * and accumulate
1361 */
gen_mxu_q8mul_mac(DisasContext * ctx,bool su,bool mac)1362 static void gen_mxu_q8mul_mac(DisasContext *ctx, bool su, bool mac)
1363 {
1364 TCGv t0, t1, t2, t3, t4, t5, t6, t7;
1365 uint32_t XRa, XRb, XRc, XRd, aptn2;
1366
1367 t0 = tcg_temp_new();
1368 t1 = tcg_temp_new();
1369 t2 = tcg_temp_new();
1370 t3 = tcg_temp_new();
1371 t4 = tcg_temp_new();
1372 t5 = tcg_temp_new();
1373 t6 = tcg_temp_new();
1374 t7 = tcg_temp_new();
1375
1376 XRa = extract32(ctx->opcode, 6, 4);
1377 XRb = extract32(ctx->opcode, 10, 4);
1378 XRc = extract32(ctx->opcode, 14, 4);
1379 XRd = extract32(ctx->opcode, 18, 4);
1380 aptn2 = extract32(ctx->opcode, 24, 2);
1381
1382 gen_load_mxu_gpr(t3, XRb);
1383 gen_load_mxu_gpr(t7, XRc);
1384
1385 if (su) {
1386 /* Q8MULSU / Q8MACSU */
1387 tcg_gen_sextract_tl(t0, t3, 0, 8);
1388 tcg_gen_sextract_tl(t1, t3, 8, 8);
1389 tcg_gen_sextract_tl(t2, t3, 16, 8);
1390 tcg_gen_sextract_tl(t3, t3, 24, 8);
1391 } else {
1392 /* Q8MUL / Q8MAC */
1393 tcg_gen_extract_tl(t0, t3, 0, 8);
1394 tcg_gen_extract_tl(t1, t3, 8, 8);
1395 tcg_gen_extract_tl(t2, t3, 16, 8);
1396 tcg_gen_extract_tl(t3, t3, 24, 8);
1397 }
1398
1399 tcg_gen_extract_tl(t4, t7, 0, 8);
1400 tcg_gen_extract_tl(t5, t7, 8, 8);
1401 tcg_gen_extract_tl(t6, t7, 16, 8);
1402 tcg_gen_extract_tl(t7, t7, 24, 8);
1403
1404 tcg_gen_mul_tl(t0, t0, t4);
1405 tcg_gen_mul_tl(t1, t1, t5);
1406 tcg_gen_mul_tl(t2, t2, t6);
1407 tcg_gen_mul_tl(t3, t3, t7);
1408
1409 if (mac) {
1410 gen_load_mxu_gpr(t4, XRd);
1411 gen_load_mxu_gpr(t5, XRa);
1412 tcg_gen_extract_tl(t6, t4, 0, 16);
1413 tcg_gen_extract_tl(t7, t4, 16, 16);
1414 if (aptn2 & 1) {
1415 tcg_gen_sub_tl(t0, t6, t0);
1416 tcg_gen_sub_tl(t1, t7, t1);
1417 } else {
1418 tcg_gen_add_tl(t0, t6, t0);
1419 tcg_gen_add_tl(t1, t7, t1);
1420 }
1421 tcg_gen_extract_tl(t6, t5, 0, 16);
1422 tcg_gen_extract_tl(t7, t5, 16, 16);
1423 if (aptn2 & 2) {
1424 tcg_gen_sub_tl(t2, t6, t2);
1425 tcg_gen_sub_tl(t3, t7, t3);
1426 } else {
1427 tcg_gen_add_tl(t2, t6, t2);
1428 tcg_gen_add_tl(t3, t7, t3);
1429 }
1430 }
1431
1432 tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
1433 tcg_gen_deposit_tl(t1, t2, t3, 16, 16);
1434
1435 gen_store_mxu_gpr(t0, XRd);
1436 gen_store_mxu_gpr(t1, XRa);
1437 }
1438
1439 /*
1440 * Q8MADL XRd, XRa, XRb, XRc
1441 * Parallel quad unsigned 8 bit multiply and accumulate.
1442 * e.g. XRd[0..3] = XRa[0..3] + XRb[0..3] * XRc[0..3]
1443 */
gen_mxu_q8madl(DisasContext * ctx)1444 static void gen_mxu_q8madl(DisasContext *ctx)
1445 {
1446 TCGv t0, t1, t2, t3, t4, t5, t6, t7;
1447 uint32_t XRa, XRb, XRc, XRd, aptn2;
1448
1449 t0 = tcg_temp_new();
1450 t1 = tcg_temp_new();
1451 t2 = tcg_temp_new();
1452 t3 = tcg_temp_new();
1453 t4 = tcg_temp_new();
1454 t5 = tcg_temp_new();
1455 t6 = tcg_temp_new();
1456 t7 = tcg_temp_new();
1457
1458 XRa = extract32(ctx->opcode, 6, 4);
1459 XRb = extract32(ctx->opcode, 10, 4);
1460 XRc = extract32(ctx->opcode, 14, 4);
1461 XRd = extract32(ctx->opcode, 18, 4);
1462 aptn2 = extract32(ctx->opcode, 24, 2);
1463
1464 gen_load_mxu_gpr(t3, XRb);
1465 gen_load_mxu_gpr(t7, XRc);
1466
1467 tcg_gen_extract_tl(t0, t3, 0, 8);
1468 tcg_gen_extract_tl(t1, t3, 8, 8);
1469 tcg_gen_extract_tl(t2, t3, 16, 8);
1470 tcg_gen_extract_tl(t3, t3, 24, 8);
1471
1472 tcg_gen_extract_tl(t4, t7, 0, 8);
1473 tcg_gen_extract_tl(t5, t7, 8, 8);
1474 tcg_gen_extract_tl(t6, t7, 16, 8);
1475 tcg_gen_extract_tl(t7, t7, 24, 8);
1476
1477 tcg_gen_mul_tl(t0, t0, t4);
1478 tcg_gen_mul_tl(t1, t1, t5);
1479 tcg_gen_mul_tl(t2, t2, t6);
1480 tcg_gen_mul_tl(t3, t3, t7);
1481
1482 gen_load_mxu_gpr(t4, XRa);
1483 tcg_gen_extract_tl(t6, t4, 0, 8);
1484 tcg_gen_extract_tl(t7, t4, 8, 8);
1485 if (aptn2 & 1) {
1486 tcg_gen_sub_tl(t0, t6, t0);
1487 tcg_gen_sub_tl(t1, t7, t1);
1488 } else {
1489 tcg_gen_add_tl(t0, t6, t0);
1490 tcg_gen_add_tl(t1, t7, t1);
1491 }
1492 tcg_gen_extract_tl(t6, t4, 16, 8);
1493 tcg_gen_extract_tl(t7, t4, 24, 8);
1494 if (aptn2 & 2) {
1495 tcg_gen_sub_tl(t2, t6, t2);
1496 tcg_gen_sub_tl(t3, t7, t3);
1497 } else {
1498 tcg_gen_add_tl(t2, t6, t2);
1499 tcg_gen_add_tl(t3, t7, t3);
1500 }
1501
1502 tcg_gen_andi_tl(t5, t0, 0xff);
1503 tcg_gen_deposit_tl(t5, t5, t1, 8, 8);
1504 tcg_gen_deposit_tl(t5, t5, t2, 16, 8);
1505 tcg_gen_deposit_tl(t5, t5, t3, 24, 8);
1506
1507 gen_store_mxu_gpr(t5, XRd);
1508 }
1509
1510 /*
1511 * S32LDD XRa, Rb, S12 - Load a word from memory to XRF
1512 * S32LDDR XRa, Rb, S12 - Load a word from memory to XRF
1513 * in reversed byte seq.
1514 * S32LDI XRa, Rb, S12 - Load a word from memory to XRF,
1515 * post modify base address GPR.
1516 * S32LDIR XRa, Rb, S12 - Load a word from memory to XRF,
1517 * post modify base address GPR and load in reversed byte seq.
1518 */
gen_mxu_s32ldxx(DisasContext * ctx,bool reversed,bool postinc)1519 static void gen_mxu_s32ldxx(DisasContext *ctx, bool reversed, bool postinc)
1520 {
1521 TCGv t0, t1;
1522 uint32_t XRa, Rb, s12;
1523
1524 t0 = tcg_temp_new();
1525 t1 = tcg_temp_new();
1526
1527 XRa = extract32(ctx->opcode, 6, 4);
1528 s12 = sextract32(ctx->opcode, 10, 10);
1529 Rb = extract32(ctx->opcode, 21, 5);
1530
1531 gen_load_gpr(t0, Rb);
1532 tcg_gen_movi_tl(t1, s12 * 4);
1533 tcg_gen_add_tl(t0, t0, t1);
1534
1535 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx,
1536 MO_SL | mo_endian_rev(ctx, reversed) |
1537 ctx->default_tcg_memop_mask);
1538 gen_store_mxu_gpr(t1, XRa);
1539
1540 if (postinc) {
1541 gen_store_gpr(t0, Rb);
1542 }
1543 }
1544
1545 /*
1546 * S32STD XRa, Rb, S12 - Store a word from XRF to memory
1547 * S32STDR XRa, Rb, S12 - Store a word from XRF to memory
1548 * in reversed byte seq.
1549 * S32SDI XRa, Rb, S12 - Store a word from XRF to memory,
1550 * post modify base address GPR.
1551 * S32SDIR XRa, Rb, S12 - Store a word from XRF to memory,
1552 * post modify base address GPR and store in reversed byte seq.
1553 */
gen_mxu_s32stxx(DisasContext * ctx,bool reversed,bool postinc)1554 static void gen_mxu_s32stxx(DisasContext *ctx, bool reversed, bool postinc)
1555 {
1556 TCGv t0, t1;
1557 uint32_t XRa, Rb, s12;
1558
1559 t0 = tcg_temp_new();
1560 t1 = tcg_temp_new();
1561
1562 XRa = extract32(ctx->opcode, 6, 4);
1563 s12 = sextract32(ctx->opcode, 10, 10);
1564 Rb = extract32(ctx->opcode, 21, 5);
1565
1566 gen_load_gpr(t0, Rb);
1567 tcg_gen_movi_tl(t1, s12 * 4);
1568 tcg_gen_add_tl(t0, t0, t1);
1569
1570 gen_load_mxu_gpr(t1, XRa);
1571 tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
1572 MO_SL | mo_endian_rev(ctx, reversed) |
1573 ctx->default_tcg_memop_mask);
1574
1575 if (postinc) {
1576 gen_store_gpr(t0, Rb);
1577 }
1578 }
1579
1580 /*
1581 * S32LDDV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
1582 * S32LDDVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
1583 * in reversed byte seq.
1584 * S32LDIV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
1585 * post modify base address GPR.
1586 * S32LDIVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
1587 * post modify base address GPR and load in reversed byte seq.
1588 */
gen_mxu_s32ldxvx(DisasContext * ctx,bool reversed,bool postinc,uint32_t strd2)1589 static void gen_mxu_s32ldxvx(DisasContext *ctx, bool reversed,
1590 bool postinc, uint32_t strd2)
1591 {
1592 TCGv t0, t1;
1593 uint32_t XRa, Rb, Rc;
1594
1595 t0 = tcg_temp_new();
1596 t1 = tcg_temp_new();
1597
1598 XRa = extract32(ctx->opcode, 6, 4);
1599 Rc = extract32(ctx->opcode, 16, 5);
1600 Rb = extract32(ctx->opcode, 21, 5);
1601
1602 gen_load_gpr(t0, Rb);
1603 gen_load_gpr(t1, Rc);
1604 tcg_gen_shli_tl(t1, t1, strd2);
1605 tcg_gen_add_tl(t0, t0, t1);
1606
1607 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx,
1608 MO_SL | mo_endian_rev(ctx, reversed) |
1609 ctx->default_tcg_memop_mask);
1610 gen_store_mxu_gpr(t1, XRa);
1611
1612 if (postinc) {
1613 gen_store_gpr(t0, Rb);
1614 }
1615 }
1616
1617 /*
1618 * LXW Ra, Rb, Rc, STRD2 - Load a word from memory to GPR
1619 * LXB Ra, Rb, Rc, STRD2 - Load a byte from memory to GPR,
1620 * sign extending to GPR size.
1621 * LXH Ra, Rb, Rc, STRD2 - Load a byte from memory to GPR,
1622 * sign extending to GPR size.
1623 * LXBU Ra, Rb, Rc, STRD2 - Load a halfword from memory to GPR,
1624 * zero extending to GPR size.
1625 * LXHU Ra, Rb, Rc, STRD2 - Load a halfword from memory to GPR,
1626 * zero extending to GPR size.
1627 */
gen_mxu_lxx(DisasContext * ctx,uint32_t strd2,MemOp mop)1628 static void gen_mxu_lxx(DisasContext *ctx, uint32_t strd2, MemOp mop)
1629 {
1630 TCGv t0, t1;
1631 uint32_t Ra, Rb, Rc;
1632
1633 t0 = tcg_temp_new();
1634 t1 = tcg_temp_new();
1635
1636 Ra = extract32(ctx->opcode, 11, 5);
1637 Rc = extract32(ctx->opcode, 16, 5);
1638 Rb = extract32(ctx->opcode, 21, 5);
1639
1640 gen_load_gpr(t0, Rb);
1641 gen_load_gpr(t1, Rc);
1642 tcg_gen_shli_tl(t1, t1, strd2);
1643 tcg_gen_add_tl(t0, t0, t1);
1644
1645 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, mop | ctx->default_tcg_memop_mask);
1646 gen_store_gpr(t1, Ra);
1647 }
1648
1649 /*
1650 * S32STDV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
1651 * S32STDVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
1652 * in reversed byte seq.
1653 * S32SDIV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
1654 * post modify base address GPR.
1655 * S32SDIVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
1656 * post modify base address GPR and store in reversed byte seq.
1657 */
gen_mxu_s32stxvx(DisasContext * ctx,bool reversed,bool postinc,uint32_t strd2)1658 static void gen_mxu_s32stxvx(DisasContext *ctx, bool reversed,
1659 bool postinc, uint32_t strd2)
1660 {
1661 TCGv t0, t1;
1662 uint32_t XRa, Rb, Rc;
1663
1664 t0 = tcg_temp_new();
1665 t1 = tcg_temp_new();
1666
1667 XRa = extract32(ctx->opcode, 6, 4);
1668 Rc = extract32(ctx->opcode, 16, 5);
1669 Rb = extract32(ctx->opcode, 21, 5);
1670
1671 gen_load_gpr(t0, Rb);
1672 gen_load_gpr(t1, Rc);
1673 tcg_gen_shli_tl(t1, t1, strd2);
1674 tcg_gen_add_tl(t0, t0, t1);
1675
1676 gen_load_mxu_gpr(t1, XRa);
1677 tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
1678 MO_SL | mo_endian_rev(ctx, reversed) |
1679 ctx->default_tcg_memop_mask);
1680
1681 if (postinc) {
1682 gen_store_gpr(t0, Rb);
1683 }
1684 }
1685
1686 /*
1687 * MXU instruction category: logic
1688 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1689 *
1690 * S32NOR S32AND S32OR S32XOR
1691 */
1692
1693 /*
1694 * S32NOR XRa, XRb, XRc
1695 * Update XRa with the result of logical bitwise 'nor' operation
1696 * applied to the content of XRb and XRc.
1697 */
gen_mxu_S32NOR(DisasContext * ctx)1698 static void gen_mxu_S32NOR(DisasContext *ctx)
1699 {
1700 uint32_t pad, XRc, XRb, XRa;
1701
1702 pad = extract32(ctx->opcode, 21, 5);
1703 XRc = extract32(ctx->opcode, 14, 4);
1704 XRb = extract32(ctx->opcode, 10, 4);
1705 XRa = extract32(ctx->opcode, 6, 4);
1706
1707 if (unlikely(pad != 0)) {
1708 /* opcode padding incorrect -> do nothing */
1709 } else if (unlikely(XRa == 0)) {
1710 /* destination is zero register -> do nothing */
1711 } else if (unlikely((XRb == 0) && (XRc == 0))) {
1712 /* both operands zero registers -> just set destination to all 1s */
1713 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0xFFFFFFFF);
1714 } else if (unlikely(XRb == 0)) {
1715 /* XRb zero register -> just set destination to the negation of XRc */
1716 tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
1717 } else if (unlikely(XRc == 0)) {
1718 /* XRa zero register -> just set destination to the negation of XRb */
1719 tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
1720 } else if (unlikely(XRb == XRc)) {
1721 /* both operands same -> just set destination to the negation of XRb */
1722 tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
1723 } else {
1724 /* the most general case */
1725 tcg_gen_nor_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
1726 }
1727 }
1728
1729 /*
1730 * S32AND XRa, XRb, XRc
1731 * Update XRa with the result of logical bitwise 'and' operation
1732 * applied to the content of XRb and XRc.
1733 */
gen_mxu_S32AND(DisasContext * ctx)1734 static void gen_mxu_S32AND(DisasContext *ctx)
1735 {
1736 uint32_t pad, XRc, XRb, XRa;
1737
1738 pad = extract32(ctx->opcode, 21, 5);
1739 XRc = extract32(ctx->opcode, 14, 4);
1740 XRb = extract32(ctx->opcode, 10, 4);
1741 XRa = extract32(ctx->opcode, 6, 4);
1742
1743 if (unlikely(pad != 0)) {
1744 /* opcode padding incorrect -> do nothing */
1745 } else if (unlikely(XRa == 0)) {
1746 /* destination is zero register -> do nothing */
1747 } else if (unlikely((XRb == 0) || (XRc == 0))) {
1748 /* one of operands zero register -> just set destination to all 0s */
1749 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
1750 } else if (unlikely(XRb == XRc)) {
1751 /* both operands same -> just set destination to one of them */
1752 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
1753 } else {
1754 /* the most general case */
1755 tcg_gen_and_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
1756 }
1757 }
1758
1759 /*
1760 * S32OR XRa, XRb, XRc
1761 * Update XRa with the result of logical bitwise 'or' operation
1762 * applied to the content of XRb and XRc.
1763 */
gen_mxu_S32OR(DisasContext * ctx)1764 static void gen_mxu_S32OR(DisasContext *ctx)
1765 {
1766 uint32_t pad, XRc, XRb, XRa;
1767
1768 pad = extract32(ctx->opcode, 21, 5);
1769 XRc = extract32(ctx->opcode, 14, 4);
1770 XRb = extract32(ctx->opcode, 10, 4);
1771 XRa = extract32(ctx->opcode, 6, 4);
1772
1773 if (unlikely(pad != 0)) {
1774 /* opcode padding incorrect -> do nothing */
1775 } else if (unlikely(XRa == 0)) {
1776 /* destination is zero register -> do nothing */
1777 } else if (unlikely((XRb == 0) && (XRc == 0))) {
1778 /* both operands zero registers -> just set destination to all 0s */
1779 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
1780 } else if (unlikely(XRb == 0)) {
1781 /* XRb zero register -> just set destination to the content of XRc */
1782 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
1783 } else if (unlikely(XRc == 0)) {
1784 /* XRc zero register -> just set destination to the content of XRb */
1785 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
1786 } else if (unlikely(XRb == XRc)) {
1787 /* both operands same -> just set destination to one of them */
1788 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
1789 } else {
1790 /* the most general case */
1791 tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
1792 }
1793 }
1794
1795 /*
1796 * S32XOR XRa, XRb, XRc
1797 * Update XRa with the result of logical bitwise 'xor' operation
1798 * applied to the content of XRb and XRc.
1799 */
gen_mxu_S32XOR(DisasContext * ctx)1800 static void gen_mxu_S32XOR(DisasContext *ctx)
1801 {
1802 uint32_t pad, XRc, XRb, XRa;
1803
1804 pad = extract32(ctx->opcode, 21, 5);
1805 XRc = extract32(ctx->opcode, 14, 4);
1806 XRb = extract32(ctx->opcode, 10, 4);
1807 XRa = extract32(ctx->opcode, 6, 4);
1808
1809 if (unlikely(pad != 0)) {
1810 /* opcode padding incorrect -> do nothing */
1811 } else if (unlikely(XRa == 0)) {
1812 /* destination is zero register -> do nothing */
1813 } else if (unlikely((XRb == 0) && (XRc == 0))) {
1814 /* both operands zero registers -> just set destination to all 0s */
1815 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
1816 } else if (unlikely(XRb == 0)) {
1817 /* XRb zero register -> just set destination to the content of XRc */
1818 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
1819 } else if (unlikely(XRc == 0)) {
1820 /* XRc zero register -> just set destination to the content of XRb */
1821 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
1822 } else if (unlikely(XRb == XRc)) {
1823 /* both operands same -> just set destination to all 0s */
1824 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
1825 } else {
1826 /* the most general case */
1827 tcg_gen_xor_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
1828 }
1829 }
1830
1831 /*
1832 * MXU instruction category: shift
1833 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1834 *
1835 * D32SLL D32SLR D32SAR D32SARL
1836 * D32SLLV D32SLRV D32SARV D32SARW
1837 * Q16SLL Q16SLR Q16SAR
1838 * Q16SLLV Q16SLRV Q16SARV
1839 */
1840
1841 /*
1842 * D32SLL XRa, XRd, XRb, XRc, SFT4
1843 * Dual 32-bit shift left from XRb and XRc to SFT4
1844 * bits (0..15). Store to XRa and XRd respectively.
1845 * D32SLR XRa, XRd, XRb, XRc, SFT4
1846 * Dual 32-bit shift logic right from XRb and XRc
1847 * to SFT4 bits (0..15). Store to XRa and XRd respectively.
1848 * D32SAR XRa, XRd, XRb, XRc, SFT4
1849 * Dual 32-bit shift arithmetic right from XRb and XRc
1850 * to SFT4 bits (0..15). Store to XRa and XRd respectively.
1851 */
gen_mxu_d32sxx(DisasContext * ctx,bool right,bool arithmetic)1852 static void gen_mxu_d32sxx(DisasContext *ctx, bool right, bool arithmetic)
1853 {
1854 uint32_t XRa, XRb, XRc, XRd, sft4;
1855
1856 XRa = extract32(ctx->opcode, 6, 4);
1857 XRb = extract32(ctx->opcode, 10, 4);
1858 XRc = extract32(ctx->opcode, 14, 4);
1859 XRd = extract32(ctx->opcode, 18, 4);
1860 sft4 = extract32(ctx->opcode, 22, 4);
1861
1862 TCGv t0 = tcg_temp_new();
1863 TCGv t1 = tcg_temp_new();
1864
1865 gen_load_mxu_gpr(t0, XRb);
1866 gen_load_mxu_gpr(t1, XRc);
1867
1868 if (right) {
1869 if (arithmetic) {
1870 tcg_gen_sari_tl(t0, t0, sft4);
1871 tcg_gen_sari_tl(t1, t1, sft4);
1872 } else {
1873 tcg_gen_shri_tl(t0, t0, sft4);
1874 tcg_gen_shri_tl(t1, t1, sft4);
1875 }
1876 } else {
1877 tcg_gen_shli_tl(t0, t0, sft4);
1878 tcg_gen_shli_tl(t1, t1, sft4);
1879 }
1880 gen_store_mxu_gpr(t0, XRa);
1881 gen_store_mxu_gpr(t1, XRd);
1882 }
1883
1884 /*
1885 * D32SLLV XRa, XRd, rs
1886 * Dual 32-bit shift left from XRa and XRd to rs[3:0]
1887 * bits. Store back to XRa and XRd respectively.
1888 * D32SLRV XRa, XRd, rs
1889 * Dual 32-bit shift logic right from XRa and XRd to rs[3:0]
1890 * bits. Store back to XRa and XRd respectively.
1891 * D32SARV XRa, XRd, rs
1892 * Dual 32-bit shift arithmetic right from XRa and XRd to rs[3:0]
1893 * bits. Store back to XRa and XRd respectively.
1894 */
gen_mxu_d32sxxv(DisasContext * ctx,bool right,bool arithmetic)1895 static void gen_mxu_d32sxxv(DisasContext *ctx, bool right, bool arithmetic)
1896 {
1897 uint32_t XRa, XRd, rs;
1898
1899 XRa = extract32(ctx->opcode, 10, 4);
1900 XRd = extract32(ctx->opcode, 14, 4);
1901 rs = extract32(ctx->opcode, 21, 5);
1902
1903 TCGv t0 = tcg_temp_new();
1904 TCGv t1 = tcg_temp_new();
1905 TCGv t2 = tcg_temp_new();
1906
1907 gen_load_mxu_gpr(t0, XRa);
1908 gen_load_mxu_gpr(t1, XRd);
1909 gen_load_gpr(t2, rs);
1910 tcg_gen_andi_tl(t2, t2, 0x0f);
1911
1912 if (right) {
1913 if (arithmetic) {
1914 tcg_gen_sar_tl(t0, t0, t2);
1915 tcg_gen_sar_tl(t1, t1, t2);
1916 } else {
1917 tcg_gen_shr_tl(t0, t0, t2);
1918 tcg_gen_shr_tl(t1, t1, t2);
1919 }
1920 } else {
1921 tcg_gen_shl_tl(t0, t0, t2);
1922 tcg_gen_shl_tl(t1, t1, t2);
1923 }
1924 gen_store_mxu_gpr(t0, XRa);
1925 gen_store_mxu_gpr(t1, XRd);
1926 }
1927
1928 /*
1929 * D32SARL XRa, XRb, XRc, SFT4
1930 * Dual shift arithmetic right 32-bit integers in XRb and XRc
1931 * to SFT4 bits (0..15). Pack 16 LSBs of each into XRa.
1932 *
1933 * D32SARW XRa, XRb, XRc, rb
1934 * Dual shift arithmetic right 32-bit integers in XRb and XRc
1935 * to rb[3:0] bits. Pack 16 LSBs of each into XRa.
1936 */
gen_mxu_d32sarl(DisasContext * ctx,bool sarw)1937 static void gen_mxu_d32sarl(DisasContext *ctx, bool sarw)
1938 {
1939 uint32_t XRa, XRb, XRc, rb;
1940
1941 XRa = extract32(ctx->opcode, 6, 4);
1942 XRb = extract32(ctx->opcode, 10, 4);
1943 XRc = extract32(ctx->opcode, 14, 4);
1944 rb = extract32(ctx->opcode, 21, 5);
1945
1946 if (unlikely(XRa == 0)) {
1947 /* destination is zero register -> do nothing */
1948 } else {
1949 TCGv t0 = tcg_temp_new();
1950 TCGv t1 = tcg_temp_new();
1951 TCGv t2 = tcg_temp_new();
1952
1953 if (!sarw) {
1954 /* Make SFT4 from rb field */
1955 tcg_gen_movi_tl(t2, rb >> 1);
1956 } else {
1957 gen_load_gpr(t2, rb);
1958 tcg_gen_andi_tl(t2, t2, 0x0f);
1959 }
1960 gen_load_mxu_gpr(t0, XRb);
1961 gen_load_mxu_gpr(t1, XRc);
1962 tcg_gen_sar_tl(t0, t0, t2);
1963 tcg_gen_sar_tl(t1, t1, t2);
1964 tcg_gen_extract_tl(t2, t1, 0, 16);
1965 tcg_gen_deposit_tl(t2, t2, t0, 16, 16);
1966 gen_store_mxu_gpr(t2, XRa);
1967 }
1968 }
1969
1970 /*
1971 * Q16SLL XRa, XRd, XRb, XRc, SFT4
1972 * Quad 16-bit shift left from XRb and XRc to SFT4
1973 * bits (0..15). Store to XRa and XRd respectively.
1974 * Q16SLR XRa, XRd, XRb, XRc, SFT4
1975 * Quad 16-bit shift logic right from XRb and XRc
1976 * to SFT4 bits (0..15). Store to XRa and XRd respectively.
1977 * Q16SAR XRa, XRd, XRb, XRc, SFT4
1978 * Quad 16-bit shift arithmetic right from XRb and XRc
1979 * to SFT4 bits (0..15). Store to XRa and XRd respectively.
1980 */
gen_mxu_q16sxx(DisasContext * ctx,bool right,bool arithmetic)1981 static void gen_mxu_q16sxx(DisasContext *ctx, bool right, bool arithmetic)
1982 {
1983 uint32_t XRa, XRb, XRc, XRd, sft4;
1984
1985 XRa = extract32(ctx->opcode, 6, 4);
1986 XRb = extract32(ctx->opcode, 10, 4);
1987 XRc = extract32(ctx->opcode, 14, 4);
1988 XRd = extract32(ctx->opcode, 18, 4);
1989 sft4 = extract32(ctx->opcode, 22, 4);
1990
1991 TCGv t0 = tcg_temp_new();
1992 TCGv t1 = tcg_temp_new();
1993 TCGv t2 = tcg_temp_new();
1994 TCGv t3 = tcg_temp_new();
1995
1996 gen_load_mxu_gpr(t0, XRb);
1997 gen_load_mxu_gpr(t2, XRc);
1998
1999 if (arithmetic) {
2000 tcg_gen_sextract_tl(t1, t0, 16, 16);
2001 tcg_gen_sextract_tl(t0, t0, 0, 16);
2002 tcg_gen_sextract_tl(t3, t2, 16, 16);
2003 tcg_gen_sextract_tl(t2, t2, 0, 16);
2004 } else {
2005 tcg_gen_extract_tl(t1, t0, 16, 16);
2006 tcg_gen_extract_tl(t0, t0, 0, 16);
2007 tcg_gen_extract_tl(t3, t2, 16, 16);
2008 tcg_gen_extract_tl(t2, t2, 0, 16);
2009 }
2010
2011 if (right) {
2012 if (arithmetic) {
2013 tcg_gen_sari_tl(t0, t0, sft4);
2014 tcg_gen_sari_tl(t1, t1, sft4);
2015 tcg_gen_sari_tl(t2, t2, sft4);
2016 tcg_gen_sari_tl(t3, t3, sft4);
2017 } else {
2018 tcg_gen_shri_tl(t0, t0, sft4);
2019 tcg_gen_shri_tl(t1, t1, sft4);
2020 tcg_gen_shri_tl(t2, t2, sft4);
2021 tcg_gen_shri_tl(t3, t3, sft4);
2022 }
2023 } else {
2024 tcg_gen_shli_tl(t0, t0, sft4);
2025 tcg_gen_shli_tl(t1, t1, sft4);
2026 tcg_gen_shli_tl(t2, t2, sft4);
2027 tcg_gen_shli_tl(t3, t3, sft4);
2028 }
2029 tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
2030 tcg_gen_deposit_tl(t2, t2, t3, 16, 16);
2031
2032 gen_store_mxu_gpr(t0, XRa);
2033 gen_store_mxu_gpr(t2, XRd);
2034 }
2035
2036 /*
2037 * Q16SLLV XRa, XRd, rs
2038 * Quad 16-bit shift left from XRa and XRd to rs[3:0]
2039 * bits. Store to XRa and XRd respectively.
2040 * Q16SLRV XRa, XRd, rs
2041 * Quad 16-bit shift logic right from XRa and XRd to rs[3:0]
2042 * bits. Store to XRa and XRd respectively.
2043 * Q16SARV XRa, XRd, rs
2044 * Quad 16-bit shift arithmetic right from XRa and XRd to rs[3:0]
2045 * bits. Store to XRa and XRd respectively.
2046 */
gen_mxu_q16sxxv(DisasContext * ctx,bool right,bool arithmetic)2047 static void gen_mxu_q16sxxv(DisasContext *ctx, bool right, bool arithmetic)
2048 {
2049 uint32_t XRa, XRd, rs;
2050
2051 XRa = extract32(ctx->opcode, 10, 4);
2052 XRd = extract32(ctx->opcode, 14, 4);
2053 rs = extract32(ctx->opcode, 21, 5);
2054
2055 TCGv t0 = tcg_temp_new();
2056 TCGv t1 = tcg_temp_new();
2057 TCGv t2 = tcg_temp_new();
2058 TCGv t3 = tcg_temp_new();
2059 TCGv t5 = tcg_temp_new();
2060
2061 gen_load_mxu_gpr(t0, XRa);
2062 gen_load_mxu_gpr(t2, XRd);
2063 gen_load_gpr(t5, rs);
2064 tcg_gen_andi_tl(t5, t5, 0x0f);
2065
2066
2067 if (arithmetic) {
2068 tcg_gen_sextract_tl(t1, t0, 16, 16);
2069 tcg_gen_sextract_tl(t0, t0, 0, 16);
2070 tcg_gen_sextract_tl(t3, t2, 16, 16);
2071 tcg_gen_sextract_tl(t2, t2, 0, 16);
2072 } else {
2073 tcg_gen_extract_tl(t1, t0, 16, 16);
2074 tcg_gen_extract_tl(t0, t0, 0, 16);
2075 tcg_gen_extract_tl(t3, t2, 16, 16);
2076 tcg_gen_extract_tl(t2, t2, 0, 16);
2077 }
2078
2079 if (right) {
2080 if (arithmetic) {
2081 tcg_gen_sar_tl(t0, t0, t5);
2082 tcg_gen_sar_tl(t1, t1, t5);
2083 tcg_gen_sar_tl(t2, t2, t5);
2084 tcg_gen_sar_tl(t3, t3, t5);
2085 } else {
2086 tcg_gen_shr_tl(t0, t0, t5);
2087 tcg_gen_shr_tl(t1, t1, t5);
2088 tcg_gen_shr_tl(t2, t2, t5);
2089 tcg_gen_shr_tl(t3, t3, t5);
2090 }
2091 } else {
2092 tcg_gen_shl_tl(t0, t0, t5);
2093 tcg_gen_shl_tl(t1, t1, t5);
2094 tcg_gen_shl_tl(t2, t2, t5);
2095 tcg_gen_shl_tl(t3, t3, t5);
2096 }
2097 tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
2098 tcg_gen_deposit_tl(t2, t2, t3, 16, 16);
2099
2100 gen_store_mxu_gpr(t0, XRa);
2101 gen_store_mxu_gpr(t2, XRd);
2102 }
2103
2104 /*
2105 * MXU instruction category max/min/avg
2106 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2107 *
2108 * S32MAX D16MAX Q8MAX
2109 * S32MIN D16MIN Q8MIN
2110 * S32SLT D16SLT Q8SLT
2111 * Q8SLTU
2112 * D16AVG Q8AVG
2113 * D16AVGR Q8AVGR
2114 * S32MOVZ D16MOVZ Q8MOVZ
2115 * S32MOVN D16MOVN Q8MOVN
2116 */
2117
2118 /*
2119 * S32MAX XRa, XRb, XRc
2120 * Update XRa with the maximum of signed 32-bit integers contained
2121 * in XRb and XRc.
2122 *
2123 * S32MIN XRa, XRb, XRc
2124 * Update XRa with the minimum of signed 32-bit integers contained
2125 * in XRb and XRc.
2126 */
gen_mxu_S32MAX_S32MIN(DisasContext * ctx)2127 static void gen_mxu_S32MAX_S32MIN(DisasContext *ctx)
2128 {
2129 uint32_t pad, opc, XRc, XRb, XRa;
2130
2131 pad = extract32(ctx->opcode, 21, 5);
2132 opc = extract32(ctx->opcode, 18, 3);
2133 XRc = extract32(ctx->opcode, 14, 4);
2134 XRb = extract32(ctx->opcode, 10, 4);
2135 XRa = extract32(ctx->opcode, 6, 4);
2136
2137 if (unlikely(pad != 0)) {
2138 /* opcode padding incorrect -> do nothing */
2139 } else if (unlikely(XRa == 0)) {
2140 /* destination is zero register -> do nothing */
2141 } else if (unlikely((XRb == 0) && (XRc == 0))) {
2142 /* both operands zero registers -> just set destination to zero */
2143 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
2144 } else if (unlikely((XRb == 0) || (XRc == 0))) {
2145 /* exactly one operand is zero register - find which one is not...*/
2146 uint32_t XRx = XRb ? XRb : XRc;
2147 /* ...and do max/min operation with one operand 0 */
2148 if (opc == OPC_MXU_S32MAX) {
2149 tcg_gen_smax_i32(mxu_gpr[XRa - 1], mxu_gpr[XRx - 1], 0);
2150 } else {
2151 tcg_gen_smin_i32(mxu_gpr[XRa - 1], mxu_gpr[XRx - 1], 0);
2152 }
2153 } else if (unlikely(XRb == XRc)) {
2154 /* both operands same -> just set destination to one of them */
2155 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2156 } else {
2157 /* the most general case */
2158 if (opc == OPC_MXU_S32MAX) {
2159 tcg_gen_smax_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1],
2160 mxu_gpr[XRc - 1]);
2161 } else {
2162 tcg_gen_smin_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1],
2163 mxu_gpr[XRc - 1]);
2164 }
2165 }
2166 }
2167
2168 /*
2169 * D16MAX
2170 * Update XRa with the 16-bit-wise maximums of signed integers
2171 * contained in XRb and XRc.
2172 *
2173 * D16MIN
2174 * Update XRa with the 16-bit-wise minimums of signed integers
2175 * contained in XRb and XRc.
2176 */
gen_mxu_D16MAX_D16MIN(DisasContext * ctx)2177 static void gen_mxu_D16MAX_D16MIN(DisasContext *ctx)
2178 {
2179 uint32_t pad, opc, XRc, XRb, XRa;
2180
2181 pad = extract32(ctx->opcode, 21, 5);
2182 opc = extract32(ctx->opcode, 18, 3);
2183 XRc = extract32(ctx->opcode, 14, 4);
2184 XRb = extract32(ctx->opcode, 10, 4);
2185 XRa = extract32(ctx->opcode, 6, 4);
2186
2187 if (unlikely(pad != 0)) {
2188 /* opcode padding incorrect -> do nothing */
2189 } else if (unlikely(XRa == 0)) {
2190 /* destination is zero register -> do nothing */
2191 } else if (unlikely((XRb == 0) && (XRc == 0))) {
2192 /* both operands zero registers -> just set destination to zero */
2193 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
2194 } else if (unlikely((XRb == 0) || (XRc == 0))) {
2195 /* exactly one operand is zero register - find which one is not...*/
2196 uint32_t XRx = XRb ? XRb : XRc;
2197 /* ...and do half-word-wise max/min with one operand 0 */
2198 TCGv_i32 t0 = tcg_temp_new();
2199 TCGv_i32 t1 = tcg_constant_i32(0);
2200 TCGv_i32 t2 = tcg_temp_new();
2201
2202 /* the left half-word first */
2203 tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFFFF0000);
2204 if (opc == OPC_MXU_D16MAX) {
2205 tcg_gen_smax_i32(t2, t0, t1);
2206 } else {
2207 tcg_gen_smin_i32(t2, t0, t1);
2208 }
2209
2210 /* the right half-word */
2211 tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0x0000FFFF);
2212 /* move half-words to the leftmost position */
2213 tcg_gen_shli_i32(t0, t0, 16);
2214 /* t0 will be max/min of t0 and t1 */
2215 if (opc == OPC_MXU_D16MAX) {
2216 tcg_gen_smax_i32(t0, t0, t1);
2217 } else {
2218 tcg_gen_smin_i32(t0, t0, t1);
2219 }
2220 /* return resulting half-words to its original position */
2221 tcg_gen_shri_i32(t0, t0, 16);
2222 /* finally update the destination */
2223 tcg_gen_or_i32(mxu_gpr[XRa - 1], t2, t0);
2224 } else if (unlikely(XRb == XRc)) {
2225 /* both operands same -> just set destination to one of them */
2226 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2227 } else {
2228 /* the most general case */
2229 TCGv_i32 t0 = tcg_temp_new();
2230 TCGv_i32 t1 = tcg_temp_new();
2231 TCGv_i32 t2 = tcg_temp_new();
2232
2233 /* the left half-word first */
2234 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFFFF0000);
2235 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFF0000);
2236 if (opc == OPC_MXU_D16MAX) {
2237 tcg_gen_smax_i32(t2, t0, t1);
2238 } else {
2239 tcg_gen_smin_i32(t2, t0, t1);
2240 }
2241
2242 /* the right half-word */
2243 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x0000FFFF);
2244 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0x0000FFFF);
2245 /* move half-words to the leftmost position */
2246 tcg_gen_shli_i32(t0, t0, 16);
2247 tcg_gen_shli_i32(t1, t1, 16);
2248 /* t0 will be max/min of t0 and t1 */
2249 if (opc == OPC_MXU_D16MAX) {
2250 tcg_gen_smax_i32(t0, t0, t1);
2251 } else {
2252 tcg_gen_smin_i32(t0, t0, t1);
2253 }
2254 /* return resulting half-words to its original position */
2255 tcg_gen_shri_i32(t0, t0, 16);
2256 /* finally update the destination */
2257 tcg_gen_or_i32(mxu_gpr[XRa - 1], t2, t0);
2258 }
2259 }
2260
2261 /*
2262 * Q8MAX
2263 * Update XRa with the 8-bit-wise maximums of signed integers
2264 * contained in XRb and XRc.
2265 *
2266 * Q8MIN
2267 * Update XRa with the 8-bit-wise minimums of signed integers
2268 * contained in XRb and XRc.
2269 */
gen_mxu_Q8MAX_Q8MIN(DisasContext * ctx)2270 static void gen_mxu_Q8MAX_Q8MIN(DisasContext *ctx)
2271 {
2272 uint32_t pad, opc, XRc, XRb, XRa;
2273
2274 pad = extract32(ctx->opcode, 21, 5);
2275 opc = extract32(ctx->opcode, 18, 3);
2276 XRc = extract32(ctx->opcode, 14, 4);
2277 XRb = extract32(ctx->opcode, 10, 4);
2278 XRa = extract32(ctx->opcode, 6, 4);
2279
2280 if (unlikely(pad != 0)) {
2281 /* opcode padding incorrect -> do nothing */
2282 } else if (unlikely(XRa == 0)) {
2283 /* destination is zero register -> do nothing */
2284 } else if (unlikely((XRb == 0) && (XRc == 0))) {
2285 /* both operands zero registers -> just set destination to zero */
2286 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
2287 } else if (unlikely((XRb == 0) || (XRc == 0))) {
2288 /* exactly one operand is zero register - make it be the first...*/
2289 uint32_t XRx = XRb ? XRb : XRc;
2290 /* ...and do byte-wise max/min with one operand 0 */
2291 TCGv_i32 t0 = tcg_temp_new();
2292 TCGv_i32 t1 = tcg_constant_i32(0);
2293 TCGv_i32 t2 = tcg_temp_new();
2294 int32_t i;
2295
2296 /* the leftmost byte (byte 3) first */
2297 tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFF000000);
2298 if (opc == OPC_MXU_Q8MAX) {
2299 tcg_gen_smax_i32(t2, t0, t1);
2300 } else {
2301 tcg_gen_smin_i32(t2, t0, t1);
2302 }
2303
2304 /* bytes 2, 1, 0 */
2305 for (i = 2; i >= 0; i--) {
2306 /* extract the byte */
2307 tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFF << (8 * i));
2308 /* move the byte to the leftmost position */
2309 tcg_gen_shli_i32(t0, t0, 8 * (3 - i));
2310 /* t0 will be max/min of t0 and t1 */
2311 if (opc == OPC_MXU_Q8MAX) {
2312 tcg_gen_smax_i32(t0, t0, t1);
2313 } else {
2314 tcg_gen_smin_i32(t0, t0, t1);
2315 }
2316 /* return resulting byte to its original position */
2317 tcg_gen_shri_i32(t0, t0, 8 * (3 - i));
2318 /* finally update the destination */
2319 tcg_gen_or_i32(t2, t2, t0);
2320 }
2321 gen_store_mxu_gpr(t2, XRa);
2322 } else if (unlikely(XRb == XRc)) {
2323 /* both operands same -> just set destination to one of them */
2324 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2325 } else {
2326 /* the most general case */
2327 TCGv_i32 t0 = tcg_temp_new();
2328 TCGv_i32 t1 = tcg_temp_new();
2329 TCGv_i32 t2 = tcg_temp_new();
2330 int32_t i;
2331
2332 /* the leftmost bytes (bytes 3) first */
2333 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFF000000);
2334 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF000000);
2335 if (opc == OPC_MXU_Q8MAX) {
2336 tcg_gen_smax_i32(t2, t0, t1);
2337 } else {
2338 tcg_gen_smin_i32(t2, t0, t1);
2339 }
2340
2341 /* bytes 2, 1, 0 */
2342 for (i = 2; i >= 0; i--) {
2343 /* extract corresponding bytes */
2344 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFF << (8 * i));
2345 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF << (8 * i));
2346 /* move the bytes to the leftmost position */
2347 tcg_gen_shli_i32(t0, t0, 8 * (3 - i));
2348 tcg_gen_shli_i32(t1, t1, 8 * (3 - i));
2349 /* t0 will be max/min of t0 and t1 */
2350 if (opc == OPC_MXU_Q8MAX) {
2351 tcg_gen_smax_i32(t0, t0, t1);
2352 } else {
2353 tcg_gen_smin_i32(t0, t0, t1);
2354 }
2355 /* return resulting byte to its original position */
2356 tcg_gen_shri_i32(t0, t0, 8 * (3 - i));
2357 /* finally update the destination */
2358 tcg_gen_or_i32(t2, t2, t0);
2359 }
2360 gen_store_mxu_gpr(t2, XRa);
2361 }
2362 }
2363
2364 /*
2365 * Q8SLT
2366 * Update XRa with the signed "set less than" comparison of XRb and XRc
2367 * on per-byte basis.
2368 * a.k.a. XRa[0..3] = XRb[0..3] < XRc[0..3] ? 1 : 0;
2369 *
2370 * Q8SLTU
2371 * Update XRa with the unsigned "set less than" comparison of XRb and XRc
2372 * on per-byte basis.
2373 * a.k.a. XRa[0..3] = XRb[0..3] < XRc[0..3] ? 1 : 0;
2374 */
gen_mxu_q8slt(DisasContext * ctx,bool sltu)2375 static void gen_mxu_q8slt(DisasContext *ctx, bool sltu)
2376 {
2377 uint32_t pad, XRc, XRb, XRa;
2378
2379 pad = extract32(ctx->opcode, 21, 5);
2380 XRc = extract32(ctx->opcode, 14, 4);
2381 XRb = extract32(ctx->opcode, 10, 4);
2382 XRa = extract32(ctx->opcode, 6, 4);
2383
2384 if (unlikely(pad != 0)) {
2385 /* opcode padding incorrect -> do nothing */
2386 } else if (unlikely(XRa == 0)) {
2387 /* destination is zero register -> do nothing */
2388 } else if (unlikely((XRb == 0) && (XRc == 0))) {
2389 /* both operands zero registers -> just set destination to zero */
2390 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2391 } else if (unlikely(XRb == XRc)) {
2392 /* both operands same registers -> just set destination to zero */
2393 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2394 } else {
2395 /* the most general case */
2396 TCGv t0 = tcg_temp_new();
2397 TCGv t1 = tcg_temp_new();
2398 TCGv t2 = tcg_temp_new();
2399 TCGv t3 = tcg_temp_new();
2400 TCGv t4 = tcg_temp_new();
2401
2402 gen_load_mxu_gpr(t3, XRb);
2403 gen_load_mxu_gpr(t4, XRc);
2404 tcg_gen_movi_tl(t2, 0);
2405
2406 for (int i = 0; i < 4; i++) {
2407 if (sltu) {
2408 tcg_gen_extract_tl(t0, t3, 8 * i, 8);
2409 tcg_gen_extract_tl(t1, t4, 8 * i, 8);
2410 } else {
2411 tcg_gen_sextract_tl(t0, t3, 8 * i, 8);
2412 tcg_gen_sextract_tl(t1, t4, 8 * i, 8);
2413 }
2414 tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1);
2415 tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
2416 }
2417 gen_store_mxu_gpr(t2, XRa);
2418 }
2419 }
2420
2421 /*
2422 * S32SLT
2423 * Update XRa with the signed "set less than" comparison of XRb and XRc.
2424 * a.k.a. XRa = XRb < XRc ? 1 : 0;
2425 */
gen_mxu_S32SLT(DisasContext * ctx)2426 static void gen_mxu_S32SLT(DisasContext *ctx)
2427 {
2428 uint32_t pad, XRc, XRb, XRa;
2429
2430 pad = extract32(ctx->opcode, 21, 5);
2431 XRc = extract32(ctx->opcode, 14, 4);
2432 XRb = extract32(ctx->opcode, 10, 4);
2433 XRa = extract32(ctx->opcode, 6, 4);
2434
2435 if (unlikely(pad != 0)) {
2436 /* opcode padding incorrect -> do nothing */
2437 } else if (unlikely(XRa == 0)) {
2438 /* destination is zero register -> do nothing */
2439 } else if (unlikely((XRb == 0) && (XRc == 0))) {
2440 /* both operands zero registers -> just set destination to zero */
2441 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2442 } else if (unlikely(XRb == XRc)) {
2443 /* both operands same registers -> just set destination to zero */
2444 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2445 } else {
2446 /* the most general case */
2447 TCGv t0 = tcg_temp_new();
2448 TCGv t1 = tcg_temp_new();
2449
2450 gen_load_mxu_gpr(t0, XRb);
2451 gen_load_mxu_gpr(t1, XRc);
2452 tcg_gen_setcond_tl(TCG_COND_LT, mxu_gpr[XRa - 1], t0, t1);
2453 }
2454 }
2455
2456 /*
2457 * D16SLT
2458 * Update XRa with the signed "set less than" comparison of XRb and XRc
2459 * on per-word basis.
2460 * a.k.a. XRa[0..1] = XRb[0..1] < XRc[0..1] ? 1 : 0;
2461 */
gen_mxu_D16SLT(DisasContext * ctx)2462 static void gen_mxu_D16SLT(DisasContext *ctx)
2463 {
2464 uint32_t pad, XRc, XRb, XRa;
2465
2466 pad = extract32(ctx->opcode, 21, 5);
2467 XRc = extract32(ctx->opcode, 14, 4);
2468 XRb = extract32(ctx->opcode, 10, 4);
2469 XRa = extract32(ctx->opcode, 6, 4);
2470
2471 if (unlikely(pad != 0)) {
2472 /* opcode padding incorrect -> do nothing */
2473 } else if (unlikely(XRa == 0)) {
2474 /* destination is zero register -> do nothing */
2475 } else if (unlikely((XRb == 0) && (XRc == 0))) {
2476 /* both operands zero registers -> just set destination to zero */
2477 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2478 } else if (unlikely(XRb == XRc)) {
2479 /* both operands same registers -> just set destination to zero */
2480 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2481 } else {
2482 /* the most general case */
2483 TCGv t0 = tcg_temp_new();
2484 TCGv t1 = tcg_temp_new();
2485 TCGv t2 = tcg_temp_new();
2486 TCGv t3 = tcg_temp_new();
2487 TCGv t4 = tcg_temp_new();
2488
2489 gen_load_mxu_gpr(t3, XRb);
2490 gen_load_mxu_gpr(t4, XRc);
2491 tcg_gen_sextract_tl(t0, t3, 16, 16);
2492 tcg_gen_sextract_tl(t1, t4, 16, 16);
2493 tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1);
2494 tcg_gen_shli_tl(t2, t0, 16);
2495 tcg_gen_sextract_tl(t0, t3, 0, 16);
2496 tcg_gen_sextract_tl(t1, t4, 0, 16);
2497 tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1);
2498 tcg_gen_or_tl(mxu_gpr[XRa - 1], t2, t0);
2499 }
2500 }
2501
2502 /*
2503 * D16AVG
2504 * Update XRa with the signed average of XRb and XRc
2505 * on per-word basis, rounding down.
2506 * a.k.a. XRa[0..1] = (XRb[0..1] + XRc[0..1]) >> 1;
2507 *
2508 * D16AVGR
2509 * Update XRa with the signed average of XRb and XRc
2510 * on per-word basis, math rounding 4/5.
2511 * a.k.a. XRa[0..1] = (XRb[0..1] + XRc[0..1] + 1) >> 1;
2512 */
gen_mxu_d16avg(DisasContext * ctx,bool round45)2513 static void gen_mxu_d16avg(DisasContext *ctx, bool round45)
2514 {
2515 uint32_t pad, XRc, XRb, XRa;
2516
2517 pad = extract32(ctx->opcode, 21, 5);
2518 XRc = extract32(ctx->opcode, 14, 4);
2519 XRb = extract32(ctx->opcode, 10, 4);
2520 XRa = extract32(ctx->opcode, 6, 4);
2521
2522 if (unlikely(pad != 0)) {
2523 /* opcode padding incorrect -> do nothing */
2524 } else if (unlikely(XRa == 0)) {
2525 /* destination is zero register -> do nothing */
2526 } else if (unlikely((XRb == 0) && (XRc == 0))) {
2527 /* both operands zero registers -> just set destination to zero */
2528 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2529 } else if (unlikely(XRb == XRc)) {
2530 /* both operands same registers -> just set destination to same */
2531 tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2532 } else {
2533 /* the most general case */
2534 TCGv t0 = tcg_temp_new();
2535 TCGv t1 = tcg_temp_new();
2536 TCGv t2 = tcg_temp_new();
2537 TCGv t3 = tcg_temp_new();
2538 TCGv t4 = tcg_temp_new();
2539
2540 gen_load_mxu_gpr(t3, XRb);
2541 gen_load_mxu_gpr(t4, XRc);
2542 tcg_gen_sextract_tl(t0, t3, 16, 16);
2543 tcg_gen_sextract_tl(t1, t4, 16, 16);
2544 tcg_gen_add_tl(t0, t0, t1);
2545 if (round45) {
2546 tcg_gen_addi_tl(t0, t0, 1);
2547 }
2548 tcg_gen_shli_tl(t2, t0, 15);
2549 tcg_gen_andi_tl(t2, t2, 0xffff0000);
2550 tcg_gen_sextract_tl(t0, t3, 0, 16);
2551 tcg_gen_sextract_tl(t1, t4, 0, 16);
2552 tcg_gen_add_tl(t0, t0, t1);
2553 if (round45) {
2554 tcg_gen_addi_tl(t0, t0, 1);
2555 }
2556 tcg_gen_shri_tl(t0, t0, 1);
2557 tcg_gen_deposit_tl(t2, t2, t0, 0, 16);
2558 gen_store_mxu_gpr(t2, XRa);
2559 }
2560 }
2561
2562 /*
2563 * Q8AVG
2564 * Update XRa with the signed average of XRb and XRc
2565 * on per-byte basis, rounding down.
2566 * a.k.a. XRa[0..3] = (XRb[0..3] + XRc[0..3]) >> 1;
2567 *
2568 * Q8AVGR
2569 * Update XRa with the signed average of XRb and XRc
2570 * on per-word basis, math rounding 4/5.
2571 * a.k.a. XRa[0..3] = (XRb[0..3] + XRc[0..3] + 1) >> 1;
2572 */
gen_mxu_q8avg(DisasContext * ctx,bool round45)2573 static void gen_mxu_q8avg(DisasContext *ctx, bool round45)
2574 {
2575 uint32_t pad, XRc, XRb, XRa;
2576
2577 pad = extract32(ctx->opcode, 21, 5);
2578 XRc = extract32(ctx->opcode, 14, 4);
2579 XRb = extract32(ctx->opcode, 10, 4);
2580 XRa = extract32(ctx->opcode, 6, 4);
2581
2582 if (unlikely(pad != 0)) {
2583 /* opcode padding incorrect -> do nothing */
2584 } else if (unlikely(XRa == 0)) {
2585 /* destination is zero register -> do nothing */
2586 } else if (unlikely((XRb == 0) && (XRc == 0))) {
2587 /* both operands zero registers -> just set destination to zero */
2588 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2589 } else if (unlikely(XRb == XRc)) {
2590 /* both operands same registers -> just set destination to same */
2591 tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2592 } else {
2593 /* the most general case */
2594 TCGv t0 = tcg_temp_new();
2595 TCGv t1 = tcg_temp_new();
2596 TCGv t2 = tcg_temp_new();
2597 TCGv t3 = tcg_temp_new();
2598 TCGv t4 = tcg_temp_new();
2599
2600 gen_load_mxu_gpr(t3, XRb);
2601 gen_load_mxu_gpr(t4, XRc);
2602 tcg_gen_movi_tl(t2, 0);
2603
2604 for (int i = 0; i < 4; i++) {
2605 tcg_gen_extract_tl(t0, t3, 8 * i, 8);
2606 tcg_gen_extract_tl(t1, t4, 8 * i, 8);
2607 tcg_gen_add_tl(t0, t0, t1);
2608 if (round45) {
2609 tcg_gen_addi_tl(t0, t0, 1);
2610 }
2611 tcg_gen_shri_tl(t0, t0, 1);
2612 tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
2613 }
2614 gen_store_mxu_gpr(t2, XRa);
2615 }
2616 }
2617
2618 /*
2619 * Q8MOVZ
2620 * Quadruple 8-bit packed conditional move where
2621 * XRb contains conditions, XRc what to move and
2622 * XRa is the destination.
2623 * a.k.a. if (XRb[0..3] == 0) { XRa[0..3] = XRc[0..3] }
2624 *
2625 * Q8MOVN
2626 * Quadruple 8-bit packed conditional move where
2627 * XRb contains conditions, XRc what to move and
2628 * XRa is the destination.
2629 * a.k.a. if (XRb[0..3] != 0) { XRa[0..3] = XRc[0..3] }
2630 */
gen_mxu_q8movzn(DisasContext * ctx,TCGCond cond)2631 static void gen_mxu_q8movzn(DisasContext *ctx, TCGCond cond)
2632 {
2633 uint32_t XRc, XRb, XRa;
2634
2635 XRa = extract32(ctx->opcode, 6, 4);
2636 XRb = extract32(ctx->opcode, 10, 4);
2637 XRc = extract32(ctx->opcode, 14, 4);
2638
2639 TCGv t0 = tcg_temp_new();
2640 TCGv t1 = tcg_temp_new();
2641 TCGv t2 = tcg_temp_new();
2642 TCGv t3 = tcg_temp_new();
2643 TCGLabel *l_quarterdone = gen_new_label();
2644 TCGLabel *l_halfdone = gen_new_label();
2645 TCGLabel *l_quarterrest = gen_new_label();
2646 TCGLabel *l_done = gen_new_label();
2647
2648 gen_load_mxu_gpr(t0, XRc);
2649 gen_load_mxu_gpr(t1, XRb);
2650 gen_load_mxu_gpr(t2, XRa);
2651
2652 tcg_gen_extract_tl(t3, t1, 24, 8);
2653 tcg_gen_brcondi_tl(cond, t3, 0, l_quarterdone);
2654 tcg_gen_extract_tl(t3, t0, 24, 8);
2655 tcg_gen_deposit_tl(t2, t2, t3, 24, 8);
2656
2657 gen_set_label(l_quarterdone);
2658 tcg_gen_extract_tl(t3, t1, 16, 8);
2659 tcg_gen_brcondi_tl(cond, t3, 0, l_halfdone);
2660 tcg_gen_extract_tl(t3, t0, 16, 8);
2661 tcg_gen_deposit_tl(t2, t2, t3, 16, 8);
2662
2663 gen_set_label(l_halfdone);
2664 tcg_gen_extract_tl(t3, t1, 8, 8);
2665 tcg_gen_brcondi_tl(cond, t3, 0, l_quarterrest);
2666 tcg_gen_extract_tl(t3, t0, 8, 8);
2667 tcg_gen_deposit_tl(t2, t2, t3, 8, 8);
2668
2669 gen_set_label(l_quarterrest);
2670 tcg_gen_extract_tl(t3, t1, 0, 8);
2671 tcg_gen_brcondi_tl(cond, t3, 0, l_done);
2672 tcg_gen_extract_tl(t3, t0, 0, 8);
2673 tcg_gen_deposit_tl(t2, t2, t3, 0, 8);
2674
2675 gen_set_label(l_done);
2676 gen_store_mxu_gpr(t2, XRa);
2677 }
2678
2679 /*
2680 * D16MOVZ
2681 * Double 16-bit packed conditional move where
2682 * XRb contains conditions, XRc what to move and
2683 * XRa is the destination.
2684 * a.k.a. if (XRb[0..1] == 0) { XRa[0..1] = XRc[0..1] }
2685 *
2686 * D16MOVN
2687 * Double 16-bit packed conditional move where
2688 * XRb contains conditions, XRc what to move and
2689 * XRa is the destination.
2690 * a.k.a. if (XRb[0..3] != 0) { XRa[0..1] = XRc[0..1] }
2691 */
gen_mxu_d16movzn(DisasContext * ctx,TCGCond cond)2692 static void gen_mxu_d16movzn(DisasContext *ctx, TCGCond cond)
2693 {
2694 uint32_t XRc, XRb, XRa;
2695
2696 XRa = extract32(ctx->opcode, 6, 4);
2697 XRb = extract32(ctx->opcode, 10, 4);
2698 XRc = extract32(ctx->opcode, 14, 4);
2699
2700 TCGv t0 = tcg_temp_new();
2701 TCGv t1 = tcg_temp_new();
2702 TCGv t2 = tcg_temp_new();
2703 TCGv t3 = tcg_temp_new();
2704 TCGLabel *l_halfdone = gen_new_label();
2705 TCGLabel *l_done = gen_new_label();
2706
2707 gen_load_mxu_gpr(t0, XRc);
2708 gen_load_mxu_gpr(t1, XRb);
2709 gen_load_mxu_gpr(t2, XRa);
2710
2711 tcg_gen_extract_tl(t3, t1, 16, 16);
2712 tcg_gen_brcondi_tl(cond, t3, 0, l_halfdone);
2713 tcg_gen_extract_tl(t3, t0, 16, 16);
2714 tcg_gen_deposit_tl(t2, t2, t3, 16, 16);
2715
2716 gen_set_label(l_halfdone);
2717 tcg_gen_extract_tl(t3, t1, 0, 16);
2718 tcg_gen_brcondi_tl(cond, t3, 0, l_done);
2719 tcg_gen_extract_tl(t3, t0, 0, 16);
2720 tcg_gen_deposit_tl(t2, t2, t3, 0, 16);
2721
2722 gen_set_label(l_done);
2723 gen_store_mxu_gpr(t2, XRa);
2724 }
2725
2726 /*
2727 * S32MOVZ
2728 * Quadruple 32-bit conditional move where
2729 * XRb contains conditions, XRc what to move and
2730 * XRa is the destination.
2731 * a.k.a. if (XRb == 0) { XRa = XRc }
2732 *
2733 * S32MOVN
2734 * Single 32-bit conditional move where
2735 * XRb contains conditions, XRc what to move and
2736 * XRa is the destination.
2737 * a.k.a. if (XRb != 0) { XRa = XRc }
2738 */
gen_mxu_s32movzn(DisasContext * ctx,TCGCond cond)2739 static void gen_mxu_s32movzn(DisasContext *ctx, TCGCond cond)
2740 {
2741 uint32_t XRc, XRb, XRa;
2742
2743 XRa = extract32(ctx->opcode, 6, 4);
2744 XRb = extract32(ctx->opcode, 10, 4);
2745 XRc = extract32(ctx->opcode, 14, 4);
2746
2747 TCGv t0 = tcg_temp_new();
2748 TCGv t1 = tcg_temp_new();
2749 TCGLabel *l_done = gen_new_label();
2750
2751 gen_load_mxu_gpr(t0, XRc);
2752 gen_load_mxu_gpr(t1, XRb);
2753
2754 tcg_gen_brcondi_tl(cond, t1, 0, l_done);
2755 gen_store_mxu_gpr(t0, XRa);
2756 gen_set_label(l_done);
2757 }
2758
2759 /*
2760 * MXU instruction category: Addition and subtraction
2761 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2762 *
2763 * S32CPS D16CPS
2764 * Q8ADD
2765 */
2766
2767 /*
2768 * S32CPS
2769 * Update XRa if XRc < 0 by value of 0 - XRb
2770 * else XRa = XRb
2771 */
gen_mxu_S32CPS(DisasContext * ctx)2772 static void gen_mxu_S32CPS(DisasContext *ctx)
2773 {
2774 uint32_t pad, XRc, XRb, XRa;
2775
2776 pad = extract32(ctx->opcode, 21, 5);
2777 XRc = extract32(ctx->opcode, 14, 4);
2778 XRb = extract32(ctx->opcode, 10, 4);
2779 XRa = extract32(ctx->opcode, 6, 4);
2780
2781 if (unlikely(pad != 0)) {
2782 /* opcode padding incorrect -> do nothing */
2783 } else if (unlikely(XRa == 0)) {
2784 /* destination is zero register -> do nothing */
2785 } else if (unlikely(XRb == 0)) {
2786 /* XRc make no sense 0 - 0 = 0 -> just set destination to zero */
2787 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2788 } else if (unlikely(XRc == 0)) {
2789 /* condition always false -> just move XRb to XRa */
2790 tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2791 } else {
2792 /* the most general case */
2793 TCGv t0 = tcg_temp_new();
2794 TCGLabel *l_not_less = gen_new_label();
2795 TCGLabel *l_done = gen_new_label();
2796
2797 tcg_gen_brcondi_tl(TCG_COND_GE, mxu_gpr[XRc - 1], 0, l_not_less);
2798 tcg_gen_neg_tl(t0, mxu_gpr[XRb - 1]);
2799 tcg_gen_br(l_done);
2800 gen_set_label(l_not_less);
2801 gen_load_mxu_gpr(t0, XRb);
2802 gen_set_label(l_done);
2803 gen_store_mxu_gpr(t0, XRa);
2804 }
2805 }
2806
2807 /*
2808 * D16CPS
2809 * Update XRa[0..1] if XRc[0..1] < 0 by value of 0 - XRb[0..1]
2810 * else XRa[0..1] = XRb[0..1]
2811 */
gen_mxu_D16CPS(DisasContext * ctx)2812 static void gen_mxu_D16CPS(DisasContext *ctx)
2813 {
2814 uint32_t pad, XRc, XRb, XRa;
2815
2816 pad = extract32(ctx->opcode, 21, 5);
2817 XRc = extract32(ctx->opcode, 14, 4);
2818 XRb = extract32(ctx->opcode, 10, 4);
2819 XRa = extract32(ctx->opcode, 6, 4);
2820
2821 if (unlikely(pad != 0)) {
2822 /* opcode padding incorrect -> do nothing */
2823 } else if (unlikely(XRa == 0)) {
2824 /* destination is zero register -> do nothing */
2825 } else if (unlikely(XRb == 0)) {
2826 /* XRc make no sense 0 - 0 = 0 -> just set destination to zero */
2827 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2828 } else if (unlikely(XRc == 0)) {
2829 /* condition always false -> just move XRb to XRa */
2830 tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
2831 } else {
2832 /* the most general case */
2833 TCGv t0 = tcg_temp_new();
2834 TCGv t1 = tcg_temp_new();
2835 TCGLabel *l_done_hi = gen_new_label();
2836 TCGLabel *l_not_less_lo = gen_new_label();
2837 TCGLabel *l_done_lo = gen_new_label();
2838
2839 tcg_gen_sextract_tl(t0, mxu_gpr[XRc - 1], 16, 16);
2840 tcg_gen_sextract_tl(t1, mxu_gpr[XRb - 1], 16, 16);
2841 tcg_gen_brcondi_tl(TCG_COND_GE, t0, 0, l_done_hi);
2842 tcg_gen_subfi_tl(t1, 0, t1);
2843
2844 gen_set_label(l_done_hi);
2845 tcg_gen_shli_i32(t1, t1, 16);
2846
2847 tcg_gen_sextract_tl(t0, mxu_gpr[XRc - 1], 0, 16);
2848 tcg_gen_brcondi_tl(TCG_COND_GE, t0, 0, l_not_less_lo);
2849 tcg_gen_sextract_tl(t0, mxu_gpr[XRb - 1], 0, 16);
2850 tcg_gen_subfi_tl(t0, 0, t0);
2851 tcg_gen_br(l_done_lo);
2852
2853 gen_set_label(l_not_less_lo);
2854 tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 0, 16);
2855
2856 gen_set_label(l_done_lo);
2857 tcg_gen_deposit_tl(mxu_gpr[XRa - 1], t1, t0, 0, 16);
2858 }
2859 }
2860
2861 /*
2862 * Q8ABD XRa, XRb, XRc
2863 * Gets absolute difference for quadruple of 8-bit
2864 * packed in XRb to another one in XRc,
2865 * put the result in XRa.
2866 * a.k.a. XRa[0..3] = abs(XRb[0..3] - XRc[0..3]);
2867 */
gen_mxu_Q8ABD(DisasContext * ctx)2868 static void gen_mxu_Q8ABD(DisasContext *ctx)
2869 {
2870 uint32_t pad, XRc, XRb, XRa;
2871
2872 pad = extract32(ctx->opcode, 21, 3);
2873 XRc = extract32(ctx->opcode, 14, 4);
2874 XRb = extract32(ctx->opcode, 10, 4);
2875 XRa = extract32(ctx->opcode, 6, 4);
2876
2877 if (unlikely(pad != 0)) {
2878 /* opcode padding incorrect -> do nothing */
2879 } else if (unlikely(XRa == 0)) {
2880 /* destination is zero register -> do nothing */
2881 } else if (unlikely((XRb == 0) && (XRc == 0))) {
2882 /* both operands zero registers -> just set destination to zero */
2883 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
2884 } else {
2885 /* the most general case */
2886 TCGv t0 = tcg_temp_new();
2887 TCGv t1 = tcg_temp_new();
2888 TCGv t2 = tcg_temp_new();
2889 TCGv t3 = tcg_temp_new();
2890 TCGv t4 = tcg_temp_new();
2891
2892 gen_load_mxu_gpr(t3, XRb);
2893 gen_load_mxu_gpr(t4, XRc);
2894 tcg_gen_movi_tl(t2, 0);
2895
2896 for (int i = 0; i < 4; i++) {
2897 tcg_gen_extract_tl(t0, t3, 8 * i, 8);
2898 tcg_gen_extract_tl(t1, t4, 8 * i, 8);
2899
2900 tcg_gen_sub_tl(t0, t0, t1);
2901 tcg_gen_abs_tl(t0, t0);
2902
2903 tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
2904 }
2905 gen_store_mxu_gpr(t2, XRa);
2906 }
2907 }
2908
2909 /*
2910 * Q8ADD XRa, XRb, XRc, ptn2
2911 * Add/subtract quadruple of 8-bit packed in XRb
2912 * to another one in XRc, put the result in XRa.
2913 */
gen_mxu_Q8ADD(DisasContext * ctx)2914 static void gen_mxu_Q8ADD(DisasContext *ctx)
2915 {
2916 uint32_t aptn2, pad, XRc, XRb, XRa;
2917
2918 aptn2 = extract32(ctx->opcode, 24, 2);
2919 pad = extract32(ctx->opcode, 21, 3);
2920 XRc = extract32(ctx->opcode, 14, 4);
2921 XRb = extract32(ctx->opcode, 10, 4);
2922 XRa = extract32(ctx->opcode, 6, 4);
2923
2924 if (unlikely(pad != 0)) {
2925 /* opcode padding incorrect -> do nothing */
2926 } else if (unlikely(XRa == 0)) {
2927 /* destination is zero register -> do nothing */
2928 } else if (unlikely((XRb == 0) && (XRc == 0))) {
2929 /* both operands zero registers -> just set destination to zero */
2930 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
2931 } else {
2932 /* the most general case */
2933 TCGv t0 = tcg_temp_new();
2934 TCGv t1 = tcg_temp_new();
2935 TCGv t2 = tcg_temp_new();
2936 TCGv t3 = tcg_temp_new();
2937 TCGv t4 = tcg_temp_new();
2938
2939 gen_load_mxu_gpr(t3, XRb);
2940 gen_load_mxu_gpr(t4, XRc);
2941
2942 for (int i = 0; i < 4; i++) {
2943 tcg_gen_andi_tl(t0, t3, 0xff);
2944 tcg_gen_andi_tl(t1, t4, 0xff);
2945
2946 if (i < 2) {
2947 if (aptn2 & 0x01) {
2948 tcg_gen_sub_tl(t0, t0, t1);
2949 } else {
2950 tcg_gen_add_tl(t0, t0, t1);
2951 }
2952 } else {
2953 if (aptn2 & 0x02) {
2954 tcg_gen_sub_tl(t0, t0, t1);
2955 } else {
2956 tcg_gen_add_tl(t0, t0, t1);
2957 }
2958 }
2959 if (i < 3) {
2960 tcg_gen_shri_tl(t3, t3, 8);
2961 tcg_gen_shri_tl(t4, t4, 8);
2962 }
2963 if (i > 0) {
2964 tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
2965 } else {
2966 tcg_gen_andi_tl(t0, t0, 0xff);
2967 tcg_gen_mov_tl(t2, t0);
2968 }
2969 }
2970 gen_store_mxu_gpr(t2, XRa);
2971 }
2972 }
2973
2974 /*
2975 * Q8ADDE XRa, XRb, XRc, XRd, aptn2
2976 * Add/subtract quadruple of 8-bit packed in XRb
2977 * to another one in XRc, with zero extending
2978 * to 16-bit and put results as packed 16-bit data
2979 * into XRa and XRd.
2980 * aptn2 manages action add or subtract of pairs of data.
2981 *
2982 * Q8ACCE XRa, XRb, XRc, XRd, aptn2
2983 * Add/subtract quadruple of 8-bit packed in XRb
2984 * to another one in XRc, with zero extending
2985 * to 16-bit and accumulate results as packed 16-bit data
2986 * into XRa and XRd.
2987 * aptn2 manages action add or subtract of pairs of data.
2988 */
gen_mxu_q8adde(DisasContext * ctx,bool accumulate)2989 static void gen_mxu_q8adde(DisasContext *ctx, bool accumulate)
2990 {
2991 uint32_t aptn2, XRd, XRc, XRb, XRa;
2992
2993 aptn2 = extract32(ctx->opcode, 24, 2);
2994 XRd = extract32(ctx->opcode, 18, 4);
2995 XRc = extract32(ctx->opcode, 14, 4);
2996 XRb = extract32(ctx->opcode, 10, 4);
2997 XRa = extract32(ctx->opcode, 6, 4);
2998
2999 if (unlikely((XRb == 0) && (XRc == 0))) {
3000 /* both operands zero registers -> just set destination to zero */
3001 if (XRa != 0) {
3002 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
3003 }
3004 if (XRd != 0) {
3005 tcg_gen_movi_tl(mxu_gpr[XRd - 1], 0);
3006 }
3007 } else {
3008 /* the most general case */
3009 TCGv t0 = tcg_temp_new();
3010 TCGv t1 = tcg_temp_new();
3011 TCGv t2 = tcg_temp_new();
3012 TCGv t3 = tcg_temp_new();
3013 TCGv t4 = tcg_temp_new();
3014 TCGv t5 = tcg_temp_new();
3015
3016 if (XRa != 0) {
3017 gen_extract_mxu_gpr(t0, XRb, 16, 8);
3018 gen_extract_mxu_gpr(t1, XRc, 16, 8);
3019 gen_extract_mxu_gpr(t2, XRb, 24, 8);
3020 gen_extract_mxu_gpr(t3, XRc, 24, 8);
3021 if (aptn2 & 2) {
3022 tcg_gen_sub_tl(t0, t0, t1);
3023 tcg_gen_sub_tl(t2, t2, t3);
3024 } else {
3025 tcg_gen_add_tl(t0, t0, t1);
3026 tcg_gen_add_tl(t2, t2, t3);
3027 }
3028 if (accumulate) {
3029 gen_load_mxu_gpr(t5, XRa);
3030 tcg_gen_extract_tl(t1, t5, 0, 16);
3031 tcg_gen_extract_tl(t3, t5, 16, 16);
3032 tcg_gen_add_tl(t0, t0, t1);
3033 tcg_gen_add_tl(t2, t2, t3);
3034 }
3035 tcg_gen_shli_tl(t2, t2, 16);
3036 tcg_gen_extract_tl(t0, t0, 0, 16);
3037 tcg_gen_or_tl(t4, t2, t0);
3038 }
3039 if (XRd != 0) {
3040 gen_extract_mxu_gpr(t0, XRb, 0, 8);
3041 gen_extract_mxu_gpr(t1, XRc, 0, 8);
3042 gen_extract_mxu_gpr(t2, XRb, 8, 8);
3043 gen_extract_mxu_gpr(t3, XRc, 8, 8);
3044 if (aptn2 & 1) {
3045 tcg_gen_sub_tl(t0, t0, t1);
3046 tcg_gen_sub_tl(t2, t2, t3);
3047 } else {
3048 tcg_gen_add_tl(t0, t0, t1);
3049 tcg_gen_add_tl(t2, t2, t3);
3050 }
3051 if (accumulate) {
3052 gen_load_mxu_gpr(t5, XRd);
3053 tcg_gen_extract_tl(t1, t5, 0, 16);
3054 tcg_gen_extract_tl(t3, t5, 16, 16);
3055 tcg_gen_add_tl(t0, t0, t1);
3056 tcg_gen_add_tl(t2, t2, t3);
3057 }
3058 tcg_gen_shli_tl(t2, t2, 16);
3059 tcg_gen_extract_tl(t0, t0, 0, 16);
3060 tcg_gen_or_tl(t5, t2, t0);
3061 }
3062
3063 gen_store_mxu_gpr(t4, XRa);
3064 gen_store_mxu_gpr(t5, XRd);
3065 }
3066 }
3067
3068 /*
3069 * D8SUM XRa, XRb, XRc
3070 * Double parallel add of quadruple unsigned 8-bit together
3071 * with zero extending to 16-bit data.
3072 * D8SUMC XRa, XRb, XRc
3073 * Double parallel add of quadruple unsigned 8-bit together
3074 * with zero extending to 16-bit data and adding 2 to each
3075 * parallel result.
3076 */
gen_mxu_d8sum(DisasContext * ctx,bool sumc)3077 static void gen_mxu_d8sum(DisasContext *ctx, bool sumc)
3078 {
3079 uint32_t pad, pad2, XRc, XRb, XRa;
3080
3081 pad = extract32(ctx->opcode, 24, 2);
3082 pad2 = extract32(ctx->opcode, 18, 4);
3083 XRc = extract32(ctx->opcode, 14, 4);
3084 XRb = extract32(ctx->opcode, 10, 4);
3085 XRa = extract32(ctx->opcode, 6, 4);
3086
3087 if (unlikely(pad != 0 || pad2 != 0)) {
3088 /* opcode padding incorrect -> do nothing */
3089 } else if (unlikely(XRa == 0)) {
3090 /* destination is zero register -> do nothing */
3091 } else if (unlikely((XRb == 0) && (XRc == 0))) {
3092 /* both operands zero registers -> just set destination to zero */
3093 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
3094 } else {
3095 /* the most general case */
3096 TCGv t0 = tcg_temp_new();
3097 TCGv t1 = tcg_temp_new();
3098 TCGv t2 = tcg_temp_new();
3099 TCGv t3 = tcg_temp_new();
3100 TCGv t4 = tcg_temp_new();
3101 TCGv t5 = tcg_temp_new();
3102
3103 if (XRb != 0) {
3104 tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 0, 8);
3105 tcg_gen_extract_tl(t1, mxu_gpr[XRb - 1], 8, 8);
3106 tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 16, 8);
3107 tcg_gen_extract_tl(t3, mxu_gpr[XRb - 1], 24, 8);
3108 tcg_gen_add_tl(t4, t0, t1);
3109 tcg_gen_add_tl(t4, t4, t2);
3110 tcg_gen_add_tl(t4, t4, t3);
3111 } else {
3112 tcg_gen_mov_tl(t4, 0);
3113 }
3114 if (XRc != 0) {
3115 tcg_gen_extract_tl(t0, mxu_gpr[XRc - 1], 0, 8);
3116 tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1], 8, 8);
3117 tcg_gen_extract_tl(t2, mxu_gpr[XRc - 1], 16, 8);
3118 tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 24, 8);
3119 tcg_gen_add_tl(t5, t0, t1);
3120 tcg_gen_add_tl(t5, t5, t2);
3121 tcg_gen_add_tl(t5, t5, t3);
3122 } else {
3123 tcg_gen_mov_tl(t5, 0);
3124 }
3125
3126 if (sumc) {
3127 tcg_gen_addi_tl(t4, t4, 2);
3128 tcg_gen_addi_tl(t5, t5, 2);
3129 }
3130 tcg_gen_shli_tl(t4, t4, 16);
3131
3132 tcg_gen_or_tl(mxu_gpr[XRa - 1], t4, t5);
3133 }
3134 }
3135
3136 /*
3137 * Q16ADD XRa, XRb, XRc, XRd, aptn2, optn2 - Quad packed
3138 * 16-bit pattern addition.
3139 */
gen_mxu_q16add(DisasContext * ctx)3140 static void gen_mxu_q16add(DisasContext *ctx)
3141 {
3142 uint32_t aptn2, optn2, XRc, XRb, XRa, XRd;
3143
3144 aptn2 = extract32(ctx->opcode, 24, 2);
3145 optn2 = extract32(ctx->opcode, 22, 2);
3146 XRd = extract32(ctx->opcode, 18, 4);
3147 XRc = extract32(ctx->opcode, 14, 4);
3148 XRb = extract32(ctx->opcode, 10, 4);
3149 XRa = extract32(ctx->opcode, 6, 4);
3150
3151 TCGv t0 = tcg_temp_new();
3152 TCGv t1 = tcg_temp_new();
3153 TCGv t2 = tcg_temp_new();
3154 TCGv t3 = tcg_temp_new();
3155 TCGv t4 = tcg_temp_new();
3156 TCGv t5 = tcg_temp_new();
3157
3158 gen_load_mxu_gpr(t1, XRb);
3159 tcg_gen_extract_tl(t0, t1, 0, 16);
3160 tcg_gen_extract_tl(t1, t1, 16, 16);
3161
3162 gen_load_mxu_gpr(t3, XRc);
3163 tcg_gen_extract_tl(t2, t3, 0, 16);
3164 tcg_gen_extract_tl(t3, t3, 16, 16);
3165
3166 switch (optn2) {
3167 case MXU_OPTN2_WW: /* XRB.H+XRC.H == lop, XRB.L+XRC.L == rop */
3168 tcg_gen_mov_tl(t4, t1);
3169 tcg_gen_mov_tl(t5, t0);
3170 break;
3171 case MXU_OPTN2_LW: /* XRB.L+XRC.H == lop, XRB.L+XRC.L == rop */
3172 tcg_gen_mov_tl(t4, t0);
3173 tcg_gen_mov_tl(t5, t0);
3174 break;
3175 case MXU_OPTN2_HW: /* XRB.H+XRC.H == lop, XRB.H+XRC.L == rop */
3176 tcg_gen_mov_tl(t4, t1);
3177 tcg_gen_mov_tl(t5, t1);
3178 break;
3179 case MXU_OPTN2_XW: /* XRB.L+XRC.H == lop, XRB.H+XRC.L == rop */
3180 tcg_gen_mov_tl(t4, t0);
3181 tcg_gen_mov_tl(t5, t1);
3182 break;
3183 }
3184
3185 switch (aptn2) {
3186 case MXU_APTN2_AA: /* lop +, rop + */
3187 tcg_gen_add_tl(t0, t4, t3);
3188 tcg_gen_add_tl(t1, t5, t2);
3189 tcg_gen_add_tl(t4, t4, t3);
3190 tcg_gen_add_tl(t5, t5, t2);
3191 break;
3192 case MXU_APTN2_AS: /* lop +, rop + */
3193 tcg_gen_sub_tl(t0, t4, t3);
3194 tcg_gen_sub_tl(t1, t5, t2);
3195 tcg_gen_add_tl(t4, t4, t3);
3196 tcg_gen_add_tl(t5, t5, t2);
3197 break;
3198 case MXU_APTN2_SA: /* lop +, rop + */
3199 tcg_gen_add_tl(t0, t4, t3);
3200 tcg_gen_add_tl(t1, t5, t2);
3201 tcg_gen_sub_tl(t4, t4, t3);
3202 tcg_gen_sub_tl(t5, t5, t2);
3203 break;
3204 case MXU_APTN2_SS: /* lop +, rop + */
3205 tcg_gen_sub_tl(t0, t4, t3);
3206 tcg_gen_sub_tl(t1, t5, t2);
3207 tcg_gen_sub_tl(t4, t4, t3);
3208 tcg_gen_sub_tl(t5, t5, t2);
3209 break;
3210 }
3211
3212 tcg_gen_shli_tl(t0, t0, 16);
3213 tcg_gen_extract_tl(t1, t1, 0, 16);
3214 tcg_gen_shli_tl(t4, t4, 16);
3215 tcg_gen_extract_tl(t5, t5, 0, 16);
3216
3217 tcg_gen_or_tl(mxu_gpr[XRa - 1], t4, t5);
3218 tcg_gen_or_tl(mxu_gpr[XRd - 1], t0, t1);
3219 }
3220
3221 /*
3222 * Q16ACC XRa, XRb, XRc, XRd, aptn2 - Quad packed
3223 * 16-bit addition/subtraction with accumulate.
3224 */
gen_mxu_q16acc(DisasContext * ctx)3225 static void gen_mxu_q16acc(DisasContext *ctx)
3226 {
3227 uint32_t aptn2, XRc, XRb, XRa, XRd;
3228
3229 aptn2 = extract32(ctx->opcode, 24, 2);
3230 XRd = extract32(ctx->opcode, 18, 4);
3231 XRc = extract32(ctx->opcode, 14, 4);
3232 XRb = extract32(ctx->opcode, 10, 4);
3233 XRa = extract32(ctx->opcode, 6, 4);
3234
3235 TCGv t0 = tcg_temp_new();
3236 TCGv t1 = tcg_temp_new();
3237 TCGv t2 = tcg_temp_new();
3238 TCGv t3 = tcg_temp_new();
3239 TCGv s3 = tcg_temp_new();
3240 TCGv s2 = tcg_temp_new();
3241 TCGv s1 = tcg_temp_new();
3242 TCGv s0 = tcg_temp_new();
3243
3244 gen_load_mxu_gpr(t1, XRb);
3245 tcg_gen_extract_tl(t0, t1, 0, 16);
3246 tcg_gen_extract_tl(t1, t1, 16, 16);
3247
3248 gen_load_mxu_gpr(t3, XRc);
3249 tcg_gen_extract_tl(t2, t3, 0, 16);
3250 tcg_gen_extract_tl(t3, t3, 16, 16);
3251
3252 switch (aptn2) {
3253 case MXU_APTN2_AA: /* lop +, rop + */
3254 tcg_gen_add_tl(s3, t1, t3);
3255 tcg_gen_add_tl(s2, t0, t2);
3256 tcg_gen_add_tl(s1, t1, t3);
3257 tcg_gen_add_tl(s0, t0, t2);
3258 break;
3259 case MXU_APTN2_AS: /* lop +, rop - */
3260 tcg_gen_sub_tl(s3, t1, t3);
3261 tcg_gen_sub_tl(s2, t0, t2);
3262 tcg_gen_add_tl(s1, t1, t3);
3263 tcg_gen_add_tl(s0, t0, t2);
3264 break;
3265 case MXU_APTN2_SA: /* lop -, rop + */
3266 tcg_gen_add_tl(s3, t1, t3);
3267 tcg_gen_add_tl(s2, t0, t2);
3268 tcg_gen_sub_tl(s1, t1, t3);
3269 tcg_gen_sub_tl(s0, t0, t2);
3270 break;
3271 case MXU_APTN2_SS: /* lop -, rop - */
3272 tcg_gen_sub_tl(s3, t1, t3);
3273 tcg_gen_sub_tl(s2, t0, t2);
3274 tcg_gen_sub_tl(s1, t1, t3);
3275 tcg_gen_sub_tl(s0, t0, t2);
3276 break;
3277 }
3278
3279 if (XRa != 0) {
3280 tcg_gen_add_tl(t0, mxu_gpr[XRa - 1], s0);
3281 tcg_gen_extract_tl(t0, t0, 0, 16);
3282 tcg_gen_extract_tl(t1, mxu_gpr[XRa - 1], 16, 16);
3283 tcg_gen_add_tl(t1, t1, s1);
3284 tcg_gen_shli_tl(t1, t1, 16);
3285 tcg_gen_or_tl(mxu_gpr[XRa - 1], t1, t0);
3286 }
3287
3288 if (XRd != 0) {
3289 tcg_gen_add_tl(t0, mxu_gpr[XRd - 1], s2);
3290 tcg_gen_extract_tl(t0, t0, 0, 16);
3291 tcg_gen_extract_tl(t1, mxu_gpr[XRd - 1], 16, 16);
3292 tcg_gen_add_tl(t1, t1, s3);
3293 tcg_gen_shli_tl(t1, t1, 16);
3294 tcg_gen_or_tl(mxu_gpr[XRd - 1], t1, t0);
3295 }
3296 }
3297
3298 /*
3299 * Q16ACCM XRa, XRb, XRc, XRd, aptn2 - Quad packed
3300 * 16-bit accumulate.
3301 */
gen_mxu_q16accm(DisasContext * ctx)3302 static void gen_mxu_q16accm(DisasContext *ctx)
3303 {
3304 uint32_t aptn2, XRc, XRb, XRa, XRd;
3305
3306 aptn2 = extract32(ctx->opcode, 24, 2);
3307 XRd = extract32(ctx->opcode, 18, 4);
3308 XRc = extract32(ctx->opcode, 14, 4);
3309 XRb = extract32(ctx->opcode, 10, 4);
3310 XRa = extract32(ctx->opcode, 6, 4);
3311
3312 TCGv t0 = tcg_temp_new();
3313 TCGv t1 = tcg_temp_new();
3314 TCGv t2 = tcg_temp_new();
3315 TCGv t3 = tcg_temp_new();
3316
3317 gen_load_mxu_gpr(t2, XRb);
3318 gen_load_mxu_gpr(t3, XRc);
3319
3320 if (XRa != 0) {
3321 TCGv a0 = tcg_temp_new();
3322 TCGv a1 = tcg_temp_new();
3323
3324 tcg_gen_extract_tl(t0, t2, 0, 16);
3325 tcg_gen_extract_tl(t1, t2, 16, 16);
3326
3327 gen_load_mxu_gpr(a1, XRa);
3328 tcg_gen_extract_tl(a0, a1, 0, 16);
3329 tcg_gen_extract_tl(a1, a1, 16, 16);
3330
3331 if (aptn2 & 2) {
3332 tcg_gen_sub_tl(a0, a0, t0);
3333 tcg_gen_sub_tl(a1, a1, t1);
3334 } else {
3335 tcg_gen_add_tl(a0, a0, t0);
3336 tcg_gen_add_tl(a1, a1, t1);
3337 }
3338 tcg_gen_extract_tl(a0, a0, 0, 16);
3339 tcg_gen_shli_tl(a1, a1, 16);
3340 tcg_gen_or_tl(mxu_gpr[XRa - 1], a1, a0);
3341 }
3342
3343 if (XRd != 0) {
3344 TCGv a0 = tcg_temp_new();
3345 TCGv a1 = tcg_temp_new();
3346
3347 tcg_gen_extract_tl(t0, t3, 0, 16);
3348 tcg_gen_extract_tl(t1, t3, 16, 16);
3349
3350 gen_load_mxu_gpr(a1, XRd);
3351 tcg_gen_extract_tl(a0, a1, 0, 16);
3352 tcg_gen_extract_tl(a1, a1, 16, 16);
3353
3354 if (aptn2 & 1) {
3355 tcg_gen_sub_tl(a0, a0, t0);
3356 tcg_gen_sub_tl(a1, a1, t1);
3357 } else {
3358 tcg_gen_add_tl(a0, a0, t0);
3359 tcg_gen_add_tl(a1, a1, t1);
3360 }
3361 tcg_gen_extract_tl(a0, a0, 0, 16);
3362 tcg_gen_shli_tl(a1, a1, 16);
3363 tcg_gen_or_tl(mxu_gpr[XRd - 1], a1, a0);
3364 }
3365 }
3366
3367
3368 /*
3369 * D16ASUM XRa, XRb, XRc, XRd, aptn2 - Double packed
3370 * 16-bit sign extended addition and accumulate.
3371 */
gen_mxu_d16asum(DisasContext * ctx)3372 static void gen_mxu_d16asum(DisasContext *ctx)
3373 {
3374 uint32_t aptn2, XRc, XRb, XRa, XRd;
3375
3376 aptn2 = extract32(ctx->opcode, 24, 2);
3377 XRd = extract32(ctx->opcode, 18, 4);
3378 XRc = extract32(ctx->opcode, 14, 4);
3379 XRb = extract32(ctx->opcode, 10, 4);
3380 XRa = extract32(ctx->opcode, 6, 4);
3381
3382 TCGv t0 = tcg_temp_new();
3383 TCGv t1 = tcg_temp_new();
3384 TCGv t2 = tcg_temp_new();
3385 TCGv t3 = tcg_temp_new();
3386
3387 gen_load_mxu_gpr(t2, XRb);
3388 gen_load_mxu_gpr(t3, XRc);
3389
3390 if (XRa != 0) {
3391 tcg_gen_sextract_tl(t0, t2, 0, 16);
3392 tcg_gen_sextract_tl(t1, t2, 16, 16);
3393 tcg_gen_add_tl(t0, t0, t1);
3394 if (aptn2 & 2) {
3395 tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
3396 } else {
3397 tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
3398 }
3399 }
3400
3401 if (XRd != 0) {
3402 tcg_gen_sextract_tl(t0, t3, 0, 16);
3403 tcg_gen_sextract_tl(t1, t3, 16, 16);
3404 tcg_gen_add_tl(t0, t0, t1);
3405 if (aptn2 & 1) {
3406 tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t0);
3407 } else {
3408 tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t0);
3409 }
3410 }
3411 }
3412
3413 /*
3414 * D32ADD XRa, XRb, XRc, XRd, aptn2 - Double
3415 * 32 bit pattern addition/subtraction, set carry.
3416 *
3417 * D32ADDC XRa, XRb, XRc, XRd, aptn2 - Double
3418 * 32 bit pattern addition/subtraction with carry.
3419 */
gen_mxu_d32add(DisasContext * ctx)3420 static void gen_mxu_d32add(DisasContext *ctx)
3421 {
3422 uint32_t aptn2, addc, XRc, XRb, XRa, XRd;
3423
3424 aptn2 = extract32(ctx->opcode, 24, 2);
3425 addc = extract32(ctx->opcode, 22, 2);
3426 XRd = extract32(ctx->opcode, 18, 4);
3427 XRc = extract32(ctx->opcode, 14, 4);
3428 XRb = extract32(ctx->opcode, 10, 4);
3429 XRa = extract32(ctx->opcode, 6, 4);
3430
3431 TCGv t0 = tcg_temp_new();
3432 TCGv t1 = tcg_temp_new();
3433 TCGv t2 = tcg_temp_new();
3434 TCGv cr = tcg_temp_new();
3435
3436 if (unlikely(addc > 1)) {
3437 /* opcode incorrect -> do nothing */
3438 } else if (addc == 1) {
3439 if (unlikely(XRa == 0 && XRd == 0)) {
3440 /* destinations are zero register -> do nothing */
3441 } else {
3442 /* FIXME ??? What if XRa == XRd ??? */
3443 /* aptn2 is unused here */
3444 gen_load_mxu_gpr(t0, XRb);
3445 gen_load_mxu_gpr(t1, XRc);
3446 gen_load_mxu_cr(cr);
3447 if (XRa != 0) {
3448 tcg_gen_extract_tl(t2, cr, 31, 1);
3449 tcg_gen_add_tl(t0, t0, t2);
3450 tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
3451 }
3452 if (XRd != 0) {
3453 tcg_gen_extract_tl(t2, cr, 30, 1);
3454 tcg_gen_add_tl(t1, t1, t2);
3455 tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1);
3456 }
3457 }
3458 } else if (unlikely(XRa == 0 && XRd == 0)) {
3459 /* destinations are zero register -> do nothing */
3460 } else {
3461 /* common case */
3462 /* FIXME ??? What if XRa == XRd ??? */
3463 TCGv carry = tcg_temp_new();
3464
3465 gen_load_mxu_gpr(t0, XRb);
3466 gen_load_mxu_gpr(t1, XRc);
3467 gen_load_mxu_cr(cr);
3468 if (XRa != 0) {
3469 if (aptn2 & 2) {
3470 tcg_gen_sub_i32(t2, t0, t1);
3471 tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t1);
3472 } else {
3473 tcg_gen_add_i32(t2, t0, t1);
3474 tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t2);
3475 }
3476 tcg_gen_andi_tl(cr, cr, 0x7fffffff);
3477 tcg_gen_shli_tl(carry, carry, 31);
3478 tcg_gen_or_tl(cr, cr, carry);
3479 gen_store_mxu_gpr(t2, XRa);
3480 }
3481 if (XRd != 0) {
3482 if (aptn2 & 1) {
3483 tcg_gen_sub_i32(t2, t0, t1);
3484 tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t1);
3485 } else {
3486 tcg_gen_add_i32(t2, t0, t1);
3487 tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t2);
3488 }
3489 tcg_gen_andi_tl(cr, cr, 0xbfffffff);
3490 tcg_gen_shli_tl(carry, carry, 30);
3491 tcg_gen_or_tl(cr, cr, carry);
3492 gen_store_mxu_gpr(t2, XRd);
3493 }
3494 gen_store_mxu_cr(cr);
3495 }
3496 }
3497
3498 /*
3499 * D32ACC XRa, XRb, XRc, XRd, aptn2 - Double
3500 * 32 bit pattern addition/subtraction and accumulate.
3501 */
gen_mxu_d32acc(DisasContext * ctx)3502 static void gen_mxu_d32acc(DisasContext *ctx)
3503 {
3504 uint32_t aptn2, XRc, XRb, XRa, XRd;
3505
3506 aptn2 = extract32(ctx->opcode, 24, 2);
3507 XRd = extract32(ctx->opcode, 18, 4);
3508 XRc = extract32(ctx->opcode, 14, 4);
3509 XRb = extract32(ctx->opcode, 10, 4);
3510 XRa = extract32(ctx->opcode, 6, 4);
3511
3512 TCGv t0 = tcg_temp_new();
3513 TCGv t1 = tcg_temp_new();
3514 TCGv t2 = tcg_temp_new();
3515
3516 if (unlikely(XRa == 0 && XRd == 0)) {
3517 /* destinations are zero register -> do nothing */
3518 } else {
3519 /* common case */
3520 gen_load_mxu_gpr(t0, XRb);
3521 gen_load_mxu_gpr(t1, XRc);
3522 if (XRa != 0) {
3523 if (aptn2 & 2) {
3524 tcg_gen_sub_tl(t2, t0, t1);
3525 } else {
3526 tcg_gen_add_tl(t2, t0, t1);
3527 }
3528 tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2);
3529 }
3530 if (XRd != 0) {
3531 if (aptn2 & 1) {
3532 tcg_gen_sub_tl(t2, t0, t1);
3533 } else {
3534 tcg_gen_add_tl(t2, t0, t1);
3535 }
3536 tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2);
3537 }
3538 }
3539 }
3540
3541 /*
3542 * D32ACCM XRa, XRb, XRc, XRd, aptn2 - Double
3543 * 32 bit pattern addition/subtraction and accumulate.
3544 */
gen_mxu_d32accm(DisasContext * ctx)3545 static void gen_mxu_d32accm(DisasContext *ctx)
3546 {
3547 uint32_t aptn2, XRc, XRb, XRa, XRd;
3548
3549 aptn2 = extract32(ctx->opcode, 24, 2);
3550 XRd = extract32(ctx->opcode, 18, 4);
3551 XRc = extract32(ctx->opcode, 14, 4);
3552 XRb = extract32(ctx->opcode, 10, 4);
3553 XRa = extract32(ctx->opcode, 6, 4);
3554
3555 TCGv t0 = tcg_temp_new();
3556 TCGv t1 = tcg_temp_new();
3557 TCGv t2 = tcg_temp_new();
3558
3559 if (unlikely(XRa == 0 && XRd == 0)) {
3560 /* destinations are zero register -> do nothing */
3561 } else {
3562 /* common case */
3563 gen_load_mxu_gpr(t0, XRb);
3564 gen_load_mxu_gpr(t1, XRc);
3565 if (XRa != 0) {
3566 tcg_gen_add_tl(t2, t0, t1);
3567 if (aptn2 & 2) {
3568 tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2);
3569 } else {
3570 tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2);
3571 }
3572 }
3573 if (XRd != 0) {
3574 tcg_gen_sub_tl(t2, t0, t1);
3575 if (aptn2 & 1) {
3576 tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2);
3577 } else {
3578 tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2);
3579 }
3580 }
3581 }
3582 }
3583
3584 /*
3585 * D32ASUM XRa, XRb, XRc, XRd, aptn2 - Double
3586 * 32 bit pattern addition/subtraction.
3587 */
gen_mxu_d32asum(DisasContext * ctx)3588 static void gen_mxu_d32asum(DisasContext *ctx)
3589 {
3590 uint32_t aptn2, XRc, XRb, XRa, XRd;
3591
3592 aptn2 = extract32(ctx->opcode, 24, 2);
3593 XRd = extract32(ctx->opcode, 18, 4);
3594 XRc = extract32(ctx->opcode, 14, 4);
3595 XRb = extract32(ctx->opcode, 10, 4);
3596 XRa = extract32(ctx->opcode, 6, 4);
3597
3598 TCGv t0 = tcg_temp_new();
3599 TCGv t1 = tcg_temp_new();
3600
3601 if (unlikely(XRa == 0 && XRd == 0)) {
3602 /* destinations are zero register -> do nothing */
3603 } else {
3604 /* common case */
3605 gen_load_mxu_gpr(t0, XRb);
3606 gen_load_mxu_gpr(t1, XRc);
3607 if (XRa != 0) {
3608 if (aptn2 & 2) {
3609 tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
3610 } else {
3611 tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
3612 }
3613 }
3614 if (XRd != 0) {
3615 if (aptn2 & 1) {
3616 tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1);
3617 } else {
3618 tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1);
3619 }
3620 }
3621 }
3622 }
3623
3624 /*
3625 * MXU instruction category: Miscellaneous
3626 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3627 *
3628 * S32EXTR S32LUI
3629 * S32EXTRV
3630 * Q16SAT
3631 * Q16SCOP
3632 */
3633
3634 /*
3635 * S32EXTR XRa, XRd, rs, bits5
3636 * Extract bits5 bits from 64-bit pair {XRa:XRd}
3637 * starting from rs[4:0] offset and put to the XRa.
3638 */
gen_mxu_s32extr(DisasContext * ctx)3639 static void gen_mxu_s32extr(DisasContext *ctx)
3640 {
3641 TCGv t0, t1, t2, t3;
3642 uint32_t XRa, XRd, rs, bits5;
3643
3644 t0 = tcg_temp_new();
3645 t1 = tcg_temp_new();
3646 t2 = tcg_temp_new();
3647 t3 = tcg_temp_new();
3648
3649 XRa = extract32(ctx->opcode, 6, 4);
3650 XRd = extract32(ctx->opcode, 10, 4);
3651 bits5 = extract32(ctx->opcode, 16, 5);
3652 rs = extract32(ctx->opcode, 21, 5);
3653
3654 /* {tmp} = {XRa:XRd} >> (64 - rt - bits5); */
3655 /* {XRa} = extract({tmp}, 0, bits5); */
3656 if (bits5 > 0) {
3657 TCGLabel *l_xra_only = gen_new_label();
3658 TCGLabel *l_done = gen_new_label();
3659
3660 gen_load_mxu_gpr(t0, XRd);
3661 gen_load_mxu_gpr(t1, XRa);
3662 gen_load_gpr(t2, rs);
3663 tcg_gen_andi_tl(t2, t2, 0x1f);
3664 tcg_gen_subfi_tl(t2, 32, t2);
3665 tcg_gen_brcondi_tl(TCG_COND_GE, t2, bits5, l_xra_only);
3666 tcg_gen_subfi_tl(t2, bits5, t2);
3667 tcg_gen_subfi_tl(t3, 32, t2);
3668 tcg_gen_shr_tl(t0, t0, t3);
3669 tcg_gen_shl_tl(t1, t1, t2);
3670 tcg_gen_or_tl(t0, t0, t1);
3671 tcg_gen_br(l_done);
3672 gen_set_label(l_xra_only);
3673 tcg_gen_subi_tl(t2, t2, bits5);
3674 tcg_gen_shr_tl(t0, t1, t2);
3675 gen_set_label(l_done);
3676 tcg_gen_extract_tl(t0, t0, 0, bits5);
3677 } else {
3678 /* unspecified behavior but matches tests on real hardware*/
3679 tcg_gen_movi_tl(t0, 0);
3680 }
3681 gen_store_mxu_gpr(t0, XRa);
3682 }
3683
3684 /*
3685 * S32EXTRV XRa, XRd, rs, rt
3686 * Extract rt[4:0] bits from 64-bit pair {XRa:XRd}
3687 * starting from rs[4:0] offset and put to the XRa.
3688 */
gen_mxu_s32extrv(DisasContext * ctx)3689 static void gen_mxu_s32extrv(DisasContext *ctx)
3690 {
3691 TCGv t0, t1, t2, t3, t4;
3692 uint32_t XRa, XRd, rs, rt;
3693
3694 t0 = tcg_temp_new();
3695 t1 = tcg_temp_new();
3696 t2 = tcg_temp_new();
3697 t3 = tcg_temp_new();
3698 t4 = tcg_temp_new();
3699 TCGLabel *l_xra_only = gen_new_label();
3700 TCGLabel *l_done = gen_new_label();
3701 TCGLabel *l_zero = gen_new_label();
3702 TCGLabel *l_extract = gen_new_label();
3703
3704 XRa = extract32(ctx->opcode, 6, 4);
3705 XRd = extract32(ctx->opcode, 10, 4);
3706 rt = extract32(ctx->opcode, 16, 5);
3707 rs = extract32(ctx->opcode, 21, 5);
3708
3709 /* {tmp} = {XRa:XRd} >> (64 - rs - rt) */
3710 gen_load_mxu_gpr(t0, XRd);
3711 gen_load_mxu_gpr(t1, XRa);
3712 gen_load_gpr(t2, rs);
3713 gen_load_gpr(t4, rt);
3714 tcg_gen_brcondi_tl(TCG_COND_EQ, t4, 0, l_zero);
3715 tcg_gen_andi_tl(t2, t2, 0x1f);
3716 tcg_gen_subfi_tl(t2, 32, t2);
3717 tcg_gen_brcond_tl(TCG_COND_GE, t2, t4, l_xra_only);
3718 tcg_gen_sub_tl(t2, t4, t2);
3719 tcg_gen_subfi_tl(t3, 32, t2);
3720 tcg_gen_shr_tl(t0, t0, t3);
3721 tcg_gen_shl_tl(t1, t1, t2);
3722 tcg_gen_or_tl(t0, t0, t1);
3723 tcg_gen_br(l_extract);
3724
3725 gen_set_label(l_xra_only);
3726 tcg_gen_sub_tl(t2, t2, t4);
3727 tcg_gen_shr_tl(t0, t1, t2);
3728 tcg_gen_br(l_extract);
3729
3730 /* unspecified behavior but matches tests on real hardware*/
3731 gen_set_label(l_zero);
3732 tcg_gen_movi_tl(t0, 0);
3733 tcg_gen_br(l_done);
3734
3735 /* {XRa} = extract({tmp}, 0, rt) */
3736 gen_set_label(l_extract);
3737 tcg_gen_subfi_tl(t4, 32, t4);
3738 tcg_gen_shl_tl(t0, t0, t4);
3739 tcg_gen_shr_tl(t0, t0, t4);
3740
3741 gen_set_label(l_done);
3742 gen_store_mxu_gpr(t0, XRa);
3743 }
3744
3745 /*
3746 * S32LUI XRa, S8, optn3
3747 * Permutate the immediate S8 value to form a word
3748 * to update XRa.
3749 */
gen_mxu_s32lui(DisasContext * ctx)3750 static void gen_mxu_s32lui(DisasContext *ctx)
3751 {
3752 uint32_t XRa, s8, optn3, pad;
3753
3754 XRa = extract32(ctx->opcode, 6, 4);
3755 s8 = extract32(ctx->opcode, 10, 8);
3756 pad = extract32(ctx->opcode, 21, 2);
3757 optn3 = extract32(ctx->opcode, 23, 3);
3758
3759 if (unlikely(pad != 0)) {
3760 /* opcode padding incorrect -> do nothing */
3761 } else if (unlikely(XRa == 0)) {
3762 /* destination is zero register -> do nothing */
3763 } else {
3764 uint32_t s16;
3765 TCGv t0 = tcg_temp_new();
3766
3767 switch (optn3) {
3768 case 0:
3769 tcg_gen_movi_tl(t0, s8);
3770 break;
3771 case 1:
3772 tcg_gen_movi_tl(t0, s8 << 8);
3773 break;
3774 case 2:
3775 tcg_gen_movi_tl(t0, s8 << 16);
3776 break;
3777 case 3:
3778 tcg_gen_movi_tl(t0, s8 << 24);
3779 break;
3780 case 4:
3781 tcg_gen_movi_tl(t0, (s8 << 16) | s8);
3782 break;
3783 case 5:
3784 tcg_gen_movi_tl(t0, (s8 << 24) | (s8 << 8));
3785 break;
3786 case 6:
3787 s16 = (uint16_t)(int16_t)(int8_t)s8;
3788 tcg_gen_movi_tl(t0, (s16 << 16) | s16);
3789 break;
3790 case 7:
3791 tcg_gen_movi_tl(t0, (s8 << 24) | (s8 << 16) | (s8 << 8) | s8);
3792 break;
3793 }
3794 gen_store_mxu_gpr(t0, XRa);
3795 }
3796 }
3797
3798 /*
3799 * Q16SAT XRa, XRb, XRc
3800 * Packs four 16-bit signed integers in XRb and XRc to
3801 * four saturated unsigned 8-bit into XRa.
3802 *
3803 */
gen_mxu_Q16SAT(DisasContext * ctx)3804 static void gen_mxu_Q16SAT(DisasContext *ctx)
3805 {
3806 uint32_t pad, XRc, XRb, XRa;
3807
3808 pad = extract32(ctx->opcode, 21, 3);
3809 XRc = extract32(ctx->opcode, 14, 4);
3810 XRb = extract32(ctx->opcode, 10, 4);
3811 XRa = extract32(ctx->opcode, 6, 4);
3812
3813 if (unlikely(pad != 0)) {
3814 /* opcode padding incorrect -> do nothing */
3815 } else if (unlikely(XRa == 0)) {
3816 /* destination is zero register -> do nothing */
3817 } else {
3818 /* the most general case */
3819 TCGv t0 = tcg_temp_new();
3820 TCGv t1 = tcg_temp_new();
3821 TCGv t2 = tcg_temp_new();
3822
3823 tcg_gen_movi_tl(t2, 0);
3824 if (XRb != 0) {
3825 TCGLabel *l_less_hi = gen_new_label();
3826 TCGLabel *l_less_lo = gen_new_label();
3827 TCGLabel *l_lo = gen_new_label();
3828 TCGLabel *l_greater_hi = gen_new_label();
3829 TCGLabel *l_greater_lo = gen_new_label();
3830 TCGLabel *l_done = gen_new_label();
3831
3832 tcg_gen_sari_tl(t0, mxu_gpr[XRb - 1], 16);
3833 tcg_gen_brcondi_tl(TCG_COND_LT, t0, 0, l_less_hi);
3834 tcg_gen_brcondi_tl(TCG_COND_GT, t0, 255, l_greater_hi);
3835 tcg_gen_br(l_lo);
3836 gen_set_label(l_less_hi);
3837 tcg_gen_movi_tl(t0, 0);
3838 tcg_gen_br(l_lo);
3839 gen_set_label(l_greater_hi);
3840 tcg_gen_movi_tl(t0, 255);
3841
3842 gen_set_label(l_lo);
3843 tcg_gen_shli_tl(t1, mxu_gpr[XRb - 1], 16);
3844 tcg_gen_sari_tl(t1, t1, 16);
3845 tcg_gen_brcondi_tl(TCG_COND_LT, t1, 0, l_less_lo);
3846 tcg_gen_brcondi_tl(TCG_COND_GT, t1, 255, l_greater_lo);
3847 tcg_gen_br(l_done);
3848 gen_set_label(l_less_lo);
3849 tcg_gen_movi_tl(t1, 0);
3850 tcg_gen_br(l_done);
3851 gen_set_label(l_greater_lo);
3852 tcg_gen_movi_tl(t1, 255);
3853
3854 gen_set_label(l_done);
3855 tcg_gen_shli_tl(t2, t0, 24);
3856 tcg_gen_shli_tl(t1, t1, 16);
3857 tcg_gen_or_tl(t2, t2, t1);
3858 }
3859
3860 if (XRc != 0) {
3861 TCGLabel *l_less_hi = gen_new_label();
3862 TCGLabel *l_less_lo = gen_new_label();
3863 TCGLabel *l_lo = gen_new_label();
3864 TCGLabel *l_greater_hi = gen_new_label();
3865 TCGLabel *l_greater_lo = gen_new_label();
3866 TCGLabel *l_done = gen_new_label();
3867
3868 tcg_gen_sari_tl(t0, mxu_gpr[XRc - 1], 16);
3869 tcg_gen_brcondi_tl(TCG_COND_LT, t0, 0, l_less_hi);
3870 tcg_gen_brcondi_tl(TCG_COND_GT, t0, 255, l_greater_hi);
3871 tcg_gen_br(l_lo);
3872 gen_set_label(l_less_hi);
3873 tcg_gen_movi_tl(t0, 0);
3874 tcg_gen_br(l_lo);
3875 gen_set_label(l_greater_hi);
3876 tcg_gen_movi_tl(t0, 255);
3877
3878 gen_set_label(l_lo);
3879 tcg_gen_shli_tl(t1, mxu_gpr[XRc - 1], 16);
3880 tcg_gen_sari_tl(t1, t1, 16);
3881 tcg_gen_brcondi_tl(TCG_COND_LT, t1, 0, l_less_lo);
3882 tcg_gen_brcondi_tl(TCG_COND_GT, t1, 255, l_greater_lo);
3883 tcg_gen_br(l_done);
3884 gen_set_label(l_less_lo);
3885 tcg_gen_movi_tl(t1, 0);
3886 tcg_gen_br(l_done);
3887 gen_set_label(l_greater_lo);
3888 tcg_gen_movi_tl(t1, 255);
3889
3890 gen_set_label(l_done);
3891 tcg_gen_shli_tl(t0, t0, 8);
3892 tcg_gen_or_tl(t2, t2, t0);
3893 tcg_gen_or_tl(t2, t2, t1);
3894 }
3895 gen_store_mxu_gpr(t2, XRa);
3896 }
3897 }
3898
3899 /*
3900 * Q16SCOP XRa, XRd, XRb, XRc
3901 * Determine sign of quad packed 16-bit signed values
3902 * in XRb and XRc put result in XRa and XRd respectively.
3903 */
gen_mxu_q16scop(DisasContext * ctx)3904 static void gen_mxu_q16scop(DisasContext *ctx)
3905 {
3906 uint32_t XRd, XRc, XRb, XRa;
3907
3908 XRd = extract32(ctx->opcode, 18, 4);
3909 XRc = extract32(ctx->opcode, 14, 4);
3910 XRb = extract32(ctx->opcode, 10, 4);
3911 XRa = extract32(ctx->opcode, 6, 4);
3912
3913 TCGv t0 = tcg_temp_new();
3914 TCGv t1 = tcg_temp_new();
3915 TCGv t2 = tcg_temp_new();
3916 TCGv t3 = tcg_temp_new();
3917 TCGv t4 = tcg_temp_new();
3918
3919 TCGLabel *l_b_hi_lt = gen_new_label();
3920 TCGLabel *l_b_hi_gt = gen_new_label();
3921 TCGLabel *l_b_lo = gen_new_label();
3922 TCGLabel *l_b_lo_lt = gen_new_label();
3923 TCGLabel *l_c_hi = gen_new_label();
3924 TCGLabel *l_c_hi_lt = gen_new_label();
3925 TCGLabel *l_c_hi_gt = gen_new_label();
3926 TCGLabel *l_c_lo = gen_new_label();
3927 TCGLabel *l_c_lo_lt = gen_new_label();
3928 TCGLabel *l_done = gen_new_label();
3929
3930 gen_load_mxu_gpr(t0, XRb);
3931 gen_load_mxu_gpr(t1, XRc);
3932
3933 tcg_gen_sextract_tl(t2, t0, 16, 16);
3934 tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_b_hi_lt);
3935 tcg_gen_brcondi_tl(TCG_COND_GT, t2, 0, l_b_hi_gt);
3936 tcg_gen_movi_tl(t3, 0);
3937 tcg_gen_br(l_b_lo);
3938 gen_set_label(l_b_hi_lt);
3939 tcg_gen_movi_tl(t3, 0xffff0000);
3940 tcg_gen_br(l_b_lo);
3941 gen_set_label(l_b_hi_gt);
3942 tcg_gen_movi_tl(t3, 0x00010000);
3943
3944 gen_set_label(l_b_lo);
3945 tcg_gen_sextract_tl(t2, t0, 0, 16);
3946 tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_c_hi);
3947 tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_b_lo_lt);
3948 tcg_gen_ori_tl(t3, t3, 0x00000001);
3949 tcg_gen_br(l_c_hi);
3950 gen_set_label(l_b_lo_lt);
3951 tcg_gen_ori_tl(t3, t3, 0x0000ffff);
3952 tcg_gen_br(l_c_hi);
3953
3954 gen_set_label(l_c_hi);
3955 tcg_gen_sextract_tl(t2, t1, 16, 16);
3956 tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_c_hi_lt);
3957 tcg_gen_brcondi_tl(TCG_COND_GT, t2, 0, l_c_hi_gt);
3958 tcg_gen_movi_tl(t4, 0);
3959 tcg_gen_br(l_c_lo);
3960 gen_set_label(l_c_hi_lt);
3961 tcg_gen_movi_tl(t4, 0xffff0000);
3962 tcg_gen_br(l_c_lo);
3963 gen_set_label(l_c_hi_gt);
3964 tcg_gen_movi_tl(t4, 0x00010000);
3965
3966 gen_set_label(l_c_lo);
3967 tcg_gen_sextract_tl(t2, t1, 0, 16);
3968 tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_done);
3969 tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_c_lo_lt);
3970 tcg_gen_ori_tl(t4, t4, 0x00000001);
3971 tcg_gen_br(l_done);
3972 gen_set_label(l_c_lo_lt);
3973 tcg_gen_ori_tl(t4, t4, 0x0000ffff);
3974
3975 gen_set_label(l_done);
3976 gen_store_mxu_gpr(t3, XRa);
3977 gen_store_mxu_gpr(t4, XRd);
3978 }
3979
3980 /*
3981 * S32SFL XRa, XRd, XRb, XRc
3982 * Shuffle bytes according to one of four patterns.
3983 */
gen_mxu_s32sfl(DisasContext * ctx)3984 static void gen_mxu_s32sfl(DisasContext *ctx)
3985 {
3986 uint32_t XRd, XRc, XRb, XRa, ptn2;
3987
3988 XRd = extract32(ctx->opcode, 18, 4);
3989 XRc = extract32(ctx->opcode, 14, 4);
3990 XRb = extract32(ctx->opcode, 10, 4);
3991 XRa = extract32(ctx->opcode, 6, 4);
3992 ptn2 = extract32(ctx->opcode, 24, 2);
3993
3994 TCGv t0 = tcg_temp_new();
3995 TCGv t1 = tcg_temp_new();
3996 TCGv t2 = tcg_temp_new();
3997 TCGv t3 = tcg_temp_new();
3998
3999 gen_load_mxu_gpr(t0, XRb);
4000 gen_load_mxu_gpr(t1, XRc);
4001
4002 switch (ptn2) {
4003 case 0:
4004 tcg_gen_andi_tl(t2, t0, 0xff000000);
4005 tcg_gen_andi_tl(t3, t1, 0x000000ff);
4006 tcg_gen_deposit_tl(t3, t3, t0, 8, 8);
4007 tcg_gen_shri_tl(t0, t0, 8);
4008 tcg_gen_shri_tl(t1, t1, 8);
4009 tcg_gen_deposit_tl(t3, t3, t0, 24, 8);
4010 tcg_gen_deposit_tl(t3, t3, t1, 16, 8);
4011 tcg_gen_shri_tl(t0, t0, 8);
4012 tcg_gen_shri_tl(t1, t1, 8);
4013 tcg_gen_deposit_tl(t2, t2, t0, 8, 8);
4014 tcg_gen_deposit_tl(t2, t2, t1, 0, 8);
4015 tcg_gen_shri_tl(t1, t1, 8);
4016 tcg_gen_deposit_tl(t2, t2, t1, 16, 8);
4017 break;
4018 case 1:
4019 tcg_gen_andi_tl(t2, t0, 0xff000000);
4020 tcg_gen_andi_tl(t3, t1, 0x000000ff);
4021 tcg_gen_deposit_tl(t3, t3, t0, 16, 8);
4022 tcg_gen_shri_tl(t0, t0, 8);
4023 tcg_gen_shri_tl(t1, t1, 8);
4024 tcg_gen_deposit_tl(t2, t2, t0, 16, 8);
4025 tcg_gen_deposit_tl(t2, t2, t1, 0, 8);
4026 tcg_gen_shri_tl(t0, t0, 8);
4027 tcg_gen_shri_tl(t1, t1, 8);
4028 tcg_gen_deposit_tl(t3, t3, t0, 24, 8);
4029 tcg_gen_deposit_tl(t3, t3, t1, 8, 8);
4030 tcg_gen_shri_tl(t1, t1, 8);
4031 tcg_gen_deposit_tl(t2, t2, t1, 8, 8);
4032 break;
4033 case 2:
4034 tcg_gen_andi_tl(t2, t0, 0xff00ff00);
4035 tcg_gen_andi_tl(t3, t1, 0x00ff00ff);
4036 tcg_gen_deposit_tl(t3, t3, t0, 8, 8);
4037 tcg_gen_shri_tl(t0, t0, 16);
4038 tcg_gen_shri_tl(t1, t1, 8);
4039 tcg_gen_deposit_tl(t2, t2, t1, 0, 8);
4040 tcg_gen_deposit_tl(t3, t3, t0, 24, 8);
4041 tcg_gen_shri_tl(t1, t1, 16);
4042 tcg_gen_deposit_tl(t2, t2, t1, 16, 8);
4043 break;
4044 case 3:
4045 tcg_gen_andi_tl(t2, t0, 0xffff0000);
4046 tcg_gen_andi_tl(t3, t1, 0x0000ffff);
4047 tcg_gen_shri_tl(t1, t1, 16);
4048 tcg_gen_deposit_tl(t2, t2, t1, 0, 16);
4049 tcg_gen_deposit_tl(t3, t3, t0, 16, 16);
4050 break;
4051 }
4052
4053 gen_store_mxu_gpr(t2, XRa);
4054 gen_store_mxu_gpr(t3, XRd);
4055 }
4056
4057 /*
4058 * Q8SAD XRa, XRd, XRb, XRc
4059 * Typical SAD operation for motion estimation.
4060 */
gen_mxu_q8sad(DisasContext * ctx)4061 static void gen_mxu_q8sad(DisasContext *ctx)
4062 {
4063 uint32_t XRd, XRc, XRb, XRa;
4064
4065 XRd = extract32(ctx->opcode, 18, 4);
4066 XRc = extract32(ctx->opcode, 14, 4);
4067 XRb = extract32(ctx->opcode, 10, 4);
4068 XRa = extract32(ctx->opcode, 6, 4);
4069
4070 TCGv t0 = tcg_temp_new();
4071 TCGv t1 = tcg_temp_new();
4072 TCGv t2 = tcg_temp_new();
4073 TCGv t3 = tcg_temp_new();
4074 TCGv t4 = tcg_temp_new();
4075 TCGv t5 = tcg_temp_new();
4076
4077 gen_load_mxu_gpr(t2, XRb);
4078 gen_load_mxu_gpr(t3, XRc);
4079 gen_load_mxu_gpr(t5, XRd);
4080 tcg_gen_movi_tl(t4, 0);
4081
4082 for (int i = 0; i < 4; i++) {
4083 tcg_gen_andi_tl(t0, t2, 0xff);
4084 tcg_gen_andi_tl(t1, t3, 0xff);
4085 tcg_gen_sub_tl(t0, t0, t1);
4086 tcg_gen_abs_tl(t0, t0);
4087 tcg_gen_add_tl(t4, t4, t0);
4088 if (i < 3) {
4089 tcg_gen_shri_tl(t2, t2, 8);
4090 tcg_gen_shri_tl(t3, t3, 8);
4091 }
4092 }
4093 tcg_gen_add_tl(t5, t5, t4);
4094 gen_store_mxu_gpr(t4, XRa);
4095 gen_store_mxu_gpr(t5, XRd);
4096 }
4097
4098 /*
4099 * MXU instruction category: align
4100 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4101 *
4102 * S32ALN S32ALNI
4103 */
4104
4105 /*
4106 * S32ALNI XRc, XRb, XRa, optn3
4107 * Arrange bytes from XRb and XRc according to one of five sets of
4108 * rules determined by optn3, and place the result in XRa.
4109 */
gen_mxu_S32ALNI(DisasContext * ctx)4110 static void gen_mxu_S32ALNI(DisasContext *ctx)
4111 {
4112 uint32_t optn3, pad, XRc, XRb, XRa;
4113
4114 optn3 = extract32(ctx->opcode, 23, 3);
4115 pad = extract32(ctx->opcode, 21, 2);
4116 XRc = extract32(ctx->opcode, 14, 4);
4117 XRb = extract32(ctx->opcode, 10, 4);
4118 XRa = extract32(ctx->opcode, 6, 4);
4119
4120 if (unlikely(pad != 0)) {
4121 /* opcode padding incorrect -> do nothing */
4122 } else if (unlikely(XRa == 0)) {
4123 /* destination is zero register -> do nothing */
4124 } else if (unlikely((XRb == 0) && (XRc == 0))) {
4125 /* both operands zero registers -> just set destination to all 0s */
4126 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
4127 } else if (unlikely(XRb == 0)) {
4128 /* XRb zero register -> just appropriatelly shift XRc into XRa */
4129 switch (optn3) {
4130 case MXU_OPTN3_PTN0:
4131 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
4132 break;
4133 case MXU_OPTN3_PTN1:
4134 case MXU_OPTN3_PTN2:
4135 case MXU_OPTN3_PTN3:
4136 tcg_gen_shri_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1],
4137 8 * (4 - optn3));
4138 break;
4139 case MXU_OPTN3_PTN4:
4140 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
4141 break;
4142 }
4143 } else if (unlikely(XRc == 0)) {
4144 /* XRc zero register -> just appropriatelly shift XRb into XRa */
4145 switch (optn3) {
4146 case MXU_OPTN3_PTN0:
4147 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
4148 break;
4149 case MXU_OPTN3_PTN1:
4150 case MXU_OPTN3_PTN2:
4151 case MXU_OPTN3_PTN3:
4152 tcg_gen_shri_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], 8 * optn3);
4153 break;
4154 case MXU_OPTN3_PTN4:
4155 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
4156 break;
4157 }
4158 } else if (unlikely(XRb == XRc)) {
4159 /* both operands same -> just rotation or moving from any of them */
4160 switch (optn3) {
4161 case MXU_OPTN3_PTN0:
4162 case MXU_OPTN3_PTN4:
4163 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
4164 break;
4165 case MXU_OPTN3_PTN1:
4166 case MXU_OPTN3_PTN2:
4167 case MXU_OPTN3_PTN3:
4168 tcg_gen_rotli_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], 8 * optn3);
4169 break;
4170 }
4171 } else {
4172 /* the most general case */
4173 switch (optn3) {
4174 case MXU_OPTN3_PTN0:
4175 {
4176 /* */
4177 /* XRb XRc */
4178 /* +---------------+ */
4179 /* | A B C D | E F G H */
4180 /* +-------+-------+ */
4181 /* | */
4182 /* XRa */
4183 /* */
4184
4185 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
4186 }
4187 break;
4188 case MXU_OPTN3_PTN1:
4189 {
4190 /* */
4191 /* XRb XRc */
4192 /* +-------------------+ */
4193 /* A | B C D E | F G H */
4194 /* +---------+---------+ */
4195 /* | */
4196 /* XRa */
4197 /* */
4198
4199 TCGv_i32 t0 = tcg_temp_new();
4200 TCGv_i32 t1 = tcg_temp_new();
4201
4202 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x00FFFFFF);
4203 tcg_gen_shli_i32(t0, t0, 8);
4204
4205 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF000000);
4206 tcg_gen_shri_i32(t1, t1, 24);
4207
4208 tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1);
4209 }
4210 break;
4211 case MXU_OPTN3_PTN2:
4212 {
4213 /* */
4214 /* XRb XRc */
4215 /* +-------------------+ */
4216 /* A B | C D E F | G H */
4217 /* +---------+---------+ */
4218 /* | */
4219 /* XRa */
4220 /* */
4221
4222 TCGv_i32 t0 = tcg_temp_new();
4223 TCGv_i32 t1 = tcg_temp_new();
4224
4225 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x0000FFFF);
4226 tcg_gen_shli_i32(t0, t0, 16);
4227
4228 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFF0000);
4229 tcg_gen_shri_i32(t1, t1, 16);
4230
4231 tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1);
4232 }
4233 break;
4234 case MXU_OPTN3_PTN3:
4235 {
4236 /* */
4237 /* XRb XRc */
4238 /* +-------------------+ */
4239 /* A B C | D E F G | H */
4240 /* +---------+---------+ */
4241 /* | */
4242 /* XRa */
4243 /* */
4244
4245 TCGv_i32 t0 = tcg_temp_new();
4246 TCGv_i32 t1 = tcg_temp_new();
4247
4248 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x000000FF);
4249 tcg_gen_shli_i32(t0, t0, 24);
4250
4251 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFFFF00);
4252 tcg_gen_shri_i32(t1, t1, 8);
4253
4254 tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1);
4255 }
4256 break;
4257 case MXU_OPTN3_PTN4:
4258 {
4259 /* */
4260 /* XRb XRc */
4261 /* +---------------+ */
4262 /* A B C D | E F G H | */
4263 /* +-------+-------+ */
4264 /* | */
4265 /* XRa */
4266 /* */
4267
4268 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
4269 }
4270 break;
4271 }
4272 }
4273 }
4274
4275 /*
4276 * S32ALN XRc, XRb, XRa, rs
4277 * Arrange bytes from XRb and XRc according to one of five sets of
4278 * rules determined by rs[2:0], and place the result in XRa.
4279 */
gen_mxu_S32ALN(DisasContext * ctx)4280 static void gen_mxu_S32ALN(DisasContext *ctx)
4281 {
4282 uint32_t rs, XRc, XRb, XRa;
4283
4284 rs = extract32(ctx->opcode, 21, 5);
4285 XRc = extract32(ctx->opcode, 14, 4);
4286 XRb = extract32(ctx->opcode, 10, 4);
4287 XRa = extract32(ctx->opcode, 6, 4);
4288
4289 if (unlikely(XRa == 0)) {
4290 /* destination is zero register -> do nothing */
4291 } else if (unlikely((XRb == 0) && (XRc == 0))) {
4292 /* both operands zero registers -> just set destination to all 0s */
4293 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
4294 } else {
4295 /* the most general case */
4296 TCGv t0 = tcg_temp_new();
4297 TCGv t1 = tcg_temp_new();
4298 TCGv t2 = tcg_temp_new();
4299 TCGv t3 = tcg_temp_new();
4300 TCGLabel *l_exit = gen_new_label();
4301 TCGLabel *l_b_only = gen_new_label();
4302 TCGLabel *l_c_only = gen_new_label();
4303
4304 gen_load_mxu_gpr(t0, XRb);
4305 gen_load_mxu_gpr(t1, XRc);
4306 gen_load_gpr(t2, rs);
4307 tcg_gen_andi_tl(t2, t2, 0x07);
4308
4309 /* do nothing for undefined cases */
4310 tcg_gen_brcondi_tl(TCG_COND_GE, t2, 5, l_exit);
4311
4312 tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_b_only);
4313 tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 4, l_c_only);
4314
4315 tcg_gen_shli_tl(t2, t2, 3);
4316 tcg_gen_subfi_tl(t3, 32, t2);
4317
4318 tcg_gen_shl_tl(t0, t0, t2);
4319 tcg_gen_shr_tl(t1, t1, t3);
4320 tcg_gen_or_tl(mxu_gpr[XRa - 1], t0, t1);
4321 tcg_gen_br(l_exit);
4322
4323 gen_set_label(l_b_only);
4324 gen_store_mxu_gpr(t0, XRa);
4325 tcg_gen_br(l_exit);
4326
4327 gen_set_label(l_c_only);
4328 gen_store_mxu_gpr(t1, XRa);
4329
4330 gen_set_label(l_exit);
4331 }
4332 }
4333
4334 /*
4335 * S32MADD XRa, XRd, rb, rc
4336 * 32 to 64 bit signed multiply with subsequent add
4337 * result stored in {XRa, XRd} pair, stain HI/LO.
4338 * S32MADDU XRa, XRd, rb, rc
4339 * 32 to 64 bit unsigned multiply with subsequent add
4340 * result stored in {XRa, XRd} pair, stain HI/LO.
4341 * S32MSUB XRa, XRd, rb, rc
4342 * 32 to 64 bit signed multiply with subsequent subtract
4343 * result stored in {XRa, XRd} pair, stain HI/LO.
4344 * S32MSUBU XRa, XRd, rb, rc
4345 * 32 to 64 bit unsigned multiply with subsequent subtract
4346 * result stored in {XRa, XRd} pair, stain HI/LO.
4347 */
gen_mxu_s32madd_sub(DisasContext * ctx,bool sub,bool uns)4348 static void gen_mxu_s32madd_sub(DisasContext *ctx, bool sub, bool uns)
4349 {
4350 uint32_t XRa, XRd, Rb, Rc;
4351
4352 XRa = extract32(ctx->opcode, 6, 4);
4353 XRd = extract32(ctx->opcode, 10, 4);
4354 Rb = extract32(ctx->opcode, 16, 5);
4355 Rc = extract32(ctx->opcode, 21, 5);
4356
4357 if (unlikely(Rb == 0 || Rc == 0)) {
4358 /* do nothing because x + 0 * y => x */
4359 } else if (unlikely(XRa == 0 && XRd == 0)) {
4360 /* do nothing because result just dropped */
4361 } else {
4362 TCGv t0 = tcg_temp_new();
4363 TCGv t1 = tcg_temp_new();
4364 TCGv_i64 t2 = tcg_temp_new_i64();
4365 TCGv_i64 t3 = tcg_temp_new_i64();
4366
4367 gen_load_gpr(t0, Rb);
4368 gen_load_gpr(t1, Rc);
4369
4370 if (uns) {
4371 tcg_gen_extu_tl_i64(t2, t0);
4372 tcg_gen_extu_tl_i64(t3, t1);
4373 } else {
4374 tcg_gen_ext_tl_i64(t2, t0);
4375 tcg_gen_ext_tl_i64(t3, t1);
4376 }
4377 tcg_gen_mul_i64(t2, t2, t3);
4378
4379 gen_load_mxu_gpr(t0, XRa);
4380 gen_load_mxu_gpr(t1, XRd);
4381
4382 tcg_gen_concat_tl_i64(t3, t1, t0);
4383 if (sub) {
4384 tcg_gen_sub_i64(t3, t3, t2);
4385 } else {
4386 tcg_gen_add_i64(t3, t3, t2);
4387 }
4388 gen_move_low32(t1, t3);
4389 gen_move_high32(t0, t3);
4390
4391 tcg_gen_mov_tl(cpu_HI[0], t0);
4392 tcg_gen_mov_tl(cpu_LO[0], t1);
4393
4394 gen_store_mxu_gpr(t1, XRd);
4395 gen_store_mxu_gpr(t0, XRa);
4396 }
4397 }
4398
4399 /*
4400 * Decoding engine for MXU
4401 * =======================
4402 */
4403
decode_opc_mxu__pool00(DisasContext * ctx)4404 static void decode_opc_mxu__pool00(DisasContext *ctx)
4405 {
4406 uint32_t opcode = extract32(ctx->opcode, 18, 3);
4407
4408 switch (opcode) {
4409 case OPC_MXU_S32MAX:
4410 case OPC_MXU_S32MIN:
4411 gen_mxu_S32MAX_S32MIN(ctx);
4412 break;
4413 case OPC_MXU_D16MAX:
4414 case OPC_MXU_D16MIN:
4415 gen_mxu_D16MAX_D16MIN(ctx);
4416 break;
4417 case OPC_MXU_Q8MAX:
4418 case OPC_MXU_Q8MIN:
4419 gen_mxu_Q8MAX_Q8MIN(ctx);
4420 break;
4421 case OPC_MXU_Q8SLT:
4422 gen_mxu_q8slt(ctx, false);
4423 break;
4424 case OPC_MXU_Q8SLTU:
4425 gen_mxu_q8slt(ctx, true);
4426 break;
4427 default:
4428 MIPS_INVAL("decode_opc_mxu");
4429 gen_reserved_instruction(ctx);
4430 break;
4431 }
4432 }
4433
decode_opc_mxu_s32madd_sub(DisasContext * ctx)4434 static bool decode_opc_mxu_s32madd_sub(DisasContext *ctx)
4435 {
4436 uint32_t opcode = extract32(ctx->opcode, 0, 6);
4437 uint32_t pad = extract32(ctx->opcode, 14, 2);
4438
4439 if (pad != 2) {
4440 /* MIPS32R1 MADD/MADDU/MSUB/MSUBU are on pad == 0 */
4441 return false;
4442 }
4443
4444 switch (opcode) {
4445 case OPC_MXU_S32MADD:
4446 gen_mxu_s32madd_sub(ctx, false, false);
4447 break;
4448 case OPC_MXU_S32MADDU:
4449 gen_mxu_s32madd_sub(ctx, false, true);
4450 break;
4451 case OPC_MXU_S32MSUB:
4452 gen_mxu_s32madd_sub(ctx, true, false);
4453 break;
4454 case OPC_MXU_S32MSUBU:
4455 gen_mxu_s32madd_sub(ctx, true, true);
4456 break;
4457 default:
4458 return false;
4459 }
4460 return true;
4461 }
4462
decode_opc_mxu__pool01(DisasContext * ctx)4463 static void decode_opc_mxu__pool01(DisasContext *ctx)
4464 {
4465 uint32_t opcode = extract32(ctx->opcode, 18, 3);
4466
4467 switch (opcode) {
4468 case OPC_MXU_S32SLT:
4469 gen_mxu_S32SLT(ctx);
4470 break;
4471 case OPC_MXU_D16SLT:
4472 gen_mxu_D16SLT(ctx);
4473 break;
4474 case OPC_MXU_D16AVG:
4475 gen_mxu_d16avg(ctx, false);
4476 break;
4477 case OPC_MXU_D16AVGR:
4478 gen_mxu_d16avg(ctx, true);
4479 break;
4480 case OPC_MXU_Q8AVG:
4481 gen_mxu_q8avg(ctx, false);
4482 break;
4483 case OPC_MXU_Q8AVGR:
4484 gen_mxu_q8avg(ctx, true);
4485 break;
4486 case OPC_MXU_Q8ADD:
4487 gen_mxu_Q8ADD(ctx);
4488 break;
4489 default:
4490 MIPS_INVAL("decode_opc_mxu");
4491 gen_reserved_instruction(ctx);
4492 break;
4493 }
4494 }
4495
decode_opc_mxu__pool02(DisasContext * ctx)4496 static void decode_opc_mxu__pool02(DisasContext *ctx)
4497 {
4498 uint32_t opcode = extract32(ctx->opcode, 18, 3);
4499
4500 switch (opcode) {
4501 case OPC_MXU_S32CPS:
4502 gen_mxu_S32CPS(ctx);
4503 break;
4504 case OPC_MXU_D16CPS:
4505 gen_mxu_D16CPS(ctx);
4506 break;
4507 case OPC_MXU_Q8ABD:
4508 gen_mxu_Q8ABD(ctx);
4509 break;
4510 case OPC_MXU_Q16SAT:
4511 gen_mxu_Q16SAT(ctx);
4512 break;
4513 default:
4514 MIPS_INVAL("decode_opc_mxu");
4515 gen_reserved_instruction(ctx);
4516 break;
4517 }
4518 }
4519
decode_opc_mxu__pool03(DisasContext * ctx)4520 static void decode_opc_mxu__pool03(DisasContext *ctx)
4521 {
4522 uint32_t opcode = extract32(ctx->opcode, 24, 2);
4523
4524 switch (opcode) {
4525 case OPC_MXU_D16MULF:
4526 gen_mxu_d16mul(ctx, true, true);
4527 break;
4528 case OPC_MXU_D16MULE:
4529 gen_mxu_d16mul(ctx, true, false);
4530 break;
4531 default:
4532 MIPS_INVAL("decode_opc_mxu");
4533 gen_reserved_instruction(ctx);
4534 break;
4535 }
4536 }
4537
decode_opc_mxu__pool04(DisasContext * ctx)4538 static void decode_opc_mxu__pool04(DisasContext *ctx)
4539 {
4540 uint32_t reversed = extract32(ctx->opcode, 20, 1);
4541 uint32_t opcode = extract32(ctx->opcode, 10, 4);
4542
4543 /* Don't care about opcode bits as their meaning is unknown yet */
4544 switch (opcode) {
4545 default:
4546 gen_mxu_s32ldxx(ctx, reversed, false);
4547 break;
4548 }
4549 }
4550
decode_opc_mxu__pool05(DisasContext * ctx)4551 static void decode_opc_mxu__pool05(DisasContext *ctx)
4552 {
4553 uint32_t reversed = extract32(ctx->opcode, 20, 1);
4554 uint32_t opcode = extract32(ctx->opcode, 10, 4);
4555
4556 /* Don't care about opcode bits as their meaning is unknown yet */
4557 switch (opcode) {
4558 default:
4559 gen_mxu_s32stxx(ctx, reversed, false);
4560 break;
4561 }
4562 }
4563
decode_opc_mxu__pool06(DisasContext * ctx)4564 static void decode_opc_mxu__pool06(DisasContext *ctx)
4565 {
4566 uint32_t opcode = extract32(ctx->opcode, 10, 4);
4567 uint32_t strd2 = extract32(ctx->opcode, 14, 2);
4568
4569 switch (opcode) {
4570 case OPC_MXU_S32LDST:
4571 case OPC_MXU_S32LDSTR:
4572 if (strd2 <= 2) {
4573 gen_mxu_s32ldxvx(ctx, opcode, false, strd2);
4574 break;
4575 }
4576 /* fallthrough */
4577 default:
4578 MIPS_INVAL("decode_opc_mxu");
4579 gen_reserved_instruction(ctx);
4580 break;
4581 }
4582 }
4583
decode_opc_mxu__pool07(DisasContext * ctx)4584 static void decode_opc_mxu__pool07(DisasContext *ctx)
4585 {
4586 uint32_t opcode = extract32(ctx->opcode, 10, 4);
4587 uint32_t strd2 = extract32(ctx->opcode, 14, 2);
4588
4589 switch (opcode) {
4590 case OPC_MXU_S32LDST:
4591 case OPC_MXU_S32LDSTR:
4592 if (strd2 <= 2) {
4593 gen_mxu_s32stxvx(ctx, opcode, false, strd2);
4594 break;
4595 }
4596 /* fallthrough */
4597 default:
4598 MIPS_INVAL("decode_opc_mxu");
4599 gen_reserved_instruction(ctx);
4600 break;
4601 }
4602 }
4603
decode_opc_mxu__pool08(DisasContext * ctx)4604 static void decode_opc_mxu__pool08(DisasContext *ctx)
4605 {
4606 uint32_t reversed = extract32(ctx->opcode, 20, 1);
4607 uint32_t opcode = extract32(ctx->opcode, 10, 4);
4608
4609 /* Don't care about opcode bits as their meaning is unknown yet */
4610 switch (opcode) {
4611 default:
4612 gen_mxu_s32ldxx(ctx, reversed, true);
4613 break;
4614 }
4615 }
4616
decode_opc_mxu__pool09(DisasContext * ctx)4617 static void decode_opc_mxu__pool09(DisasContext *ctx)
4618 {
4619 uint32_t reversed = extract32(ctx->opcode, 20, 1);
4620 uint32_t opcode = extract32(ctx->opcode, 10, 4);
4621
4622 /* Don't care about opcode bits as their meaning is unknown yet */
4623 switch (opcode) {
4624 default:
4625 gen_mxu_s32stxx(ctx, reversed, true);
4626 break;
4627 }
4628 }
4629
decode_opc_mxu__pool10(DisasContext * ctx)4630 static void decode_opc_mxu__pool10(DisasContext *ctx)
4631 {
4632 uint32_t opcode = extract32(ctx->opcode, 10, 4);
4633 uint32_t strd2 = extract32(ctx->opcode, 14, 2);
4634
4635 switch (opcode) {
4636 case OPC_MXU_S32LDST:
4637 case OPC_MXU_S32LDSTR:
4638 if (strd2 <= 2) {
4639 gen_mxu_s32ldxvx(ctx, opcode, true, strd2);
4640 break;
4641 }
4642 /* fallthrough */
4643 default:
4644 MIPS_INVAL("decode_opc_mxu");
4645 gen_reserved_instruction(ctx);
4646 break;
4647 }
4648 }
4649
decode_opc_mxu__pool11(DisasContext * ctx)4650 static void decode_opc_mxu__pool11(DisasContext *ctx)
4651 {
4652 uint32_t opcode = extract32(ctx->opcode, 10, 4);
4653 uint32_t strd2 = extract32(ctx->opcode, 14, 2);
4654
4655 switch (opcode) {
4656 case OPC_MXU_S32LDST:
4657 case OPC_MXU_S32LDSTR:
4658 if (strd2 <= 2) {
4659 gen_mxu_s32stxvx(ctx, opcode, true, strd2);
4660 break;
4661 }
4662 /* fallthrough */
4663 default:
4664 MIPS_INVAL("decode_opc_mxu");
4665 gen_reserved_instruction(ctx);
4666 break;
4667 }
4668 }
4669
decode_opc_mxu__pool12(DisasContext * ctx)4670 static void decode_opc_mxu__pool12(DisasContext *ctx)
4671 {
4672 uint32_t opcode = extract32(ctx->opcode, 22, 2);
4673
4674 switch (opcode) {
4675 case OPC_MXU_D32ACC:
4676 gen_mxu_d32acc(ctx);
4677 break;
4678 case OPC_MXU_D32ACCM:
4679 gen_mxu_d32accm(ctx);
4680 break;
4681 case OPC_MXU_D32ASUM:
4682 gen_mxu_d32asum(ctx);
4683 break;
4684 default:
4685 MIPS_INVAL("decode_opc_mxu");
4686 gen_reserved_instruction(ctx);
4687 break;
4688 }
4689 }
4690
decode_opc_mxu__pool13(DisasContext * ctx)4691 static void decode_opc_mxu__pool13(DisasContext *ctx)
4692 {
4693 uint32_t opcode = extract32(ctx->opcode, 22, 2);
4694
4695 switch (opcode) {
4696 case OPC_MXU_Q16ACC:
4697 gen_mxu_q16acc(ctx);
4698 break;
4699 case OPC_MXU_Q16ACCM:
4700 gen_mxu_q16accm(ctx);
4701 break;
4702 case OPC_MXU_D16ASUM:
4703 gen_mxu_d16asum(ctx);
4704 break;
4705 default:
4706 MIPS_INVAL("decode_opc_mxu");
4707 gen_reserved_instruction(ctx);
4708 break;
4709 }
4710 }
4711
decode_opc_mxu__pool14(DisasContext * ctx)4712 static void decode_opc_mxu__pool14(DisasContext *ctx)
4713 {
4714 uint32_t opcode = extract32(ctx->opcode, 22, 2);
4715
4716 switch (opcode) {
4717 case OPC_MXU_Q8ADDE:
4718 gen_mxu_q8adde(ctx, false);
4719 break;
4720 case OPC_MXU_D8SUM:
4721 gen_mxu_d8sum(ctx, false);
4722 break;
4723 case OPC_MXU_D8SUMC:
4724 gen_mxu_d8sum(ctx, true);
4725 break;
4726 default:
4727 MIPS_INVAL("decode_opc_mxu");
4728 gen_reserved_instruction(ctx);
4729 break;
4730 }
4731 }
4732
decode_opc_mxu__pool15(DisasContext * ctx)4733 static void decode_opc_mxu__pool15(DisasContext *ctx)
4734 {
4735 uint32_t opcode = extract32(ctx->opcode, 14, 2);
4736
4737 switch (opcode) {
4738 case OPC_MXU_S32MUL:
4739 gen_mxu_s32mul(ctx, false);
4740 break;
4741 case OPC_MXU_S32MULU:
4742 gen_mxu_s32mul(ctx, true);
4743 break;
4744 case OPC_MXU_S32EXTR:
4745 gen_mxu_s32extr(ctx);
4746 break;
4747 case OPC_MXU_S32EXTRV:
4748 gen_mxu_s32extrv(ctx);
4749 break;
4750 default:
4751 MIPS_INVAL("decode_opc_mxu");
4752 gen_reserved_instruction(ctx);
4753 break;
4754 }
4755 }
4756
decode_opc_mxu__pool16(DisasContext * ctx)4757 static void decode_opc_mxu__pool16(DisasContext *ctx)
4758 {
4759 uint32_t opcode = extract32(ctx->opcode, 18, 3);
4760
4761 switch (opcode) {
4762 case OPC_MXU_D32SARW:
4763 gen_mxu_d32sarl(ctx, true);
4764 break;
4765 case OPC_MXU_S32ALN:
4766 gen_mxu_S32ALN(ctx);
4767 break;
4768 case OPC_MXU_S32ALNI:
4769 gen_mxu_S32ALNI(ctx);
4770 break;
4771 case OPC_MXU_S32LUI:
4772 gen_mxu_s32lui(ctx);
4773 break;
4774 case OPC_MXU_S32NOR:
4775 gen_mxu_S32NOR(ctx);
4776 break;
4777 case OPC_MXU_S32AND:
4778 gen_mxu_S32AND(ctx);
4779 break;
4780 case OPC_MXU_S32OR:
4781 gen_mxu_S32OR(ctx);
4782 break;
4783 case OPC_MXU_S32XOR:
4784 gen_mxu_S32XOR(ctx);
4785 break;
4786 default:
4787 MIPS_INVAL("decode_opc_mxu");
4788 gen_reserved_instruction(ctx);
4789 break;
4790 }
4791 }
4792
decode_opc_mxu__pool17(DisasContext * ctx)4793 static void decode_opc_mxu__pool17(DisasContext *ctx)
4794 {
4795 uint32_t opcode = extract32(ctx->opcode, 6, 3);
4796 uint32_t strd2 = extract32(ctx->opcode, 9, 2);
4797
4798 if (strd2 > 2) {
4799 MIPS_INVAL("decode_opc_mxu");
4800 gen_reserved_instruction(ctx);
4801 return;
4802 }
4803
4804 switch (opcode) {
4805 case OPC_MXU_LXW:
4806 gen_mxu_lxx(ctx, strd2, mo_endian(ctx) | MO_UL);
4807 break;
4808 case OPC_MXU_LXB:
4809 gen_mxu_lxx(ctx, strd2, mo_endian(ctx) | MO_SB);
4810 break;
4811 case OPC_MXU_LXH:
4812 gen_mxu_lxx(ctx, strd2, mo_endian(ctx) | MO_SW);
4813 break;
4814 case OPC_MXU_LXBU:
4815 gen_mxu_lxx(ctx, strd2, mo_endian(ctx) | MO_UB);
4816 break;
4817 case OPC_MXU_LXHU:
4818 gen_mxu_lxx(ctx, strd2, mo_endian(ctx) | MO_UW);
4819 break;
4820 default:
4821 MIPS_INVAL("decode_opc_mxu");
4822 gen_reserved_instruction(ctx);
4823 break;
4824 }
4825 }
4826
decode_opc_mxu__pool18(DisasContext * ctx)4827 static void decode_opc_mxu__pool18(DisasContext *ctx)
4828 {
4829 uint32_t opcode = extract32(ctx->opcode, 18, 3);
4830
4831 switch (opcode) {
4832 case OPC_MXU_D32SLLV:
4833 gen_mxu_d32sxxv(ctx, false, false);
4834 break;
4835 case OPC_MXU_D32SLRV:
4836 gen_mxu_d32sxxv(ctx, true, false);
4837 break;
4838 case OPC_MXU_D32SARV:
4839 gen_mxu_d32sxxv(ctx, true, true);
4840 break;
4841 case OPC_MXU_Q16SLLV:
4842 gen_mxu_q16sxxv(ctx, false, false);
4843 break;
4844 case OPC_MXU_Q16SLRV:
4845 gen_mxu_q16sxxv(ctx, true, false);
4846 break;
4847 case OPC_MXU_Q16SARV:
4848 gen_mxu_q16sxxv(ctx, true, true);
4849 break;
4850 default:
4851 MIPS_INVAL("decode_opc_mxu");
4852 gen_reserved_instruction(ctx);
4853 break;
4854 }
4855 }
4856
decode_opc_mxu__pool19(DisasContext * ctx)4857 static void decode_opc_mxu__pool19(DisasContext *ctx)
4858 {
4859 uint32_t opcode = extract32(ctx->opcode, 22, 4);
4860
4861 switch (opcode) {
4862 case OPC_MXU_Q8MUL:
4863 gen_mxu_q8mul_mac(ctx, false, false);
4864 break;
4865 case OPC_MXU_Q8MULSU:
4866 gen_mxu_q8mul_mac(ctx, true, false);
4867 break;
4868 default:
4869 MIPS_INVAL("decode_opc_mxu");
4870 gen_reserved_instruction(ctx);
4871 break;
4872 }
4873 }
4874
decode_opc_mxu__pool20(DisasContext * ctx)4875 static void decode_opc_mxu__pool20(DisasContext *ctx)
4876 {
4877 uint32_t opcode = extract32(ctx->opcode, 18, 3);
4878
4879 switch (opcode) {
4880 case OPC_MXU_Q8MOVZ:
4881 gen_mxu_q8movzn(ctx, TCG_COND_NE);
4882 break;
4883 case OPC_MXU_Q8MOVN:
4884 gen_mxu_q8movzn(ctx, TCG_COND_EQ);
4885 break;
4886 case OPC_MXU_D16MOVZ:
4887 gen_mxu_d16movzn(ctx, TCG_COND_NE);
4888 break;
4889 case OPC_MXU_D16MOVN:
4890 gen_mxu_d16movzn(ctx, TCG_COND_EQ);
4891 break;
4892 case OPC_MXU_S32MOVZ:
4893 gen_mxu_s32movzn(ctx, TCG_COND_NE);
4894 break;
4895 case OPC_MXU_S32MOVN:
4896 gen_mxu_s32movzn(ctx, TCG_COND_EQ);
4897 break;
4898 default:
4899 MIPS_INVAL("decode_opc_mxu");
4900 gen_reserved_instruction(ctx);
4901 break;
4902 }
4903 }
4904
decode_opc_mxu__pool21(DisasContext * ctx)4905 static void decode_opc_mxu__pool21(DisasContext *ctx)
4906 {
4907 uint32_t opcode = extract32(ctx->opcode, 22, 2);
4908
4909 switch (opcode) {
4910 case OPC_MXU_Q8MAC:
4911 gen_mxu_q8mul_mac(ctx, false, true);
4912 break;
4913 case OPC_MXU_Q8MACSU:
4914 gen_mxu_q8mul_mac(ctx, true, true);
4915 break;
4916 default:
4917 MIPS_INVAL("decode_opc_mxu");
4918 gen_reserved_instruction(ctx);
4919 break;
4920 }
4921 }
4922
4923
decode_ase_mxu(DisasContext * ctx,uint32_t insn)4924 bool decode_ase_mxu(DisasContext *ctx, uint32_t insn)
4925 {
4926 uint32_t opcode = extract32(insn, 0, 6);
4927
4928 if (opcode == OPC_MXU_S32M2I) {
4929 gen_mxu_s32m2i(ctx);
4930 return true;
4931 }
4932
4933 if (opcode == OPC_MXU_S32I2M) {
4934 gen_mxu_s32i2m(ctx);
4935 return true;
4936 }
4937
4938 {
4939 TCGv t_mxu_cr = tcg_temp_new();
4940 TCGLabel *l_exit = gen_new_label();
4941
4942 gen_load_mxu_cr(t_mxu_cr);
4943 tcg_gen_andi_tl(t_mxu_cr, t_mxu_cr, MXU_CR_MXU_EN);
4944 tcg_gen_brcondi_tl(TCG_COND_NE, t_mxu_cr, MXU_CR_MXU_EN, l_exit);
4945
4946 switch (opcode) {
4947 case OPC_MXU_S32MADD:
4948 case OPC_MXU_S32MADDU:
4949 case OPC_MXU_S32MSUB:
4950 case OPC_MXU_S32MSUBU:
4951 return decode_opc_mxu_s32madd_sub(ctx);
4952 case OPC_MXU__POOL00:
4953 decode_opc_mxu__pool00(ctx);
4954 break;
4955 case OPC_MXU_D16MUL:
4956 gen_mxu_d16mul(ctx, false, false);
4957 break;
4958 case OPC_MXU_D16MAC:
4959 gen_mxu_d16mac(ctx, false, false);
4960 break;
4961 case OPC_MXU_D16MACF:
4962 gen_mxu_d16mac(ctx, true, true);
4963 break;
4964 case OPC_MXU_D16MADL:
4965 gen_mxu_d16madl(ctx);
4966 break;
4967 case OPC_MXU_S16MAD:
4968 gen_mxu_s16mad(ctx);
4969 break;
4970 case OPC_MXU_Q16ADD:
4971 gen_mxu_q16add(ctx);
4972 break;
4973 case OPC_MXU_D16MACE:
4974 gen_mxu_d16mac(ctx, true, false);
4975 break;
4976 case OPC_MXU__POOL01:
4977 decode_opc_mxu__pool01(ctx);
4978 break;
4979 case OPC_MXU__POOL02:
4980 decode_opc_mxu__pool02(ctx);
4981 break;
4982 case OPC_MXU__POOL03:
4983 decode_opc_mxu__pool03(ctx);
4984 break;
4985 case OPC_MXU__POOL04:
4986 decode_opc_mxu__pool04(ctx);
4987 break;
4988 case OPC_MXU__POOL05:
4989 decode_opc_mxu__pool05(ctx);
4990 break;
4991 case OPC_MXU__POOL06:
4992 decode_opc_mxu__pool06(ctx);
4993 break;
4994 case OPC_MXU__POOL07:
4995 decode_opc_mxu__pool07(ctx);
4996 break;
4997 case OPC_MXU__POOL08:
4998 decode_opc_mxu__pool08(ctx);
4999 break;
5000 case OPC_MXU__POOL09:
5001 decode_opc_mxu__pool09(ctx);
5002 break;
5003 case OPC_MXU__POOL10:
5004 decode_opc_mxu__pool10(ctx);
5005 break;
5006 case OPC_MXU__POOL11:
5007 decode_opc_mxu__pool11(ctx);
5008 break;
5009 case OPC_MXU_D32ADD:
5010 gen_mxu_d32add(ctx);
5011 break;
5012 case OPC_MXU__POOL12:
5013 decode_opc_mxu__pool12(ctx);
5014 break;
5015 case OPC_MXU__POOL13:
5016 decode_opc_mxu__pool13(ctx);
5017 break;
5018 case OPC_MXU__POOL14:
5019 decode_opc_mxu__pool14(ctx);
5020 break;
5021 case OPC_MXU_Q8ACCE:
5022 gen_mxu_q8adde(ctx, true);
5023 break;
5024 case OPC_MXU_S8LDD:
5025 gen_mxu_s8ldd(ctx, false);
5026 break;
5027 case OPC_MXU_S8STD:
5028 gen_mxu_s8std(ctx, false);
5029 break;
5030 case OPC_MXU_S8LDI:
5031 gen_mxu_s8ldd(ctx, true);
5032 break;
5033 case OPC_MXU_S8SDI:
5034 gen_mxu_s8std(ctx, true);
5035 break;
5036 case OPC_MXU__POOL15:
5037 decode_opc_mxu__pool15(ctx);
5038 break;
5039 case OPC_MXU__POOL16:
5040 decode_opc_mxu__pool16(ctx);
5041 break;
5042 case OPC_MXU__POOL17:
5043 decode_opc_mxu__pool17(ctx);
5044 break;
5045 case OPC_MXU_S16LDD:
5046 gen_mxu_s16ldd(ctx, false);
5047 break;
5048 case OPC_MXU_S16STD:
5049 gen_mxu_s16std(ctx, false);
5050 break;
5051 case OPC_MXU_S16LDI:
5052 gen_mxu_s16ldd(ctx, true);
5053 break;
5054 case OPC_MXU_S16SDI:
5055 gen_mxu_s16std(ctx, true);
5056 break;
5057 case OPC_MXU_D32SLL:
5058 gen_mxu_d32sxx(ctx, false, false);
5059 break;
5060 case OPC_MXU_D32SLR:
5061 gen_mxu_d32sxx(ctx, true, false);
5062 break;
5063 case OPC_MXU_D32SARL:
5064 gen_mxu_d32sarl(ctx, false);
5065 break;
5066 case OPC_MXU_D32SAR:
5067 gen_mxu_d32sxx(ctx, true, true);
5068 break;
5069 case OPC_MXU_Q16SLL:
5070 gen_mxu_q16sxx(ctx, false, false);
5071 break;
5072 case OPC_MXU__POOL18:
5073 decode_opc_mxu__pool18(ctx);
5074 break;
5075 case OPC_MXU_Q16SLR:
5076 gen_mxu_q16sxx(ctx, true, false);
5077 break;
5078 case OPC_MXU_Q16SAR:
5079 gen_mxu_q16sxx(ctx, true, true);
5080 break;
5081 case OPC_MXU__POOL19:
5082 decode_opc_mxu__pool19(ctx);
5083 break;
5084 case OPC_MXU__POOL20:
5085 decode_opc_mxu__pool20(ctx);
5086 break;
5087 case OPC_MXU__POOL21:
5088 decode_opc_mxu__pool21(ctx);
5089 break;
5090 case OPC_MXU_Q16SCOP:
5091 gen_mxu_q16scop(ctx);
5092 break;
5093 case OPC_MXU_Q8MADL:
5094 gen_mxu_q8madl(ctx);
5095 break;
5096 case OPC_MXU_S32SFL:
5097 gen_mxu_s32sfl(ctx);
5098 break;
5099 case OPC_MXU_Q8SAD:
5100 gen_mxu_q8sad(ctx);
5101 break;
5102 default:
5103 return false;
5104 }
5105
5106 gen_set_label(l_exit);
5107 }
5108
5109 return true;
5110 }
5111