1/*
2 * RISC-V translation routines for the Zfa Standard Extension.
3 *
4 * Copyright (c) 2023 Christoph Müllner, christoph.muellner@vrull.eu
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program.  If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#define REQUIRE_ZFA(ctx) do {     \
20    if (!ctx->cfg_ptr->ext_zfa) { \
21        return false;             \
22    }                             \
23} while (0)
24
25#define REQUIRE_ZFH(ctx) do {     \
26    if (!ctx->cfg_ptr->ext_zfh) { \
27        return false;             \
28    }                             \
29} while (0)
30
31static bool trans_fli_s(DisasContext *ctx, arg_fli_s *a)
32{
33    REQUIRE_FPU;
34    REQUIRE_ZFA(ctx);
35    REQUIRE_EXT(ctx, RVF);
36
37    /* Values below are NaN-boxed to avoid a gen_nanbox_s(). */
38    static const uint64_t fli_s_table[] = {
39        0xffffffffbf800000,  /* -1.0 */
40        0xffffffff00800000,  /* minimum positive normal */
41        0xffffffff37800000,  /* 1.0 * 2^-16 */
42        0xffffffff38000000,  /* 1.0 * 2^-15 */
43        0xffffffff3b800000,  /* 1.0 * 2^-8  */
44        0xffffffff3c000000,  /* 1.0 * 2^-7  */
45        0xffffffff3d800000,  /* 1.0 * 2^-4  */
46        0xffffffff3e000000,  /* 1.0 * 2^-3  */
47        0xffffffff3e800000,  /* 0.25 */
48        0xffffffff3ea00000,  /* 0.3125 */
49        0xffffffff3ec00000,  /* 0.375 */
50        0xffffffff3ee00000,  /* 0.4375 */
51        0xffffffff3f000000,  /* 0.5 */
52        0xffffffff3f200000,  /* 0.625 */
53        0xffffffff3f400000,  /* 0.75 */
54        0xffffffff3f600000,  /* 0.875 */
55        0xffffffff3f800000,  /* 1.0 */
56        0xffffffff3fa00000,  /* 1.25 */
57        0xffffffff3fc00000,  /* 1.5 */
58        0xffffffff3fe00000,  /* 1.75 */
59        0xffffffff40000000,  /* 2.0 */
60        0xffffffff40200000,  /* 2.5 */
61        0xffffffff40400000,  /* 3 */
62        0xffffffff40800000,  /* 4 */
63        0xffffffff41000000,  /* 8 */
64        0xffffffff41800000,  /* 16 */
65        0xffffffff43000000,  /* 2^7 */
66        0xffffffff43800000,  /* 2^8 */
67        0xffffffff47000000,  /* 2^15 */
68        0xffffffff47800000,  /* 2^16 */
69        0xffffffff7f800000,  /* +inf */
70        0xffffffff7fc00000,  /* Canonical NaN */
71    };
72
73    TCGv_i64 dest = dest_fpr(ctx, a->rd);
74    tcg_gen_movi_i64(dest, fli_s_table[a->rs1]);
75    gen_set_fpr_hs(ctx, a->rd, dest);
76
77    mark_fs_dirty(ctx);
78    return true;
79}
80
81static bool trans_fli_d(DisasContext *ctx, arg_fli_d *a)
82{
83    REQUIRE_FPU;
84    REQUIRE_ZFA(ctx);
85    REQUIRE_EXT(ctx, RVD);
86
87    static const uint64_t fli_d_table[] = {
88        0xbff0000000000000,  /* -1.0 */
89        0x0010000000000000,  /* minimum positive normal */
90        0x3ef0000000000000,  /* 1.0 * 2^-16 */
91        0x3f00000000000000,  /* 1.0 * 2^-15 */
92        0x3f70000000000000,  /* 1.0 * 2^-8  */
93        0x3f80000000000000,  /* 1.0 * 2^-7  */
94        0x3fb0000000000000,  /* 1.0 * 2^-4  */
95        0x3fc0000000000000,  /* 1.0 * 2^-3  */
96        0x3fd0000000000000,  /* 0.25 */
97        0x3fd4000000000000,  /* 0.3125 */
98        0x3fd8000000000000,  /* 0.375 */
99        0x3fdc000000000000,  /* 0.4375 */
100        0x3fe0000000000000,  /* 0.5 */
101        0x3fe4000000000000,  /* 0.625 */
102        0x3fe8000000000000,  /* 0.75 */
103        0x3fec000000000000,  /* 0.875 */
104        0x3ff0000000000000,  /* 1.0 */
105        0x3ff4000000000000,  /* 1.25 */
106        0x3ff8000000000000,  /* 1.5 */
107        0x3ffc000000000000,  /* 1.75 */
108        0x4000000000000000,  /* 2.0 */
109        0x4004000000000000,  /* 2.5 */
110        0x4008000000000000,  /* 3 */
111        0x4010000000000000,  /* 4 */
112        0x4020000000000000,  /* 8 */
113        0x4030000000000000,  /* 16 */
114        0x4060000000000000,  /* 2^7 */
115        0x4070000000000000,  /* 2^8 */
116        0x40e0000000000000,  /* 2^15 */
117        0x40f0000000000000,  /* 2^16 */
118        0x7ff0000000000000,  /* +inf */
119        0x7ff8000000000000,  /* Canonical NaN */
120    };
121
122    TCGv_i64 dest = dest_fpr(ctx, a->rd);
123    tcg_gen_movi_i64(dest, fli_d_table[a->rs1]);
124    gen_set_fpr_d(ctx, a->rd, dest);
125
126    mark_fs_dirty(ctx);
127    return true;
128}
129
130static bool trans_fli_h(DisasContext *ctx, arg_fli_h *a)
131{
132    REQUIRE_FPU;
133    REQUIRE_ZFA(ctx);
134    REQUIRE_ZFH(ctx);
135
136    /* Values below are NaN-boxed to avoid a gen_nanbox_h(). */
137    static const uint64_t fli_h_table[] = {
138        0xffffffffffffbc00,  /* -1.0 */
139        0xffffffffffff0400,  /* minimum positive normal */
140        0xffffffffffff0100,  /* 1.0 * 2^-16 */
141        0xffffffffffff0200,  /* 1.0 * 2^-15 */
142        0xffffffffffff1c00,  /* 1.0 * 2^-8  */
143        0xffffffffffff2000,  /* 1.0 * 2^-7  */
144        0xffffffffffff2c00,  /* 1.0 * 2^-4  */
145        0xffffffffffff3000,  /* 1.0 * 2^-3  */
146        0xffffffffffff3400,  /* 0.25 */
147        0xffffffffffff3500,  /* 0.3125 */
148        0xffffffffffff3600,  /* 0.375 */
149        0xffffffffffff3700,  /* 0.4375 */
150        0xffffffffffff3800,  /* 0.5 */
151        0xffffffffffff3900,  /* 0.625 */
152        0xffffffffffff3a00,  /* 0.75 */
153        0xffffffffffff3b00,  /* 0.875 */
154        0xffffffffffff3c00,  /* 1.0 */
155        0xffffffffffff3d00,  /* 1.25 */
156        0xffffffffffff3e00,  /* 1.5 */
157        0xffffffffffff3f00,  /* 1.75 */
158        0xffffffffffff4000,  /* 2.0 */
159        0xffffffffffff4100,  /* 2.5 */
160        0xffffffffffff4200,  /* 3 */
161        0xffffffffffff4400,  /* 4 */
162        0xffffffffffff4800,  /* 8 */
163        0xffffffffffff4c00,  /* 16 */
164        0xffffffffffff5800,  /* 2^7 */
165        0xffffffffffff5c00,  /* 2^8 */
166        0xffffffffffff7800,  /* 2^15 */
167        0xffffffffffff7c00,  /* 2^16 */
168        0xffffffffffff7c00,  /* +inf */
169        0xffffffffffff7e00,  /* Canonical NaN */
170    };
171
172    TCGv_i64 dest = dest_fpr(ctx, a->rd);
173    tcg_gen_movi_i64(dest, fli_h_table[a->rs1]);
174    gen_set_fpr_hs(ctx, a->rd, dest);
175
176    mark_fs_dirty(ctx);
177    return true;
178}
179
180static bool trans_fminm_s(DisasContext *ctx, arg_fminm_s *a)
181{
182    REQUIRE_FPU;
183    REQUIRE_ZFA(ctx);
184    REQUIRE_EXT(ctx, RVF);
185
186    TCGv_i64 dest = dest_fpr(ctx, a->rd);
187    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
188    TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
189
190    gen_helper_fminm_s(dest, tcg_env, src1, src2);
191    gen_set_fpr_hs(ctx, a->rd, dest);
192
193    mark_fs_dirty(ctx);
194    return true;
195}
196
197static bool trans_fmaxm_s(DisasContext *ctx, arg_fmaxm_s *a)
198{
199    REQUIRE_FPU;
200    REQUIRE_ZFA(ctx);
201    REQUIRE_EXT(ctx, RVF);
202
203    TCGv_i64 dest = dest_fpr(ctx, a->rd);
204    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
205    TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
206
207    gen_helper_fmaxm_s(dest, tcg_env, src1, src2);
208    gen_set_fpr_hs(ctx, a->rd, dest);
209
210    mark_fs_dirty(ctx);
211    return true;
212}
213
214static bool trans_fminm_d(DisasContext *ctx, arg_fminm_d *a)
215{
216    REQUIRE_FPU;
217    REQUIRE_ZFA(ctx);
218    REQUIRE_EXT(ctx, RVD);
219
220    TCGv_i64 dest = dest_fpr(ctx, a->rd);
221    TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
222    TCGv_i64 src2 = get_fpr_d(ctx, a->rs2);
223
224    gen_helper_fminm_d(dest, tcg_env, src1, src2);
225    gen_set_fpr_d(ctx, a->rd, dest);
226
227    mark_fs_dirty(ctx);
228    return true;
229}
230
231static bool trans_fmaxm_d(DisasContext *ctx, arg_fmaxm_d *a)
232{
233    REQUIRE_FPU;
234    REQUIRE_ZFA(ctx);
235    REQUIRE_EXT(ctx, RVD);
236
237    TCGv_i64 dest = dest_fpr(ctx, a->rd);
238    TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
239    TCGv_i64 src2 = get_fpr_d(ctx, a->rs2);
240
241    gen_helper_fmaxm_d(dest, tcg_env, src1, src2);
242    gen_set_fpr_d(ctx, a->rd, dest);
243
244    mark_fs_dirty(ctx);
245    return true;
246}
247
248static bool trans_fminm_h(DisasContext *ctx, arg_fminm_h *a)
249{
250    REQUIRE_FPU;
251    REQUIRE_ZFA(ctx);
252    REQUIRE_ZFH(ctx);
253
254    TCGv_i64 dest = dest_fpr(ctx, a->rd);
255    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
256    TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
257
258    gen_helper_fminm_h(dest, tcg_env, src1, src2);
259    gen_set_fpr_hs(ctx, a->rd, dest);
260
261    mark_fs_dirty(ctx);
262    return true;
263}
264
265static bool trans_fmaxm_h(DisasContext *ctx, arg_fmaxm_h *a)
266{
267    REQUIRE_FPU;
268    REQUIRE_ZFA(ctx);
269    REQUIRE_ZFH(ctx);
270
271    TCGv_i64 dest = dest_fpr(ctx, a->rd);
272    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
273    TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
274
275    gen_helper_fmaxm_h(dest, tcg_env, src1, src2);
276    gen_set_fpr_hs(ctx, a->rd, dest);
277
278    mark_fs_dirty(ctx);
279    return true;
280}
281
282static bool trans_fround_s(DisasContext *ctx, arg_fround_s *a)
283{
284    REQUIRE_FPU;
285    REQUIRE_ZFA(ctx);
286    REQUIRE_EXT(ctx, RVF);
287
288    TCGv_i64 dest = dest_fpr(ctx, a->rd);
289    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
290
291    gen_set_rm(ctx, a->rm);
292    gen_helper_fround_s(dest, tcg_env, src1);
293    gen_set_fpr_hs(ctx, a->rd, dest);
294
295    mark_fs_dirty(ctx);
296    return true;
297}
298
299static bool trans_froundnx_s(DisasContext *ctx, arg_froundnx_s *a)
300{
301    REQUIRE_FPU;
302    REQUIRE_ZFA(ctx);
303    REQUIRE_EXT(ctx, RVF);
304
305    TCGv_i64 dest = dest_fpr(ctx, a->rd);
306    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
307
308    gen_set_rm(ctx, a->rm);
309    gen_helper_froundnx_s(dest, tcg_env, src1);
310    gen_set_fpr_hs(ctx, a->rd, dest);
311
312    mark_fs_dirty(ctx);
313    return true;
314}
315
316static bool trans_fround_d(DisasContext *ctx, arg_fround_d *a)
317{
318    REQUIRE_FPU;
319    REQUIRE_ZFA(ctx);
320    REQUIRE_EXT(ctx, RVD);
321
322    TCGv_i64 dest = dest_fpr(ctx, a->rd);
323    TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
324
325    gen_set_rm(ctx, a->rm);
326    gen_helper_fround_d(dest, tcg_env, src1);
327    gen_set_fpr_hs(ctx, a->rd, dest);
328
329    mark_fs_dirty(ctx);
330    return true;
331}
332
333static bool trans_froundnx_d(DisasContext *ctx, arg_froundnx_d *a)
334{
335    REQUIRE_FPU;
336    REQUIRE_ZFA(ctx);
337    REQUIRE_EXT(ctx, RVD);
338
339    TCGv_i64 dest = dest_fpr(ctx, a->rd);
340    TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
341
342    gen_set_rm(ctx, a->rm);
343    gen_helper_froundnx_d(dest, tcg_env, src1);
344    gen_set_fpr_hs(ctx, a->rd, dest);
345
346    mark_fs_dirty(ctx);
347    return true;
348}
349
350static bool trans_fround_h(DisasContext *ctx, arg_fround_h *a)
351{
352    REQUIRE_FPU;
353    REQUIRE_ZFA(ctx);
354    REQUIRE_ZFH(ctx);
355
356    TCGv_i64 dest = dest_fpr(ctx, a->rd);
357    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
358
359    gen_set_rm(ctx, a->rm);
360    gen_helper_fround_h(dest, tcg_env, src1);
361    gen_set_fpr_hs(ctx, a->rd, dest);
362
363    mark_fs_dirty(ctx);
364    return true;
365}
366
367static bool trans_froundnx_h(DisasContext *ctx, arg_froundnx_h *a)
368{
369    REQUIRE_FPU;
370    REQUIRE_ZFA(ctx);
371    REQUIRE_ZFH(ctx);
372
373    TCGv_i64 dest = dest_fpr(ctx, a->rd);
374    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
375
376    gen_set_rm(ctx, a->rm);
377    gen_helper_froundnx_h(dest, tcg_env, src1);
378    gen_set_fpr_hs(ctx, a->rd, dest);
379
380    mark_fs_dirty(ctx);
381    return true;
382}
383
384bool trans_fcvtmod_w_d(DisasContext *ctx, arg_fcvtmod_w_d *a)
385{
386    REQUIRE_FPU;
387    REQUIRE_ZFA(ctx);
388    REQUIRE_EXT(ctx, RVD);
389
390    TCGv dst = dest_gpr(ctx, a->rd);
391    TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
392    TCGv_i64 t1 = tcg_temp_new_i64();
393
394    /* Rounding mode is RTZ. */
395    gen_set_rm(ctx, RISCV_FRM_RTZ);
396    gen_helper_fcvtmod_w_d(t1, tcg_env, src1);
397    tcg_gen_trunc_i64_tl(dst, t1);
398    gen_set_gpr(ctx, a->rd, dst);
399
400    return true;
401}
402
403bool trans_fmvh_x_d(DisasContext *ctx, arg_fmvh_x_d *a)
404{
405    REQUIRE_FPU;
406    REQUIRE_ZFA(ctx);
407    REQUIRE_EXT(ctx, RVD);
408    REQUIRE_32BIT(ctx);
409
410    TCGv dst = dest_gpr(ctx, a->rd);
411    TCGv_i64 t1 = tcg_temp_new_i64();
412    tcg_gen_sari_i64(t1, cpu_fpr[a->rs1], 32);
413    tcg_gen_trunc_i64_tl(dst, t1);
414    gen_set_gpr(ctx, a->rd, dst);
415    return true;
416}
417
418bool trans_fmvp_d_x(DisasContext *ctx, arg_fmvp_d_x *a)
419{
420    REQUIRE_FPU;
421    REQUIRE_ZFA(ctx);
422    REQUIRE_EXT(ctx, RVD);
423    REQUIRE_32BIT(ctx);
424
425    TCGv src1 = get_gpr(ctx, a->rs1, EXT_NONE);
426    TCGv src2 = get_gpr(ctx, a->rs2, EXT_NONE);
427    tcg_gen_concat_tl_i64(cpu_fpr[a->rd], src1, src2);
428
429    mark_fs_dirty(ctx);
430    return true;
431}
432
433bool trans_fleq_s(DisasContext *ctx, arg_fleq_s *a)
434{
435    REQUIRE_FPU;
436    REQUIRE_ZFA(ctx);
437    REQUIRE_EXT(ctx, RVF);
438
439    TCGv dest = dest_gpr(ctx, a->rd);
440    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
441    TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
442
443    gen_helper_fleq_s(dest, tcg_env, src1, src2);
444    gen_set_gpr(ctx, a->rd, dest);
445    return true;
446}
447
448bool trans_fltq_s(DisasContext *ctx, arg_fltq_s *a)
449{
450    REQUIRE_FPU;
451    REQUIRE_ZFA(ctx);
452    REQUIRE_EXT(ctx, RVF);
453
454    TCGv dest = dest_gpr(ctx, a->rd);
455    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
456    TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
457
458    gen_helper_fltq_s(dest, tcg_env, src1, src2);
459    gen_set_gpr(ctx, a->rd, dest);
460    return true;
461}
462
463bool trans_fleq_d(DisasContext *ctx, arg_fleq_d *a)
464{
465    REQUIRE_FPU;
466    REQUIRE_ZFA(ctx);
467    REQUIRE_EXT(ctx, RVD);
468
469    TCGv dest = dest_gpr(ctx, a->rd);
470    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
471    TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
472
473    gen_helper_fleq_d(dest, tcg_env, src1, src2);
474    gen_set_gpr(ctx, a->rd, dest);
475    return true;
476}
477
478bool trans_fltq_d(DisasContext *ctx, arg_fltq_d *a)
479{
480    REQUIRE_FPU;
481    REQUIRE_ZFA(ctx);
482    REQUIRE_EXT(ctx, RVD);
483
484    TCGv dest = dest_gpr(ctx, a->rd);
485    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
486    TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
487
488    gen_helper_fltq_d(dest, tcg_env, src1, src2);
489    gen_set_gpr(ctx, a->rd, dest);
490    return true;
491}
492
493bool trans_fleq_h(DisasContext *ctx, arg_fleq_h *a)
494{
495    REQUIRE_FPU;
496    REQUIRE_ZFA(ctx);
497    REQUIRE_ZFH(ctx);
498
499    TCGv dest = dest_gpr(ctx, a->rd);
500    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
501    TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
502
503    gen_helper_fleq_h(dest, tcg_env, src1, src2);
504    gen_set_gpr(ctx, a->rd, dest);
505    return true;
506}
507
508bool trans_fltq_h(DisasContext *ctx, arg_fltq_h *a)
509{
510    REQUIRE_FPU;
511    REQUIRE_ZFA(ctx);
512    REQUIRE_ZFH(ctx);
513
514    TCGv dest = dest_gpr(ctx, a->rd);
515    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
516    TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
517
518    gen_helper_fltq_h(dest, tcg_env, src1, src2);
519    gen_set_gpr(ctx, a->rd, dest);
520    return true;
521}
522