1/*
2 * RISC-V translation routines for the BF16 Standard Extensions.
3 *
4 * Copyright (c) 2020-2023 PLCT Lab
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program.  If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#define REQUIRE_ZFBFMIN(ctx) do { \
20    if (!ctx->cfg_ptr->ext_zfbfmin) { \
21        return false; \
22    } \
23} while (0)
24
25#define REQUIRE_ZVFBFMIN(ctx) do { \
26    if (!ctx->cfg_ptr->ext_zvfbfmin) { \
27        return false; \
28    } \
29} while (0)
30
31#define REQUIRE_ZVFBFWMA(ctx) do { \
32    if (!ctx->cfg_ptr->ext_zvfbfwma) { \
33        return false; \
34    } \
35} while (0)
36
37static bool trans_fcvt_bf16_s(DisasContext *ctx, arg_fcvt_bf16_s *a)
38{
39    REQUIRE_FPU;
40    REQUIRE_ZFBFMIN(ctx);
41
42    TCGv_i64 dest = dest_fpr(ctx, a->rd);
43    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
44
45    gen_set_rm(ctx, a->rm);
46    gen_helper_fcvt_bf16_s(dest, tcg_env, src1);
47    gen_set_fpr_hs(ctx, a->rd, dest);
48    mark_fs_dirty(ctx);
49    return true;
50}
51
52static bool trans_fcvt_s_bf16(DisasContext *ctx, arg_fcvt_s_bf16 *a)
53{
54    REQUIRE_FPU;
55    REQUIRE_ZFBFMIN(ctx);
56
57    TCGv_i64 dest = dest_fpr(ctx, a->rd);
58    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
59
60    gen_set_rm(ctx, a->rm);
61    gen_helper_fcvt_s_bf16(dest, tcg_env, src1);
62    gen_set_fpr_hs(ctx, a->rd, dest);
63    mark_fs_dirty(ctx);
64    return true;
65}
66
67static bool trans_vfncvtbf16_f_f_w(DisasContext *ctx, arg_vfncvtbf16_f_f_w *a)
68{
69    REQUIRE_FPU;
70    REQUIRE_ZVFBFMIN(ctx);
71
72    if (opfv_narrow_check(ctx, a) && (ctx->sew == MO_16)) {
73        uint32_t data = 0;
74        TCGLabel *over = gen_new_label();
75
76        gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN);
77        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
78        tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
79
80        data = FIELD_DP32(data, VDATA, VM, a->vm);
81        data = FIELD_DP32(data, VDATA, LMUL, ctx->lmul);
82        data = FIELD_DP32(data, VDATA, VTA, ctx->vta);
83        data = FIELD_DP32(data, VDATA, VMA, ctx->vma);
84        tcg_gen_gvec_3_ptr(vreg_ofs(ctx, a->rd), vreg_ofs(ctx, 0),
85                           vreg_ofs(ctx, a->rs2), tcg_env,
86                           ctx->cfg_ptr->vlen / 8,
87                           ctx->cfg_ptr->vlen / 8, data,
88                           gen_helper_vfncvtbf16_f_f_w);
89        mark_vs_dirty(ctx);
90        gen_set_label(over);
91        return true;
92    }
93    return false;
94}
95
96static bool trans_vfwcvtbf16_f_f_v(DisasContext *ctx, arg_vfwcvtbf16_f_f_v *a)
97{
98    REQUIRE_FPU;
99    REQUIRE_ZVFBFMIN(ctx);
100
101    if (opfv_widen_check(ctx, a) && (ctx->sew == MO_16)) {
102        uint32_t data = 0;
103        TCGLabel *over = gen_new_label();
104
105        gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN);
106        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
107        tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
108
109        data = FIELD_DP32(data, VDATA, VM, a->vm);
110        data = FIELD_DP32(data, VDATA, LMUL, ctx->lmul);
111        data = FIELD_DP32(data, VDATA, VTA, ctx->vta);
112        data = FIELD_DP32(data, VDATA, VMA, ctx->vma);
113        tcg_gen_gvec_3_ptr(vreg_ofs(ctx, a->rd), vreg_ofs(ctx, 0),
114                           vreg_ofs(ctx, a->rs2), tcg_env,
115                           ctx->cfg_ptr->vlen / 8,
116                           ctx->cfg_ptr->vlen / 8, data,
117                           gen_helper_vfwcvtbf16_f_f_v);
118        mark_vs_dirty(ctx);
119        gen_set_label(over);
120        return true;
121    }
122    return false;
123}
124
125static bool trans_vfwmaccbf16_vv(DisasContext *ctx, arg_vfwmaccbf16_vv *a)
126{
127    REQUIRE_FPU;
128    REQUIRE_ZVFBFWMA(ctx);
129
130    if (require_rvv(ctx) && vext_check_isa_ill(ctx) && (ctx->sew == MO_16) &&
131        vext_check_dss(ctx, a->rd, a->rs1, a->rs2, a->vm)) {
132        uint32_t data = 0;
133        TCGLabel *over = gen_new_label();
134
135        gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN);
136        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
137        tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
138
139        data = FIELD_DP32(data, VDATA, VM, a->vm);
140        data = FIELD_DP32(data, VDATA, LMUL, ctx->lmul);
141        data = FIELD_DP32(data, VDATA, VTA, ctx->vta);
142        data = FIELD_DP32(data, VDATA, VMA, ctx->vma);
143        tcg_gen_gvec_4_ptr(vreg_ofs(ctx, a->rd), vreg_ofs(ctx, 0),
144                           vreg_ofs(ctx, a->rs1),
145                           vreg_ofs(ctx, a->rs2), tcg_env,
146                           ctx->cfg_ptr->vlen / 8,
147                           ctx->cfg_ptr->vlen / 8, data,
148                           gen_helper_vfwmaccbf16_vv);
149        mark_vs_dirty(ctx);
150        gen_set_label(over);
151        return true;
152    }
153    return false;
154}
155
156static bool trans_vfwmaccbf16_vf(DisasContext *ctx, arg_vfwmaccbf16_vf *a)
157{
158    REQUIRE_FPU;
159    REQUIRE_ZVFBFWMA(ctx);
160
161    if (require_rvv(ctx) && (ctx->sew == MO_16) && vext_check_isa_ill(ctx) &&
162        vext_check_ds(ctx, a->rd, a->rs2, a->vm)) {
163        uint32_t data = 0;
164
165        gen_set_rm(ctx, RISCV_FRM_DYN);
166        data = FIELD_DP32(data, VDATA, VM, a->vm);
167        data = FIELD_DP32(data, VDATA, LMUL, ctx->lmul);
168        data = FIELD_DP32(data, VDATA, VTA, ctx->vta);
169        data = FIELD_DP32(data, VDATA, VMA, ctx->vma);
170        return opfvf_trans(a->rd, a->rs1, a->rs2, data,
171                           gen_helper_vfwmaccbf16_vf, ctx);
172    }
173
174    return false;
175}
176