1/*
2 * RISC-V translation routines for the RV64M Standard Extension.
3 *
4 * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu
5 * Copyright (c) 2018 Peer Adelt, peer.adelt@hni.uni-paderborn.de
6 *                    Bastian Koppelmann, kbastian@mail.uni-paderborn.de
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2 or later, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program.  If not, see <http://www.gnu.org/licenses/>.
19 */
20
21#define REQUIRE_M_OR_ZMMUL(ctx) do {                      \
22    if (!ctx->cfg_ptr->ext_zmmul && !has_ext(ctx, RVM)) { \
23        return false;                                     \
24    }                                                     \
25} while (0)
26
27static void gen_mulhu_i128(TCGv r2, TCGv r3, TCGv al, TCGv ah, TCGv bl, TCGv bh)
28{
29    TCGv tmpl = tcg_temp_new();
30    TCGv tmph = tcg_temp_new();
31    TCGv r0 = tcg_temp_new();
32    TCGv r1 = tcg_temp_new();
33    TCGv zero = tcg_constant_tl(0);
34
35    tcg_gen_mulu2_tl(r0, r1, al, bl);
36
37    tcg_gen_mulu2_tl(tmpl, tmph, al, bh);
38    tcg_gen_add2_tl(r1, r2, r1, zero, tmpl, tmph);
39    tcg_gen_mulu2_tl(tmpl, tmph, ah, bl);
40    tcg_gen_add2_tl(r1, tmph, r1, r2, tmpl, tmph);
41    /* Overflow detection into r3 */
42    tcg_gen_setcond_tl(TCG_COND_LTU, r3, tmph, r2);
43
44    tcg_gen_mov_tl(r2, tmph);
45
46    tcg_gen_mulu2_tl(tmpl, tmph, ah, bh);
47    tcg_gen_add2_tl(r2, r3, r2, r3, tmpl, tmph);
48
49    tcg_temp_free(tmpl);
50    tcg_temp_free(tmph);
51}
52
53static void gen_mul_i128(TCGv rl, TCGv rh,
54                         TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
55{
56    TCGv tmpl = tcg_temp_new();
57    TCGv tmph = tcg_temp_new();
58    TCGv tmpx = tcg_temp_new();
59    TCGv zero = tcg_constant_tl(0);
60
61    tcg_gen_mulu2_tl(rl, rh, rs1l, rs2l);
62    tcg_gen_mulu2_tl(tmpl, tmph, rs1l, rs2h);
63    tcg_gen_add2_tl(rh, tmpx, rh, zero, tmpl, tmph);
64    tcg_gen_mulu2_tl(tmpl, tmph, rs1h, rs2l);
65    tcg_gen_add2_tl(rh, tmph, rh, tmpx, tmpl, tmph);
66
67    tcg_temp_free(tmpl);
68    tcg_temp_free(tmph);
69    tcg_temp_free(tmpx);
70}
71
72static bool trans_mul(DisasContext *ctx, arg_mul *a)
73{
74    REQUIRE_M_OR_ZMMUL(ctx);
75    return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl, gen_mul_i128);
76}
77
78static void gen_mulh_i128(TCGv rl, TCGv rh,
79                          TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
80{
81    TCGv t0l = tcg_temp_new();
82    TCGv t0h = tcg_temp_new();
83    TCGv t1l = tcg_temp_new();
84    TCGv t1h = tcg_temp_new();
85
86    gen_mulhu_i128(rl, rh, rs1l, rs1h, rs2l, rs2h);
87    tcg_gen_sari_tl(t0h, rs1h, 63);
88    tcg_gen_and_tl(t0l, t0h, rs2l);
89    tcg_gen_and_tl(t0h, t0h, rs2h);
90    tcg_gen_sari_tl(t1h, rs2h, 63);
91    tcg_gen_and_tl(t1l, t1h, rs1l);
92    tcg_gen_and_tl(t1h, t1h, rs1h);
93    tcg_gen_sub2_tl(t0l, t0h, rl, rh, t0l, t0h);
94    tcg_gen_sub2_tl(rl, rh, t0l, t0h, t1l, t1h);
95
96    tcg_temp_free(t0l);
97    tcg_temp_free(t0h);
98    tcg_temp_free(t1l);
99    tcg_temp_free(t1h);
100}
101
102static void gen_mulh(TCGv ret, TCGv s1, TCGv s2)
103{
104    TCGv discard = tcg_temp_new();
105
106    tcg_gen_muls2_tl(discard, ret, s1, s2);
107    tcg_temp_free(discard);
108}
109
110static void gen_mulh_w(TCGv ret, TCGv s1, TCGv s2)
111{
112    tcg_gen_mul_tl(ret, s1, s2);
113    tcg_gen_sari_tl(ret, ret, 32);
114}
115
116static bool trans_mulh(DisasContext *ctx, arg_mulh *a)
117{
118    REQUIRE_M_OR_ZMMUL(ctx);
119    return gen_arith_per_ol(ctx, a, EXT_SIGN, gen_mulh, gen_mulh_w,
120                            gen_mulh_i128);
121}
122
123static void gen_mulhsu_i128(TCGv rl, TCGv rh,
124                            TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
125{
126
127    TCGv t0l = tcg_temp_new();
128    TCGv t0h = tcg_temp_new();
129
130    gen_mulhu_i128(rl, rh, rs1l, rs1h, rs2l, rs2h);
131    tcg_gen_sari_tl(t0h, rs1h, 63);
132    tcg_gen_and_tl(t0l, t0h, rs2l);
133    tcg_gen_and_tl(t0h, t0h, rs2h);
134    tcg_gen_sub2_tl(rl, rh, rl, rh, t0l, t0h);
135
136    tcg_temp_free(t0l);
137    tcg_temp_free(t0h);
138}
139
140static void gen_mulhsu(TCGv ret, TCGv arg1, TCGv arg2)
141{
142    TCGv rl = tcg_temp_new();
143    TCGv rh = tcg_temp_new();
144
145    tcg_gen_mulu2_tl(rl, rh, arg1, arg2);
146    /* fix up for one negative */
147    tcg_gen_sari_tl(rl, arg1, TARGET_LONG_BITS - 1);
148    tcg_gen_and_tl(rl, rl, arg2);
149    tcg_gen_sub_tl(ret, rh, rl);
150
151    tcg_temp_free(rl);
152    tcg_temp_free(rh);
153}
154
155static void gen_mulhsu_w(TCGv ret, TCGv arg1, TCGv arg2)
156{
157    TCGv t1 = tcg_temp_new();
158    TCGv t2 = tcg_temp_new();
159
160    tcg_gen_ext32s_tl(t1, arg1);
161    tcg_gen_ext32u_tl(t2, arg2);
162    tcg_gen_mul_tl(ret, t1, t2);
163    tcg_temp_free(t1);
164    tcg_temp_free(t2);
165    tcg_gen_sari_tl(ret, ret, 32);
166}
167
168static bool trans_mulhsu(DisasContext *ctx, arg_mulhsu *a)
169{
170    REQUIRE_M_OR_ZMMUL(ctx);
171    return gen_arith_per_ol(ctx, a, EXT_NONE, gen_mulhsu, gen_mulhsu_w,
172                            gen_mulhsu_i128);
173}
174
175static void gen_mulhu(TCGv ret, TCGv s1, TCGv s2)
176{
177    TCGv discard = tcg_temp_new();
178
179    tcg_gen_mulu2_tl(discard, ret, s1, s2);
180    tcg_temp_free(discard);
181}
182
183static bool trans_mulhu(DisasContext *ctx, arg_mulhu *a)
184{
185    REQUIRE_M_OR_ZMMUL(ctx);
186    /* gen_mulh_w works for either sign as input. */
187    return gen_arith_per_ol(ctx, a, EXT_ZERO, gen_mulhu, gen_mulh_w,
188                            gen_mulhu_i128);
189}
190
191static void gen_div_i128(TCGv rdl, TCGv rdh,
192                         TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
193{
194    gen_helper_divs_i128(rdl, cpu_env, rs1l, rs1h, rs2l, rs2h);
195    tcg_gen_ld_tl(rdh, cpu_env, offsetof(CPURISCVState, retxh));
196}
197
198static void gen_div(TCGv ret, TCGv source1, TCGv source2)
199{
200    TCGv temp1, temp2, zero, one, mone, min;
201
202    temp1 = tcg_temp_new();
203    temp2 = tcg_temp_new();
204    zero = tcg_constant_tl(0);
205    one = tcg_constant_tl(1);
206    mone = tcg_constant_tl(-1);
207    min = tcg_constant_tl(1ull << (TARGET_LONG_BITS - 1));
208
209    /*
210     * If overflow, set temp2 to 1, else source2.
211     * This produces the required result of min.
212     */
213    tcg_gen_setcond_tl(TCG_COND_EQ, temp1, source1, min);
214    tcg_gen_setcond_tl(TCG_COND_EQ, temp2, source2, mone);
215    tcg_gen_and_tl(temp1, temp1, temp2);
216    tcg_gen_movcond_tl(TCG_COND_NE, temp2, temp1, zero, one, source2);
217
218    /*
219     * If div by zero, set temp1 to -1 and temp2 to 1 to
220     * produce the required result of -1.
221     */
222    tcg_gen_movcond_tl(TCG_COND_EQ, temp1, source2, zero, mone, source1);
223    tcg_gen_movcond_tl(TCG_COND_EQ, temp2, source2, zero, one, temp2);
224
225    tcg_gen_div_tl(ret, temp1, temp2);
226
227    tcg_temp_free(temp1);
228    tcg_temp_free(temp2);
229}
230
231static bool trans_div(DisasContext *ctx, arg_div *a)
232{
233    REQUIRE_EXT(ctx, RVM);
234    return gen_arith(ctx, a, EXT_SIGN, gen_div, gen_div_i128);
235}
236
237static void gen_divu_i128(TCGv rdl, TCGv rdh,
238                          TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
239{
240    gen_helper_divu_i128(rdl, cpu_env, rs1l, rs1h, rs2l, rs2h);
241    tcg_gen_ld_tl(rdh, cpu_env, offsetof(CPURISCVState, retxh));
242}
243
244static void gen_divu(TCGv ret, TCGv source1, TCGv source2)
245{
246    TCGv temp1, temp2, zero, one, max;
247
248    temp1 = tcg_temp_new();
249    temp2 = tcg_temp_new();
250    zero = tcg_constant_tl(0);
251    one = tcg_constant_tl(1);
252    max = tcg_constant_tl(~0);
253
254    /*
255     * If div by zero, set temp1 to max and temp2 to 1 to
256     * produce the required result of max.
257     */
258    tcg_gen_movcond_tl(TCG_COND_EQ, temp1, source2, zero, max, source1);
259    tcg_gen_movcond_tl(TCG_COND_EQ, temp2, source2, zero, one, source2);
260    tcg_gen_divu_tl(ret, temp1, temp2);
261
262    tcg_temp_free(temp1);
263    tcg_temp_free(temp2);
264}
265
266static bool trans_divu(DisasContext *ctx, arg_divu *a)
267{
268    REQUIRE_EXT(ctx, RVM);
269    return gen_arith(ctx, a, EXT_ZERO, gen_divu, gen_divu_i128);
270}
271
272static void gen_rem_i128(TCGv rdl, TCGv rdh,
273                         TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
274{
275    gen_helper_rems_i128(rdl, cpu_env, rs1l, rs1h, rs2l, rs2h);
276    tcg_gen_ld_tl(rdh, cpu_env, offsetof(CPURISCVState, retxh));
277}
278
279static void gen_rem(TCGv ret, TCGv source1, TCGv source2)
280{
281    TCGv temp1, temp2, zero, one, mone, min;
282
283    temp1 = tcg_temp_new();
284    temp2 = tcg_temp_new();
285    zero = tcg_constant_tl(0);
286    one = tcg_constant_tl(1);
287    mone = tcg_constant_tl(-1);
288    min = tcg_constant_tl(1ull << (TARGET_LONG_BITS - 1));
289
290    /*
291     * If overflow, set temp1 to 0, else source1.
292     * This avoids a possible host trap, and produces the required result of 0.
293     */
294    tcg_gen_setcond_tl(TCG_COND_EQ, temp1, source1, min);
295    tcg_gen_setcond_tl(TCG_COND_EQ, temp2, source2, mone);
296    tcg_gen_and_tl(temp1, temp1, temp2);
297    tcg_gen_movcond_tl(TCG_COND_NE, temp1, temp1, zero, zero, source1);
298
299    /*
300     * If div by zero, set temp2 to 1, else source2.
301     * This avoids a possible host trap, but produces an incorrect result.
302     */
303    tcg_gen_movcond_tl(TCG_COND_EQ, temp2, source2, zero, one, source2);
304
305    tcg_gen_rem_tl(temp1, temp1, temp2);
306
307    /* If div by zero, the required result is the original dividend. */
308    tcg_gen_movcond_tl(TCG_COND_EQ, ret, source2, zero, source1, temp1);
309
310    tcg_temp_free(temp1);
311    tcg_temp_free(temp2);
312}
313
314static bool trans_rem(DisasContext *ctx, arg_rem *a)
315{
316    REQUIRE_EXT(ctx, RVM);
317    return gen_arith(ctx, a, EXT_SIGN, gen_rem, gen_rem_i128);
318}
319
320static void gen_remu_i128(TCGv rdl, TCGv rdh,
321                          TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
322{
323    gen_helper_remu_i128(rdl, cpu_env, rs1l, rs1h, rs2l, rs2h);
324    tcg_gen_ld_tl(rdh, cpu_env, offsetof(CPURISCVState, retxh));
325}
326
327static void gen_remu(TCGv ret, TCGv source1, TCGv source2)
328{
329    TCGv temp, zero, one;
330
331    temp = tcg_temp_new();
332    zero = tcg_constant_tl(0);
333    one = tcg_constant_tl(1);
334
335    /*
336     * If div by zero, set temp to 1, else source2.
337     * This avoids a possible host trap, but produces an incorrect result.
338     */
339    tcg_gen_movcond_tl(TCG_COND_EQ, temp, source2, zero, one, source2);
340
341    tcg_gen_remu_tl(temp, source1, temp);
342
343    /* If div by zero, the required result is the original dividend. */
344    tcg_gen_movcond_tl(TCG_COND_EQ, ret, source2, zero, source1, temp);
345
346    tcg_temp_free(temp);
347}
348
349static bool trans_remu(DisasContext *ctx, arg_remu *a)
350{
351    REQUIRE_EXT(ctx, RVM);
352    return gen_arith(ctx, a, EXT_ZERO, gen_remu, gen_remu_i128);
353}
354
355static bool trans_mulw(DisasContext *ctx, arg_mulw *a)
356{
357    REQUIRE_64_OR_128BIT(ctx);
358    REQUIRE_M_OR_ZMMUL(ctx);
359    ctx->ol = MXL_RV32;
360    return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl, NULL);
361}
362
363static bool trans_divw(DisasContext *ctx, arg_divw *a)
364{
365    REQUIRE_64_OR_128BIT(ctx);
366    REQUIRE_EXT(ctx, RVM);
367    ctx->ol = MXL_RV32;
368    return gen_arith(ctx, a, EXT_SIGN, gen_div, NULL);
369}
370
371static bool trans_divuw(DisasContext *ctx, arg_divuw *a)
372{
373    REQUIRE_64_OR_128BIT(ctx);
374    REQUIRE_EXT(ctx, RVM);
375    ctx->ol = MXL_RV32;
376    return gen_arith(ctx, a, EXT_ZERO, gen_divu, NULL);
377}
378
379static bool trans_remw(DisasContext *ctx, arg_remw *a)
380{
381    REQUIRE_64_OR_128BIT(ctx);
382    REQUIRE_EXT(ctx, RVM);
383    ctx->ol = MXL_RV32;
384    return gen_arith(ctx, a, EXT_SIGN, gen_rem, NULL);
385}
386
387static bool trans_remuw(DisasContext *ctx, arg_remuw *a)
388{
389    REQUIRE_64_OR_128BIT(ctx);
390    REQUIRE_EXT(ctx, RVM);
391    ctx->ol = MXL_RV32;
392    return gen_arith(ctx, a, EXT_ZERO, gen_remu, NULL);
393}
394
395static bool trans_muld(DisasContext *ctx, arg_muld *a)
396{
397    REQUIRE_128BIT(ctx);
398    REQUIRE_M_OR_ZMMUL(ctx);
399    ctx->ol = MXL_RV64;
400    return gen_arith(ctx, a, EXT_SIGN, tcg_gen_mul_tl, NULL);
401}
402
403static bool trans_divd(DisasContext *ctx, arg_divd *a)
404{
405    REQUIRE_128BIT(ctx);
406    REQUIRE_EXT(ctx, RVM);
407    ctx->ol = MXL_RV64;
408    return gen_arith(ctx, a, EXT_SIGN, gen_div, NULL);
409}
410
411static bool trans_divud(DisasContext *ctx, arg_divud *a)
412{
413    REQUIRE_128BIT(ctx);
414    REQUIRE_EXT(ctx, RVM);
415    ctx->ol = MXL_RV64;
416    return gen_arith(ctx, a, EXT_ZERO, gen_divu, NULL);
417}
418
419static bool trans_remd(DisasContext *ctx, arg_remd *a)
420{
421    REQUIRE_128BIT(ctx);
422    REQUIRE_EXT(ctx, RVM);
423    ctx->ol = MXL_RV64;
424    return gen_arith(ctx, a, EXT_SIGN, gen_rem, NULL);
425}
426
427static bool trans_remud(DisasContext *ctx, arg_remud *a)
428{
429    REQUIRE_128BIT(ctx);
430    REQUIRE_EXT(ctx, RVM);
431    ctx->ol = MXL_RV64;
432    return gen_arith(ctx, a, EXT_ZERO, gen_remu, NULL);
433}
434