1/*
2 * RISC-V translation routines for the RV64M Standard Extension.
3 *
4 * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu
5 * Copyright (c) 2018 Peer Adelt, peer.adelt@hni.uni-paderborn.de
6 *                    Bastian Koppelmann, kbastian@mail.uni-paderborn.de
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2 or later, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program.  If not, see <http://www.gnu.org/licenses/>.
19 */
20
21static void gen_mulhu_i128(TCGv r2, TCGv r3, TCGv al, TCGv ah, TCGv bl, TCGv bh)
22{
23    TCGv tmpl = tcg_temp_new();
24    TCGv tmph = tcg_temp_new();
25    TCGv r0 = tcg_temp_new();
26    TCGv r1 = tcg_temp_new();
27    TCGv zero = tcg_constant_tl(0);
28
29    tcg_gen_mulu2_tl(r0, r1, al, bl);
30
31    tcg_gen_mulu2_tl(tmpl, tmph, al, bh);
32    tcg_gen_add2_tl(r1, r2, r1, zero, tmpl, tmph);
33    tcg_gen_mulu2_tl(tmpl, tmph, ah, bl);
34    tcg_gen_add2_tl(r1, tmph, r1, r2, tmpl, tmph);
35    /* Overflow detection into r3 */
36    tcg_gen_setcond_tl(TCG_COND_LTU, r3, tmph, r2);
37
38    tcg_gen_mov_tl(r2, tmph);
39
40    tcg_gen_mulu2_tl(tmpl, tmph, ah, bh);
41    tcg_gen_add2_tl(r2, r3, r2, r3, tmpl, tmph);
42
43    tcg_temp_free(tmpl);
44    tcg_temp_free(tmph);
45}
46
47static void gen_mul_i128(TCGv rl, TCGv rh,
48                         TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
49{
50    TCGv tmpl = tcg_temp_new();
51    TCGv tmph = tcg_temp_new();
52    TCGv tmpx = tcg_temp_new();
53    TCGv zero = tcg_constant_tl(0);
54
55    tcg_gen_mulu2_tl(rl, rh, rs1l, rs2l);
56    tcg_gen_mulu2_tl(tmpl, tmph, rs1l, rs2h);
57    tcg_gen_add2_tl(rh, tmpx, rh, zero, tmpl, tmph);
58    tcg_gen_mulu2_tl(tmpl, tmph, rs1h, rs2l);
59    tcg_gen_add2_tl(rh, tmph, rh, tmpx, tmpl, tmph);
60
61    tcg_temp_free(tmpl);
62    tcg_temp_free(tmph);
63    tcg_temp_free(tmpx);
64}
65
66static bool trans_mul(DisasContext *ctx, arg_mul *a)
67{
68    REQUIRE_EXT(ctx, RVM);
69    return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl, gen_mul_i128);
70}
71
72static void gen_mulh_i128(TCGv rl, TCGv rh,
73                          TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
74{
75    TCGv t0l = tcg_temp_new();
76    TCGv t0h = tcg_temp_new();
77    TCGv t1l = tcg_temp_new();
78    TCGv t1h = tcg_temp_new();
79
80    gen_mulhu_i128(rl, rh, rs1l, rs1h, rs2l, rs2h);
81    tcg_gen_sari_tl(t0h, rs1h, 63);
82    tcg_gen_and_tl(t0l, t0h, rs2l);
83    tcg_gen_and_tl(t0h, t0h, rs2h);
84    tcg_gen_sari_tl(t1h, rs2h, 63);
85    tcg_gen_and_tl(t1l, t1h, rs1l);
86    tcg_gen_and_tl(t1h, t1h, rs1h);
87    tcg_gen_sub2_tl(t0l, t0h, rl, rh, t0l, t0h);
88    tcg_gen_sub2_tl(rl, rh, t0l, t0h, t1l, t1h);
89
90    tcg_temp_free(t0l);
91    tcg_temp_free(t0h);
92    tcg_temp_free(t1l);
93    tcg_temp_free(t1h);
94}
95
96static void gen_mulh(TCGv ret, TCGv s1, TCGv s2)
97{
98    TCGv discard = tcg_temp_new();
99
100    tcg_gen_muls2_tl(discard, ret, s1, s2);
101    tcg_temp_free(discard);
102}
103
104static void gen_mulh_w(TCGv ret, TCGv s1, TCGv s2)
105{
106    tcg_gen_mul_tl(ret, s1, s2);
107    tcg_gen_sari_tl(ret, ret, 32);
108}
109
110static bool trans_mulh(DisasContext *ctx, arg_mulh *a)
111{
112    REQUIRE_EXT(ctx, RVM);
113    return gen_arith_per_ol(ctx, a, EXT_SIGN, gen_mulh, gen_mulh_w,
114                            gen_mulh_i128);
115}
116
117static void gen_mulhsu_i128(TCGv rl, TCGv rh,
118                            TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
119{
120
121    TCGv t0l = tcg_temp_new();
122    TCGv t0h = tcg_temp_new();
123
124    gen_mulhu_i128(rl, rh, rs1l, rs1h, rs2l, rs2h);
125    tcg_gen_sari_tl(t0h, rs1h, 63);
126    tcg_gen_and_tl(t0l, t0h, rs2l);
127    tcg_gen_and_tl(t0h, t0h, rs2h);
128    tcg_gen_sub2_tl(rl, rh, rl, rh, t0l, t0h);
129
130    tcg_temp_free(t0l);
131    tcg_temp_free(t0h);
132}
133
134static void gen_mulhsu(TCGv ret, TCGv arg1, TCGv arg2)
135{
136    TCGv rl = tcg_temp_new();
137    TCGv rh = tcg_temp_new();
138
139    tcg_gen_mulu2_tl(rl, rh, arg1, arg2);
140    /* fix up for one negative */
141    tcg_gen_sari_tl(rl, arg1, TARGET_LONG_BITS - 1);
142    tcg_gen_and_tl(rl, rl, arg2);
143    tcg_gen_sub_tl(ret, rh, rl);
144
145    tcg_temp_free(rl);
146    tcg_temp_free(rh);
147}
148
149static void gen_mulhsu_w(TCGv ret, TCGv arg1, TCGv arg2)
150{
151    TCGv t1 = tcg_temp_new();
152    TCGv t2 = tcg_temp_new();
153
154    tcg_gen_ext32s_tl(t1, arg1);
155    tcg_gen_ext32u_tl(t2, arg2);
156    tcg_gen_mul_tl(ret, t1, t2);
157    tcg_temp_free(t1);
158    tcg_temp_free(t2);
159    tcg_gen_sari_tl(ret, ret, 32);
160}
161
162static bool trans_mulhsu(DisasContext *ctx, arg_mulhsu *a)
163{
164    REQUIRE_EXT(ctx, RVM);
165    return gen_arith_per_ol(ctx, a, EXT_NONE, gen_mulhsu, gen_mulhsu_w,
166                            gen_mulhsu_i128);
167}
168
169static void gen_mulhu(TCGv ret, TCGv s1, TCGv s2)
170{
171    TCGv discard = tcg_temp_new();
172
173    tcg_gen_mulu2_tl(discard, ret, s1, s2);
174    tcg_temp_free(discard);
175}
176
177static bool trans_mulhu(DisasContext *ctx, arg_mulhu *a)
178{
179    REQUIRE_EXT(ctx, RVM);
180    /* gen_mulh_w works for either sign as input. */
181    return gen_arith_per_ol(ctx, a, EXT_ZERO, gen_mulhu, gen_mulh_w,
182                            gen_mulhu_i128);
183}
184
185static void gen_div_i128(TCGv rdl, TCGv rdh,
186                         TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
187{
188    gen_helper_divs_i128(rdl, cpu_env, rs1l, rs1h, rs2l, rs2h);
189    tcg_gen_ld_tl(rdh, cpu_env, offsetof(CPURISCVState, retxh));
190}
191
192static void gen_div(TCGv ret, TCGv source1, TCGv source2)
193{
194    TCGv temp1, temp2, zero, one, mone, min;
195
196    temp1 = tcg_temp_new();
197    temp2 = tcg_temp_new();
198    zero = tcg_constant_tl(0);
199    one = tcg_constant_tl(1);
200    mone = tcg_constant_tl(-1);
201    min = tcg_constant_tl(1ull << (TARGET_LONG_BITS - 1));
202
203    /*
204     * If overflow, set temp2 to 1, else source2.
205     * This produces the required result of min.
206     */
207    tcg_gen_setcond_tl(TCG_COND_EQ, temp1, source1, min);
208    tcg_gen_setcond_tl(TCG_COND_EQ, temp2, source2, mone);
209    tcg_gen_and_tl(temp1, temp1, temp2);
210    tcg_gen_movcond_tl(TCG_COND_NE, temp2, temp1, zero, one, source2);
211
212    /*
213     * If div by zero, set temp1 to -1 and temp2 to 1 to
214     * produce the required result of -1.
215     */
216    tcg_gen_movcond_tl(TCG_COND_EQ, temp1, source2, zero, mone, source1);
217    tcg_gen_movcond_tl(TCG_COND_EQ, temp2, source2, zero, one, temp2);
218
219    tcg_gen_div_tl(ret, temp1, temp2);
220
221    tcg_temp_free(temp1);
222    tcg_temp_free(temp2);
223}
224
225static bool trans_div(DisasContext *ctx, arg_div *a)
226{
227    REQUIRE_EXT(ctx, RVM);
228    return gen_arith(ctx, a, EXT_SIGN, gen_div, gen_div_i128);
229}
230
231static void gen_divu_i128(TCGv rdl, TCGv rdh,
232                          TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
233{
234    gen_helper_divu_i128(rdl, cpu_env, rs1l, rs1h, rs2l, rs2h);
235    tcg_gen_ld_tl(rdh, cpu_env, offsetof(CPURISCVState, retxh));
236}
237
238static void gen_divu(TCGv ret, TCGv source1, TCGv source2)
239{
240    TCGv temp1, temp2, zero, one, max;
241
242    temp1 = tcg_temp_new();
243    temp2 = tcg_temp_new();
244    zero = tcg_constant_tl(0);
245    one = tcg_constant_tl(1);
246    max = tcg_constant_tl(~0);
247
248    /*
249     * If div by zero, set temp1 to max and temp2 to 1 to
250     * produce the required result of max.
251     */
252    tcg_gen_movcond_tl(TCG_COND_EQ, temp1, source2, zero, max, source1);
253    tcg_gen_movcond_tl(TCG_COND_EQ, temp2, source2, zero, one, source2);
254    tcg_gen_divu_tl(ret, temp1, temp2);
255
256    tcg_temp_free(temp1);
257    tcg_temp_free(temp2);
258}
259
260static bool trans_divu(DisasContext *ctx, arg_divu *a)
261{
262    REQUIRE_EXT(ctx, RVM);
263    return gen_arith(ctx, a, EXT_ZERO, gen_divu, gen_divu_i128);
264}
265
266static void gen_rem_i128(TCGv rdl, TCGv rdh,
267                         TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
268{
269    gen_helper_rems_i128(rdl, cpu_env, rs1l, rs1h, rs2l, rs2h);
270    tcg_gen_ld_tl(rdh, cpu_env, offsetof(CPURISCVState, retxh));
271}
272
273static void gen_rem(TCGv ret, TCGv source1, TCGv source2)
274{
275    TCGv temp1, temp2, zero, one, mone, min;
276
277    temp1 = tcg_temp_new();
278    temp2 = tcg_temp_new();
279    zero = tcg_constant_tl(0);
280    one = tcg_constant_tl(1);
281    mone = tcg_constant_tl(-1);
282    min = tcg_constant_tl(1ull << (TARGET_LONG_BITS - 1));
283
284    /*
285     * If overflow, set temp1 to 0, else source1.
286     * This avoids a possible host trap, and produces the required result of 0.
287     */
288    tcg_gen_setcond_tl(TCG_COND_EQ, temp1, source1, min);
289    tcg_gen_setcond_tl(TCG_COND_EQ, temp2, source2, mone);
290    tcg_gen_and_tl(temp1, temp1, temp2);
291    tcg_gen_movcond_tl(TCG_COND_NE, temp1, temp1, zero, zero, source1);
292
293    /*
294     * If div by zero, set temp2 to 1, else source2.
295     * This avoids a possible host trap, but produces an incorrect result.
296     */
297    tcg_gen_movcond_tl(TCG_COND_EQ, temp2, source2, zero, one, source2);
298
299    tcg_gen_rem_tl(temp1, temp1, temp2);
300
301    /* If div by zero, the required result is the original dividend. */
302    tcg_gen_movcond_tl(TCG_COND_EQ, ret, source2, zero, source1, temp1);
303
304    tcg_temp_free(temp1);
305    tcg_temp_free(temp2);
306}
307
308static bool trans_rem(DisasContext *ctx, arg_rem *a)
309{
310    REQUIRE_EXT(ctx, RVM);
311    return gen_arith(ctx, a, EXT_SIGN, gen_rem, gen_rem_i128);
312}
313
314static void gen_remu_i128(TCGv rdl, TCGv rdh,
315                          TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
316{
317    gen_helper_remu_i128(rdl, cpu_env, rs1l, rs1h, rs2l, rs2h);
318    tcg_gen_ld_tl(rdh, cpu_env, offsetof(CPURISCVState, retxh));
319}
320
321static void gen_remu(TCGv ret, TCGv source1, TCGv source2)
322{
323    TCGv temp, zero, one;
324
325    temp = tcg_temp_new();
326    zero = tcg_constant_tl(0);
327    one = tcg_constant_tl(1);
328
329    /*
330     * If div by zero, set temp to 1, else source2.
331     * This avoids a possible host trap, but produces an incorrect result.
332     */
333    tcg_gen_movcond_tl(TCG_COND_EQ, temp, source2, zero, one, source2);
334
335    tcg_gen_remu_tl(temp, source1, temp);
336
337    /* If div by zero, the required result is the original dividend. */
338    tcg_gen_movcond_tl(TCG_COND_EQ, ret, source2, zero, source1, temp);
339
340    tcg_temp_free(temp);
341}
342
343static bool trans_remu(DisasContext *ctx, arg_remu *a)
344{
345    REQUIRE_EXT(ctx, RVM);
346    return gen_arith(ctx, a, EXT_ZERO, gen_remu, gen_remu_i128);
347}
348
349static bool trans_mulw(DisasContext *ctx, arg_mulw *a)
350{
351    REQUIRE_64_OR_128BIT(ctx);
352    REQUIRE_EXT(ctx, RVM);
353    ctx->ol = MXL_RV32;
354    return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl, NULL);
355}
356
357static bool trans_divw(DisasContext *ctx, arg_divw *a)
358{
359    REQUIRE_64_OR_128BIT(ctx);
360    REQUIRE_EXT(ctx, RVM);
361    ctx->ol = MXL_RV32;
362    return gen_arith(ctx, a, EXT_SIGN, gen_div, NULL);
363}
364
365static bool trans_divuw(DisasContext *ctx, arg_divuw *a)
366{
367    REQUIRE_64_OR_128BIT(ctx);
368    REQUIRE_EXT(ctx, RVM);
369    ctx->ol = MXL_RV32;
370    return gen_arith(ctx, a, EXT_ZERO, gen_divu, NULL);
371}
372
373static bool trans_remw(DisasContext *ctx, arg_remw *a)
374{
375    REQUIRE_64_OR_128BIT(ctx);
376    REQUIRE_EXT(ctx, RVM);
377    ctx->ol = MXL_RV32;
378    return gen_arith(ctx, a, EXT_SIGN, gen_rem, NULL);
379}
380
381static bool trans_remuw(DisasContext *ctx, arg_remuw *a)
382{
383    REQUIRE_64_OR_128BIT(ctx);
384    REQUIRE_EXT(ctx, RVM);
385    ctx->ol = MXL_RV32;
386    return gen_arith(ctx, a, EXT_ZERO, gen_remu, NULL);
387}
388
389static bool trans_muld(DisasContext *ctx, arg_muld *a)
390{
391    REQUIRE_128BIT(ctx);
392    REQUIRE_EXT(ctx, RVM);
393    ctx->ol = MXL_RV64;
394    return gen_arith(ctx, a, EXT_SIGN, tcg_gen_mul_tl, NULL);
395}
396
397static bool trans_divd(DisasContext *ctx, arg_divd *a)
398{
399    REQUIRE_128BIT(ctx);
400    REQUIRE_EXT(ctx, RVM);
401    ctx->ol = MXL_RV64;
402    return gen_arith(ctx, a, EXT_SIGN, gen_div, NULL);
403}
404
405static bool trans_divud(DisasContext *ctx, arg_divud *a)
406{
407    REQUIRE_128BIT(ctx);
408    REQUIRE_EXT(ctx, RVM);
409    ctx->ol = MXL_RV64;
410    return gen_arith(ctx, a, EXT_ZERO, gen_divu, NULL);
411}
412
413static bool trans_remd(DisasContext *ctx, arg_remd *a)
414{
415    REQUIRE_128BIT(ctx);
416    REQUIRE_EXT(ctx, RVM);
417    ctx->ol = MXL_RV64;
418    return gen_arith(ctx, a, EXT_SIGN, gen_rem, NULL);
419}
420
421static bool trans_remud(DisasContext *ctx, arg_remud *a)
422{
423    REQUIRE_128BIT(ctx);
424    REQUIRE_EXT(ctx, RVM);
425    ctx->ol = MXL_RV64;
426    return gen_arith(ctx, a, EXT_ZERO, gen_remu, NULL);
427}
428