xref: /qemu/target/hexagon/translate.c (revision b2a3cbb8)
1 /*
2  *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #define QEMU_GENERATE
19 #include "qemu/osdep.h"
20 #include "cpu.h"
21 #include "tcg/tcg-op.h"
22 #include "tcg/tcg-op-gvec.h"
23 #include "exec/cpu_ldst.h"
24 #include "exec/log.h"
25 #include "internal.h"
26 #include "attribs.h"
27 #include "insn.h"
28 #include "decode.h"
29 #include "translate.h"
30 #include "printinsn.h"
31 
32 TCGv hex_gpr[TOTAL_PER_THREAD_REGS];
33 TCGv hex_pred[NUM_PREGS];
34 TCGv hex_next_PC;
35 TCGv hex_this_PC;
36 TCGv hex_slot_cancelled;
37 TCGv hex_branch_taken;
38 TCGv hex_new_value[TOTAL_PER_THREAD_REGS];
39 TCGv hex_reg_written[TOTAL_PER_THREAD_REGS];
40 TCGv hex_new_pred_value[NUM_PREGS];
41 TCGv hex_pred_written;
42 TCGv hex_store_addr[STORES_MAX];
43 TCGv hex_store_width[STORES_MAX];
44 TCGv hex_store_val32[STORES_MAX];
45 TCGv_i64 hex_store_val64[STORES_MAX];
46 TCGv hex_pkt_has_store_s1;
47 TCGv hex_dczero_addr;
48 TCGv hex_llsc_addr;
49 TCGv hex_llsc_val;
50 TCGv_i64 hex_llsc_val_i64;
51 TCGv hex_VRegs_updated;
52 TCGv hex_QRegs_updated;
53 TCGv hex_vstore_addr[VSTORES_MAX];
54 TCGv hex_vstore_size[VSTORES_MAX];
55 TCGv hex_vstore_pending[VSTORES_MAX];
56 
57 static const char * const hexagon_prednames[] = {
58   "p0", "p1", "p2", "p3"
59 };
60 
61 intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum,
62                           int num, bool alloc_ok)
63 {
64     intptr_t offset;
65 
66     /* See if it is already allocated */
67     for (int i = 0; i < ctx->future_vregs_idx; i++) {
68         if (ctx->future_vregs_num[i] == regnum) {
69             return offsetof(CPUHexagonState, future_VRegs[i]);
70         }
71     }
72 
73     g_assert(alloc_ok);
74     offset = offsetof(CPUHexagonState, future_VRegs[ctx->future_vregs_idx]);
75     for (int i = 0; i < num; i++) {
76         ctx->future_vregs_num[ctx->future_vregs_idx + i] = regnum++;
77     }
78     ctx->future_vregs_idx += num;
79     g_assert(ctx->future_vregs_idx <= VECTOR_TEMPS_MAX);
80     return offset;
81 }
82 
83 intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum,
84                           int num, bool alloc_ok)
85 {
86     intptr_t offset;
87 
88     /* See if it is already allocated */
89     for (int i = 0; i < ctx->tmp_vregs_idx; i++) {
90         if (ctx->tmp_vregs_num[i] == regnum) {
91             return offsetof(CPUHexagonState, tmp_VRegs[i]);
92         }
93     }
94 
95     g_assert(alloc_ok);
96     offset = offsetof(CPUHexagonState, tmp_VRegs[ctx->tmp_vregs_idx]);
97     for (int i = 0; i < num; i++) {
98         ctx->tmp_vregs_num[ctx->tmp_vregs_idx + i] = regnum++;
99     }
100     ctx->tmp_vregs_idx += num;
101     g_assert(ctx->tmp_vregs_idx <= VECTOR_TEMPS_MAX);
102     return offset;
103 }
104 
105 static void gen_exception_raw(int excp)
106 {
107     gen_helper_raise_exception(cpu_env, tcg_constant_i32(excp));
108 }
109 
110 static void gen_exec_counters(DisasContext *ctx)
111 {
112     tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_PKT_CNT],
113                     hex_gpr[HEX_REG_QEMU_PKT_CNT], ctx->num_packets);
114     tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_INSN_CNT],
115                     hex_gpr[HEX_REG_QEMU_INSN_CNT], ctx->num_insns);
116     tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_HVX_CNT],
117                     hex_gpr[HEX_REG_QEMU_HVX_CNT], ctx->num_hvx_insns);
118 }
119 
120 static void gen_end_tb(DisasContext *ctx)
121 {
122     gen_exec_counters(ctx);
123     tcg_gen_mov_tl(hex_gpr[HEX_REG_PC], hex_next_PC);
124     tcg_gen_exit_tb(NULL, 0);
125     ctx->base.is_jmp = DISAS_NORETURN;
126 }
127 
128 static void gen_exception_end_tb(DisasContext *ctx, int excp)
129 {
130     gen_exec_counters(ctx);
131     tcg_gen_mov_tl(hex_gpr[HEX_REG_PC], hex_next_PC);
132     gen_exception_raw(excp);
133     ctx->base.is_jmp = DISAS_NORETURN;
134 
135 }
136 
137 #define PACKET_BUFFER_LEN              1028
138 static void print_pkt(Packet *pkt)
139 {
140     GString *buf = g_string_sized_new(PACKET_BUFFER_LEN);
141     snprint_a_pkt_debug(buf, pkt);
142     HEX_DEBUG_LOG("%s", buf->str);
143     g_string_free(buf, true);
144 }
145 #define HEX_DEBUG_PRINT_PKT(pkt) \
146     do { \
147         if (HEX_DEBUG) { \
148             print_pkt(pkt); \
149         } \
150     } while (0)
151 
152 static int read_packet_words(CPUHexagonState *env, DisasContext *ctx,
153                              uint32_t words[])
154 {
155     bool found_end = false;
156     int nwords, max_words;
157 
158     memset(words, 0, PACKET_WORDS_MAX * sizeof(uint32_t));
159     for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) {
160         words[nwords] =
161             translator_ldl(env, &ctx->base,
162                            ctx->base.pc_next + nwords * sizeof(uint32_t));
163         found_end = is_packet_end(words[nwords]);
164     }
165     if (!found_end) {
166         /* Read too many words without finding the end */
167         return 0;
168     }
169 
170     /* Check for page boundary crossing */
171     max_words = -(ctx->base.pc_next | TARGET_PAGE_MASK) / sizeof(uint32_t);
172     if (nwords > max_words) {
173         /* We can only cross a page boundary at the beginning of a TB */
174         g_assert(ctx->base.num_insns == 1);
175     }
176 
177     HEX_DEBUG_LOG("decode_packet: pc = 0x%x\n", ctx->base.pc_next);
178     HEX_DEBUG_LOG("    words = { ");
179     for (int i = 0; i < nwords; i++) {
180         HEX_DEBUG_LOG("0x%x, ", words[i]);
181     }
182     HEX_DEBUG_LOG("}\n");
183 
184     return nwords;
185 }
186 
187 static bool check_for_attrib(Packet *pkt, int attrib)
188 {
189     for (int i = 0; i < pkt->num_insns; i++) {
190         if (GET_ATTRIB(pkt->insn[i].opcode, attrib)) {
191             return true;
192         }
193     }
194     return false;
195 }
196 
197 static bool need_pc(Packet *pkt)
198 {
199     return check_for_attrib(pkt, A_IMPLICIT_READS_PC);
200 }
201 
202 static bool need_slot_cancelled(Packet *pkt)
203 {
204     return check_for_attrib(pkt, A_CONDEXEC);
205 }
206 
207 static bool need_pred_written(Packet *pkt)
208 {
209     return check_for_attrib(pkt, A_WRITES_PRED_REG);
210 }
211 
212 static void gen_start_packet(DisasContext *ctx, Packet *pkt)
213 {
214     target_ulong next_PC = ctx->base.pc_next + pkt->encod_pkt_size_in_bytes;
215     int i;
216 
217     /* Clear out the disassembly context */
218     ctx->reg_log_idx = 0;
219     bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS);
220     ctx->preg_log_idx = 0;
221     bitmap_zero(ctx->pregs_written, NUM_PREGS);
222     ctx->future_vregs_idx = 0;
223     ctx->tmp_vregs_idx = 0;
224     ctx->vreg_log_idx = 0;
225     bitmap_zero(ctx->vregs_updated_tmp, NUM_VREGS);
226     bitmap_zero(ctx->vregs_updated, NUM_VREGS);
227     bitmap_zero(ctx->vregs_select, NUM_VREGS);
228     ctx->qreg_log_idx = 0;
229     for (i = 0; i < STORES_MAX; i++) {
230         ctx->store_width[i] = 0;
231     }
232     tcg_gen_movi_tl(hex_pkt_has_store_s1, pkt->pkt_has_store_s1);
233     ctx->s1_store_processed = false;
234     ctx->pre_commit = true;
235 
236     if (HEX_DEBUG) {
237         /* Handy place to set a breakpoint before the packet executes */
238         gen_helper_debug_start_packet(cpu_env);
239         tcg_gen_movi_tl(hex_this_PC, ctx->base.pc_next);
240     }
241 
242     /* Initialize the runtime state for packet semantics */
243     if (need_pc(pkt)) {
244         tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->base.pc_next);
245     }
246     if (need_slot_cancelled(pkt)) {
247         tcg_gen_movi_tl(hex_slot_cancelled, 0);
248     }
249     if (pkt->pkt_has_cof) {
250         tcg_gen_movi_tl(hex_branch_taken, 0);
251         tcg_gen_movi_tl(hex_next_PC, next_PC);
252     }
253     if (need_pred_written(pkt)) {
254         tcg_gen_movi_tl(hex_pred_written, 0);
255     }
256 
257     if (pkt->pkt_has_hvx) {
258         tcg_gen_movi_tl(hex_VRegs_updated, 0);
259         tcg_gen_movi_tl(hex_QRegs_updated, 0);
260     }
261 }
262 
263 bool is_gather_store_insn(Insn *insn, Packet *pkt)
264 {
265     if (GET_ATTRIB(insn->opcode, A_CVI_NEW) &&
266         insn->new_value_producer_slot == 1) {
267         /* Look for gather instruction */
268         for (int i = 0; i < pkt->num_insns; i++) {
269             Insn *in = &pkt->insn[i];
270             if (GET_ATTRIB(in->opcode, A_CVI_GATHER) && in->slot == 1) {
271                 return true;
272             }
273         }
274     }
275     return false;
276 }
277 
278 /*
279  * The LOG_*_WRITE macros mark most of the writes in a packet
280  * However, there are some implicit writes marked as attributes
281  * of the applicable instructions.
282  */
283 static void mark_implicit_reg_write(DisasContext *ctx, Insn *insn,
284                                     int attrib, int rnum)
285 {
286     if (GET_ATTRIB(insn->opcode, attrib)) {
287         /*
288          * USR is used to set overflow and FP exceptions,
289          * so treat it as conditional
290          */
291         bool is_predicated = GET_ATTRIB(insn->opcode, A_CONDEXEC) ||
292                              rnum == HEX_REG_USR;
293         if (is_predicated && !is_preloaded(ctx, rnum)) {
294             tcg_gen_mov_tl(hex_new_value[rnum], hex_gpr[rnum]);
295         }
296 
297         ctx_log_reg_write(ctx, rnum);
298     }
299 }
300 
301 static void mark_implicit_pred_write(DisasContext *ctx, Insn *insn,
302                                      int attrib, int pnum)
303 {
304     if (GET_ATTRIB(insn->opcode, attrib)) {
305         ctx_log_pred_write(ctx, pnum);
306     }
307 }
308 
309 static void mark_implicit_reg_writes(DisasContext *ctx, Insn *insn)
310 {
311     mark_implicit_reg_write(ctx, insn, A_IMPLICIT_WRITES_FP,  HEX_REG_FP);
312     mark_implicit_reg_write(ctx, insn, A_IMPLICIT_WRITES_SP,  HEX_REG_SP);
313     mark_implicit_reg_write(ctx, insn, A_IMPLICIT_WRITES_LR,  HEX_REG_LR);
314     mark_implicit_reg_write(ctx, insn, A_IMPLICIT_WRITES_LC0, HEX_REG_LC0);
315     mark_implicit_reg_write(ctx, insn, A_IMPLICIT_WRITES_SA0, HEX_REG_SA0);
316     mark_implicit_reg_write(ctx, insn, A_IMPLICIT_WRITES_LC1, HEX_REG_LC1);
317     mark_implicit_reg_write(ctx, insn, A_IMPLICIT_WRITES_SA1, HEX_REG_SA1);
318     mark_implicit_reg_write(ctx, insn, A_IMPLICIT_WRITES_USR, HEX_REG_USR);
319     mark_implicit_reg_write(ctx, insn, A_FPOP, HEX_REG_USR);
320 }
321 
322 static void mark_implicit_pred_writes(DisasContext *ctx, Insn *insn)
323 {
324     mark_implicit_pred_write(ctx, insn, A_IMPLICIT_WRITES_P0, 0);
325     mark_implicit_pred_write(ctx, insn, A_IMPLICIT_WRITES_P1, 1);
326     mark_implicit_pred_write(ctx, insn, A_IMPLICIT_WRITES_P2, 2);
327     mark_implicit_pred_write(ctx, insn, A_IMPLICIT_WRITES_P3, 3);
328 }
329 
330 static void mark_store_width(DisasContext *ctx, Insn *insn)
331 {
332     uint16_t opcode = insn->opcode;
333     uint32_t slot = insn->slot;
334     uint8_t width = 0;
335 
336     if (GET_ATTRIB(opcode, A_SCALAR_STORE)) {
337         if (GET_ATTRIB(opcode, A_MEMSIZE_1B)) {
338             width |= 1;
339         }
340         if (GET_ATTRIB(opcode, A_MEMSIZE_2B)) {
341             width |= 2;
342         }
343         if (GET_ATTRIB(opcode, A_MEMSIZE_4B)) {
344             width |= 4;
345         }
346         if (GET_ATTRIB(opcode, A_MEMSIZE_8B)) {
347             width |= 8;
348         }
349         tcg_debug_assert(is_power_of_2(width));
350         ctx->store_width[slot] = width;
351     }
352 }
353 
354 static void gen_insn(CPUHexagonState *env, DisasContext *ctx,
355                      Insn *insn, Packet *pkt)
356 {
357     if (insn->generate) {
358         mark_implicit_reg_writes(ctx, insn);
359         insn->generate(env, ctx, insn, pkt);
360         mark_implicit_pred_writes(ctx, insn);
361         mark_store_width(ctx, insn);
362     } else {
363         gen_exception_end_tb(ctx, HEX_EXCP_INVALID_OPCODE);
364     }
365 }
366 
367 /*
368  * Helpers for generating the packet commit
369  */
370 static void gen_reg_writes(DisasContext *ctx)
371 {
372     int i;
373 
374     for (i = 0; i < ctx->reg_log_idx; i++) {
375         int reg_num = ctx->reg_log[i];
376 
377         tcg_gen_mov_tl(hex_gpr[reg_num], hex_new_value[reg_num]);
378     }
379 }
380 
381 static void gen_pred_writes(DisasContext *ctx, Packet *pkt)
382 {
383     int i;
384 
385     /* Early exit if the log is empty */
386     if (!ctx->preg_log_idx) {
387         return;
388     }
389 
390     /*
391      * Only endloop instructions will conditionally
392      * write a predicate.  If there are no endloop
393      * instructions, we can use the non-conditional
394      * write of the predicates.
395      */
396     if (pkt->pkt_has_endloop) {
397         TCGv zero = tcg_constant_tl(0);
398         TCGv pred_written = tcg_temp_new();
399         for (i = 0; i < ctx->preg_log_idx; i++) {
400             int pred_num = ctx->preg_log[i];
401 
402             tcg_gen_andi_tl(pred_written, hex_pred_written, 1 << pred_num);
403             tcg_gen_movcond_tl(TCG_COND_NE, hex_pred[pred_num],
404                                pred_written, zero,
405                                hex_new_pred_value[pred_num],
406                                hex_pred[pred_num]);
407         }
408         tcg_temp_free(pred_written);
409     } else {
410         for (i = 0; i < ctx->preg_log_idx; i++) {
411             int pred_num = ctx->preg_log[i];
412             tcg_gen_mov_tl(hex_pred[pred_num], hex_new_pred_value[pred_num]);
413             if (HEX_DEBUG) {
414                 /* Do this so HELPER(debug_commit_end) will know */
415                 tcg_gen_ori_tl(hex_pred_written, hex_pred_written,
416                                1 << pred_num);
417             }
418         }
419     }
420 }
421 
422 static void gen_check_store_width(DisasContext *ctx, int slot_num)
423 {
424     if (HEX_DEBUG) {
425         TCGv slot = tcg_constant_tl(slot_num);
426         TCGv check = tcg_constant_tl(ctx->store_width[slot_num]);
427         gen_helper_debug_check_store_width(cpu_env, slot, check);
428     }
429 }
430 
431 static bool slot_is_predicated(Packet *pkt, int slot_num)
432 {
433     for (int i = 0; i < pkt->num_insns; i++) {
434         if (pkt->insn[i].slot == slot_num) {
435             return GET_ATTRIB(pkt->insn[i].opcode, A_CONDEXEC);
436         }
437     }
438     /* If we get to here, we didn't find an instruction in the requested slot */
439     g_assert_not_reached();
440 }
441 
442 void process_store(DisasContext *ctx, Packet *pkt, int slot_num)
443 {
444     bool is_predicated = slot_is_predicated(pkt, slot_num);
445     TCGLabel *label_end = NULL;
446 
447     /*
448      * We may have already processed this store
449      * See CHECK_NOSHUF in macros.h
450      */
451     if (slot_num == 1 && ctx->s1_store_processed) {
452         return;
453     }
454     ctx->s1_store_processed = true;
455 
456     if (is_predicated) {
457         TCGv cancelled = tcg_temp_new();
458         label_end = gen_new_label();
459 
460         /* Don't do anything if the slot was cancelled */
461         tcg_gen_extract_tl(cancelled, hex_slot_cancelled, slot_num, 1);
462         tcg_gen_brcondi_tl(TCG_COND_NE, cancelled, 0, label_end);
463         tcg_temp_free(cancelled);
464     }
465     {
466         TCGv address = tcg_temp_local_new();
467         tcg_gen_mov_tl(address, hex_store_addr[slot_num]);
468 
469         /*
470          * If we know the width from the DisasContext, we can
471          * generate much cleaner code.
472          * Unfortunately, not all instructions execute the fSTORE
473          * macro during code generation.  Anything that uses the
474          * generic helper will have this problem.  Instructions
475          * that use fWRAP to generate proper TCG code will be OK.
476          */
477         switch (ctx->store_width[slot_num]) {
478         case 1:
479             gen_check_store_width(ctx, slot_num);
480             tcg_gen_qemu_st8(hex_store_val32[slot_num],
481                              hex_store_addr[slot_num],
482                              ctx->mem_idx);
483             break;
484         case 2:
485             gen_check_store_width(ctx, slot_num);
486             tcg_gen_qemu_st16(hex_store_val32[slot_num],
487                               hex_store_addr[slot_num],
488                               ctx->mem_idx);
489             break;
490         case 4:
491             gen_check_store_width(ctx, slot_num);
492             tcg_gen_qemu_st32(hex_store_val32[slot_num],
493                               hex_store_addr[slot_num],
494                               ctx->mem_idx);
495             break;
496         case 8:
497             gen_check_store_width(ctx, slot_num);
498             tcg_gen_qemu_st64(hex_store_val64[slot_num],
499                               hex_store_addr[slot_num],
500                               ctx->mem_idx);
501             break;
502         default:
503             {
504                 /*
505                  * If we get to here, we don't know the width at
506                  * TCG generation time, we'll use a helper to
507                  * avoid branching based on the width at runtime.
508                  */
509                 TCGv slot = tcg_constant_tl(slot_num);
510                 gen_helper_commit_store(cpu_env, slot);
511             }
512         }
513         tcg_temp_free(address);
514     }
515     if (is_predicated) {
516         gen_set_label(label_end);
517     }
518 }
519 
520 static void process_store_log(DisasContext *ctx, Packet *pkt)
521 {
522     /*
523      *  When a packet has two stores, the hardware processes
524      *  slot 1 and then slot 0.  This will be important when
525      *  the memory accesses overlap.
526      */
527     if (pkt->pkt_has_store_s1) {
528         g_assert(!pkt->pkt_has_dczeroa);
529         process_store(ctx, pkt, 1);
530     }
531     if (pkt->pkt_has_store_s0) {
532         g_assert(!pkt->pkt_has_dczeroa);
533         process_store(ctx, pkt, 0);
534     }
535 }
536 
537 /* Zero out a 32-bit cache line */
538 static void process_dczeroa(DisasContext *ctx, Packet *pkt)
539 {
540     if (pkt->pkt_has_dczeroa) {
541         /* Store 32 bytes of zero starting at (addr & ~0x1f) */
542         TCGv addr = tcg_temp_new();
543         TCGv_i64 zero = tcg_constant_i64(0);
544 
545         tcg_gen_andi_tl(addr, hex_dczero_addr, ~0x1f);
546         tcg_gen_qemu_st64(zero, addr, ctx->mem_idx);
547         tcg_gen_addi_tl(addr, addr, 8);
548         tcg_gen_qemu_st64(zero, addr, ctx->mem_idx);
549         tcg_gen_addi_tl(addr, addr, 8);
550         tcg_gen_qemu_st64(zero, addr, ctx->mem_idx);
551         tcg_gen_addi_tl(addr, addr, 8);
552         tcg_gen_qemu_st64(zero, addr, ctx->mem_idx);
553 
554         tcg_temp_free(addr);
555     }
556 }
557 
558 static bool pkt_has_hvx_store(Packet *pkt)
559 {
560     int i;
561     for (i = 0; i < pkt->num_insns; i++) {
562         int opcode = pkt->insn[i].opcode;
563         if (GET_ATTRIB(opcode, A_CVI) && GET_ATTRIB(opcode, A_STORE)) {
564             return true;
565         }
566     }
567     return false;
568 }
569 
570 static void gen_commit_hvx(DisasContext *ctx, Packet *pkt)
571 {
572     int i;
573 
574     /*
575      *    for (i = 0; i < ctx->vreg_log_idx; i++) {
576      *        int rnum = ctx->vreg_log[i];
577      *        if (ctx->vreg_is_predicated[i]) {
578      *            if (env->VRegs_updated & (1 << rnum)) {
579      *                env->VRegs[rnum] = env->future_VRegs[rnum];
580      *            }
581      *        } else {
582      *            env->VRegs[rnum] = env->future_VRegs[rnum];
583      *        }
584      *    }
585      */
586     for (i = 0; i < ctx->vreg_log_idx; i++) {
587         int rnum = ctx->vreg_log[i];
588         bool is_predicated = ctx->vreg_is_predicated[i];
589         intptr_t dstoff = offsetof(CPUHexagonState, VRegs[rnum]);
590         intptr_t srcoff = ctx_future_vreg_off(ctx, rnum, 1, false);
591         size_t size = sizeof(MMVector);
592 
593         if (is_predicated) {
594             TCGv cmp = tcg_temp_new();
595             TCGLabel *label_skip = gen_new_label();
596 
597             tcg_gen_andi_tl(cmp, hex_VRegs_updated, 1 << rnum);
598             tcg_gen_brcondi_tl(TCG_COND_EQ, cmp, 0, label_skip);
599             tcg_temp_free(cmp);
600             tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
601             gen_set_label(label_skip);
602         } else {
603             tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
604         }
605     }
606 
607     /*
608      *    for (i = 0; i < ctx->qreg_log_idx; i++) {
609      *        int rnum = ctx->qreg_log[i];
610      *        if (ctx->qreg_is_predicated[i]) {
611      *            if (env->QRegs_updated) & (1 << rnum)) {
612      *                env->QRegs[rnum] = env->future_QRegs[rnum];
613      *            }
614      *        } else {
615      *            env->QRegs[rnum] = env->future_QRegs[rnum];
616      *        }
617      *    }
618      */
619     for (i = 0; i < ctx->qreg_log_idx; i++) {
620         int rnum = ctx->qreg_log[i];
621         bool is_predicated = ctx->qreg_is_predicated[i];
622         intptr_t dstoff = offsetof(CPUHexagonState, QRegs[rnum]);
623         intptr_t srcoff = offsetof(CPUHexagonState, future_QRegs[rnum]);
624         size_t size = sizeof(MMQReg);
625 
626         if (is_predicated) {
627             TCGv cmp = tcg_temp_new();
628             TCGLabel *label_skip = gen_new_label();
629 
630             tcg_gen_andi_tl(cmp, hex_QRegs_updated, 1 << rnum);
631             tcg_gen_brcondi_tl(TCG_COND_EQ, cmp, 0, label_skip);
632             tcg_temp_free(cmp);
633             tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
634             gen_set_label(label_skip);
635         } else {
636             tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
637         }
638     }
639 
640     if (pkt_has_hvx_store(pkt)) {
641         gen_helper_commit_hvx_stores(cpu_env);
642     }
643 }
644 
645 static void update_exec_counters(DisasContext *ctx, Packet *pkt)
646 {
647     int num_insns = pkt->num_insns;
648     int num_real_insns = 0;
649     int num_hvx_insns = 0;
650 
651     for (int i = 0; i < num_insns; i++) {
652         if (!pkt->insn[i].is_endloop &&
653             !pkt->insn[i].part1 &&
654             !GET_ATTRIB(pkt->insn[i].opcode, A_IT_NOP)) {
655             num_real_insns++;
656         }
657         if (GET_ATTRIB(pkt->insn[i].opcode, A_CVI)) {
658             num_hvx_insns++;
659         }
660     }
661 
662     ctx->num_packets++;
663     ctx->num_insns += num_real_insns;
664     ctx->num_hvx_insns += num_hvx_insns;
665 }
666 
667 static void gen_commit_packet(CPUHexagonState *env, DisasContext *ctx,
668                               Packet *pkt)
669 {
670     /*
671      * If there is more than one store in a packet, make sure they are all OK
672      * before proceeding with the rest of the packet commit.
673      *
674      * dczeroa has to be the only store operation in the packet, so we go
675      * ahead and process that first.
676      *
677      * When there is an HVX store, there can also be a scalar store in either
678      * slot 0 or slot1, so we create a mask for the helper to indicate what
679      * work to do.
680      *
681      * When there are two scalar stores, we probe the one in slot 0.
682      *
683      * Note that we don't call the probe helper for packets with only one
684      * store.  Therefore, we call process_store_log before anything else
685      * involved in committing the packet.
686      */
687     bool has_store_s0 = pkt->pkt_has_store_s0;
688     bool has_store_s1 = (pkt->pkt_has_store_s1 && !ctx->s1_store_processed);
689     bool has_hvx_store = pkt_has_hvx_store(pkt);
690     if (pkt->pkt_has_dczeroa) {
691         /*
692          * The dczeroa will be the store in slot 0, check that we don't have
693          * a store in slot 1 or an HVX store.
694          */
695         g_assert(!has_store_s1 && !has_hvx_store);
696         process_dczeroa(ctx, pkt);
697     } else if (has_hvx_store) {
698         TCGv mem_idx = tcg_constant_tl(ctx->mem_idx);
699 
700         if (!has_store_s0 && !has_store_s1) {
701             gen_helper_probe_hvx_stores(cpu_env, mem_idx);
702         } else {
703             int mask = 0;
704             TCGv mask_tcgv;
705 
706             if (has_store_s0) {
707                 mask |= (1 << 0);
708             }
709             if (has_store_s1) {
710                 mask |= (1 << 1);
711             }
712             if (has_hvx_store) {
713                 mask |= (1 << 2);
714             }
715             mask_tcgv = tcg_constant_tl(mask);
716             gen_helper_probe_pkt_scalar_hvx_stores(cpu_env, mask_tcgv, mem_idx);
717         }
718     } else if (has_store_s0 && has_store_s1) {
719         /*
720          * process_store_log will execute the slot 1 store first,
721          * so we only have to probe the store in slot 0
722          */
723         TCGv mem_idx = tcg_constant_tl(ctx->mem_idx);
724         gen_helper_probe_pkt_scalar_store_s0(cpu_env, mem_idx);
725     }
726 
727     process_store_log(ctx, pkt);
728 
729     gen_reg_writes(ctx);
730     gen_pred_writes(ctx, pkt);
731     if (pkt->pkt_has_hvx) {
732         gen_commit_hvx(ctx, pkt);
733     }
734     update_exec_counters(ctx, pkt);
735     if (HEX_DEBUG) {
736         TCGv has_st0 =
737             tcg_constant_tl(pkt->pkt_has_store_s0 && !pkt->pkt_has_dczeroa);
738         TCGv has_st1 =
739             tcg_constant_tl(pkt->pkt_has_store_s1 && !pkt->pkt_has_dczeroa);
740 
741         /* Handy place to set a breakpoint at the end of execution */
742         gen_helper_debug_commit_end(cpu_env, has_st0, has_st1);
743     }
744 
745     if (pkt->vhist_insn != NULL) {
746         ctx->pre_commit = false;
747         pkt->vhist_insn->generate(env, ctx, pkt->vhist_insn, pkt);
748     }
749 
750     if (pkt->pkt_has_cof) {
751         gen_end_tb(ctx);
752     }
753 }
754 
755 static void decode_and_translate_packet(CPUHexagonState *env, DisasContext *ctx)
756 {
757     uint32_t words[PACKET_WORDS_MAX];
758     int nwords;
759     Packet pkt;
760     int i;
761 
762     nwords = read_packet_words(env, ctx, words);
763     if (!nwords) {
764         gen_exception_end_tb(ctx, HEX_EXCP_INVALID_PACKET);
765         return;
766     }
767 
768     if (decode_packet(nwords, words, &pkt, false) > 0) {
769         HEX_DEBUG_PRINT_PKT(&pkt);
770         gen_start_packet(ctx, &pkt);
771         for (i = 0; i < pkt.num_insns; i++) {
772             gen_insn(env, ctx, &pkt.insn[i], &pkt);
773         }
774         gen_commit_packet(env, ctx, &pkt);
775         ctx->base.pc_next += pkt.encod_pkt_size_in_bytes;
776     } else {
777         gen_exception_end_tb(ctx, HEX_EXCP_INVALID_PACKET);
778     }
779 }
780 
781 static void hexagon_tr_init_disas_context(DisasContextBase *dcbase,
782                                           CPUState *cs)
783 {
784     DisasContext *ctx = container_of(dcbase, DisasContext, base);
785 
786     ctx->mem_idx = MMU_USER_IDX;
787     ctx->num_packets = 0;
788     ctx->num_insns = 0;
789     ctx->num_hvx_insns = 0;
790 }
791 
792 static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu)
793 {
794 }
795 
796 static void hexagon_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
797 {
798     DisasContext *ctx = container_of(dcbase, DisasContext, base);
799 
800     tcg_gen_insn_start(ctx->base.pc_next);
801 }
802 
803 static bool pkt_crosses_page(CPUHexagonState *env, DisasContext *ctx)
804 {
805     target_ulong page_start = ctx->base.pc_first & TARGET_PAGE_MASK;
806     bool found_end = false;
807     int nwords;
808 
809     for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) {
810         uint32_t word = cpu_ldl_code(env,
811                             ctx->base.pc_next + nwords * sizeof(uint32_t));
812         found_end = is_packet_end(word);
813     }
814     uint32_t next_ptr =  ctx->base.pc_next + nwords * sizeof(uint32_t);
815     return found_end && next_ptr - page_start >= TARGET_PAGE_SIZE;
816 }
817 
818 static void hexagon_tr_translate_packet(DisasContextBase *dcbase, CPUState *cpu)
819 {
820     DisasContext *ctx = container_of(dcbase, DisasContext, base);
821     CPUHexagonState *env = cpu->env_ptr;
822 
823     decode_and_translate_packet(env, ctx);
824 
825     if (ctx->base.is_jmp == DISAS_NEXT) {
826         target_ulong page_start = ctx->base.pc_first & TARGET_PAGE_MASK;
827         target_ulong bytes_max = PACKET_WORDS_MAX * sizeof(target_ulong);
828 
829         if (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE ||
830             (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE - bytes_max &&
831              pkt_crosses_page(env, ctx))) {
832             ctx->base.is_jmp = DISAS_TOO_MANY;
833         }
834 
835         /*
836          * The CPU log is used to compare against LLDB single stepping,
837          * so end the TLB after every packet.
838          */
839         HexagonCPU *hex_cpu = env_archcpu(env);
840         if (hex_cpu->lldb_compat && qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
841             ctx->base.is_jmp = DISAS_TOO_MANY;
842         }
843     }
844 }
845 
846 static void hexagon_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
847 {
848     DisasContext *ctx = container_of(dcbase, DisasContext, base);
849 
850     switch (ctx->base.is_jmp) {
851     case DISAS_TOO_MANY:
852         gen_exec_counters(ctx);
853         tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->base.pc_next);
854         tcg_gen_exit_tb(NULL, 0);
855         break;
856     case DISAS_NORETURN:
857         break;
858     default:
859         g_assert_not_reached();
860     }
861 }
862 
863 static void hexagon_tr_disas_log(const DisasContextBase *dcbase,
864                                  CPUState *cpu, FILE *logfile)
865 {
866     fprintf(logfile, "IN: %s\n", lookup_symbol(dcbase->pc_first));
867     target_disas(logfile, cpu, dcbase->pc_first, dcbase->tb->size);
868 }
869 
870 
871 static const TranslatorOps hexagon_tr_ops = {
872     .init_disas_context = hexagon_tr_init_disas_context,
873     .tb_start           = hexagon_tr_tb_start,
874     .insn_start         = hexagon_tr_insn_start,
875     .translate_insn     = hexagon_tr_translate_packet,
876     .tb_stop            = hexagon_tr_tb_stop,
877     .disas_log          = hexagon_tr_disas_log,
878 };
879 
880 void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
881                            target_ulong pc, void *host_pc)
882 {
883     DisasContext ctx;
884 
885     translator_loop(cs, tb, max_insns, pc, host_pc,
886                     &hexagon_tr_ops, &ctx.base);
887 }
888 
889 #define NAME_LEN               64
890 static char new_value_names[TOTAL_PER_THREAD_REGS][NAME_LEN];
891 static char reg_written_names[TOTAL_PER_THREAD_REGS][NAME_LEN];
892 static char new_pred_value_names[NUM_PREGS][NAME_LEN];
893 static char store_addr_names[STORES_MAX][NAME_LEN];
894 static char store_width_names[STORES_MAX][NAME_LEN];
895 static char store_val32_names[STORES_MAX][NAME_LEN];
896 static char store_val64_names[STORES_MAX][NAME_LEN];
897 static char vstore_addr_names[VSTORES_MAX][NAME_LEN];
898 static char vstore_size_names[VSTORES_MAX][NAME_LEN];
899 static char vstore_pending_names[VSTORES_MAX][NAME_LEN];
900 
901 void hexagon_translate_init(void)
902 {
903     int i;
904 
905     opcode_init();
906 
907     for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
908         hex_gpr[i] = tcg_global_mem_new(cpu_env,
909             offsetof(CPUHexagonState, gpr[i]),
910             hexagon_regnames[i]);
911 
912         snprintf(new_value_names[i], NAME_LEN, "new_%s", hexagon_regnames[i]);
913         hex_new_value[i] = tcg_global_mem_new(cpu_env,
914             offsetof(CPUHexagonState, new_value[i]),
915             new_value_names[i]);
916 
917         if (HEX_DEBUG) {
918             snprintf(reg_written_names[i], NAME_LEN, "reg_written_%s",
919                      hexagon_regnames[i]);
920             hex_reg_written[i] = tcg_global_mem_new(cpu_env,
921                 offsetof(CPUHexagonState, reg_written[i]),
922                 reg_written_names[i]);
923         }
924     }
925     for (i = 0; i < NUM_PREGS; i++) {
926         hex_pred[i] = tcg_global_mem_new(cpu_env,
927             offsetof(CPUHexagonState, pred[i]),
928             hexagon_prednames[i]);
929 
930         snprintf(new_pred_value_names[i], NAME_LEN, "new_pred_%s",
931                  hexagon_prednames[i]);
932         hex_new_pred_value[i] = tcg_global_mem_new(cpu_env,
933             offsetof(CPUHexagonState, new_pred_value[i]),
934             new_pred_value_names[i]);
935     }
936     hex_pred_written = tcg_global_mem_new(cpu_env,
937         offsetof(CPUHexagonState, pred_written), "pred_written");
938     hex_next_PC = tcg_global_mem_new(cpu_env,
939         offsetof(CPUHexagonState, next_PC), "next_PC");
940     hex_this_PC = tcg_global_mem_new(cpu_env,
941         offsetof(CPUHexagonState, this_PC), "this_PC");
942     hex_slot_cancelled = tcg_global_mem_new(cpu_env,
943         offsetof(CPUHexagonState, slot_cancelled), "slot_cancelled");
944     hex_branch_taken = tcg_global_mem_new(cpu_env,
945         offsetof(CPUHexagonState, branch_taken), "branch_taken");
946     hex_pkt_has_store_s1 = tcg_global_mem_new(cpu_env,
947         offsetof(CPUHexagonState, pkt_has_store_s1), "pkt_has_store_s1");
948     hex_dczero_addr = tcg_global_mem_new(cpu_env,
949         offsetof(CPUHexagonState, dczero_addr), "dczero_addr");
950     hex_llsc_addr = tcg_global_mem_new(cpu_env,
951         offsetof(CPUHexagonState, llsc_addr), "llsc_addr");
952     hex_llsc_val = tcg_global_mem_new(cpu_env,
953         offsetof(CPUHexagonState, llsc_val), "llsc_val");
954     hex_llsc_val_i64 = tcg_global_mem_new_i64(cpu_env,
955         offsetof(CPUHexagonState, llsc_val_i64), "llsc_val_i64");
956     hex_VRegs_updated = tcg_global_mem_new(cpu_env,
957         offsetof(CPUHexagonState, VRegs_updated), "VRegs_updated");
958     hex_QRegs_updated = tcg_global_mem_new(cpu_env,
959         offsetof(CPUHexagonState, QRegs_updated), "QRegs_updated");
960     for (i = 0; i < STORES_MAX; i++) {
961         snprintf(store_addr_names[i], NAME_LEN, "store_addr_%d", i);
962         hex_store_addr[i] = tcg_global_mem_new(cpu_env,
963             offsetof(CPUHexagonState, mem_log_stores[i].va),
964             store_addr_names[i]);
965 
966         snprintf(store_width_names[i], NAME_LEN, "store_width_%d", i);
967         hex_store_width[i] = tcg_global_mem_new(cpu_env,
968             offsetof(CPUHexagonState, mem_log_stores[i].width),
969             store_width_names[i]);
970 
971         snprintf(store_val32_names[i], NAME_LEN, "store_val32_%d", i);
972         hex_store_val32[i] = tcg_global_mem_new(cpu_env,
973             offsetof(CPUHexagonState, mem_log_stores[i].data32),
974             store_val32_names[i]);
975 
976         snprintf(store_val64_names[i], NAME_LEN, "store_val64_%d", i);
977         hex_store_val64[i] = tcg_global_mem_new_i64(cpu_env,
978             offsetof(CPUHexagonState, mem_log_stores[i].data64),
979             store_val64_names[i]);
980     }
981     for (int i = 0; i < VSTORES_MAX; i++) {
982         snprintf(vstore_addr_names[i], NAME_LEN, "vstore_addr_%d", i);
983         hex_vstore_addr[i] = tcg_global_mem_new(cpu_env,
984             offsetof(CPUHexagonState, vstore[i].va),
985             vstore_addr_names[i]);
986 
987         snprintf(vstore_size_names[i], NAME_LEN, "vstore_size_%d", i);
988         hex_vstore_size[i] = tcg_global_mem_new(cpu_env,
989             offsetof(CPUHexagonState, vstore[i].size),
990             vstore_size_names[i]);
991 
992         snprintf(vstore_pending_names[i], NAME_LEN, "vstore_pending_%d", i);
993         hex_vstore_pending[i] = tcg_global_mem_new(cpu_env,
994             offsetof(CPUHexagonState, vstore_pending[i]),
995             vstore_pending_names[i]);
996     }
997 }
998