xref: /qemu/target/hexagon/translate.c (revision cc37d98b)
1 /*
2  *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #define QEMU_GENERATE
19 #include "qemu/osdep.h"
20 #include "cpu.h"
21 #include "tcg/tcg-op.h"
22 #include "tcg/tcg-op-gvec.h"
23 #include "exec/cpu_ldst.h"
24 #include "exec/log.h"
25 #include "internal.h"
26 #include "attribs.h"
27 #include "insn.h"
28 #include "decode.h"
29 #include "translate.h"
30 #include "printinsn.h"
31 
32 #include "analyze_funcs_generated.c.inc"
33 
34 typedef void (*AnalyzeInsn)(DisasContext *ctx);
35 static const AnalyzeInsn opcode_analyze[XX_LAST_OPCODE] = {
36 #define OPCODE(X)    [X] = analyze_##X
37 #include "opcodes_def_generated.h.inc"
38 #undef OPCODE
39 };
40 
41 TCGv hex_gpr[TOTAL_PER_THREAD_REGS];
42 TCGv hex_pred[NUM_PREGS];
43 TCGv hex_this_PC;
44 TCGv hex_slot_cancelled;
45 TCGv hex_branch_taken;
46 TCGv hex_new_value[TOTAL_PER_THREAD_REGS];
47 TCGv hex_reg_written[TOTAL_PER_THREAD_REGS];
48 TCGv hex_new_pred_value[NUM_PREGS];
49 TCGv hex_pred_written;
50 TCGv hex_store_addr[STORES_MAX];
51 TCGv hex_store_width[STORES_MAX];
52 TCGv hex_store_val32[STORES_MAX];
53 TCGv_i64 hex_store_val64[STORES_MAX];
54 TCGv hex_pkt_has_store_s1;
55 TCGv hex_dczero_addr;
56 TCGv hex_llsc_addr;
57 TCGv hex_llsc_val;
58 TCGv_i64 hex_llsc_val_i64;
59 TCGv hex_vstore_addr[VSTORES_MAX];
60 TCGv hex_vstore_size[VSTORES_MAX];
61 TCGv hex_vstore_pending[VSTORES_MAX];
62 
63 static const char * const hexagon_prednames[] = {
64   "p0", "p1", "p2", "p3"
65 };
66 
67 intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum,
68                           int num, bool alloc_ok)
69 {
70     intptr_t offset;
71 
72     /* See if it is already allocated */
73     for (int i = 0; i < ctx->future_vregs_idx; i++) {
74         if (ctx->future_vregs_num[i] == regnum) {
75             return offsetof(CPUHexagonState, future_VRegs[i]);
76         }
77     }
78 
79     g_assert(alloc_ok);
80     offset = offsetof(CPUHexagonState, future_VRegs[ctx->future_vregs_idx]);
81     for (int i = 0; i < num; i++) {
82         ctx->future_vregs_num[ctx->future_vregs_idx + i] = regnum++;
83     }
84     ctx->future_vregs_idx += num;
85     g_assert(ctx->future_vregs_idx <= VECTOR_TEMPS_MAX);
86     return offset;
87 }
88 
89 intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum,
90                           int num, bool alloc_ok)
91 {
92     intptr_t offset;
93 
94     /* See if it is already allocated */
95     for (int i = 0; i < ctx->tmp_vregs_idx; i++) {
96         if (ctx->tmp_vregs_num[i] == regnum) {
97             return offsetof(CPUHexagonState, tmp_VRegs[i]);
98         }
99     }
100 
101     g_assert(alloc_ok);
102     offset = offsetof(CPUHexagonState, tmp_VRegs[ctx->tmp_vregs_idx]);
103     for (int i = 0; i < num; i++) {
104         ctx->tmp_vregs_num[ctx->tmp_vregs_idx + i] = regnum++;
105     }
106     ctx->tmp_vregs_idx += num;
107     g_assert(ctx->tmp_vregs_idx <= VECTOR_TEMPS_MAX);
108     return offset;
109 }
110 
111 static void gen_exception_raw(int excp)
112 {
113     gen_helper_raise_exception(cpu_env, tcg_constant_i32(excp));
114 }
115 
116 static void gen_exec_counters(DisasContext *ctx)
117 {
118     tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_PKT_CNT],
119                     hex_gpr[HEX_REG_QEMU_PKT_CNT], ctx->num_packets);
120     tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_INSN_CNT],
121                     hex_gpr[HEX_REG_QEMU_INSN_CNT], ctx->num_insns);
122     tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_HVX_CNT],
123                     hex_gpr[HEX_REG_QEMU_HVX_CNT], ctx->num_hvx_insns);
124 }
125 
126 static bool use_goto_tb(DisasContext *ctx, target_ulong dest)
127 {
128     return translator_use_goto_tb(&ctx->base, dest);
129 }
130 
131 static void gen_goto_tb(DisasContext *ctx, int idx, target_ulong dest)
132 {
133     if (use_goto_tb(ctx, dest)) {
134         tcg_gen_goto_tb(idx);
135         tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest);
136         tcg_gen_exit_tb(ctx->base.tb, idx);
137     } else {
138         tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest);
139         tcg_gen_lookup_and_goto_ptr();
140     }
141 }
142 
143 static void gen_end_tb(DisasContext *ctx)
144 {
145     Packet *pkt = ctx->pkt;
146 
147     gen_exec_counters(ctx);
148 
149     if (ctx->branch_cond != TCG_COND_NEVER) {
150         if (ctx->branch_cond != TCG_COND_ALWAYS) {
151             TCGLabel *skip = gen_new_label();
152             tcg_gen_brcondi_tl(ctx->branch_cond, hex_branch_taken, 0, skip);
153             gen_goto_tb(ctx, 0, ctx->branch_dest);
154             gen_set_label(skip);
155             gen_goto_tb(ctx, 1, ctx->next_PC);
156         } else {
157             gen_goto_tb(ctx, 0, ctx->branch_dest);
158         }
159     } else if (ctx->is_tight_loop &&
160                pkt->insn[pkt->num_insns - 1].opcode == J2_endloop0) {
161         /*
162          * When we're in a tight loop, we defer the endloop0 processing
163          * to take advantage of direct block chaining
164          */
165         TCGLabel *skip = gen_new_label();
166         tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC0], 1, skip);
167         tcg_gen_subi_tl(hex_gpr[HEX_REG_LC0], hex_gpr[HEX_REG_LC0], 1);
168         gen_goto_tb(ctx, 0, ctx->base.tb->pc);
169         gen_set_label(skip);
170         gen_goto_tb(ctx, 1, ctx->next_PC);
171     } else {
172         tcg_gen_lookup_and_goto_ptr();
173     }
174 
175     ctx->base.is_jmp = DISAS_NORETURN;
176 }
177 
178 static void gen_exception_end_tb(DisasContext *ctx, int excp)
179 {
180     gen_exec_counters(ctx);
181     tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->next_PC);
182     gen_exception_raw(excp);
183     ctx->base.is_jmp = DISAS_NORETURN;
184 
185 }
186 
187 #define PACKET_BUFFER_LEN              1028
188 static void print_pkt(Packet *pkt)
189 {
190     GString *buf = g_string_sized_new(PACKET_BUFFER_LEN);
191     snprint_a_pkt_debug(buf, pkt);
192     HEX_DEBUG_LOG("%s", buf->str);
193     g_string_free(buf, true);
194 }
195 #define HEX_DEBUG_PRINT_PKT(pkt) \
196     do { \
197         if (HEX_DEBUG) { \
198             print_pkt(pkt); \
199         } \
200     } while (0)
201 
202 static int read_packet_words(CPUHexagonState *env, DisasContext *ctx,
203                              uint32_t words[])
204 {
205     bool found_end = false;
206     int nwords, max_words;
207 
208     memset(words, 0, PACKET_WORDS_MAX * sizeof(uint32_t));
209     for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) {
210         words[nwords] =
211             translator_ldl(env, &ctx->base,
212                            ctx->base.pc_next + nwords * sizeof(uint32_t));
213         found_end = is_packet_end(words[nwords]);
214     }
215     if (!found_end) {
216         /* Read too many words without finding the end */
217         return 0;
218     }
219 
220     /* Check for page boundary crossing */
221     max_words = -(ctx->base.pc_next | TARGET_PAGE_MASK) / sizeof(uint32_t);
222     if (nwords > max_words) {
223         /* We can only cross a page boundary at the beginning of a TB */
224         g_assert(ctx->base.num_insns == 1);
225     }
226 
227     HEX_DEBUG_LOG("decode_packet: pc = 0x%x\n", ctx->base.pc_next);
228     HEX_DEBUG_LOG("    words = { ");
229     for (int i = 0; i < nwords; i++) {
230         HEX_DEBUG_LOG("0x%x, ", words[i]);
231     }
232     HEX_DEBUG_LOG("}\n");
233 
234     return nwords;
235 }
236 
237 static bool check_for_attrib(Packet *pkt, int attrib)
238 {
239     for (int i = 0; i < pkt->num_insns; i++) {
240         if (GET_ATTRIB(pkt->insn[i].opcode, attrib)) {
241             return true;
242         }
243     }
244     return false;
245 }
246 
247 static bool need_slot_cancelled(Packet *pkt)
248 {
249     /* We only need slot_cancelled for conditional store instructions */
250     for (int i = 0; i < pkt->num_insns; i++) {
251         uint16_t opcode = pkt->insn[i].opcode;
252         if (GET_ATTRIB(opcode, A_CONDEXEC) &&
253             GET_ATTRIB(opcode, A_SCALAR_STORE)) {
254             return true;
255         }
256     }
257     return false;
258 }
259 
260 static bool need_pred_written(Packet *pkt)
261 {
262     return check_for_attrib(pkt, A_WRITES_PRED_REG);
263 }
264 
265 static bool need_next_PC(DisasContext *ctx)
266 {
267     Packet *pkt = ctx->pkt;
268 
269     /* Check for conditional control flow or HW loop end */
270     for (int i = 0; i < pkt->num_insns; i++) {
271         uint16_t opcode = pkt->insn[i].opcode;
272         if (GET_ATTRIB(opcode, A_CONDEXEC) && GET_ATTRIB(opcode, A_COF)) {
273             return true;
274         }
275         if (GET_ATTRIB(opcode, A_HWLOOP0_END) ||
276             GET_ATTRIB(opcode, A_HWLOOP1_END)) {
277             return true;
278         }
279     }
280     return false;
281 }
282 
283 /*
284  * The opcode_analyze functions mark most of the writes in a packet
285  * However, there are some implicit writes marked as attributes
286  * of the applicable instructions.
287  */
288 static void mark_implicit_reg_write(DisasContext *ctx, int attrib, int rnum)
289 {
290     uint16_t opcode = ctx->insn->opcode;
291     if (GET_ATTRIB(opcode, attrib)) {
292         /*
293          * USR is used to set overflow and FP exceptions,
294          * so treat it as conditional
295          */
296         bool is_predicated = GET_ATTRIB(opcode, A_CONDEXEC) ||
297                              rnum == HEX_REG_USR;
298 
299         /* LC0/LC1 is conditionally written by endloop instructions */
300         if ((rnum == HEX_REG_LC0 || rnum == HEX_REG_LC1) &&
301             (opcode == J2_endloop0 ||
302              opcode == J2_endloop1 ||
303              opcode == J2_endloop01)) {
304             is_predicated = true;
305         }
306 
307         ctx_log_reg_write(ctx, rnum, is_predicated);
308     }
309 }
310 
311 static void mark_implicit_reg_writes(DisasContext *ctx)
312 {
313     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_FP,  HEX_REG_FP);
314     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SP,  HEX_REG_SP);
315     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LR,  HEX_REG_LR);
316     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC0, HEX_REG_LC0);
317     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA0, HEX_REG_SA0);
318     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC1, HEX_REG_LC1);
319     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA1, HEX_REG_SA1);
320     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_USR, HEX_REG_USR);
321     mark_implicit_reg_write(ctx, A_FPOP, HEX_REG_USR);
322 }
323 
324 static void mark_implicit_pred_write(DisasContext *ctx, int attrib, int pnum)
325 {
326     if (GET_ATTRIB(ctx->insn->opcode, attrib)) {
327         ctx_log_pred_write(ctx, pnum);
328     }
329 }
330 
331 static void mark_implicit_pred_writes(DisasContext *ctx)
332 {
333     mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P0, 0);
334     mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P1, 1);
335     mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P2, 2);
336     mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P3, 3);
337 }
338 
339 static void analyze_packet(DisasContext *ctx)
340 {
341     Packet *pkt = ctx->pkt;
342     ctx->need_pkt_has_store_s1 = false;
343     for (int i = 0; i < pkt->num_insns; i++) {
344         Insn *insn = &pkt->insn[i];
345         ctx->insn = insn;
346         if (opcode_analyze[insn->opcode]) {
347             opcode_analyze[insn->opcode](ctx);
348         }
349         mark_implicit_reg_writes(ctx);
350         mark_implicit_pred_writes(ctx);
351     }
352 }
353 
354 static void gen_start_packet(DisasContext *ctx)
355 {
356     Packet *pkt = ctx->pkt;
357     target_ulong next_PC = ctx->base.pc_next + pkt->encod_pkt_size_in_bytes;
358     int i;
359 
360     /* Clear out the disassembly context */
361     ctx->next_PC = next_PC;
362     ctx->reg_log_idx = 0;
363     bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS);
364     bitmap_zero(ctx->predicated_regs, TOTAL_PER_THREAD_REGS);
365     ctx->preg_log_idx = 0;
366     bitmap_zero(ctx->pregs_written, NUM_PREGS);
367     ctx->future_vregs_idx = 0;
368     ctx->tmp_vregs_idx = 0;
369     ctx->vreg_log_idx = 0;
370     bitmap_zero(ctx->vregs_updated_tmp, NUM_VREGS);
371     bitmap_zero(ctx->vregs_updated, NUM_VREGS);
372     bitmap_zero(ctx->vregs_select, NUM_VREGS);
373     bitmap_zero(ctx->predicated_future_vregs, NUM_VREGS);
374     bitmap_zero(ctx->predicated_tmp_vregs, NUM_VREGS);
375     ctx->qreg_log_idx = 0;
376     for (i = 0; i < STORES_MAX; i++) {
377         ctx->store_width[i] = 0;
378     }
379     ctx->s1_store_processed = false;
380     ctx->pre_commit = true;
381 
382     analyze_packet(ctx);
383 
384     if (ctx->need_pkt_has_store_s1) {
385         tcg_gen_movi_tl(hex_pkt_has_store_s1, pkt->pkt_has_store_s1);
386     }
387 
388     /*
389      * pregs_written is used both in the analyze phase as well as the code
390      * gen phase, so clear it again.
391      */
392     bitmap_zero(ctx->pregs_written, NUM_PREGS);
393 
394     if (HEX_DEBUG) {
395         /* Handy place to set a breakpoint before the packet executes */
396         gen_helper_debug_start_packet(cpu_env);
397         tcg_gen_movi_tl(hex_this_PC, ctx->base.pc_next);
398     }
399 
400     /* Initialize the runtime state for packet semantics */
401     if (need_slot_cancelled(pkt)) {
402         tcg_gen_movi_tl(hex_slot_cancelled, 0);
403     }
404     if (pkt->pkt_has_cof) {
405         if (pkt->pkt_has_multi_cof) {
406             tcg_gen_movi_tl(hex_branch_taken, 0);
407         }
408         if (need_next_PC(ctx)) {
409             tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], next_PC);
410         }
411     }
412     if (need_pred_written(pkt)) {
413         tcg_gen_movi_tl(hex_pred_written, 0);
414     }
415 
416     /* Preload the predicated registers into hex_new_value[i] */
417     if (!bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) {
418         int i = find_first_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS);
419         while (i < TOTAL_PER_THREAD_REGS) {
420             tcg_gen_mov_tl(hex_new_value[i], hex_gpr[i]);
421             i = find_next_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS,
422                               i + 1);
423         }
424     }
425 
426     /* Preload the predicated HVX registers into future_VRegs and tmp_VRegs */
427     if (!bitmap_empty(ctx->predicated_future_vregs, NUM_VREGS)) {
428         int i = find_first_bit(ctx->predicated_future_vregs, NUM_VREGS);
429         while (i < NUM_VREGS) {
430             const intptr_t VdV_off =
431                 ctx_future_vreg_off(ctx, i, 1, true);
432             intptr_t src_off = offsetof(CPUHexagonState, VRegs[i]);
433             tcg_gen_gvec_mov(MO_64, VdV_off,
434                              src_off,
435                              sizeof(MMVector),
436                              sizeof(MMVector));
437             i = find_next_bit(ctx->predicated_future_vregs, NUM_VREGS, i + 1);
438         }
439     }
440     if (!bitmap_empty(ctx->predicated_tmp_vregs, NUM_VREGS)) {
441         int i = find_first_bit(ctx->predicated_tmp_vregs, NUM_VREGS);
442         while (i < NUM_VREGS) {
443             const intptr_t VdV_off =
444                 ctx_tmp_vreg_off(ctx, i, 1, true);
445             intptr_t src_off = offsetof(CPUHexagonState, VRegs[i]);
446             tcg_gen_gvec_mov(MO_64, VdV_off,
447                              src_off,
448                              sizeof(MMVector),
449                              sizeof(MMVector));
450             i = find_next_bit(ctx->predicated_tmp_vregs, NUM_VREGS, i + 1);
451         }
452     }
453 }
454 
455 bool is_gather_store_insn(DisasContext *ctx)
456 {
457     Packet *pkt = ctx->pkt;
458     Insn *insn = ctx->insn;
459     if (GET_ATTRIB(insn->opcode, A_CVI_NEW) &&
460         insn->new_value_producer_slot == 1) {
461         /* Look for gather instruction */
462         for (int i = 0; i < pkt->num_insns; i++) {
463             Insn *in = &pkt->insn[i];
464             if (GET_ATTRIB(in->opcode, A_CVI_GATHER) && in->slot == 1) {
465                 return true;
466             }
467         }
468     }
469     return false;
470 }
471 
472 static void mark_store_width(DisasContext *ctx)
473 {
474     uint16_t opcode = ctx->insn->opcode;
475     uint32_t slot = ctx->insn->slot;
476     uint8_t width = 0;
477 
478     if (GET_ATTRIB(opcode, A_SCALAR_STORE)) {
479         if (GET_ATTRIB(opcode, A_MEMSIZE_1B)) {
480             width |= 1;
481         }
482         if (GET_ATTRIB(opcode, A_MEMSIZE_2B)) {
483             width |= 2;
484         }
485         if (GET_ATTRIB(opcode, A_MEMSIZE_4B)) {
486             width |= 4;
487         }
488         if (GET_ATTRIB(opcode, A_MEMSIZE_8B)) {
489             width |= 8;
490         }
491         tcg_debug_assert(is_power_of_2(width));
492         ctx->store_width[slot] = width;
493     }
494 }
495 
496 static void gen_insn(DisasContext *ctx)
497 {
498     if (ctx->insn->generate) {
499         ctx->insn->generate(ctx);
500         mark_store_width(ctx);
501     } else {
502         gen_exception_end_tb(ctx, HEX_EXCP_INVALID_OPCODE);
503     }
504 }
505 
506 /*
507  * Helpers for generating the packet commit
508  */
509 static void gen_reg_writes(DisasContext *ctx)
510 {
511     int i;
512 
513     for (i = 0; i < ctx->reg_log_idx; i++) {
514         int reg_num = ctx->reg_log[i];
515 
516         tcg_gen_mov_tl(hex_gpr[reg_num], hex_new_value[reg_num]);
517 
518         /*
519          * ctx->is_tight_loop is set when SA0 points to the beginning of the TB.
520          * If we write to SA0, we have to turn off tight loop handling.
521          */
522         if (reg_num == HEX_REG_SA0) {
523             ctx->is_tight_loop = false;
524         }
525     }
526 }
527 
528 static void gen_pred_writes(DisasContext *ctx)
529 {
530     int i;
531 
532     /* Early exit if the log is empty */
533     if (!ctx->preg_log_idx) {
534         return;
535     }
536 
537     /*
538      * Only endloop instructions will conditionally
539      * write a predicate.  If there are no endloop
540      * instructions, we can use the non-conditional
541      * write of the predicates.
542      */
543     if (ctx->pkt->pkt_has_endloop) {
544         TCGv zero = tcg_constant_tl(0);
545         TCGv pred_written = tcg_temp_new();
546         for (i = 0; i < ctx->preg_log_idx; i++) {
547             int pred_num = ctx->preg_log[i];
548 
549             tcg_gen_andi_tl(pred_written, hex_pred_written, 1 << pred_num);
550             tcg_gen_movcond_tl(TCG_COND_NE, hex_pred[pred_num],
551                                pred_written, zero,
552                                hex_new_pred_value[pred_num],
553                                hex_pred[pred_num]);
554         }
555     } else {
556         for (i = 0; i < ctx->preg_log_idx; i++) {
557             int pred_num = ctx->preg_log[i];
558             tcg_gen_mov_tl(hex_pred[pred_num], hex_new_pred_value[pred_num]);
559             if (HEX_DEBUG) {
560                 /* Do this so HELPER(debug_commit_end) will know */
561                 tcg_gen_ori_tl(hex_pred_written, hex_pred_written,
562                                1 << pred_num);
563             }
564         }
565     }
566 }
567 
568 static void gen_check_store_width(DisasContext *ctx, int slot_num)
569 {
570     if (HEX_DEBUG) {
571         TCGv slot = tcg_constant_tl(slot_num);
572         TCGv check = tcg_constant_tl(ctx->store_width[slot_num]);
573         gen_helper_debug_check_store_width(cpu_env, slot, check);
574     }
575 }
576 
577 static bool slot_is_predicated(Packet *pkt, int slot_num)
578 {
579     for (int i = 0; i < pkt->num_insns; i++) {
580         if (pkt->insn[i].slot == slot_num) {
581             return GET_ATTRIB(pkt->insn[i].opcode, A_CONDEXEC);
582         }
583     }
584     /* If we get to here, we didn't find an instruction in the requested slot */
585     g_assert_not_reached();
586 }
587 
588 void process_store(DisasContext *ctx, int slot_num)
589 {
590     bool is_predicated = slot_is_predicated(ctx->pkt, slot_num);
591     TCGLabel *label_end = NULL;
592 
593     /*
594      * We may have already processed this store
595      * See CHECK_NOSHUF in macros.h
596      */
597     if (slot_num == 1 && ctx->s1_store_processed) {
598         return;
599     }
600     ctx->s1_store_processed = true;
601 
602     if (is_predicated) {
603         TCGv cancelled = tcg_temp_new();
604         label_end = gen_new_label();
605 
606         /* Don't do anything if the slot was cancelled */
607         tcg_gen_extract_tl(cancelled, hex_slot_cancelled, slot_num, 1);
608         tcg_gen_brcondi_tl(TCG_COND_NE, cancelled, 0, label_end);
609     }
610     {
611         TCGv address = tcg_temp_new();
612         tcg_gen_mov_tl(address, hex_store_addr[slot_num]);
613 
614         /*
615          * If we know the width from the DisasContext, we can
616          * generate much cleaner code.
617          * Unfortunately, not all instructions execute the fSTORE
618          * macro during code generation.  Anything that uses the
619          * generic helper will have this problem.  Instructions
620          * that use fWRAP to generate proper TCG code will be OK.
621          */
622         switch (ctx->store_width[slot_num]) {
623         case 1:
624             gen_check_store_width(ctx, slot_num);
625             tcg_gen_qemu_st8(hex_store_val32[slot_num],
626                              hex_store_addr[slot_num],
627                              ctx->mem_idx);
628             break;
629         case 2:
630             gen_check_store_width(ctx, slot_num);
631             tcg_gen_qemu_st16(hex_store_val32[slot_num],
632                               hex_store_addr[slot_num],
633                               ctx->mem_idx);
634             break;
635         case 4:
636             gen_check_store_width(ctx, slot_num);
637             tcg_gen_qemu_st32(hex_store_val32[slot_num],
638                               hex_store_addr[slot_num],
639                               ctx->mem_idx);
640             break;
641         case 8:
642             gen_check_store_width(ctx, slot_num);
643             tcg_gen_qemu_st64(hex_store_val64[slot_num],
644                               hex_store_addr[slot_num],
645                               ctx->mem_idx);
646             break;
647         default:
648             {
649                 /*
650                  * If we get to here, we don't know the width at
651                  * TCG generation time, we'll use a helper to
652                  * avoid branching based on the width at runtime.
653                  */
654                 TCGv slot = tcg_constant_tl(slot_num);
655                 gen_helper_commit_store(cpu_env, slot);
656             }
657         }
658     }
659     if (is_predicated) {
660         gen_set_label(label_end);
661     }
662 }
663 
664 static void process_store_log(DisasContext *ctx)
665 {
666     /*
667      *  When a packet has two stores, the hardware processes
668      *  slot 1 and then slot 0.  This will be important when
669      *  the memory accesses overlap.
670      */
671     Packet *pkt = ctx->pkt;
672     if (pkt->pkt_has_store_s1) {
673         g_assert(!pkt->pkt_has_dczeroa);
674         process_store(ctx, 1);
675     }
676     if (pkt->pkt_has_store_s0) {
677         g_assert(!pkt->pkt_has_dczeroa);
678         process_store(ctx, 0);
679     }
680 }
681 
682 /* Zero out a 32-bit cache line */
683 static void process_dczeroa(DisasContext *ctx)
684 {
685     if (ctx->pkt->pkt_has_dczeroa) {
686         /* Store 32 bytes of zero starting at (addr & ~0x1f) */
687         TCGv addr = tcg_temp_new();
688         TCGv_i64 zero = tcg_constant_i64(0);
689 
690         tcg_gen_andi_tl(addr, hex_dczero_addr, ~0x1f);
691         tcg_gen_qemu_st64(zero, addr, ctx->mem_idx);
692         tcg_gen_addi_tl(addr, addr, 8);
693         tcg_gen_qemu_st64(zero, addr, ctx->mem_idx);
694         tcg_gen_addi_tl(addr, addr, 8);
695         tcg_gen_qemu_st64(zero, addr, ctx->mem_idx);
696         tcg_gen_addi_tl(addr, addr, 8);
697         tcg_gen_qemu_st64(zero, addr, ctx->mem_idx);
698     }
699 }
700 
701 static bool pkt_has_hvx_store(Packet *pkt)
702 {
703     int i;
704     for (i = 0; i < pkt->num_insns; i++) {
705         int opcode = pkt->insn[i].opcode;
706         if (GET_ATTRIB(opcode, A_CVI) && GET_ATTRIB(opcode, A_STORE)) {
707             return true;
708         }
709     }
710     return false;
711 }
712 
713 static void gen_commit_hvx(DisasContext *ctx)
714 {
715     int i;
716 
717     /*
718      *    for (i = 0; i < ctx->vreg_log_idx; i++) {
719      *        int rnum = ctx->vreg_log[i];
720      *        env->VRegs[rnum] = env->future_VRegs[rnum];
721      *    }
722      */
723     for (i = 0; i < ctx->vreg_log_idx; i++) {
724         int rnum = ctx->vreg_log[i];
725         intptr_t dstoff = offsetof(CPUHexagonState, VRegs[rnum]);
726         intptr_t srcoff = ctx_future_vreg_off(ctx, rnum, 1, false);
727         size_t size = sizeof(MMVector);
728 
729         tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
730     }
731 
732     /*
733      *    for (i = 0; i < ctx->qreg_log_idx; i++) {
734      *        int rnum = ctx->qreg_log[i];
735      *        env->QRegs[rnum] = env->future_QRegs[rnum];
736      *    }
737      */
738     for (i = 0; i < ctx->qreg_log_idx; i++) {
739         int rnum = ctx->qreg_log[i];
740         intptr_t dstoff = offsetof(CPUHexagonState, QRegs[rnum]);
741         intptr_t srcoff = offsetof(CPUHexagonState, future_QRegs[rnum]);
742         size_t size = sizeof(MMQReg);
743 
744         tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
745     }
746 
747     if (pkt_has_hvx_store(ctx->pkt)) {
748         gen_helper_commit_hvx_stores(cpu_env);
749     }
750 }
751 
752 static void update_exec_counters(DisasContext *ctx)
753 {
754     Packet *pkt = ctx->pkt;
755     int num_insns = pkt->num_insns;
756     int num_real_insns = 0;
757     int num_hvx_insns = 0;
758 
759     for (int i = 0; i < num_insns; i++) {
760         if (!pkt->insn[i].is_endloop &&
761             !pkt->insn[i].part1 &&
762             !GET_ATTRIB(pkt->insn[i].opcode, A_IT_NOP)) {
763             num_real_insns++;
764         }
765         if (GET_ATTRIB(pkt->insn[i].opcode, A_CVI)) {
766             num_hvx_insns++;
767         }
768     }
769 
770     ctx->num_packets++;
771     ctx->num_insns += num_real_insns;
772     ctx->num_hvx_insns += num_hvx_insns;
773 }
774 
775 static void gen_commit_packet(DisasContext *ctx)
776 {
777     /*
778      * If there is more than one store in a packet, make sure they are all OK
779      * before proceeding with the rest of the packet commit.
780      *
781      * dczeroa has to be the only store operation in the packet, so we go
782      * ahead and process that first.
783      *
784      * When there is an HVX store, there can also be a scalar store in either
785      * slot 0 or slot1, so we create a mask for the helper to indicate what
786      * work to do.
787      *
788      * When there are two scalar stores, we probe the one in slot 0.
789      *
790      * Note that we don't call the probe helper for packets with only one
791      * store.  Therefore, we call process_store_log before anything else
792      * involved in committing the packet.
793      */
794     Packet *pkt = ctx->pkt;
795     bool has_store_s0 = pkt->pkt_has_store_s0;
796     bool has_store_s1 = (pkt->pkt_has_store_s1 && !ctx->s1_store_processed);
797     bool has_hvx_store = pkt_has_hvx_store(pkt);
798     if (pkt->pkt_has_dczeroa) {
799         /*
800          * The dczeroa will be the store in slot 0, check that we don't have
801          * a store in slot 1 or an HVX store.
802          */
803         g_assert(!has_store_s1 && !has_hvx_store);
804         process_dczeroa(ctx);
805     } else if (has_hvx_store) {
806         TCGv mem_idx = tcg_constant_tl(ctx->mem_idx);
807 
808         if (!has_store_s0 && !has_store_s1) {
809             gen_helper_probe_hvx_stores(cpu_env, mem_idx);
810         } else {
811             int mask = 0;
812             TCGv mask_tcgv;
813 
814             if (has_store_s0) {
815                 mask =
816                     FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST0, 1);
817             }
818             if (has_store_s1) {
819                 mask =
820                     FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST1, 1);
821             }
822             if (has_hvx_store) {
823                 mask =
824                     FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES,
825                                HAS_HVX_STORES, 1);
826             }
827             if (has_store_s0 && slot_is_predicated(pkt, 0)) {
828                 mask =
829                     FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES,
830                                S0_IS_PRED, 1);
831             }
832             if (has_store_s1 && slot_is_predicated(pkt, 1)) {
833                 mask =
834                     FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES,
835                                S1_IS_PRED, 1);
836             }
837             mask_tcgv = tcg_constant_tl(mask);
838             gen_helper_probe_pkt_scalar_hvx_stores(cpu_env, mask_tcgv, mem_idx);
839         }
840     } else if (has_store_s0 && has_store_s1) {
841         /*
842          * process_store_log will execute the slot 1 store first,
843          * so we only have to probe the store in slot 0
844          */
845         int args = 0;
846         args =
847             FIELD_DP32(args, PROBE_PKT_SCALAR_STORE_S0, MMU_IDX, ctx->mem_idx);
848         if (slot_is_predicated(pkt, 0)) {
849             args =
850                 FIELD_DP32(args, PROBE_PKT_SCALAR_STORE_S0, IS_PREDICATED, 1);
851         }
852         TCGv args_tcgv = tcg_constant_tl(args);
853         gen_helper_probe_pkt_scalar_store_s0(cpu_env, args_tcgv);
854     }
855 
856     process_store_log(ctx);
857 
858     gen_reg_writes(ctx);
859     gen_pred_writes(ctx);
860     if (pkt->pkt_has_hvx) {
861         gen_commit_hvx(ctx);
862     }
863     update_exec_counters(ctx);
864     if (HEX_DEBUG) {
865         TCGv has_st0 =
866             tcg_constant_tl(pkt->pkt_has_store_s0 && !pkt->pkt_has_dczeroa);
867         TCGv has_st1 =
868             tcg_constant_tl(pkt->pkt_has_store_s1 && !pkt->pkt_has_dczeroa);
869 
870         /* Handy place to set a breakpoint at the end of execution */
871         gen_helper_debug_commit_end(cpu_env, has_st0, has_st1);
872     }
873 
874     if (pkt->vhist_insn != NULL) {
875         ctx->pre_commit = false;
876         ctx->insn = pkt->vhist_insn;
877         pkt->vhist_insn->generate(ctx);
878     }
879 
880     if (pkt->pkt_has_cof) {
881         gen_end_tb(ctx);
882     }
883 }
884 
885 static void decode_and_translate_packet(CPUHexagonState *env, DisasContext *ctx)
886 {
887     uint32_t words[PACKET_WORDS_MAX];
888     int nwords;
889     Packet pkt;
890     int i;
891 
892     nwords = read_packet_words(env, ctx, words);
893     if (!nwords) {
894         gen_exception_end_tb(ctx, HEX_EXCP_INVALID_PACKET);
895         return;
896     }
897 
898     if (decode_packet(nwords, words, &pkt, false) > 0) {
899         pkt.pc = ctx->base.pc_next;
900         HEX_DEBUG_PRINT_PKT(&pkt);
901         ctx->pkt = &pkt;
902         gen_start_packet(ctx);
903         for (i = 0; i < pkt.num_insns; i++) {
904             ctx->insn = &pkt.insn[i];
905             gen_insn(ctx);
906         }
907         gen_commit_packet(ctx);
908         ctx->base.pc_next += pkt.encod_pkt_size_in_bytes;
909     } else {
910         gen_exception_end_tb(ctx, HEX_EXCP_INVALID_PACKET);
911     }
912 }
913 
914 static void hexagon_tr_init_disas_context(DisasContextBase *dcbase,
915                                           CPUState *cs)
916 {
917     DisasContext *ctx = container_of(dcbase, DisasContext, base);
918     uint32_t hex_flags = dcbase->tb->flags;
919 
920     ctx->mem_idx = MMU_USER_IDX;
921     ctx->num_packets = 0;
922     ctx->num_insns = 0;
923     ctx->num_hvx_insns = 0;
924     ctx->branch_cond = TCG_COND_NEVER;
925     ctx->is_tight_loop = FIELD_EX32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP);
926 }
927 
928 static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu)
929 {
930 }
931 
932 static void hexagon_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
933 {
934     DisasContext *ctx = container_of(dcbase, DisasContext, base);
935 
936     tcg_gen_insn_start(ctx->base.pc_next);
937 }
938 
939 static bool pkt_crosses_page(CPUHexagonState *env, DisasContext *ctx)
940 {
941     target_ulong page_start = ctx->base.pc_first & TARGET_PAGE_MASK;
942     bool found_end = false;
943     int nwords;
944 
945     for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) {
946         uint32_t word = cpu_ldl_code(env,
947                             ctx->base.pc_next + nwords * sizeof(uint32_t));
948         found_end = is_packet_end(word);
949     }
950     uint32_t next_ptr =  ctx->base.pc_next + nwords * sizeof(uint32_t);
951     return found_end && next_ptr - page_start >= TARGET_PAGE_SIZE;
952 }
953 
954 static void hexagon_tr_translate_packet(DisasContextBase *dcbase, CPUState *cpu)
955 {
956     DisasContext *ctx = container_of(dcbase, DisasContext, base);
957     CPUHexagonState *env = cpu->env_ptr;
958 
959     decode_and_translate_packet(env, ctx);
960 
961     if (ctx->base.is_jmp == DISAS_NEXT) {
962         target_ulong page_start = ctx->base.pc_first & TARGET_PAGE_MASK;
963         target_ulong bytes_max = PACKET_WORDS_MAX * sizeof(target_ulong);
964 
965         if (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE ||
966             (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE - bytes_max &&
967              pkt_crosses_page(env, ctx))) {
968             ctx->base.is_jmp = DISAS_TOO_MANY;
969         }
970 
971         /*
972          * The CPU log is used to compare against LLDB single stepping,
973          * so end the TLB after every packet.
974          */
975         HexagonCPU *hex_cpu = env_archcpu(env);
976         if (hex_cpu->lldb_compat && qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
977             ctx->base.is_jmp = DISAS_TOO_MANY;
978         }
979     }
980 }
981 
982 static void hexagon_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
983 {
984     DisasContext *ctx = container_of(dcbase, DisasContext, base);
985 
986     switch (ctx->base.is_jmp) {
987     case DISAS_TOO_MANY:
988         gen_exec_counters(ctx);
989         tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->base.pc_next);
990         tcg_gen_exit_tb(NULL, 0);
991         break;
992     case DISAS_NORETURN:
993         break;
994     default:
995         g_assert_not_reached();
996     }
997 }
998 
999 static void hexagon_tr_disas_log(const DisasContextBase *dcbase,
1000                                  CPUState *cpu, FILE *logfile)
1001 {
1002     fprintf(logfile, "IN: %s\n", lookup_symbol(dcbase->pc_first));
1003     target_disas(logfile, cpu, dcbase->pc_first, dcbase->tb->size);
1004 }
1005 
1006 
1007 static const TranslatorOps hexagon_tr_ops = {
1008     .init_disas_context = hexagon_tr_init_disas_context,
1009     .tb_start           = hexagon_tr_tb_start,
1010     .insn_start         = hexagon_tr_insn_start,
1011     .translate_insn     = hexagon_tr_translate_packet,
1012     .tb_stop            = hexagon_tr_tb_stop,
1013     .disas_log          = hexagon_tr_disas_log,
1014 };
1015 
1016 void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
1017                            target_ulong pc, void *host_pc)
1018 {
1019     DisasContext ctx;
1020 
1021     translator_loop(cs, tb, max_insns, pc, host_pc,
1022                     &hexagon_tr_ops, &ctx.base);
1023 }
1024 
1025 #define NAME_LEN               64
1026 static char new_value_names[TOTAL_PER_THREAD_REGS][NAME_LEN];
1027 static char reg_written_names[TOTAL_PER_THREAD_REGS][NAME_LEN];
1028 static char new_pred_value_names[NUM_PREGS][NAME_LEN];
1029 static char store_addr_names[STORES_MAX][NAME_LEN];
1030 static char store_width_names[STORES_MAX][NAME_LEN];
1031 static char store_val32_names[STORES_MAX][NAME_LEN];
1032 static char store_val64_names[STORES_MAX][NAME_LEN];
1033 static char vstore_addr_names[VSTORES_MAX][NAME_LEN];
1034 static char vstore_size_names[VSTORES_MAX][NAME_LEN];
1035 static char vstore_pending_names[VSTORES_MAX][NAME_LEN];
1036 
1037 void hexagon_translate_init(void)
1038 {
1039     int i;
1040 
1041     opcode_init();
1042 
1043     for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
1044         hex_gpr[i] = tcg_global_mem_new(cpu_env,
1045             offsetof(CPUHexagonState, gpr[i]),
1046             hexagon_regnames[i]);
1047 
1048         snprintf(new_value_names[i], NAME_LEN, "new_%s", hexagon_regnames[i]);
1049         hex_new_value[i] = tcg_global_mem_new(cpu_env,
1050             offsetof(CPUHexagonState, new_value[i]),
1051             new_value_names[i]);
1052 
1053         if (HEX_DEBUG) {
1054             snprintf(reg_written_names[i], NAME_LEN, "reg_written_%s",
1055                      hexagon_regnames[i]);
1056             hex_reg_written[i] = tcg_global_mem_new(cpu_env,
1057                 offsetof(CPUHexagonState, reg_written[i]),
1058                 reg_written_names[i]);
1059         }
1060     }
1061     for (i = 0; i < NUM_PREGS; i++) {
1062         hex_pred[i] = tcg_global_mem_new(cpu_env,
1063             offsetof(CPUHexagonState, pred[i]),
1064             hexagon_prednames[i]);
1065 
1066         snprintf(new_pred_value_names[i], NAME_LEN, "new_pred_%s",
1067                  hexagon_prednames[i]);
1068         hex_new_pred_value[i] = tcg_global_mem_new(cpu_env,
1069             offsetof(CPUHexagonState, new_pred_value[i]),
1070             new_pred_value_names[i]);
1071     }
1072     hex_pred_written = tcg_global_mem_new(cpu_env,
1073         offsetof(CPUHexagonState, pred_written), "pred_written");
1074     hex_this_PC = tcg_global_mem_new(cpu_env,
1075         offsetof(CPUHexagonState, this_PC), "this_PC");
1076     hex_slot_cancelled = tcg_global_mem_new(cpu_env,
1077         offsetof(CPUHexagonState, slot_cancelled), "slot_cancelled");
1078     hex_branch_taken = tcg_global_mem_new(cpu_env,
1079         offsetof(CPUHexagonState, branch_taken), "branch_taken");
1080     hex_pkt_has_store_s1 = tcg_global_mem_new(cpu_env,
1081         offsetof(CPUHexagonState, pkt_has_store_s1), "pkt_has_store_s1");
1082     hex_dczero_addr = tcg_global_mem_new(cpu_env,
1083         offsetof(CPUHexagonState, dczero_addr), "dczero_addr");
1084     hex_llsc_addr = tcg_global_mem_new(cpu_env,
1085         offsetof(CPUHexagonState, llsc_addr), "llsc_addr");
1086     hex_llsc_val = tcg_global_mem_new(cpu_env,
1087         offsetof(CPUHexagonState, llsc_val), "llsc_val");
1088     hex_llsc_val_i64 = tcg_global_mem_new_i64(cpu_env,
1089         offsetof(CPUHexagonState, llsc_val_i64), "llsc_val_i64");
1090     for (i = 0; i < STORES_MAX; i++) {
1091         snprintf(store_addr_names[i], NAME_LEN, "store_addr_%d", i);
1092         hex_store_addr[i] = tcg_global_mem_new(cpu_env,
1093             offsetof(CPUHexagonState, mem_log_stores[i].va),
1094             store_addr_names[i]);
1095 
1096         snprintf(store_width_names[i], NAME_LEN, "store_width_%d", i);
1097         hex_store_width[i] = tcg_global_mem_new(cpu_env,
1098             offsetof(CPUHexagonState, mem_log_stores[i].width),
1099             store_width_names[i]);
1100 
1101         snprintf(store_val32_names[i], NAME_LEN, "store_val32_%d", i);
1102         hex_store_val32[i] = tcg_global_mem_new(cpu_env,
1103             offsetof(CPUHexagonState, mem_log_stores[i].data32),
1104             store_val32_names[i]);
1105 
1106         snprintf(store_val64_names[i], NAME_LEN, "store_val64_%d", i);
1107         hex_store_val64[i] = tcg_global_mem_new_i64(cpu_env,
1108             offsetof(CPUHexagonState, mem_log_stores[i].data64),
1109             store_val64_names[i]);
1110     }
1111     for (int i = 0; i < VSTORES_MAX; i++) {
1112         snprintf(vstore_addr_names[i], NAME_LEN, "vstore_addr_%d", i);
1113         hex_vstore_addr[i] = tcg_global_mem_new(cpu_env,
1114             offsetof(CPUHexagonState, vstore[i].va),
1115             vstore_addr_names[i]);
1116 
1117         snprintf(vstore_size_names[i], NAME_LEN, "vstore_size_%d", i);
1118         hex_vstore_size[i] = tcg_global_mem_new(cpu_env,
1119             offsetof(CPUHexagonState, vstore[i].size),
1120             vstore_size_names[i]);
1121 
1122         snprintf(vstore_pending_names[i], NAME_LEN, "vstore_pending_%d", i);
1123         hex_vstore_pending[i] = tcg_global_mem_new(cpu_env,
1124             offsetof(CPUHexagonState, vstore_pending[i]),
1125             vstore_pending_names[i]);
1126     }
1127 }
1128