1 /* Auxiliary functions for expand movmem, setmem, cmpmem, load_multiple
2 and store_multiple pattern of Andes NDS32 cpu for GNU compiler
3 Copyright (C) 2012-2019 Free Software Foundation, Inc.
4 Contributed by Andes Technology Corporation.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* ------------------------------------------------------------------------ */
23
24 #define IN_TARGET_CODE 1
25
26 #include "config.h"
27 #include "system.h"
28 #include "coretypes.h"
29 #include "backend.h"
30 #include "target.h"
31 #include "rtl.h"
32 #include "memmodel.h"
33 #include "emit-rtl.h"
34 #include "explow.h"
35 #include "tree.h"
36 #include "expr.h"
37 #include "optabs.h"
38 #include "nds32-protos.h"
39
40 /* ------------------------------------------------------------------------ */
41
42 /* Auxiliary static function definitions. */
43
44 static void
nds32_emit_load_store(rtx reg,rtx mem,enum machine_mode mode,int offset,bool load_p)45 nds32_emit_load_store (rtx reg, rtx mem,
46 enum machine_mode mode,
47 int offset, bool load_p)
48 {
49 rtx new_mem;
50 new_mem = adjust_address (mem, mode, offset);
51 if (load_p)
52 emit_move_insn (reg, new_mem);
53 else
54 emit_move_insn (new_mem, reg);
55 }
56
57 static void
nds32_emit_post_inc_load_store(rtx reg,rtx base_reg,enum machine_mode mode,bool load_p)58 nds32_emit_post_inc_load_store (rtx reg, rtx base_reg,
59 enum machine_mode mode,
60 bool load_p)
61 {
62 gcc_assert (GET_MODE (reg) == mode);
63 gcc_assert (GET_MODE (base_reg) == Pmode);
64
65 /* Do not gen (set (reg) (mem (post_inc (reg)))) directly here since it may
66 not recognize by gcc, so let gcc combine it at auto_inc_dec pass. */
67 if (load_p)
68 emit_move_insn (reg,
69 gen_rtx_MEM (mode,
70 base_reg));
71 else
72 emit_move_insn (gen_rtx_MEM (mode,
73 base_reg),
74 reg);
75
76 emit_move_insn (base_reg,
77 plus_constant(Pmode, base_reg, GET_MODE_SIZE (mode)));
78 }
79
80 static void
nds32_emit_mem_move(rtx src,rtx dst,enum machine_mode mode,int addr_offset)81 nds32_emit_mem_move (rtx src, rtx dst,
82 enum machine_mode mode,
83 int addr_offset)
84 {
85 gcc_assert (MEM_P (src) && MEM_P (dst));
86 rtx tmp_reg = gen_reg_rtx (mode);
87 nds32_emit_load_store (tmp_reg, src, mode,
88 addr_offset, /* load_p */ true);
89 nds32_emit_load_store (tmp_reg, dst, mode,
90 addr_offset, /* load_p */ false);
91 }
92
93 static void
nds32_emit_mem_move_block(int base_regno,int count,rtx * dst_base_reg,rtx * dst_mem,rtx * src_base_reg,rtx * src_mem,bool update_base_reg_p)94 nds32_emit_mem_move_block (int base_regno, int count,
95 rtx *dst_base_reg, rtx *dst_mem,
96 rtx *src_base_reg, rtx *src_mem,
97 bool update_base_reg_p)
98 {
99 rtx new_base_reg;
100
101 emit_insn (nds32_expand_load_multiple (base_regno, count,
102 *src_base_reg, *src_mem,
103 update_base_reg_p, &new_base_reg));
104 if (update_base_reg_p)
105 {
106 *src_base_reg = new_base_reg;
107 *src_mem = gen_rtx_MEM (SImode, *src_base_reg);
108 }
109
110 emit_insn (nds32_expand_store_multiple (base_regno, count,
111 *dst_base_reg, *dst_mem,
112 update_base_reg_p, &new_base_reg));
113
114 if (update_base_reg_p)
115 {
116 *dst_base_reg = new_base_reg;
117 *dst_mem = gen_rtx_MEM (SImode, *dst_base_reg);
118 }
119 }
120
121 /* ------------------------------------------------------------------------ */
122
123 /* Auxiliary function for expand movmem pattern. */
124
125 static bool
nds32_expand_movmemsi_loop_unknown_size(rtx dstmem,rtx srcmem,rtx size,rtx alignment)126 nds32_expand_movmemsi_loop_unknown_size (rtx dstmem, rtx srcmem,
127 rtx size,
128 rtx alignment)
129 {
130 /* Emit loop version of movmem.
131
132 andi $size_least_3_bit, $size, #~7
133 add $dst_end, $dst, $size
134 move $dst_itr, $dst
135 move $src_itr, $src
136 beqz $size_least_3_bit, .Lbyte_mode_entry ! Not large enough.
137 add $double_word_end, $dst, $size_least_3_bit
138
139 .Ldouble_word_mode_loop:
140 lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
141 smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr
142 ! move will delete after register allocation
143 move $src_itr, $src_itr'
144 move $dst_itr, $dst_itr'
145 ! Not readch upper bound. Loop.
146 bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop
147
148 .Lbyte_mode_entry:
149 beq $dst_itr, $dst_end, .Lend_label
150 .Lbyte_mode_loop:
151 lbi.bi $tmp, [$src_itr], #1
152 sbi.bi $tmp, [$dst_itr], #1
153 ! Not readch upper bound. Loop.
154 bne $dst_itr, $dst_end, .Lbyte_mode_loop
155 .Lend_label:
156 */
157 rtx dst_base_reg, src_base_reg;
158 rtx dst_itr, src_itr;
159 rtx dstmem_m, srcmem_m, dst_itr_m, src_itr_m;
160 rtx dst_end;
161 rtx size_least_3_bit;
162 rtx double_word_end;
163 rtx double_word_mode_loop, byte_mode_entry, byte_mode_loop, end_label;
164 rtx tmp;
165 rtx mask_least_3_bit;
166 int start_regno;
167 bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0;
168
169 if (TARGET_ISA_V3M && !align_to_4_bytes)
170 return 0;
171
172 if (TARGET_REDUCED_REGS)
173 start_regno = 2;
174 else
175 start_regno = 16;
176
177 dst_itr = gen_reg_rtx (Pmode);
178 src_itr = gen_reg_rtx (Pmode);
179 dst_end = gen_reg_rtx (Pmode);
180 tmp = gen_reg_rtx (QImode);
181 mask_least_3_bit = GEN_INT (~7);
182
183 double_word_mode_loop = gen_label_rtx ();
184 byte_mode_entry = gen_label_rtx ();
185 byte_mode_loop = gen_label_rtx ();
186 end_label = gen_label_rtx ();
187
188 dst_base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0));
189 src_base_reg = copy_to_mode_reg (Pmode, XEXP (srcmem, 0));
190 /* andi $size_least_3_bit, $size, #~7 */
191 size_least_3_bit = expand_binop (SImode, and_optab, size, mask_least_3_bit,
192 NULL_RTX, 0, OPTAB_WIDEN);
193 /* add $dst_end, $dst, $size */
194 dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size,
195 NULL_RTX, 0, OPTAB_WIDEN);
196
197 /* move $dst_itr, $dst
198 move $src_itr, $src */
199 emit_move_insn (dst_itr, dst_base_reg);
200 emit_move_insn (src_itr, src_base_reg);
201
202 /* beqz $size_least_3_bit, .Lbyte_mode_entry ! Not large enough. */
203 emit_cmp_and_jump_insns (size_least_3_bit, const0_rtx, EQ, NULL,
204 SImode, 1, byte_mode_entry);
205 /* add $double_word_end, $dst, $size_least_3_bit */
206 double_word_end = expand_binop (Pmode, add_optab,
207 dst_base_reg, size_least_3_bit,
208 NULL_RTX, 0, OPTAB_WIDEN);
209
210 /* .Ldouble_word_mode_loop: */
211 emit_label (double_word_mode_loop);
212 /* lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
213 smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr */
214 src_itr_m = src_itr;
215 dst_itr_m = dst_itr;
216 srcmem_m = srcmem;
217 dstmem_m = dstmem;
218 nds32_emit_mem_move_block (start_regno, 2,
219 &dst_itr_m, &dstmem_m,
220 &src_itr_m, &srcmem_m,
221 true);
222 /* move $src_itr, $src_itr'
223 move $dst_itr, $dst_itr' */
224 emit_move_insn (dst_itr, dst_itr_m);
225 emit_move_insn (src_itr, src_itr_m);
226
227 /* ! Not readch upper bound. Loop.
228 bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop */
229 emit_cmp_and_jump_insns (double_word_end, dst_itr, NE, NULL,
230 Pmode, 1, double_word_mode_loop);
231 /* .Lbyte_mode_entry: */
232 emit_label (byte_mode_entry);
233
234 /* beq $dst_itr, $dst_end, .Lend_label */
235 emit_cmp_and_jump_insns (dst_itr, dst_end, EQ, NULL,
236 Pmode, 1, end_label);
237 /* .Lbyte_mode_loop: */
238 emit_label (byte_mode_loop);
239
240 /* lbi.bi $tmp, [$src_itr], #1 */
241 nds32_emit_post_inc_load_store (tmp, src_itr, QImode, true);
242
243 /* sbi.bi $tmp, [$dst_itr], #1 */
244 nds32_emit_post_inc_load_store (tmp, dst_itr, QImode, false);
245 /* ! Not readch upper bound. Loop.
246 bne $dst_itr, $dst_end, .Lbyte_mode_loop */
247 emit_cmp_and_jump_insns (dst_itr, dst_end, NE, NULL,
248 SImode, 1, byte_mode_loop);
249
250 /* .Lend_label: */
251 emit_label (end_label);
252
253 return true;
254 }
255
256 static bool
nds32_expand_movmemsi_loop_known_size(rtx dstmem,rtx srcmem,rtx size,rtx alignment)257 nds32_expand_movmemsi_loop_known_size (rtx dstmem, rtx srcmem,
258 rtx size, rtx alignment)
259 {
260 rtx dst_base_reg, src_base_reg;
261 rtx dst_itr, src_itr;
262 rtx dstmem_m, srcmem_m, dst_itr_m, src_itr_m;
263 rtx dst_end;
264 rtx double_word_mode_loop, byte_mode_loop;
265 rtx tmp;
266 int start_regno;
267 bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0;
268 unsigned HOST_WIDE_INT total_bytes = UINTVAL (size);
269
270 if (TARGET_ISA_V3M && !align_to_4_bytes)
271 return 0;
272
273 if (TARGET_REDUCED_REGS)
274 start_regno = 2;
275 else
276 start_regno = 16;
277
278 dst_itr = gen_reg_rtx (Pmode);
279 src_itr = gen_reg_rtx (Pmode);
280 dst_end = gen_reg_rtx (Pmode);
281 tmp = gen_reg_rtx (QImode);
282
283 double_word_mode_loop = gen_label_rtx ();
284 byte_mode_loop = gen_label_rtx ();
285
286 dst_base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0));
287 src_base_reg = copy_to_mode_reg (Pmode, XEXP (srcmem, 0));
288
289 if (total_bytes < 8)
290 {
291 /* Emit total_bytes less than 8 loop version of movmem.
292 add $dst_end, $dst, $size
293 move $dst_itr, $dst
294 .Lbyte_mode_loop:
295 lbi.bi $tmp, [$src_itr], #1
296 sbi.bi $tmp, [$dst_itr], #1
297 ! Not readch upper bound. Loop.
298 bne $dst_itr, $dst_end, .Lbyte_mode_loop */
299
300 /* add $dst_end, $dst, $size */
301 dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size,
302 NULL_RTX, 0, OPTAB_WIDEN);
303 /* move $dst_itr, $dst
304 move $src_itr, $src */
305 emit_move_insn (dst_itr, dst_base_reg);
306 emit_move_insn (src_itr, src_base_reg);
307
308 /* .Lbyte_mode_loop: */
309 emit_label (byte_mode_loop);
310
311 /* lbi.bi $tmp, [$src_itr], #1 */
312 nds32_emit_post_inc_load_store (tmp, src_itr, QImode, true);
313
314 /* sbi.bi $tmp, [$dst_itr], #1 */
315 nds32_emit_post_inc_load_store (tmp, dst_itr, QImode, false);
316 /* ! Not readch upper bound. Loop.
317 bne $dst_itr, $dst_end, .Lbyte_mode_loop */
318 emit_cmp_and_jump_insns (dst_itr, dst_end, NE, NULL,
319 SImode, 1, byte_mode_loop);
320 return true;
321 }
322 else if (total_bytes % 8 == 0)
323 {
324 /* Emit multiple of 8 loop version of movmem.
325
326 add $dst_end, $dst, $size
327 move $dst_itr, $dst
328 move $src_itr, $src
329
330 .Ldouble_word_mode_loop:
331 lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
332 smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr
333 ! move will delete after register allocation
334 move $src_itr, $src_itr'
335 move $dst_itr, $dst_itr'
336 ! Not readch upper bound. Loop.
337 bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop */
338
339 /* add $dst_end, $dst, $size */
340 dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size,
341 NULL_RTX, 0, OPTAB_WIDEN);
342
343 /* move $dst_itr, $dst
344 move $src_itr, $src */
345 emit_move_insn (dst_itr, dst_base_reg);
346 emit_move_insn (src_itr, src_base_reg);
347
348 /* .Ldouble_word_mode_loop: */
349 emit_label (double_word_mode_loop);
350 /* lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
351 smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr */
352 src_itr_m = src_itr;
353 dst_itr_m = dst_itr;
354 srcmem_m = srcmem;
355 dstmem_m = dstmem;
356 nds32_emit_mem_move_block (start_regno, 2,
357 &dst_itr_m, &dstmem_m,
358 &src_itr_m, &srcmem_m,
359 true);
360 /* move $src_itr, $src_itr'
361 move $dst_itr, $dst_itr' */
362 emit_move_insn (dst_itr, dst_itr_m);
363 emit_move_insn (src_itr, src_itr_m);
364
365 /* ! Not readch upper bound. Loop.
366 bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop */
367 emit_cmp_and_jump_insns (dst_end, dst_itr, NE, NULL,
368 Pmode, 1, double_word_mode_loop);
369 }
370 else
371 {
372 /* Handle size greater than 8, and not a multiple of 8. */
373 return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem,
374 size, alignment);
375 }
376
377 return true;
378 }
379
380 static bool
nds32_expand_movmemsi_loop(rtx dstmem,rtx srcmem,rtx size,rtx alignment)381 nds32_expand_movmemsi_loop (rtx dstmem, rtx srcmem,
382 rtx size, rtx alignment)
383 {
384 if (CONST_INT_P (size))
385 return nds32_expand_movmemsi_loop_known_size (dstmem, srcmem,
386 size, alignment);
387 else
388 return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem,
389 size, alignment);
390 }
391
392 static bool
nds32_expand_movmemsi_unroll(rtx dstmem,rtx srcmem,rtx total_bytes,rtx alignment)393 nds32_expand_movmemsi_unroll (rtx dstmem, rtx srcmem,
394 rtx total_bytes, rtx alignment)
395 {
396 rtx dst_base_reg, src_base_reg;
397 rtx tmp_reg;
398 int maximum_bytes;
399 int maximum_bytes_per_inst;
400 int maximum_regs;
401 int start_regno;
402 int i, inst_num;
403 HOST_WIDE_INT remain_bytes, remain_words;
404 bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0;
405 bool align_to_2_bytes = (INTVAL (alignment) & 1) == 0;
406
407 /* Because reduced-set regsiters has few registers
408 (r0~r5, r6~10, r15, r28~r31, where 'r15' and 'r28~r31'
409 cannot be used for register allocation),
410 using 8 registers (32 bytes) for moving memory block
411 may easily consume all of them.
412 It makes register allocation/spilling hard to work.
413 So we only allow maximum=4 registers (16 bytes) for
414 moving memory block under reduced-set registers. */
415 if (TARGET_REDUCED_REGS)
416 {
417 maximum_regs = 4;
418 maximum_bytes = 64;
419 start_regno = 2;
420 }
421 else
422 {
423 /* $r25 is $tp so we use up to 8 registers. */
424 maximum_regs = 8;
425 maximum_bytes = 160;
426 start_regno = 16;
427 }
428 maximum_bytes_per_inst = maximum_regs * UNITS_PER_WORD;
429
430 /* 1. Total_bytes is integer for sure.
431 2. Alignment is integer for sure.
432 3. Maximum 4 or 10 registers and up to 4 instructions,
433 4 * 4 * 4 = 64 bytes, 8 * 4 * 10 = 160 bytes.
434 4. The dstmem cannot be volatile memory access.
435 5. The srcmem cannot be volatile memory access.
436 6. Known shared alignment not align to 4 byte in v3m since lmw/smw *NOT*
437 support unalign access with v3m configure. */
438 if (GET_CODE (total_bytes) != CONST_INT
439 || GET_CODE (alignment) != CONST_INT
440 || INTVAL (total_bytes) > maximum_bytes
441 || MEM_VOLATILE_P (dstmem)
442 || MEM_VOLATILE_P (srcmem)
443 || (TARGET_ISA_V3M && !align_to_4_bytes))
444 return false;
445
446 dst_base_reg = copy_to_mode_reg (SImode, XEXP (dstmem, 0));
447 src_base_reg = copy_to_mode_reg (SImode, XEXP (srcmem, 0));
448 remain_bytes = INTVAL (total_bytes);
449
450 /* Do not update base address for last lmw/smw pair. */
451 inst_num = ((INTVAL (total_bytes) + (maximum_bytes_per_inst - 1))
452 / maximum_bytes_per_inst) - 1;
453
454 for (i = 0; i < inst_num; i++)
455 {
456 nds32_emit_mem_move_block (start_regno, maximum_regs,
457 &dst_base_reg, &dstmem,
458 &src_base_reg, &srcmem,
459 true);
460 }
461 remain_bytes -= maximum_bytes_per_inst * inst_num;
462
463 remain_words = remain_bytes / UNITS_PER_WORD;
464 remain_bytes = remain_bytes - (remain_words * UNITS_PER_WORD);
465
466 if (remain_words != 0)
467 {
468 if (remain_bytes != 0)
469 nds32_emit_mem_move_block (start_regno, remain_words,
470 &dst_base_reg, &dstmem,
471 &src_base_reg, &srcmem,
472 true);
473 else
474 {
475 /* Do not update address if no further byte to move. */
476 if (remain_words == 1)
477 {
478 /* emit move instruction if align to 4 byte and only 1
479 word to move. */
480 if (align_to_4_bytes)
481 nds32_emit_mem_move (srcmem, dstmem, SImode, 0);
482 else
483 {
484 tmp_reg = gen_reg_rtx (SImode);
485 emit_insn (
486 gen_unaligned_load_w (tmp_reg,
487 gen_rtx_MEM (SImode, src_base_reg)));
488 emit_insn (
489 gen_unaligned_store_w (gen_rtx_MEM (SImode, dst_base_reg),
490 tmp_reg));
491 }
492 }
493 else
494 nds32_emit_mem_move_block (start_regno, remain_words,
495 &dst_base_reg, &dstmem,
496 &src_base_reg, &srcmem,
497 false);
498 }
499 }
500
501 switch (remain_bytes)
502 {
503 case 3:
504 case 2:
505 {
506 if (align_to_2_bytes)
507 nds32_emit_mem_move (srcmem, dstmem, HImode, 0);
508 else
509 {
510 nds32_emit_mem_move (srcmem, dstmem, QImode, 0);
511 nds32_emit_mem_move (srcmem, dstmem, QImode, 1);
512 }
513
514 if (remain_bytes == 3)
515 nds32_emit_mem_move (srcmem, dstmem, QImode, 2);
516 break;
517 }
518 case 1:
519 nds32_emit_mem_move (srcmem, dstmem, QImode, 0);
520 break;
521 case 0:
522 break;
523 default:
524 gcc_unreachable ();
525 }
526
527 /* Successfully create patterns, return true. */
528 return true;
529 }
530
531 /* Function to move block memory content by
532 using load_multiple and store_multiple.
533 This is auxiliary extern function to help create rtx template.
534 Check nds32-multiple.md file for the patterns. */
535 bool
nds32_expand_movmemsi(rtx dstmem,rtx srcmem,rtx total_bytes,rtx alignment)536 nds32_expand_movmemsi (rtx dstmem, rtx srcmem, rtx total_bytes, rtx alignment)
537 {
538 if (nds32_expand_movmemsi_unroll (dstmem, srcmem, total_bytes, alignment))
539 return true;
540
541 if (!optimize_size && optimize > 2)
542 return nds32_expand_movmemsi_loop (dstmem, srcmem, total_bytes, alignment);
543
544 return false;
545 }
546
547 /* ------------------------------------------------------------------------ */
548
549 /* Auxiliary function for expand setmem pattern. */
550
551 static rtx
nds32_gen_dup_4_byte_to_word_value_aux(rtx value,rtx value4word)552 nds32_gen_dup_4_byte_to_word_value_aux (rtx value, rtx value4word)
553 {
554 gcc_assert (GET_MODE (value) == QImode || CONST_INT_P (value));
555
556 if (CONST_INT_P (value))
557 {
558 unsigned HOST_WIDE_INT val = UINTVAL (value) & GET_MODE_MASK(QImode);
559 rtx new_val = gen_int_mode (val | (val << 8)
560 | (val << 16) | (val << 24), SImode);
561 /* Just calculate at here if it's constant value. */
562 emit_move_insn (value4word, new_val);
563 }
564 else
565 {
566 if (NDS32_EXT_DSP_P ())
567 {
568 /* ! prepare word
569 insb $tmp, $value, 1 ! $tmp <- 0x0000abab
570 pkbb16 $tmp6, $tmp2, $tmp2 ! $value4word <- 0xabababab */
571 rtx tmp = gen_reg_rtx (SImode);
572
573 convert_move (tmp, value, true);
574
575 emit_insn (
576 gen_insvsi_internal (tmp, gen_int_mode (0x8, SImode), tmp));
577
578 emit_insn (gen_pkbbsi_1 (value4word, tmp, tmp));
579 }
580 else
581 {
582 /* ! prepare word
583 andi $tmp1, $value, 0xff ! $tmp1 <- 0x000000ab
584 slli $tmp2, $tmp1, 8 ! $tmp2 <- 0x0000ab00
585 or $tmp3, $tmp1, $tmp2 ! $tmp3 <- 0x0000abab
586 slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000
587 or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab */
588
589 rtx tmp1, tmp2, tmp3, tmp4;
590 tmp1 = expand_binop (SImode, and_optab, value,
591 gen_int_mode (0xff, SImode),
592 NULL_RTX, 0, OPTAB_WIDEN);
593 tmp2 = expand_binop (SImode, ashl_optab, tmp1,
594 gen_int_mode (8, SImode),
595 NULL_RTX, 0, OPTAB_WIDEN);
596 tmp3 = expand_binop (SImode, ior_optab, tmp1, tmp2,
597 NULL_RTX, 0, OPTAB_WIDEN);
598 tmp4 = expand_binop (SImode, ashl_optab, tmp3,
599 gen_int_mode (16, SImode),
600 NULL_RTX, 0, OPTAB_WIDEN);
601
602 emit_insn (gen_iorsi3 (value4word, tmp3, tmp4));
603 }
604 }
605
606 return value4word;
607 }
608
609 static rtx
nds32_gen_dup_4_byte_to_word_value(rtx value)610 nds32_gen_dup_4_byte_to_word_value (rtx value)
611 {
612 rtx value4word = gen_reg_rtx (SImode);
613 nds32_gen_dup_4_byte_to_word_value_aux (value, value4word);
614
615 return value4word;
616 }
617
618 static rtx
nds32_gen_dup_8_byte_to_double_word_value(rtx value)619 nds32_gen_dup_8_byte_to_double_word_value (rtx value)
620 {
621 rtx value4doubleword = gen_reg_rtx (DImode);
622
623 nds32_gen_dup_4_byte_to_word_value_aux (
624 value, nds32_di_low_part_subreg(value4doubleword));
625
626 emit_move_insn (nds32_di_high_part_subreg(value4doubleword),
627 nds32_di_low_part_subreg(value4doubleword));
628 return value4doubleword;
629 }
630
631
632 static rtx
emit_setmem_doubleword_loop(rtx itr,rtx size,rtx value)633 emit_setmem_doubleword_loop (rtx itr, rtx size, rtx value)
634 {
635 rtx word_mode_label = gen_label_rtx ();
636 rtx word_mode_end_label = gen_label_rtx ();
637 rtx byte_mode_size = gen_reg_rtx (SImode);
638 rtx byte_mode_size_tmp = gen_reg_rtx (SImode);
639 rtx word_mode_end = gen_reg_rtx (SImode);
640 rtx size_for_word = gen_reg_rtx (SImode);
641
642 /* and $size_for_word, $size, #~0x7 */
643 size_for_word = expand_binop (SImode, and_optab, size,
644 gen_int_mode (~0x7, SImode),
645 NULL_RTX, 0, OPTAB_WIDEN);
646
647 emit_move_insn (byte_mode_size, size);
648
649 /* beqz $size_for_word, .Lbyte_mode_entry */
650 emit_cmp_and_jump_insns (size_for_word, const0_rtx, EQ, NULL,
651 SImode, 1, word_mode_end_label);
652 /* add $word_mode_end, $dst, $size_for_word */
653 word_mode_end = expand_binop (Pmode, add_optab, itr, size_for_word,
654 NULL_RTX, 0, OPTAB_WIDEN);
655
656 /* andi $byte_mode_size, $size, 0x7 */
657 byte_mode_size_tmp = expand_binop (SImode, and_optab, size, GEN_INT (0x7),
658 NULL_RTX, 0, OPTAB_WIDEN);
659
660 emit_move_insn (byte_mode_size, byte_mode_size_tmp);
661
662 /* .Lword_mode: */
663 emit_label (word_mode_label);
664 /* ! word-mode set loop
665 smw.bim $value4word, [$dst_itr], $value4word, 0
666 bne $word_mode_end, $dst_itr, .Lword_mode */
667 emit_insn (gen_unaligned_store_update_base_dw (itr,
668 itr,
669 value));
670 emit_cmp_and_jump_insns (word_mode_end, itr, NE, NULL,
671 Pmode, 1, word_mode_label);
672
673 emit_label (word_mode_end_label);
674
675 return byte_mode_size;
676 }
677
678 static rtx
emit_setmem_byte_loop(rtx itr,rtx size,rtx value,bool need_end)679 emit_setmem_byte_loop (rtx itr, rtx size, rtx value, bool need_end)
680 {
681 rtx end = gen_reg_rtx (Pmode);
682 rtx byte_mode_label = gen_label_rtx ();
683 rtx end_label = gen_label_rtx ();
684
685 value = force_reg (QImode, value);
686
687 if (need_end)
688 end = expand_binop (Pmode, add_optab, itr, size,
689 NULL_RTX, 0, OPTAB_WIDEN);
690 /* beqz $byte_mode_size, .Lend
691 add $byte_mode_end, $dst_itr, $byte_mode_size */
692 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL,
693 SImode, 1, end_label);
694
695 if (!need_end)
696 end = expand_binop (Pmode, add_optab, itr, size,
697 NULL_RTX, 0, OPTAB_WIDEN);
698
699 /* .Lbyte_mode: */
700 emit_label (byte_mode_label);
701
702 /* ! byte-mode set loop
703 sbi.bi $value, [$dst_itr] ,1
704 bne $byte_mode_end, $dst_itr, .Lbyte_mode */
705 nds32_emit_post_inc_load_store (value, itr, QImode, false);
706
707 emit_cmp_and_jump_insns (end, itr, NE, NULL,
708 Pmode, 1, byte_mode_label);
709 /* .Lend: */
710 emit_label (end_label);
711
712 if (need_end)
713 return end;
714 else
715 return NULL_RTX;
716 }
717
718 static bool
nds32_expand_setmem_loop(rtx dstmem,rtx size,rtx value)719 nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value)
720 {
721 rtx value4doubleword;
722 rtx value4byte;
723 rtx dst;
724 rtx byte_mode_size;
725
726 /* Emit loop version of setmem.
727 memset:
728 ! prepare word
729 andi $tmp1, $val, 0xff ! $tmp1 <- 0x000000ab
730 slli $tmp2, $tmp1, 8 ! $tmp2 <- 0x0000ab00
731 or $tmp3, $val, $tmp2 ! $tmp3 <- 0x0000abab
732 slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000
733 or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab
734
735 and $size_for_word, $size, #-4
736 beqz $size_for_word, .Lword_mode_end
737
738 add $word_mode_end, $dst, $size_for_word
739 andi $byte_mode_size, $size, 3
740
741 .Lword_mode:
742 ! word-mode set loop
743 smw.bim $value4word, [$dst], $value4word, 0
744 bne $word_mode_end, $dst, .Lword_mode
745
746 .Lword_mode_end:
747 beqz $byte_mode_size, .Lend
748 add $byte_mode_end, $dst, $byte_mode_size
749
750 .Lbyte_mode:
751 ! byte-mode set loop
752 sbi.bi $value4word, [$dst] ,1
753 bne $byte_mode_end, $dst, .Lbyte_mode
754 .Lend: */
755
756 dst = copy_to_mode_reg (SImode, XEXP (dstmem, 0));
757
758 /* ! prepare word
759 andi $tmp1, $value, 0xff ! $tmp1 <- 0x000000ab
760 slli $tmp2, $tmp1, 8 ! $tmp2 <- 0x0000ab00
761 or $tmp3, $tmp1, $tmp2 ! $tmp3 <- 0x0000abab
762 slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000
763 or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab */
764 value4doubleword = nds32_gen_dup_8_byte_to_double_word_value (value);
765
766 /* and $size_for_word, $size, #-4
767 beqz $size_for_word, .Lword_mode_end
768
769 add $word_mode_end, $dst, $size_for_word
770 andi $byte_mode_size, $size, 3
771
772 .Lword_mode:
773 ! word-mode set loop
774 smw.bim $value4word, [$dst], $value4word, 0
775 bne $word_mode_end, $dst, .Lword_mode
776 .Lword_mode_end: */
777 byte_mode_size = emit_setmem_doubleword_loop (dst, size, value4doubleword);
778
779 /* beqz $byte_mode_size, .Lend
780 add $byte_mode_end, $dst, $byte_mode_size
781
782 .Lbyte_mode:
783 ! byte-mode set loop
784 sbi.bi $value, [$dst] ,1
785 bne $byte_mode_end, $dst, .Lbyte_mode
786 .Lend: */
787
788 value4byte = simplify_gen_subreg (QImode, value4doubleword, DImode,
789 subreg_lowpart_offset (QImode, DImode));
790
791 emit_setmem_byte_loop (dst, byte_mode_size, value4byte, false);
792
793 return true;
794 }
795
796 static bool
nds32_expand_setmem_loop_v3m(rtx dstmem,rtx size,rtx value)797 nds32_expand_setmem_loop_v3m (rtx dstmem, rtx size, rtx value)
798 {
799 rtx base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0));
800 rtx need_align_bytes = gen_reg_rtx (SImode);
801 rtx last_2_bit = gen_reg_rtx (SImode);
802 rtx byte_loop_base = gen_reg_rtx (SImode);
803 rtx byte_loop_size = gen_reg_rtx (SImode);
804 rtx remain_size = gen_reg_rtx (SImode);
805 rtx new_base_reg;
806 rtx value4byte, value4doubleword;
807 rtx byte_mode_size;
808 rtx last_byte_loop_label = gen_label_rtx ();
809
810 size = force_reg (SImode, size);
811
812 value4doubleword = nds32_gen_dup_8_byte_to_double_word_value (value);
813 value4byte = simplify_gen_subreg (QImode, value4doubleword, DImode,
814 subreg_lowpart_offset (QImode, DImode));
815
816 emit_move_insn (byte_loop_size, size);
817 emit_move_insn (byte_loop_base, base_reg);
818
819 /* Jump to last byte loop if size is less than 16. */
820 emit_cmp_and_jump_insns (size, gen_int_mode (16, SImode), LE, NULL,
821 SImode, 1, last_byte_loop_label);
822
823 /* Make sure align to 4 byte first since v3m can't unalign access. */
824 emit_insn (gen_andsi3 (last_2_bit,
825 base_reg,
826 gen_int_mode (0x3, SImode)));
827
828 emit_insn (gen_subsi3 (need_align_bytes,
829 gen_int_mode (4, SImode),
830 last_2_bit));
831
832 /* Align to 4 byte. */
833 new_base_reg = emit_setmem_byte_loop (base_reg,
834 need_align_bytes,
835 value4byte,
836 true);
837
838 /* Calculate remain size. */
839 emit_insn (gen_subsi3 (remain_size, size, need_align_bytes));
840
841 /* Set memory word by word. */
842 byte_mode_size = emit_setmem_doubleword_loop (new_base_reg,
843 remain_size,
844 value4doubleword);
845
846 emit_move_insn (byte_loop_base, new_base_reg);
847 emit_move_insn (byte_loop_size, byte_mode_size);
848
849 emit_label (last_byte_loop_label);
850
851 /* And set memory for remain bytes. */
852 emit_setmem_byte_loop (byte_loop_base, byte_loop_size, value4byte, false);
853 return true;
854 }
855
856 static bool
nds32_expand_setmem_unroll(rtx dstmem,rtx size,rtx value,rtx align ATTRIBUTE_UNUSED,rtx expected_align ATTRIBUTE_UNUSED,rtx expected_size ATTRIBUTE_UNUSED)857 nds32_expand_setmem_unroll (rtx dstmem, rtx size, rtx value,
858 rtx align ATTRIBUTE_UNUSED,
859 rtx expected_align ATTRIBUTE_UNUSED,
860 rtx expected_size ATTRIBUTE_UNUSED)
861 {
862 unsigned maximum_regs, maximum_bytes, start_regno, regno;
863 rtx value4word;
864 rtx dst_base_reg, new_base_reg;
865 unsigned HOST_WIDE_INT remain_bytes, remain_words, prepare_regs, fill_per_smw;
866 unsigned HOST_WIDE_INT real_size;
867
868 if (TARGET_REDUCED_REGS)
869 {
870 maximum_regs = 4;
871 maximum_bytes = 64;
872 start_regno = 2;
873 }
874 else
875 {
876 maximum_regs = 8;
877 maximum_bytes = 128;
878 start_regno = 16;
879 }
880
881 real_size = UINTVAL (size) & GET_MODE_MASK(SImode);
882
883 if (!(CONST_INT_P (size) && real_size <= maximum_bytes))
884 return false;
885
886 remain_bytes = real_size;
887
888 gcc_assert (GET_MODE (value) == QImode || CONST_INT_P (value));
889
890 value4word = nds32_gen_dup_4_byte_to_word_value (value);
891
892 prepare_regs = remain_bytes / UNITS_PER_WORD;
893
894 dst_base_reg = copy_to_mode_reg (SImode, XEXP (dstmem, 0));
895
896 if (prepare_regs > maximum_regs)
897 prepare_regs = maximum_regs;
898
899 fill_per_smw = prepare_regs * UNITS_PER_WORD;
900
901 regno = start_regno;
902 switch (prepare_regs)
903 {
904 case 2:
905 default:
906 {
907 rtx reg0 = gen_rtx_REG (SImode, regno);
908 rtx reg1 = gen_rtx_REG (SImode, regno+1);
909 unsigned last_regno = start_regno + prepare_regs - 1;
910
911 emit_move_insn (reg0, value4word);
912 emit_move_insn (reg1, value4word);
913 rtx regd = gen_rtx_REG (DImode, regno);
914 regno += 2;
915
916 /* Try to utilize movd44! */
917 while (regno <= last_regno)
918 {
919 if ((regno + 1) <=last_regno)
920 {
921 rtx reg = gen_rtx_REG (DImode, regno);
922 emit_move_insn (reg, regd);
923 regno += 2;
924 }
925 else
926 {
927 rtx reg = gen_rtx_REG (SImode, regno);
928 emit_move_insn (reg, reg0);
929 regno += 1;
930 }
931 }
932 break;
933 }
934 case 1:
935 {
936 rtx reg = gen_rtx_REG (SImode, regno++);
937 emit_move_insn (reg, value4word);
938 }
939 break;
940 case 0:
941 break;
942 }
943
944 if (fill_per_smw)
945 for (;remain_bytes >= fill_per_smw;remain_bytes -= fill_per_smw)
946 {
947 emit_insn (nds32_expand_store_multiple (start_regno, prepare_regs,
948 dst_base_reg, dstmem,
949 true, &new_base_reg));
950 dst_base_reg = new_base_reg;
951 dstmem = gen_rtx_MEM (SImode, dst_base_reg);
952 }
953
954 remain_words = remain_bytes / UNITS_PER_WORD;
955
956 if (remain_words)
957 {
958 emit_insn (nds32_expand_store_multiple (start_regno, remain_words,
959 dst_base_reg, dstmem,
960 true, &new_base_reg));
961 dst_base_reg = new_base_reg;
962 dstmem = gen_rtx_MEM (SImode, dst_base_reg);
963 }
964
965 remain_bytes = remain_bytes - (remain_words * UNITS_PER_WORD);
966
967 if (remain_bytes)
968 {
969 value = simplify_gen_subreg (QImode, value4word, SImode,
970 subreg_lowpart_offset(QImode, SImode));
971 int offset = 0;
972 for (;remain_bytes;--remain_bytes, ++offset)
973 {
974 nds32_emit_load_store (value, dstmem, QImode, offset, false);
975 }
976 }
977
978 return true;
979 }
980
981 bool
nds32_expand_setmem(rtx dstmem,rtx size,rtx value,rtx align,rtx expected_align,rtx expected_size)982 nds32_expand_setmem (rtx dstmem, rtx size, rtx value, rtx align,
983 rtx expected_align,
984 rtx expected_size)
985 {
986 bool align_to_4_bytes = (INTVAL (align) & 3) == 0;
987
988 /* Only expand at O3 */
989 if (optimize_size || optimize < 3)
990 return false;
991
992 if (TARGET_ISA_V3M && !align_to_4_bytes)
993 return nds32_expand_setmem_loop_v3m (dstmem, size, value);
994
995 if (nds32_expand_setmem_unroll (dstmem, size, value,
996 align, expected_align, expected_size))
997 return true;
998
999 return nds32_expand_setmem_loop (dstmem, size, value);
1000 }
1001
1002 /* ------------------------------------------------------------------------ */
1003
1004 /* Auxiliary function for expand strlen pattern. */
1005
1006 bool
nds32_expand_strlen(rtx result,rtx str,rtx target_char,rtx align ATTRIBUTE_UNUSED)1007 nds32_expand_strlen (rtx result, rtx str,
1008 rtx target_char, rtx align ATTRIBUTE_UNUSED)
1009 {
1010 rtx base_reg, backup_base_reg;
1011 rtx ffb_result;
1012 rtx target_char_ptr, length;
1013 rtx loop_label, tmp;
1014
1015 if (optimize_size || optimize < 3)
1016 return false;
1017
1018 gcc_assert (MEM_P (str));
1019 gcc_assert (CONST_INT_P (target_char) || REG_P (target_char));
1020
1021 base_reg = copy_to_mode_reg (SImode, XEXP (str, 0));
1022 loop_label = gen_label_rtx ();
1023
1024 ffb_result = gen_reg_rtx (Pmode);
1025 tmp = gen_reg_rtx (SImode);
1026 backup_base_reg = gen_reg_rtx (SImode);
1027
1028 /* Emit loop version of strlen.
1029 move $backup_base, $base
1030 .Lloop:
1031 lmw.bim $tmp, [$base], $tmp, 0
1032 ffb $ffb_result, $tmp, $target_char ! is there $target_char?
1033 beqz $ffb_result, .Lloop
1034 add $last_char_ptr, $base, $ffb_result
1035 sub $length, $last_char_ptr, $backup_base */
1036
1037 /* move $backup_base, $base */
1038 emit_move_insn (backup_base_reg, base_reg);
1039
1040 /* .Lloop: */
1041 emit_label (loop_label);
1042 /* lmw.bim $tmp, [$base], $tmp, 0 */
1043 emit_insn (gen_unaligned_load_update_base_w (base_reg, tmp, base_reg));
1044
1045 /* ffb $ffb_result, $tmp, $target_char ! is there $target_char? */
1046 emit_insn (gen_unspec_ffb (ffb_result, tmp, target_char));
1047
1048 /* beqz $ffb_result, .Lloop */
1049 emit_cmp_and_jump_insns (ffb_result, const0_rtx, EQ, NULL,
1050 SImode, 1, loop_label);
1051
1052 /* add $target_char_ptr, $base, $ffb_result */
1053 target_char_ptr = expand_binop (Pmode, add_optab, base_reg,
1054 ffb_result, NULL_RTX, 0, OPTAB_WIDEN);
1055
1056 /* sub $length, $target_char_ptr, $backup_base */
1057 length = expand_binop (Pmode, sub_optab, target_char_ptr,
1058 backup_base_reg, NULL_RTX, 0, OPTAB_WIDEN);
1059
1060 emit_move_insn (result, length);
1061
1062 return true;
1063 }
1064
1065 /* ------------------------------------------------------------------------ */
1066
1067 /* Functions to expand load_multiple and store_multiple.
1068 They are auxiliary extern functions to help create rtx template.
1069 Check nds32-multiple.md file for the patterns. */
1070 rtx
nds32_expand_load_multiple(int base_regno,int count,rtx base_addr,rtx basemem,bool update_base_reg_p,rtx * update_base_reg)1071 nds32_expand_load_multiple (int base_regno, int count,
1072 rtx base_addr, rtx basemem,
1073 bool update_base_reg_p,
1074 rtx *update_base_reg)
1075 {
1076 int par_index;
1077 int offset;
1078 int start_idx;
1079 rtx result;
1080 rtx new_addr, mem, reg;
1081
1082 /* Generate a unaligned load to prevent load instruction pull out from
1083 parallel, and then it will generate lwi, and lose unaligned acces */
1084 if (count == 1)
1085 {
1086 reg = gen_rtx_REG (SImode, base_regno);
1087 if (update_base_reg_p)
1088 {
1089 *update_base_reg = gen_reg_rtx (SImode);
1090 return gen_unaligned_load_update_base_w (*update_base_reg, reg, base_addr);
1091 }
1092 else
1093 return gen_unaligned_load_w (reg, gen_rtx_MEM (SImode, base_addr));
1094 }
1095
1096 /* Create the pattern that is presented in nds32-multiple.md. */
1097 if (update_base_reg_p)
1098 {
1099 result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count + 1));
1100 start_idx = 1;
1101 }
1102 else
1103 {
1104 result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
1105 start_idx = 0;
1106 }
1107
1108 if (update_base_reg_p)
1109 {
1110 offset = count * 4;
1111 new_addr = plus_constant (Pmode, base_addr, offset);
1112 *update_base_reg = gen_reg_rtx (SImode);
1113
1114 XVECEXP (result, 0, 0) = gen_rtx_SET (*update_base_reg, new_addr);
1115 }
1116
1117 for (par_index = 0; par_index < count; par_index++)
1118 {
1119 offset = par_index * 4;
1120 /* 4-byte for loading data to each register. */
1121 new_addr = plus_constant (Pmode, base_addr, offset);
1122 mem = adjust_automodify_address_nv (basemem, SImode,
1123 new_addr, offset);
1124 reg = gen_rtx_REG (SImode, base_regno + par_index);
1125
1126 XVECEXP (result, 0, (par_index + start_idx)) = gen_rtx_SET (reg, mem);
1127 }
1128
1129 return result;
1130 }
1131
1132 rtx
nds32_expand_store_multiple(int base_regno,int count,rtx base_addr,rtx basemem,bool update_base_reg_p,rtx * update_base_reg)1133 nds32_expand_store_multiple (int base_regno, int count,
1134 rtx base_addr, rtx basemem,
1135 bool update_base_reg_p,
1136 rtx *update_base_reg)
1137 {
1138 int par_index;
1139 int offset;
1140 int start_idx;
1141 rtx result;
1142 rtx new_addr, mem, reg;
1143
1144 if (count == 1)
1145 {
1146 reg = gen_rtx_REG (SImode, base_regno);
1147 if (update_base_reg_p)
1148 {
1149 *update_base_reg = gen_reg_rtx (SImode);
1150 return gen_unaligned_store_update_base_w (*update_base_reg, base_addr, reg);
1151 }
1152 else
1153 return gen_unaligned_store_w (gen_rtx_MEM (SImode, base_addr), reg);
1154 }
1155
1156 /* Create the pattern that is presented in nds32-multiple.md. */
1157
1158 if (update_base_reg_p)
1159 {
1160 result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count + 1));
1161 start_idx = 1;
1162 }
1163 else
1164 {
1165 result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
1166 start_idx = 0;
1167 }
1168
1169 if (update_base_reg_p)
1170 {
1171 offset = count * 4;
1172 new_addr = plus_constant (Pmode, base_addr, offset);
1173 *update_base_reg = gen_reg_rtx (SImode);
1174
1175 XVECEXP (result, 0, 0) = gen_rtx_SET (*update_base_reg, new_addr);
1176 }
1177
1178 for (par_index = 0; par_index < count; par_index++)
1179 {
1180 offset = par_index * 4;
1181 /* 4-byte for storing data to memory. */
1182 new_addr = plus_constant (Pmode, base_addr, offset);
1183 mem = adjust_automodify_address_nv (basemem, SImode,
1184 new_addr, offset);
1185 reg = gen_rtx_REG (SImode, base_regno + par_index);
1186
1187 XVECEXP (result, 0, par_index + start_idx) = gen_rtx_SET (mem, reg);
1188 }
1189
1190 return result;
1191 }
1192
1193 /* ------------------------------------------------------------------------ */
1194