1 /* Auxiliary functions for expand movmem, setmem, cmpmem, load_multiple
2    and store_multiple pattern of Andes NDS32 cpu for GNU compiler
3    Copyright (C) 2012-2019 Free Software Foundation, Inc.
4    Contributed by Andes Technology Corporation.
5 
6    This file is part of GCC.
7 
8    GCC is free software; you can redistribute it and/or modify it
9    under the terms of the GNU General Public License as published
10    by the Free Software Foundation; either version 3, or (at your
11    option) any later version.
12 
13    GCC is distributed in the hope that it will be useful, but WITHOUT
14    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
16    License for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with GCC; see the file COPYING3.  If not see
20    <http://www.gnu.org/licenses/>.  */
21 
22 /* ------------------------------------------------------------------------ */
23 
24 #define IN_TARGET_CODE 1
25 
26 #include "config.h"
27 #include "system.h"
28 #include "coretypes.h"
29 #include "backend.h"
30 #include "target.h"
31 #include "rtl.h"
32 #include "memmodel.h"
33 #include "emit-rtl.h"
34 #include "explow.h"
35 #include "tree.h"
36 #include "expr.h"
37 #include "optabs.h"
38 #include "nds32-protos.h"
39 
40 /* ------------------------------------------------------------------------ */
41 
42 /* Auxiliary static function definitions.  */
43 
44 static void
nds32_emit_load_store(rtx reg,rtx mem,enum machine_mode mode,int offset,bool load_p)45 nds32_emit_load_store (rtx reg, rtx mem,
46 		       enum machine_mode mode,
47 		       int offset, bool load_p)
48 {
49   rtx new_mem;
50   new_mem = adjust_address (mem, mode, offset);
51   if (load_p)
52     emit_move_insn (reg, new_mem);
53   else
54     emit_move_insn (new_mem, reg);
55 }
56 
57 static void
nds32_emit_post_inc_load_store(rtx reg,rtx base_reg,enum machine_mode mode,bool load_p)58 nds32_emit_post_inc_load_store (rtx reg, rtx base_reg,
59 				enum machine_mode mode,
60 				bool load_p)
61 {
62   gcc_assert (GET_MODE (reg) == mode);
63   gcc_assert (GET_MODE (base_reg) == Pmode);
64 
65   /* Do not gen (set (reg) (mem (post_inc (reg)))) directly here since it may
66      not recognize by gcc, so let gcc combine it at auto_inc_dec pass.  */
67   if (load_p)
68     emit_move_insn (reg,
69 		    gen_rtx_MEM (mode,
70 				 base_reg));
71   else
72     emit_move_insn (gen_rtx_MEM (mode,
73 				 base_reg),
74 		    reg);
75 
76   emit_move_insn (base_reg,
77 		  plus_constant(Pmode, base_reg, GET_MODE_SIZE (mode)));
78 }
79 
80 static void
nds32_emit_mem_move(rtx src,rtx dst,enum machine_mode mode,int addr_offset)81 nds32_emit_mem_move (rtx src, rtx dst,
82 		     enum machine_mode mode,
83 		     int addr_offset)
84 {
85   gcc_assert (MEM_P (src) && MEM_P (dst));
86   rtx tmp_reg = gen_reg_rtx (mode);
87   nds32_emit_load_store (tmp_reg, src, mode,
88 			 addr_offset, /* load_p */ true);
89   nds32_emit_load_store (tmp_reg, dst, mode,
90 			 addr_offset, /* load_p */ false);
91 }
92 
93 static void
nds32_emit_mem_move_block(int base_regno,int count,rtx * dst_base_reg,rtx * dst_mem,rtx * src_base_reg,rtx * src_mem,bool update_base_reg_p)94 nds32_emit_mem_move_block (int base_regno, int count,
95 			   rtx *dst_base_reg, rtx *dst_mem,
96 			   rtx *src_base_reg, rtx *src_mem,
97 			   bool update_base_reg_p)
98 {
99   rtx new_base_reg;
100 
101   emit_insn (nds32_expand_load_multiple (base_regno, count,
102 					 *src_base_reg, *src_mem,
103 					 update_base_reg_p, &new_base_reg));
104   if (update_base_reg_p)
105     {
106       *src_base_reg = new_base_reg;
107       *src_mem = gen_rtx_MEM (SImode, *src_base_reg);
108     }
109 
110   emit_insn (nds32_expand_store_multiple (base_regno, count,
111 					  *dst_base_reg, *dst_mem,
112 					  update_base_reg_p, &new_base_reg));
113 
114   if (update_base_reg_p)
115     {
116       *dst_base_reg = new_base_reg;
117       *dst_mem = gen_rtx_MEM (SImode, *dst_base_reg);
118     }
119 }
120 
121 /* ------------------------------------------------------------------------ */
122 
123 /* Auxiliary function for expand movmem pattern.  */
124 
125 static bool
nds32_expand_movmemsi_loop_unknown_size(rtx dstmem,rtx srcmem,rtx size,rtx alignment)126 nds32_expand_movmemsi_loop_unknown_size (rtx dstmem, rtx srcmem,
127 					 rtx size,
128 					 rtx alignment)
129 {
130   /* Emit loop version of movmem.
131 
132        andi    $size_least_3_bit, $size, #~7
133        add     $dst_end, $dst, $size
134        move    $dst_itr, $dst
135        move    $src_itr, $src
136        beqz    $size_least_3_bit, .Lbyte_mode_entry ! Not large enough.
137        add     $double_word_end, $dst, $size_least_3_bit
138 
139      .Ldouble_word_mode_loop:
140        lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
141        smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr
142        ! move will delete after register allocation
143        move    $src_itr, $src_itr'
144        move    $dst_itr, $dst_itr'
145        ! Not readch upper bound. Loop.
146        bne     $double_word_end, $dst_itr, .Ldouble_word_mode_loop
147 
148      .Lbyte_mode_entry:
149        beq     $dst_itr, $dst_end, .Lend_label
150      .Lbyte_mode_loop:
151        lbi.bi  $tmp, [$src_itr], #1
152        sbi.bi  $tmp, [$dst_itr], #1
153        ! Not readch upper bound. Loop.
154        bne     $dst_itr, $dst_end, .Lbyte_mode_loop
155      .Lend_label:
156   */
157   rtx dst_base_reg, src_base_reg;
158   rtx dst_itr, src_itr;
159   rtx dstmem_m, srcmem_m, dst_itr_m, src_itr_m;
160   rtx dst_end;
161   rtx size_least_3_bit;
162   rtx double_word_end;
163   rtx double_word_mode_loop, byte_mode_entry, byte_mode_loop, end_label;
164   rtx tmp;
165   rtx mask_least_3_bit;
166   int start_regno;
167   bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0;
168 
169   if (TARGET_ISA_V3M && !align_to_4_bytes)
170     return 0;
171 
172   if (TARGET_REDUCED_REGS)
173     start_regno = 2;
174   else
175     start_regno = 16;
176 
177   dst_itr = gen_reg_rtx (Pmode);
178   src_itr = gen_reg_rtx (Pmode);
179   dst_end = gen_reg_rtx (Pmode);
180   tmp = gen_reg_rtx (QImode);
181   mask_least_3_bit = GEN_INT (~7);
182 
183   double_word_mode_loop = gen_label_rtx ();
184   byte_mode_entry = gen_label_rtx ();
185   byte_mode_loop = gen_label_rtx ();
186   end_label = gen_label_rtx ();
187 
188   dst_base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0));
189   src_base_reg = copy_to_mode_reg (Pmode, XEXP (srcmem, 0));
190   /* andi   $size_least_3_bit, $size, #~7 */
191   size_least_3_bit = expand_binop (SImode, and_optab, size, mask_least_3_bit,
192 				   NULL_RTX, 0, OPTAB_WIDEN);
193   /* add     $dst_end, $dst, $size */
194   dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size,
195 			  NULL_RTX, 0, OPTAB_WIDEN);
196 
197   /* move    $dst_itr, $dst
198      move    $src_itr, $src */
199   emit_move_insn (dst_itr, dst_base_reg);
200   emit_move_insn (src_itr, src_base_reg);
201 
202   /* beqz    $size_least_3_bit, .Lbyte_mode_entry ! Not large enough. */
203   emit_cmp_and_jump_insns (size_least_3_bit, const0_rtx, EQ, NULL,
204 			   SImode, 1, byte_mode_entry);
205   /* add     $double_word_end, $dst, $size_least_3_bit */
206   double_word_end = expand_binop (Pmode, add_optab,
207 				  dst_base_reg, size_least_3_bit,
208 				  NULL_RTX, 0, OPTAB_WIDEN);
209 
210   /* .Ldouble_word_mode_loop: */
211   emit_label (double_word_mode_loop);
212   /* lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
213      smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr */
214   src_itr_m = src_itr;
215   dst_itr_m = dst_itr;
216   srcmem_m = srcmem;
217   dstmem_m = dstmem;
218   nds32_emit_mem_move_block (start_regno, 2,
219 			     &dst_itr_m, &dstmem_m,
220 			     &src_itr_m, &srcmem_m,
221 			     true);
222   /* move    $src_itr, $src_itr'
223      move    $dst_itr, $dst_itr' */
224   emit_move_insn (dst_itr, dst_itr_m);
225   emit_move_insn (src_itr, src_itr_m);
226 
227   /* ! Not readch upper bound. Loop.
228      bne     $double_word_end, $dst_itr, .Ldouble_word_mode_loop */
229   emit_cmp_and_jump_insns (double_word_end, dst_itr, NE, NULL,
230 			   Pmode, 1, double_word_mode_loop);
231   /* .Lbyte_mode_entry: */
232   emit_label (byte_mode_entry);
233 
234   /* beq     $dst_itr, $dst_end, .Lend_label */
235   emit_cmp_and_jump_insns (dst_itr, dst_end, EQ, NULL,
236 			   Pmode, 1, end_label);
237   /* .Lbyte_mode_loop: */
238   emit_label (byte_mode_loop);
239 
240   /* lbi.bi  $tmp, [$src_itr], #1 */
241   nds32_emit_post_inc_load_store (tmp, src_itr, QImode, true);
242 
243   /* sbi.bi  $tmp, [$dst_itr], #1 */
244   nds32_emit_post_inc_load_store (tmp, dst_itr, QImode, false);
245   /* ! Not readch upper bound. Loop.
246      bne     $dst_itr, $dst_end, .Lbyte_mode_loop */
247   emit_cmp_and_jump_insns (dst_itr, dst_end, NE, NULL,
248 			   SImode, 1, byte_mode_loop);
249 
250   /* .Lend_label: */
251   emit_label (end_label);
252 
253   return true;
254 }
255 
256 static bool
nds32_expand_movmemsi_loop_known_size(rtx dstmem,rtx srcmem,rtx size,rtx alignment)257 nds32_expand_movmemsi_loop_known_size (rtx dstmem, rtx srcmem,
258 				       rtx size, rtx alignment)
259 {
260   rtx dst_base_reg, src_base_reg;
261   rtx dst_itr, src_itr;
262   rtx dstmem_m, srcmem_m, dst_itr_m, src_itr_m;
263   rtx dst_end;
264   rtx double_word_mode_loop, byte_mode_loop;
265   rtx tmp;
266   int start_regno;
267   bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0;
268   unsigned HOST_WIDE_INT total_bytes = UINTVAL (size);
269 
270   if (TARGET_ISA_V3M && !align_to_4_bytes)
271     return 0;
272 
273   if (TARGET_REDUCED_REGS)
274     start_regno = 2;
275   else
276     start_regno = 16;
277 
278   dst_itr = gen_reg_rtx (Pmode);
279   src_itr = gen_reg_rtx (Pmode);
280   dst_end = gen_reg_rtx (Pmode);
281   tmp = gen_reg_rtx (QImode);
282 
283   double_word_mode_loop = gen_label_rtx ();
284   byte_mode_loop = gen_label_rtx ();
285 
286   dst_base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0));
287   src_base_reg = copy_to_mode_reg (Pmode, XEXP (srcmem, 0));
288 
289   if (total_bytes < 8)
290     {
291       /* Emit total_bytes less than 8 loop version of movmem.
292 	add     $dst_end, $dst, $size
293 	move    $dst_itr, $dst
294 	.Lbyte_mode_loop:
295 	lbi.bi  $tmp, [$src_itr], #1
296 	sbi.bi  $tmp, [$dst_itr], #1
297 	! Not readch upper bound. Loop.
298 	bne     $dst_itr, $dst_end, .Lbyte_mode_loop */
299 
300       /* add     $dst_end, $dst, $size */
301       dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size,
302 			      NULL_RTX, 0, OPTAB_WIDEN);
303       /* move    $dst_itr, $dst
304 	 move    $src_itr, $src */
305       emit_move_insn (dst_itr, dst_base_reg);
306       emit_move_insn (src_itr, src_base_reg);
307 
308       /* .Lbyte_mode_loop: */
309       emit_label (byte_mode_loop);
310 
311       /* lbi.bi  $tmp, [$src_itr], #1 */
312       nds32_emit_post_inc_load_store (tmp, src_itr, QImode, true);
313 
314       /* sbi.bi  $tmp, [$dst_itr], #1 */
315       nds32_emit_post_inc_load_store (tmp, dst_itr, QImode, false);
316       /* ! Not readch upper bound. Loop.
317 	 bne     $dst_itr, $dst_end, .Lbyte_mode_loop */
318       emit_cmp_and_jump_insns (dst_itr, dst_end, NE, NULL,
319 			       SImode, 1, byte_mode_loop);
320       return true;
321     }
322   else if (total_bytes % 8 == 0)
323     {
324       /* Emit multiple of 8 loop version of movmem.
325 
326 	 add     $dst_end, $dst, $size
327 	 move    $dst_itr, $dst
328 	 move    $src_itr, $src
329 
330 	.Ldouble_word_mode_loop:
331 	lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
332 	smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr
333 	! move will delete after register allocation
334 	move    $src_itr, $src_itr'
335 	move    $dst_itr, $dst_itr'
336 	! Not readch upper bound. Loop.
337 	bne     $double_word_end, $dst_itr, .Ldouble_word_mode_loop */
338 
339       /* add     $dst_end, $dst, $size */
340       dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size,
341 			      NULL_RTX, 0, OPTAB_WIDEN);
342 
343       /* move    $dst_itr, $dst
344 	 move    $src_itr, $src */
345       emit_move_insn (dst_itr, dst_base_reg);
346       emit_move_insn (src_itr, src_base_reg);
347 
348       /* .Ldouble_word_mode_loop: */
349       emit_label (double_word_mode_loop);
350       /* lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
351 	 smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr */
352       src_itr_m = src_itr;
353       dst_itr_m = dst_itr;
354       srcmem_m = srcmem;
355       dstmem_m = dstmem;
356       nds32_emit_mem_move_block (start_regno, 2,
357 				 &dst_itr_m, &dstmem_m,
358 				 &src_itr_m, &srcmem_m,
359 				 true);
360       /* move    $src_itr, $src_itr'
361 	 move    $dst_itr, $dst_itr' */
362       emit_move_insn (dst_itr, dst_itr_m);
363       emit_move_insn (src_itr, src_itr_m);
364 
365       /* ! Not readch upper bound. Loop.
366 	 bne     $double_word_end, $dst_itr, .Ldouble_word_mode_loop */
367       emit_cmp_and_jump_insns (dst_end, dst_itr, NE, NULL,
368 			       Pmode, 1, double_word_mode_loop);
369     }
370   else
371     {
372       /* Handle size greater than 8, and not a multiple of 8.  */
373       return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem,
374 						      size, alignment);
375     }
376 
377   return true;
378 }
379 
380 static bool
nds32_expand_movmemsi_loop(rtx dstmem,rtx srcmem,rtx size,rtx alignment)381 nds32_expand_movmemsi_loop (rtx dstmem, rtx srcmem,
382 			    rtx size, rtx alignment)
383 {
384   if (CONST_INT_P (size))
385     return nds32_expand_movmemsi_loop_known_size (dstmem, srcmem,
386 						  size, alignment);
387   else
388     return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem,
389 						    size, alignment);
390 }
391 
392 static bool
nds32_expand_movmemsi_unroll(rtx dstmem,rtx srcmem,rtx total_bytes,rtx alignment)393 nds32_expand_movmemsi_unroll (rtx dstmem, rtx srcmem,
394 			      rtx total_bytes, rtx alignment)
395 {
396   rtx dst_base_reg, src_base_reg;
397   rtx tmp_reg;
398   int maximum_bytes;
399   int maximum_bytes_per_inst;
400   int maximum_regs;
401   int start_regno;
402   int i, inst_num;
403   HOST_WIDE_INT remain_bytes, remain_words;
404   bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0;
405   bool align_to_2_bytes = (INTVAL (alignment) & 1) == 0;
406 
407   /* Because reduced-set regsiters has few registers
408      (r0~r5, r6~10, r15, r28~r31, where 'r15' and 'r28~r31'
409       cannot be used for register allocation),
410      using 8 registers (32 bytes) for moving memory block
411      may easily consume all of them.
412      It makes register allocation/spilling hard to work.
413      So we only allow maximum=4 registers (16 bytes) for
414      moving memory block under reduced-set registers.  */
415   if (TARGET_REDUCED_REGS)
416     {
417       maximum_regs  = 4;
418       maximum_bytes = 64;
419       start_regno   = 2;
420     }
421   else
422     {
423       /* $r25 is $tp so we use up to 8 registers.  */
424       maximum_regs  = 8;
425       maximum_bytes = 160;
426       start_regno   = 16;
427     }
428   maximum_bytes_per_inst = maximum_regs * UNITS_PER_WORD;
429 
430   /* 1. Total_bytes is integer for sure.
431      2. Alignment is integer for sure.
432      3. Maximum 4 or 10 registers and up to 4 instructions,
433 	4 * 4 * 4 = 64 bytes, 8 * 4 * 10 = 160 bytes.
434      4. The dstmem cannot be volatile memory access.
435      5. The srcmem cannot be volatile memory access.
436      6. Known shared alignment not align to 4 byte in v3m since lmw/smw *NOT*
437 	support unalign access with v3m configure.  */
438   if (GET_CODE (total_bytes) != CONST_INT
439       || GET_CODE (alignment) != CONST_INT
440       || INTVAL (total_bytes) > maximum_bytes
441       || MEM_VOLATILE_P (dstmem)
442       || MEM_VOLATILE_P (srcmem)
443       || (TARGET_ISA_V3M && !align_to_4_bytes))
444     return false;
445 
446   dst_base_reg = copy_to_mode_reg (SImode, XEXP (dstmem, 0));
447   src_base_reg = copy_to_mode_reg (SImode, XEXP (srcmem, 0));
448   remain_bytes = INTVAL (total_bytes);
449 
450   /* Do not update base address for last lmw/smw pair.  */
451   inst_num = ((INTVAL (total_bytes) + (maximum_bytes_per_inst - 1))
452 	      / maximum_bytes_per_inst) - 1;
453 
454   for (i = 0; i < inst_num; i++)
455     {
456       nds32_emit_mem_move_block (start_regno, maximum_regs,
457 				 &dst_base_reg, &dstmem,
458 				 &src_base_reg, &srcmem,
459 				 true);
460     }
461   remain_bytes -= maximum_bytes_per_inst * inst_num;
462 
463   remain_words = remain_bytes / UNITS_PER_WORD;
464   remain_bytes = remain_bytes - (remain_words * UNITS_PER_WORD);
465 
466   if (remain_words != 0)
467     {
468       if (remain_bytes != 0)
469 	nds32_emit_mem_move_block (start_regno, remain_words,
470 				   &dst_base_reg, &dstmem,
471 				   &src_base_reg, &srcmem,
472 				   true);
473       else
474 	{
475 	  /* Do not update address if no further byte to move.  */
476 	  if (remain_words == 1)
477 	   {
478 	      /* emit move instruction if align to 4 byte and only 1
479 		 word to move.  */
480 	      if (align_to_4_bytes)
481 		nds32_emit_mem_move (srcmem, dstmem, SImode, 0);
482 	      else
483 		{
484 		  tmp_reg = gen_reg_rtx (SImode);
485 		  emit_insn (
486 		    gen_unaligned_load_w (tmp_reg,
487 					  gen_rtx_MEM (SImode, src_base_reg)));
488 		  emit_insn (
489 		    gen_unaligned_store_w (gen_rtx_MEM (SImode, dst_base_reg),
490 					   tmp_reg));
491 		}
492 	    }
493 	  else
494 	    nds32_emit_mem_move_block (start_regno, remain_words,
495 				       &dst_base_reg, &dstmem,
496 				       &src_base_reg, &srcmem,
497 				       false);
498 	}
499     }
500 
501   switch (remain_bytes)
502     {
503     case 3:
504     case 2:
505       {
506 	if (align_to_2_bytes)
507 	  nds32_emit_mem_move (srcmem, dstmem, HImode, 0);
508 	else
509 	  {
510 	    nds32_emit_mem_move (srcmem, dstmem, QImode, 0);
511 	    nds32_emit_mem_move (srcmem, dstmem, QImode, 1);
512 	  }
513 
514 	if (remain_bytes == 3)
515 	  nds32_emit_mem_move (srcmem, dstmem, QImode, 2);
516 	break;
517       }
518     case 1:
519       nds32_emit_mem_move (srcmem, dstmem, QImode, 0);
520       break;
521     case 0:
522       break;
523     default:
524       gcc_unreachable ();
525     }
526 
527   /* Successfully create patterns, return true.  */
528   return true;
529 }
530 
531 /* Function to move block memory content by
532    using load_multiple and store_multiple.
533    This is auxiliary extern function to help create rtx template.
534    Check nds32-multiple.md file for the patterns.  */
535 bool
nds32_expand_movmemsi(rtx dstmem,rtx srcmem,rtx total_bytes,rtx alignment)536 nds32_expand_movmemsi (rtx dstmem, rtx srcmem, rtx total_bytes, rtx alignment)
537 {
538   if (nds32_expand_movmemsi_unroll (dstmem, srcmem, total_bytes, alignment))
539     return true;
540 
541   if (!optimize_size && optimize > 2)
542     return nds32_expand_movmemsi_loop (dstmem, srcmem, total_bytes, alignment);
543 
544   return false;
545 }
546 
547 /* ------------------------------------------------------------------------ */
548 
549 /* Auxiliary function for expand setmem pattern.  */
550 
551 static rtx
nds32_gen_dup_4_byte_to_word_value_aux(rtx value,rtx value4word)552 nds32_gen_dup_4_byte_to_word_value_aux (rtx value, rtx value4word)
553 {
554   gcc_assert (GET_MODE (value) == QImode || CONST_INT_P (value));
555 
556   if (CONST_INT_P (value))
557     {
558       unsigned HOST_WIDE_INT val = UINTVAL (value) & GET_MODE_MASK(QImode);
559       rtx new_val = gen_int_mode (val | (val << 8)
560 				  | (val << 16) | (val << 24), SImode);
561       /* Just calculate at here if it's constant value.  */
562       emit_move_insn (value4word, new_val);
563     }
564   else
565     {
566       if (NDS32_EXT_DSP_P ())
567 	{
568 	  /* ! prepare word
569 	     insb    $tmp, $value, 1         ! $tmp  <- 0x0000abab
570 	     pkbb16  $tmp6, $tmp2, $tmp2   ! $value4word  <- 0xabababab */
571 	  rtx tmp = gen_reg_rtx (SImode);
572 
573 	  convert_move (tmp, value, true);
574 
575 	  emit_insn (
576 	    gen_insvsi_internal (tmp, gen_int_mode (0x8, SImode), tmp));
577 
578 	  emit_insn (gen_pkbbsi_1 (value4word, tmp, tmp));
579 	}
580       else
581 	{
582 	  /* ! prepare word
583 	     andi    $tmp1, $value, 0xff       ! $tmp1  <- 0x000000ab
584 	     slli    $tmp2, $tmp1, 8           ! $tmp2  <- 0x0000ab00
585 	     or      $tmp3, $tmp1, $tmp2       ! $tmp3  <- 0x0000abab
586 	     slli    $tmp4, $tmp3, 16          ! $tmp4  <- 0xabab0000
587 	     or      $val4word, $tmp3, $tmp4   ! $value4word  <- 0xabababab  */
588 
589 	  rtx tmp1, tmp2, tmp3, tmp4;
590 	  tmp1 = expand_binop (SImode, and_optab, value,
591 			       gen_int_mode (0xff, SImode),
592 			       NULL_RTX, 0, OPTAB_WIDEN);
593 	  tmp2 = expand_binop (SImode, ashl_optab, tmp1,
594 			       gen_int_mode (8, SImode),
595 			       NULL_RTX, 0, OPTAB_WIDEN);
596 	  tmp3 = expand_binop (SImode, ior_optab, tmp1, tmp2,
597 			       NULL_RTX, 0, OPTAB_WIDEN);
598 	  tmp4 = expand_binop (SImode, ashl_optab, tmp3,
599 			       gen_int_mode (16, SImode),
600 			       NULL_RTX, 0, OPTAB_WIDEN);
601 
602 	  emit_insn (gen_iorsi3 (value4word, tmp3, tmp4));
603 	}
604     }
605 
606   return value4word;
607 }
608 
609 static rtx
nds32_gen_dup_4_byte_to_word_value(rtx value)610 nds32_gen_dup_4_byte_to_word_value (rtx value)
611 {
612   rtx value4word = gen_reg_rtx (SImode);
613   nds32_gen_dup_4_byte_to_word_value_aux (value, value4word);
614 
615   return value4word;
616 }
617 
618 static rtx
nds32_gen_dup_8_byte_to_double_word_value(rtx value)619 nds32_gen_dup_8_byte_to_double_word_value (rtx value)
620 {
621   rtx value4doubleword = gen_reg_rtx (DImode);
622 
623   nds32_gen_dup_4_byte_to_word_value_aux (
624     value, nds32_di_low_part_subreg(value4doubleword));
625 
626   emit_move_insn (nds32_di_high_part_subreg(value4doubleword),
627 		  nds32_di_low_part_subreg(value4doubleword));
628   return value4doubleword;
629 }
630 
631 
632 static rtx
emit_setmem_doubleword_loop(rtx itr,rtx size,rtx value)633 emit_setmem_doubleword_loop (rtx itr, rtx size, rtx value)
634 {
635   rtx word_mode_label = gen_label_rtx ();
636   rtx word_mode_end_label = gen_label_rtx ();
637   rtx byte_mode_size = gen_reg_rtx (SImode);
638   rtx byte_mode_size_tmp = gen_reg_rtx (SImode);
639   rtx word_mode_end = gen_reg_rtx (SImode);
640   rtx size_for_word = gen_reg_rtx (SImode);
641 
642   /* and     $size_for_word, $size, #~0x7  */
643   size_for_word = expand_binop (SImode, and_optab, size,
644 				gen_int_mode (~0x7, SImode),
645 				NULL_RTX, 0, OPTAB_WIDEN);
646 
647   emit_move_insn (byte_mode_size, size);
648 
649   /* beqz    $size_for_word, .Lbyte_mode_entry  */
650   emit_cmp_and_jump_insns (size_for_word, const0_rtx, EQ, NULL,
651 			   SImode, 1, word_mode_end_label);
652   /* add     $word_mode_end, $dst, $size_for_word  */
653   word_mode_end = expand_binop (Pmode, add_optab, itr, size_for_word,
654 				NULL_RTX, 0, OPTAB_WIDEN);
655 
656   /* andi    $byte_mode_size, $size, 0x7  */
657   byte_mode_size_tmp = expand_binop (SImode, and_optab, size, GEN_INT (0x7),
658 				     NULL_RTX, 0, OPTAB_WIDEN);
659 
660   emit_move_insn (byte_mode_size, byte_mode_size_tmp);
661 
662   /* .Lword_mode:  */
663   emit_label (word_mode_label);
664   /*   ! word-mode set loop
665        smw.bim $value4word, [$dst_itr], $value4word, 0
666        bne     $word_mode_end, $dst_itr, .Lword_mode  */
667   emit_insn (gen_unaligned_store_update_base_dw (itr,
668 						 itr,
669 						 value));
670   emit_cmp_and_jump_insns (word_mode_end, itr, NE, NULL,
671 			   Pmode, 1, word_mode_label);
672 
673   emit_label (word_mode_end_label);
674 
675   return byte_mode_size;
676 }
677 
678 static rtx
emit_setmem_byte_loop(rtx itr,rtx size,rtx value,bool need_end)679 emit_setmem_byte_loop (rtx itr, rtx size, rtx value, bool need_end)
680 {
681   rtx end  = gen_reg_rtx (Pmode);
682   rtx byte_mode_label = gen_label_rtx ();
683   rtx end_label = gen_label_rtx ();
684 
685   value = force_reg (QImode, value);
686 
687   if (need_end)
688     end = expand_binop (Pmode, add_optab, itr, size,
689 			NULL_RTX, 0, OPTAB_WIDEN);
690   /*   beqz    $byte_mode_size, .Lend
691        add     $byte_mode_end, $dst_itr, $byte_mode_size  */
692   emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL,
693 			   SImode, 1, end_label);
694 
695   if (!need_end)
696     end = expand_binop (Pmode, add_optab, itr, size,
697 			NULL_RTX, 0, OPTAB_WIDEN);
698 
699   /* .Lbyte_mode:  */
700   emit_label (byte_mode_label);
701 
702   /*   ! byte-mode set loop
703        sbi.bi  $value, [$dst_itr] ,1
704        bne     $byte_mode_end, $dst_itr, .Lbyte_mode */
705   nds32_emit_post_inc_load_store (value, itr, QImode, false);
706 
707   emit_cmp_and_jump_insns (end, itr, NE, NULL,
708 			   Pmode, 1, byte_mode_label);
709   /* .Lend: */
710   emit_label (end_label);
711 
712   if (need_end)
713     return end;
714   else
715     return NULL_RTX;
716 }
717 
718 static bool
nds32_expand_setmem_loop(rtx dstmem,rtx size,rtx value)719 nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value)
720 {
721   rtx value4doubleword;
722   rtx value4byte;
723   rtx dst;
724   rtx byte_mode_size;
725 
726   /* Emit loop version of setmem.
727      memset:
728        ! prepare word
729        andi    $tmp1, $val, 0xff               ! $tmp1  <- 0x000000ab
730        slli    $tmp2, $tmp1, 8                 ! $tmp2  <- 0x0000ab00
731        or      $tmp3, $val, $tmp2              ! $tmp3  <- 0x0000abab
732        slli    $tmp4, $tmp3, 16                ! $tmp4  <- 0xabab0000
733        or      $val4word, $tmp3, $tmp4         ! $value4word  <- 0xabababab
734 
735        and     $size_for_word, $size, #-4
736        beqz    $size_for_word, .Lword_mode_end
737 
738        add     $word_mode_end, $dst, $size_for_word
739        andi    $byte_mode_size, $size, 3
740 
741      .Lword_mode:
742        ! word-mode set loop
743        smw.bim $value4word, [$dst], $value4word, 0
744        bne     $word_mode_end, $dst, .Lword_mode
745 
746      .Lword_mode_end:
747        beqz    $byte_mode_size, .Lend
748        add     $byte_mode_end, $dst, $byte_mode_size
749 
750      .Lbyte_mode:
751        ! byte-mode set loop
752        sbi.bi  $value4word, [$dst] ,1
753        bne     $byte_mode_end, $dst, .Lbyte_mode
754      .Lend: */
755 
756   dst = copy_to_mode_reg (SImode, XEXP (dstmem, 0));
757 
758   /* ! prepare word
759      andi    $tmp1, $value, 0xff             ! $tmp1  <- 0x000000ab
760      slli    $tmp2, $tmp1, 8                 ! $tmp2  <- 0x0000ab00
761      or      $tmp3, $tmp1, $tmp2             ! $tmp3  <- 0x0000abab
762      slli    $tmp4, $tmp3, 16                ! $tmp4  <- 0xabab0000
763      or      $val4word, $tmp3, $tmp4         ! $value4word  <- 0xabababab  */
764   value4doubleword = nds32_gen_dup_8_byte_to_double_word_value (value);
765 
766   /*   and     $size_for_word, $size, #-4
767        beqz    $size_for_word, .Lword_mode_end
768 
769        add     $word_mode_end, $dst, $size_for_word
770        andi    $byte_mode_size, $size, 3
771 
772      .Lword_mode:
773        ! word-mode set loop
774        smw.bim $value4word, [$dst], $value4word, 0
775        bne     $word_mode_end, $dst, .Lword_mode
776      .Lword_mode_end:  */
777   byte_mode_size = emit_setmem_doubleword_loop (dst, size, value4doubleword);
778 
779   /*   beqz    $byte_mode_size, .Lend
780        add     $byte_mode_end, $dst, $byte_mode_size
781 
782      .Lbyte_mode:
783        ! byte-mode set loop
784        sbi.bi  $value, [$dst] ,1
785        bne     $byte_mode_end, $dst, .Lbyte_mode
786      .Lend: */
787 
788   value4byte = simplify_gen_subreg (QImode, value4doubleword, DImode,
789 				    subreg_lowpart_offset (QImode, DImode));
790 
791   emit_setmem_byte_loop (dst, byte_mode_size, value4byte, false);
792 
793   return true;
794 }
795 
796 static bool
nds32_expand_setmem_loop_v3m(rtx dstmem,rtx size,rtx value)797 nds32_expand_setmem_loop_v3m (rtx dstmem, rtx size, rtx value)
798 {
799   rtx base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0));
800   rtx need_align_bytes = gen_reg_rtx (SImode);
801   rtx last_2_bit = gen_reg_rtx (SImode);
802   rtx byte_loop_base = gen_reg_rtx (SImode);
803   rtx byte_loop_size = gen_reg_rtx (SImode);
804   rtx remain_size = gen_reg_rtx (SImode);
805   rtx new_base_reg;
806   rtx value4byte, value4doubleword;
807   rtx byte_mode_size;
808   rtx last_byte_loop_label = gen_label_rtx ();
809 
810   size = force_reg (SImode, size);
811 
812   value4doubleword = nds32_gen_dup_8_byte_to_double_word_value (value);
813   value4byte = simplify_gen_subreg (QImode, value4doubleword, DImode,
814 				    subreg_lowpart_offset (QImode, DImode));
815 
816   emit_move_insn (byte_loop_size, size);
817   emit_move_insn (byte_loop_base, base_reg);
818 
819   /* Jump to last byte loop if size is less than 16.  */
820   emit_cmp_and_jump_insns (size, gen_int_mode (16, SImode), LE, NULL,
821 			   SImode, 1, last_byte_loop_label);
822 
823   /* Make sure align to 4 byte first since v3m can't unalign access.  */
824   emit_insn (gen_andsi3 (last_2_bit,
825 			 base_reg,
826 			 gen_int_mode (0x3, SImode)));
827 
828   emit_insn (gen_subsi3 (need_align_bytes,
829 			 gen_int_mode (4, SImode),
830 			 last_2_bit));
831 
832   /* Align to 4 byte. */
833   new_base_reg = emit_setmem_byte_loop (base_reg,
834 					need_align_bytes,
835 					value4byte,
836 					true);
837 
838   /* Calculate remain size. */
839   emit_insn (gen_subsi3 (remain_size, size, need_align_bytes));
840 
841   /* Set memory word by word. */
842   byte_mode_size = emit_setmem_doubleword_loop (new_base_reg,
843 						remain_size,
844 						value4doubleword);
845 
846   emit_move_insn (byte_loop_base, new_base_reg);
847   emit_move_insn (byte_loop_size, byte_mode_size);
848 
849   emit_label (last_byte_loop_label);
850 
851   /* And set memory for remain bytes. */
852   emit_setmem_byte_loop (byte_loop_base, byte_loop_size, value4byte, false);
853   return true;
854 }
855 
856 static bool
nds32_expand_setmem_unroll(rtx dstmem,rtx size,rtx value,rtx align ATTRIBUTE_UNUSED,rtx expected_align ATTRIBUTE_UNUSED,rtx expected_size ATTRIBUTE_UNUSED)857 nds32_expand_setmem_unroll (rtx dstmem, rtx size, rtx value,
858 			    rtx align ATTRIBUTE_UNUSED,
859 			    rtx expected_align ATTRIBUTE_UNUSED,
860 			    rtx expected_size ATTRIBUTE_UNUSED)
861 {
862   unsigned maximum_regs, maximum_bytes, start_regno, regno;
863   rtx value4word;
864   rtx dst_base_reg, new_base_reg;
865   unsigned HOST_WIDE_INT remain_bytes, remain_words, prepare_regs, fill_per_smw;
866   unsigned HOST_WIDE_INT real_size;
867 
868   if (TARGET_REDUCED_REGS)
869     {
870       maximum_regs  = 4;
871       maximum_bytes = 64;
872       start_regno   = 2;
873     }
874   else
875     {
876       maximum_regs  = 8;
877       maximum_bytes = 128;
878       start_regno   = 16;
879     }
880 
881   real_size = UINTVAL (size) & GET_MODE_MASK(SImode);
882 
883   if (!(CONST_INT_P (size) && real_size <= maximum_bytes))
884     return false;
885 
886   remain_bytes = real_size;
887 
888   gcc_assert (GET_MODE (value) == QImode || CONST_INT_P (value));
889 
890   value4word = nds32_gen_dup_4_byte_to_word_value (value);
891 
892   prepare_regs = remain_bytes / UNITS_PER_WORD;
893 
894   dst_base_reg = copy_to_mode_reg (SImode, XEXP (dstmem, 0));
895 
896   if (prepare_regs > maximum_regs)
897     prepare_regs = maximum_regs;
898 
899   fill_per_smw = prepare_regs * UNITS_PER_WORD;
900 
901   regno = start_regno;
902   switch (prepare_regs)
903     {
904     case 2:
905     default:
906       {
907 	rtx reg0 = gen_rtx_REG (SImode, regno);
908 	rtx reg1 = gen_rtx_REG (SImode, regno+1);
909 	unsigned last_regno = start_regno + prepare_regs - 1;
910 
911 	emit_move_insn (reg0, value4word);
912 	emit_move_insn (reg1, value4word);
913 	rtx regd = gen_rtx_REG (DImode, regno);
914 	regno += 2;
915 
916 	/* Try to utilize movd44!  */
917 	while (regno <= last_regno)
918 	  {
919 	    if ((regno + 1) <=last_regno)
920 	      {
921 		rtx reg = gen_rtx_REG (DImode, regno);
922 		emit_move_insn (reg, regd);
923 		regno += 2;
924 	      }
925 	    else
926 	      {
927 		rtx reg = gen_rtx_REG (SImode, regno);
928 		emit_move_insn (reg, reg0);
929 		regno += 1;
930 	      }
931 	  }
932 	break;
933       }
934     case 1:
935       {
936 	rtx reg = gen_rtx_REG (SImode, regno++);
937 	emit_move_insn (reg, value4word);
938       }
939       break;
940     case 0:
941       break;
942     }
943 
944   if (fill_per_smw)
945     for (;remain_bytes >= fill_per_smw;remain_bytes -= fill_per_smw)
946       {
947 	emit_insn (nds32_expand_store_multiple (start_regno, prepare_regs,
948 						dst_base_reg, dstmem,
949 						true, &new_base_reg));
950 	dst_base_reg = new_base_reg;
951 	dstmem = gen_rtx_MEM (SImode, dst_base_reg);
952       }
953 
954   remain_words = remain_bytes / UNITS_PER_WORD;
955 
956   if (remain_words)
957     {
958       emit_insn (nds32_expand_store_multiple (start_regno, remain_words,
959 					      dst_base_reg, dstmem,
960 					      true, &new_base_reg));
961       dst_base_reg = new_base_reg;
962       dstmem = gen_rtx_MEM (SImode, dst_base_reg);
963     }
964 
965   remain_bytes = remain_bytes - (remain_words * UNITS_PER_WORD);
966 
967   if (remain_bytes)
968     {
969       value = simplify_gen_subreg (QImode, value4word, SImode,
970 				   subreg_lowpart_offset(QImode, SImode));
971       int offset = 0;
972       for (;remain_bytes;--remain_bytes, ++offset)
973 	{
974 	  nds32_emit_load_store (value, dstmem, QImode, offset, false);
975 	}
976     }
977 
978   return true;
979 }
980 
981 bool
nds32_expand_setmem(rtx dstmem,rtx size,rtx value,rtx align,rtx expected_align,rtx expected_size)982 nds32_expand_setmem (rtx dstmem, rtx size, rtx value, rtx align,
983 		     rtx expected_align,
984 		     rtx expected_size)
985 {
986   bool align_to_4_bytes = (INTVAL (align) & 3) == 0;
987 
988   /* Only expand at O3 */
989   if (optimize_size || optimize < 3)
990     return false;
991 
992   if (TARGET_ISA_V3M && !align_to_4_bytes)
993     return nds32_expand_setmem_loop_v3m (dstmem, size, value);
994 
995   if (nds32_expand_setmem_unroll (dstmem, size, value,
996 				  align, expected_align, expected_size))
997     return true;
998 
999   return nds32_expand_setmem_loop (dstmem, size, value);
1000 }
1001 
1002 /* ------------------------------------------------------------------------ */
1003 
1004 /* Auxiliary function for expand strlen pattern.  */
1005 
1006 bool
nds32_expand_strlen(rtx result,rtx str,rtx target_char,rtx align ATTRIBUTE_UNUSED)1007 nds32_expand_strlen (rtx result, rtx str,
1008 		     rtx target_char, rtx align ATTRIBUTE_UNUSED)
1009 {
1010   rtx base_reg, backup_base_reg;
1011   rtx ffb_result;
1012   rtx target_char_ptr, length;
1013   rtx loop_label, tmp;
1014 
1015   if (optimize_size || optimize < 3)
1016     return false;
1017 
1018   gcc_assert (MEM_P (str));
1019   gcc_assert (CONST_INT_P (target_char) || REG_P (target_char));
1020 
1021   base_reg = copy_to_mode_reg (SImode, XEXP (str, 0));
1022   loop_label = gen_label_rtx ();
1023 
1024   ffb_result = gen_reg_rtx (Pmode);
1025   tmp = gen_reg_rtx (SImode);
1026   backup_base_reg = gen_reg_rtx (SImode);
1027 
1028   /* Emit loop version of strlen.
1029        move  $backup_base, $base
1030      .Lloop:
1031        lmw.bim $tmp, [$base], $tmp, 0
1032        ffb   $ffb_result, $tmp, $target_char   ! is there $target_char?
1033        beqz  $ffb_result, .Lloop
1034        add   $last_char_ptr, $base, $ffb_result
1035        sub   $length, $last_char_ptr, $backup_base  */
1036 
1037   /* move  $backup_base, $base  */
1038   emit_move_insn (backup_base_reg, base_reg);
1039 
1040   /* .Lloop:  */
1041   emit_label (loop_label);
1042   /* lmw.bim $tmp, [$base], $tmp, 0  */
1043   emit_insn (gen_unaligned_load_update_base_w (base_reg, tmp, base_reg));
1044 
1045   /*  ffb   $ffb_result, $tmp, $target_char   ! is there $target_char?  */
1046   emit_insn (gen_unspec_ffb (ffb_result, tmp, target_char));
1047 
1048   /* beqz  $ffb_result, .Lloop  */
1049   emit_cmp_and_jump_insns (ffb_result, const0_rtx, EQ, NULL,
1050 			   SImode, 1, loop_label);
1051 
1052   /* add   $target_char_ptr, $base, $ffb_result   */
1053   target_char_ptr = expand_binop (Pmode, add_optab, base_reg,
1054 				ffb_result, NULL_RTX, 0, OPTAB_WIDEN);
1055 
1056   /* sub   $length, $target_char_ptr, $backup_base  */
1057   length = expand_binop (Pmode, sub_optab, target_char_ptr,
1058 			 backup_base_reg, NULL_RTX, 0, OPTAB_WIDEN);
1059 
1060   emit_move_insn (result, length);
1061 
1062   return true;
1063 }
1064 
1065 /* ------------------------------------------------------------------------ */
1066 
1067 /* Functions to expand load_multiple and store_multiple.
1068    They are auxiliary extern functions to help create rtx template.
1069    Check nds32-multiple.md file for the patterns.  */
1070 rtx
nds32_expand_load_multiple(int base_regno,int count,rtx base_addr,rtx basemem,bool update_base_reg_p,rtx * update_base_reg)1071 nds32_expand_load_multiple (int base_regno, int count,
1072 			    rtx base_addr, rtx basemem,
1073 			    bool update_base_reg_p,
1074 			    rtx *update_base_reg)
1075 {
1076   int par_index;
1077   int offset;
1078   int start_idx;
1079   rtx result;
1080   rtx new_addr, mem, reg;
1081 
1082   /* Generate a unaligned load to prevent load instruction pull out from
1083      parallel, and then it will generate lwi, and lose unaligned acces */
1084   if (count == 1)
1085     {
1086       reg = gen_rtx_REG (SImode, base_regno);
1087       if (update_base_reg_p)
1088 	{
1089 	  *update_base_reg = gen_reg_rtx (SImode);
1090 	  return gen_unaligned_load_update_base_w (*update_base_reg, reg, base_addr);
1091 	}
1092       else
1093 	return gen_unaligned_load_w (reg, gen_rtx_MEM (SImode, base_addr));
1094     }
1095 
1096   /* Create the pattern that is presented in nds32-multiple.md.  */
1097   if (update_base_reg_p)
1098     {
1099       result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count + 1));
1100       start_idx = 1;
1101     }
1102   else
1103     {
1104       result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
1105       start_idx = 0;
1106     }
1107 
1108   if (update_base_reg_p)
1109     {
1110       offset           = count * 4;
1111       new_addr         = plus_constant (Pmode, base_addr, offset);
1112       *update_base_reg = gen_reg_rtx (SImode);
1113 
1114       XVECEXP (result, 0, 0) = gen_rtx_SET (*update_base_reg, new_addr);
1115     }
1116 
1117   for (par_index = 0; par_index < count; par_index++)
1118     {
1119       offset   = par_index * 4;
1120       /* 4-byte for loading data to each register.  */
1121       new_addr = plus_constant (Pmode, base_addr, offset);
1122       mem      = adjust_automodify_address_nv (basemem, SImode,
1123 					       new_addr, offset);
1124       reg      = gen_rtx_REG (SImode, base_regno + par_index);
1125 
1126       XVECEXP (result, 0, (par_index + start_idx)) = gen_rtx_SET (reg, mem);
1127     }
1128 
1129   return result;
1130 }
1131 
1132 rtx
nds32_expand_store_multiple(int base_regno,int count,rtx base_addr,rtx basemem,bool update_base_reg_p,rtx * update_base_reg)1133 nds32_expand_store_multiple (int base_regno, int count,
1134 			     rtx base_addr, rtx basemem,
1135 			     bool update_base_reg_p,
1136 			     rtx *update_base_reg)
1137 {
1138   int par_index;
1139   int offset;
1140   int start_idx;
1141   rtx result;
1142   rtx new_addr, mem, reg;
1143 
1144   if (count == 1)
1145     {
1146       reg = gen_rtx_REG (SImode, base_regno);
1147       if (update_base_reg_p)
1148 	{
1149 	  *update_base_reg = gen_reg_rtx (SImode);
1150 	  return gen_unaligned_store_update_base_w (*update_base_reg, base_addr, reg);
1151 	}
1152       else
1153 	return gen_unaligned_store_w (gen_rtx_MEM (SImode, base_addr), reg);
1154     }
1155 
1156   /* Create the pattern that is presented in nds32-multiple.md.  */
1157 
1158   if (update_base_reg_p)
1159     {
1160       result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count + 1));
1161       start_idx = 1;
1162     }
1163   else
1164     {
1165       result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
1166       start_idx = 0;
1167     }
1168 
1169   if (update_base_reg_p)
1170     {
1171       offset           = count * 4;
1172       new_addr         = plus_constant (Pmode, base_addr, offset);
1173       *update_base_reg = gen_reg_rtx (SImode);
1174 
1175       XVECEXP (result, 0, 0) = gen_rtx_SET (*update_base_reg, new_addr);
1176     }
1177 
1178   for (par_index = 0; par_index < count; par_index++)
1179     {
1180       offset   = par_index * 4;
1181       /* 4-byte for storing data to memory.  */
1182       new_addr = plus_constant (Pmode, base_addr, offset);
1183       mem      = adjust_automodify_address_nv (basemem, SImode,
1184 					       new_addr, offset);
1185       reg      = gen_rtx_REG (SImode, base_regno + par_index);
1186 
1187       XVECEXP (result, 0, par_index + start_idx) = gen_rtx_SET (mem, reg);
1188     }
1189 
1190   return result;
1191 }
1192 
1193 /* ------------------------------------------------------------------------ */
1194