1 /* Helper routines for memory move and comparison insns.
2    Copyright (C) 2013-2020 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10 
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #define IN_TARGET_CODE 1
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "function.h"
27 #include "basic-block.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "emit-rtl.h"
33 #include "explow.h"
34 #include "expr.h"
35 
36 /* Like force_operand, but guarantees that VALUE ends up in TARGET.  */
37 static void
force_into(rtx value,rtx target)38 force_into (rtx value, rtx target)
39 {
40   value = force_operand (value, target);
41   if (! rtx_equal_p (value, target))
42     emit_insn (gen_move_insn (target, value));
43 }
44 
45 /* Emit code to perform a block move.  Choose the best method.
46 
47    OPERANDS[0] is the destination.
48    OPERANDS[1] is the source.
49    OPERANDS[2] is the size.
50    OPERANDS[3] is the alignment safe to use.  */
51 bool
expand_block_move(rtx * operands)52 expand_block_move (rtx *operands)
53 {
54   int align = INTVAL (operands[3]);
55   int constp = (CONST_INT_P (operands[2]));
56   int bytes = (constp ? INTVAL (operands[2]) : 0);
57 
58   if (! constp)
59     return false;
60 
61   /* If we could use mov.l to move words and dest is word-aligned, we
62      can use movua.l for loads and still generate a relatively short
63      and efficient sequence.  */
64   if (TARGET_SH4A && align < 4
65       && MEM_ALIGN (operands[0]) >= 32
66       && can_move_by_pieces (bytes, 32))
67     {
68       rtx dest = copy_rtx (operands[0]);
69       rtx src = copy_rtx (operands[1]);
70       /* We could use different pseudos for each copied word, but
71 	 since movua can only load into r0, it's kind of
72 	 pointless.  */
73       rtx temp = gen_reg_rtx (SImode);
74       rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
75       int copied = 0;
76 
77       while (copied + 4 <= bytes)
78 	{
79 	  rtx to = adjust_address (dest, SImode, copied);
80 	  rtx from = adjust_automodify_address (src, BLKmode,
81 						src_addr, copied);
82 
83 	  set_mem_size (from, 4);
84 	  emit_insn (gen_movua (temp, from));
85 	  emit_move_insn (src_addr, plus_constant (Pmode, src_addr, 4));
86 	  emit_move_insn (to, temp);
87 	  copied += 4;
88 	}
89 
90       if (copied < bytes)
91 	move_by_pieces (adjust_address (dest, BLKmode, copied),
92 			adjust_automodify_address (src, BLKmode,
93 						   src_addr, copied),
94 			bytes - copied, align, RETURN_BEGIN);
95 
96       return true;
97     }
98 
99   /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
100      alignment, or if it isn't a multiple of 4 bytes, then fail.  */
101   if (align < 4 || (bytes % 4 != 0))
102     return false;
103 
104   if (TARGET_HARD_SH4)
105     {
106       if (bytes < 12)
107 	return false;
108       else if (bytes == 12)
109 	{
110 	  rtx func_addr_rtx = gen_reg_rtx (Pmode);
111 	  rtx r4 = gen_rtx_REG (SImode, 4);
112 	  rtx r5 = gen_rtx_REG (SImode, 5);
113 
114 	  rtx lab = function_symbol (func_addr_rtx, "__movmemSI12_i4",
115 				     SFUNC_STATIC).lab;
116 	  force_into (XEXP (operands[0], 0), r4);
117 	  force_into (XEXP (operands[1], 0), r5);
118 	  emit_insn (gen_block_move_real_i4 (func_addr_rtx, lab));
119 	  return true;
120 	}
121       else if (! optimize_size)
122 	{
123 	  rtx func_addr_rtx = gen_reg_rtx (Pmode);
124 	  rtx r4 = gen_rtx_REG (SImode, 4);
125 	  rtx r5 = gen_rtx_REG (SImode, 5);
126 	  rtx r6 = gen_rtx_REG (SImode, 6);
127 
128 	  rtx lab = function_symbol (func_addr_rtx, bytes & 4
129 						    ? "__movmem_i4_odd"
130 						    : "__movmem_i4_even",
131 				     SFUNC_STATIC).lab;
132 	  force_into (XEXP (operands[0], 0), r4);
133 	  force_into (XEXP (operands[1], 0), r5);
134 
135 	  int dwords = bytes >> 3;
136 	  emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
137 	  emit_insn (gen_block_lump_real_i4 (func_addr_rtx, lab));
138 	  return true;
139 	}
140       else
141 	return false;
142     }
143   if (bytes < 64)
144     {
145       char entry[30];
146       rtx func_addr_rtx = gen_reg_rtx (Pmode);
147       rtx r4 = gen_rtx_REG (SImode, 4);
148       rtx r5 = gen_rtx_REG (SImode, 5);
149 
150       sprintf (entry, "__movmemSI%d", bytes);
151       rtx lab = function_symbol (func_addr_rtx, entry, SFUNC_STATIC).lab;
152       force_into (XEXP (operands[0], 0), r4);
153       force_into (XEXP (operands[1], 0), r5);
154       emit_insn (gen_block_move_real (func_addr_rtx, lab));
155       return true;
156     }
157 
158   /* This is the same number of bytes as a memcpy call, but to a different
159      less common function name, so this will occasionally use more space.  */
160   if (! optimize_size)
161     {
162       rtx func_addr_rtx = gen_reg_rtx (Pmode);
163       int final_switch, while_loop;
164       rtx r4 = gen_rtx_REG (SImode, 4);
165       rtx r5 = gen_rtx_REG (SImode, 5);
166       rtx r6 = gen_rtx_REG (SImode, 6);
167 
168       rtx lab = function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC).lab;
169       force_into (XEXP (operands[0], 0), r4);
170       force_into (XEXP (operands[1], 0), r5);
171 
172       /* r6 controls the size of the move.  16 is decremented from it
173 	 for each 64 bytes moved.  Then the negative bit left over is used
174 	 as an index into a list of move instructions.  e.g., a 72 byte move
175 	 would be set up with size(r6) = 14, for one iteration through the
176 	 big while loop, and a switch of -2 for the last part.  */
177 
178       final_switch = 16 - ((bytes / 4) % 16);
179       while_loop = ((bytes / 4) / 16 - 1) * 16;
180       emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
181       emit_insn (gen_block_lump_real (func_addr_rtx, lab));
182       return true;
183     }
184 
185   return false;
186 }
187 
188 static const int prob_unlikely
189   = profile_probability::from_reg_br_prob_base (REG_BR_PROB_BASE / 10)
190     .to_reg_br_prob_note ();
191 static const int prob_likely
192   = profile_probability::from_reg_br_prob_base (REG_BR_PROB_BASE / 4)
193     .to_reg_br_prob_note ();
194 
195 /* Emit code to perform a strcmp.
196 
197    OPERANDS[0] is the destination.
198    OPERANDS[1] is the first string.
199    OPERANDS[2] is the second string.
200    OPERANDS[3] is the known alignment.  */
201 bool
sh_expand_cmpstr(rtx * operands)202 sh_expand_cmpstr (rtx *operands)
203 {
204   rtx addr1 = operands[1];
205   rtx addr2 = operands[2];
206   rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
207   rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
208   rtx tmp0 = gen_reg_rtx (SImode);
209   rtx tmp1 = gen_reg_rtx (SImode);
210   rtx tmp2 = gen_reg_rtx (SImode);
211   rtx tmp3 = gen_reg_rtx (SImode);
212 
213   rtx_insn *jump;
214   rtx_code_label *L_return = gen_label_rtx ();
215   rtx_code_label *L_loop_byte = gen_label_rtx ();
216   rtx_code_label *L_end_loop_byte = gen_label_rtx ();
217   rtx_code_label *L_loop_long = gen_label_rtx ();
218   rtx_code_label *L_end_loop_long = gen_label_rtx ();
219 
220   const unsigned int addr1_alignment = MEM_ALIGN (operands[1]) / BITS_PER_UNIT;
221   const unsigned int addr2_alignment = MEM_ALIGN (operands[2]) / BITS_PER_UNIT;
222 
223   if (addr1_alignment < 4 && addr2_alignment < 4)
224     {
225       emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
226       emit_insn (gen_tstsi_t (tmp1, GEN_INT (3)));
227       jump = emit_jump_insn (gen_branch_false (L_loop_byte));
228       add_int_reg_note (jump, REG_BR_PROB, prob_likely);
229     }
230   else if (addr1_alignment < 4 && addr2_alignment >= 4)
231     {
232       emit_insn (gen_tstsi_t (s1_addr, GEN_INT (3)));
233       jump = emit_jump_insn (gen_branch_false (L_loop_byte));
234       add_int_reg_note (jump, REG_BR_PROB, prob_likely);
235     }
236   else if (addr1_alignment >= 4 && addr2_alignment < 4)
237     {
238       emit_insn (gen_tstsi_t (s2_addr, GEN_INT (3)));
239       jump = emit_jump_insn (gen_branch_false (L_loop_byte));
240       add_int_reg_note (jump, REG_BR_PROB, prob_likely);
241     }
242 
243   addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
244   addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
245 
246   /* tmp2 is aligned, OK to load.  */
247   emit_move_insn (tmp3, addr2);
248   emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
249 
250   /* start long loop.  */
251   emit_label (L_loop_long);
252 
253   emit_move_insn (tmp2, tmp3);
254 
255   /* tmp1 is aligned, OK to load.  */
256   emit_move_insn (tmp1, addr1);
257   emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 4));
258 
259   /* Is there a 0 byte ?  */
260   emit_insn (gen_andsi3 (tmp3, tmp3, tmp1));
261 
262   emit_insn (gen_cmpstr_t (tmp0, tmp3));
263   jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
264   add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
265 
266   emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
267 
268   /* tmp2 is aligned, OK to load.  */
269   emit_move_insn (tmp3, addr2);
270   emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
271 
272   jump = emit_jump_insn (gen_branch_true (L_loop_long));
273   add_int_reg_note (jump, REG_BR_PROB, prob_likely);
274   /* end loop.  */
275 
276   /* Fallthu, substract words.  */
277   if (TARGET_LITTLE_ENDIAN)
278     {
279       rtx low_1 = gen_lowpart (HImode, tmp1);
280       rtx low_2 = gen_lowpart (HImode, tmp2);
281 
282       emit_insn (gen_rotlhi3_8 (low_1, low_1));
283       emit_insn (gen_rotlhi3_8 (low_2, low_2));
284       emit_insn (gen_rotlsi3_16 (tmp1, tmp1));
285       emit_insn (gen_rotlsi3_16 (tmp2, tmp2));
286       emit_insn (gen_rotlhi3_8 (low_1, low_1));
287       emit_insn (gen_rotlhi3_8 (low_2, low_2));
288     }
289 
290   jump = emit_jump_insn (gen_jump_compact (L_return));
291   emit_barrier_after (jump);
292 
293   emit_label (L_end_loop_long);
294 
295   emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, -4));
296   emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4));
297 
298   /* start byte loop.  */
299   addr1 = adjust_address (addr1, QImode, 0);
300   addr2 = adjust_address (addr2, QImode, 0);
301 
302   emit_label (L_loop_byte);
303 
304   emit_insn (gen_extendqisi2 (tmp2, addr2));
305   emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
306 
307   emit_insn (gen_extendqisi2 (tmp1, addr1));
308   emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
309 
310   emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
311   jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
312   add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
313 
314   emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
315   if (flag_delayed_branch)
316     emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
317   jump = emit_jump_insn (gen_branch_true (L_loop_byte));
318   add_int_reg_note (jump, REG_BR_PROB, prob_likely);
319   /* end loop.  */
320 
321   emit_label (L_end_loop_byte);
322 
323   if (! flag_delayed_branch)
324     emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
325   emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
326 
327   emit_label (L_return);
328 
329   emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
330 
331   return true;
332 }
333 
334 /* Emit code to perform a strncmp.
335 
336    OPERANDS[0] is the destination.
337    OPERANDS[1] is the first string.
338    OPERANDS[2] is the second string.
339    OPERANDS[3] is the length.
340    OPERANDS[4] is the known alignment.  */
341 bool
sh_expand_cmpnstr(rtx * operands)342 sh_expand_cmpnstr (rtx *operands)
343 {
344   rtx addr1 = operands[1];
345   rtx addr2 = operands[2];
346   rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
347   rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
348   rtx tmp1 = gen_reg_rtx (SImode);
349   rtx tmp2 = gen_reg_rtx (SImode);
350 
351   rtx_insn *jump;
352   rtx_code_label *L_return = gen_label_rtx ();
353   rtx_code_label *L_loop_byte = gen_label_rtx ();
354   rtx_code_label *L_end_loop_byte = gen_label_rtx ();
355 
356   rtx len = copy_to_mode_reg (SImode, operands[3]);
357   int constp = CONST_INT_P (operands[3]);
358   HOST_WIDE_INT bytes = constp ? INTVAL (operands[3]) : 0;
359 
360   const unsigned int addr1_alignment = MEM_ALIGN (operands[1]) / BITS_PER_UNIT;
361   const unsigned int addr2_alignment = MEM_ALIGN (operands[2]) / BITS_PER_UNIT;
362 
363   /* Loop on a register count.  */
364   if (constp && bytes >= 0 && bytes < 32)
365     {
366       rtx tmp0 = gen_reg_rtx (SImode);
367       rtx tmp3 = gen_reg_rtx (SImode);
368       rtx lenw = gen_reg_rtx (SImode);
369 
370       rtx_code_label *L_loop_long = gen_label_rtx ();
371       rtx_code_label *L_end_loop_long = gen_label_rtx ();
372 
373       int witers = bytes / 4;
374 
375       if (witers > 1)
376 	{
377 	  addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
378 	  addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
379 
380 	  emit_move_insn (tmp0, const0_rtx);
381 
382 	  if (addr1_alignment < 4 && addr2_alignment < 4)
383 	    {
384 	      emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
385 	      emit_insn (gen_tstsi_t (tmp1, GEN_INT (3)));
386 	      jump = emit_jump_insn (gen_branch_false (L_loop_byte));
387 	      add_int_reg_note (jump, REG_BR_PROB, prob_likely);
388 	    }
389 	  else if (addr1_alignment < 4 && addr2_alignment >= 4)
390 	    {
391 	      emit_insn (gen_tstsi_t (s1_addr, GEN_INT (3)));
392 	      jump = emit_jump_insn (gen_branch_false (L_loop_byte));
393 	      add_int_reg_note (jump, REG_BR_PROB, prob_likely);
394 	    }
395 	  else if (addr1_alignment >= 4 && addr2_alignment < 4)
396 	    {
397 	      emit_insn (gen_tstsi_t (s2_addr, GEN_INT (3)));
398 	      jump = emit_jump_insn (gen_branch_false (L_loop_byte));
399 	      add_int_reg_note (jump, REG_BR_PROB, prob_likely);
400 	    }
401 
402 	  /* word count. Do we have iterations ?  */
403 	  emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
404 
405 	  /* start long loop.  */
406 	  emit_label (L_loop_long);
407 
408 	  /* tmp2 is aligned, OK to load.  */
409 	  emit_move_insn (tmp2, addr2);
410 	  emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
411 						  GET_MODE_SIZE (SImode)));
412 
413 	  /* tmp1 is aligned, OK to load.  */
414 	  emit_move_insn (tmp1, addr1);
415 	  emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
416 						  GET_MODE_SIZE (SImode)));
417 
418 	  /* Is there a 0 byte ?  */
419 	  emit_insn (gen_andsi3 (tmp3, tmp2, tmp1));
420 
421 	  emit_insn (gen_cmpstr_t (tmp0, tmp3));
422 	  jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
423 	  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
424 
425 	  emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
426 	  jump = emit_jump_insn (gen_branch_false (L_end_loop_long));
427 	  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
428 
429 	  if (TARGET_SH2)
430 	    emit_insn (gen_dect (lenw, lenw));
431 	  else
432 	    {
433 	      emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
434 	      emit_insn (gen_tstsi_t (lenw, lenw));
435 	    }
436 
437 	  jump = emit_jump_insn (gen_branch_false (L_loop_long));
438 	  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
439 
440 	  int sbytes = bytes % 4;
441 
442 	  /* end loop.  Reached max iterations.  */
443 	  if (sbytes == 0)
444 	    {
445 	      emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
446 	      jump = emit_jump_insn (gen_jump_compact (L_return));
447 	      emit_barrier_after (jump);
448 	    }
449 	  else
450 	    {
451 	      /* Remaining bytes to check.  */
452 
453 	      addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
454 	      addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
455 
456 	      while (sbytes--)
457 		{
458 		  emit_insn (gen_extendqisi2 (tmp1, addr1));
459 		  emit_insn (gen_extendqisi2 (tmp2, addr2));
460 
461 		  emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
462 		  jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
463 		  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
464 
465 		  emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
466 		  if (flag_delayed_branch)
467 		    emit_insn (gen_zero_extendqisi2 (tmp2,
468 						     gen_lowpart (QImode,
469 								  tmp2)));
470 		  jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
471 		  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
472 
473 		  addr1 = adjust_address (addr1, QImode,
474 					  GET_MODE_SIZE (QImode));
475 		  addr2 = adjust_address (addr2, QImode,
476 					  GET_MODE_SIZE (QImode));
477 		}
478 
479 	      jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
480 	      emit_barrier_after (jump);
481 	    }
482 
483 	  emit_label (L_end_loop_long);
484 
485 	  /* Found last word.  Restart it byte per byte.  */
486 
487 	  emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
488 						  -GET_MODE_SIZE (SImode)));
489 	  emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
490 						  -GET_MODE_SIZE (SImode)));
491 
492 	  /* fall thru.  */
493 	}
494 
495       addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
496       addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
497 
498       while (bytes--)
499 	{
500 	  emit_insn (gen_extendqisi2 (tmp1, addr1));
501 	  emit_insn (gen_extendqisi2 (tmp2, addr2));
502 
503 	  emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
504 	  jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
505 	  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
506 
507 	  emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
508 	  if (flag_delayed_branch)
509 	    emit_insn (gen_zero_extendqisi2 (tmp2,
510 					     gen_lowpart (QImode, tmp2)));
511 	  jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
512 	  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
513 
514 	  addr1 = adjust_address (addr1, QImode, GET_MODE_SIZE (QImode));
515 	  addr2 = adjust_address (addr2, QImode, GET_MODE_SIZE (QImode));
516 	}
517 
518       jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
519       emit_barrier_after (jump);
520     }
521   else
522     {
523       emit_insn (gen_cmpeqsi_t (len, const0_rtx));
524       emit_move_insn (operands[0], const0_rtx);
525       jump = emit_jump_insn (gen_branch_true (L_return));
526       add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
527     }
528 
529   addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
530   addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
531 
532   emit_label (L_loop_byte);
533 
534   emit_insn (gen_extendqisi2 (tmp2, addr2));
535   emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
536 
537   emit_insn (gen_extendqisi2 (tmp1, addr1));
538   emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
539 
540   emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
541   jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
542   add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
543 
544   emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
545   if (flag_delayed_branch)
546     emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
547   jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
548   add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
549 
550   if (TARGET_SH2)
551     emit_insn (gen_dect (len, len));
552   else
553     {
554       emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
555       emit_insn (gen_tstsi_t (len, len));
556     }
557 
558   jump = emit_jump_insn (gen_branch_false (L_loop_byte));
559   add_int_reg_note (jump, REG_BR_PROB, prob_likely);
560   /* end byte loop.  */
561 
562   emit_label (L_end_loop_byte);
563 
564   if (! flag_delayed_branch)
565     emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
566   emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
567 
568   emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
569 
570   emit_label (L_return);
571 
572   return true;
573 }
574 
575 /* Emit code to perform a strlen.
576 
577    OPERANDS[0] is the destination.
578    OPERANDS[1] is the string.
579    OPERANDS[2] is the char to search.
580    OPERANDS[3] is the alignment.  */
581 bool
sh_expand_strlen(rtx * operands)582 sh_expand_strlen (rtx *operands)
583 {
584   rtx addr1 = operands[1];
585   rtx current_addr = copy_addr_to_reg (XEXP (addr1, 0));
586   rtx start_addr = gen_reg_rtx (Pmode);
587   rtx tmp0 = gen_reg_rtx (SImode);
588   rtx tmp1 = gen_reg_rtx (SImode);
589   rtx_code_label *L_return = gen_label_rtx ();
590   rtx_code_label *L_loop_byte = gen_label_rtx ();
591 
592   rtx_insn *jump;
593   rtx_code_label *L_loop_long = gen_label_rtx ();
594   rtx_code_label *L_end_loop_long = gen_label_rtx ();
595 
596   int align = INTVAL (operands[3]);
597 
598   emit_move_insn (operands[0], GEN_INT (-1));
599 
600   /* remember start of string.  */
601   emit_move_insn (start_addr, current_addr);
602 
603   if (align < 4)
604     {
605       emit_insn (gen_tstsi_t (current_addr, GEN_INT (3)));
606       jump = emit_jump_insn (gen_branch_false (L_loop_byte));
607       add_int_reg_note (jump, REG_BR_PROB, prob_likely);
608     }
609 
610   emit_move_insn (tmp0, operands[2]);
611 
612   addr1 = adjust_automodify_address (addr1, SImode, current_addr, 0);
613 
614   /* start long loop.  */
615   emit_label (L_loop_long);
616 
617   /* tmp1 is aligned, OK to load.  */
618   emit_move_insn (tmp1, addr1);
619   emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 4));
620 
621   /* Is there a 0 byte ?  */
622   emit_insn (gen_cmpstr_t (tmp0, tmp1));
623 
624   jump = emit_jump_insn (gen_branch_false (L_loop_long));
625   add_int_reg_note (jump, REG_BR_PROB, prob_likely);
626   /* end loop.  */
627 
628   emit_label (L_end_loop_long);
629 
630   emit_move_insn (current_addr, plus_constant (Pmode, current_addr, -4));
631 
632   addr1 = adjust_address (addr1, QImode, 0);
633 
634   /* unroll remaining bytes.  */
635   for (int i = 0; i < 4; ++i)
636     {
637       emit_insn (gen_extendqisi2 (tmp1, addr1));
638       emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
639       emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
640       jump = emit_jump_insn (gen_branch_true (L_return));
641       add_int_reg_note (jump, REG_BR_PROB, prob_likely);
642     }
643 
644   emit_barrier_after (jump);
645 
646   /* start byte loop.  */
647   emit_label (L_loop_byte);
648 
649   emit_insn (gen_extendqisi2 (tmp1, addr1));
650   emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
651 
652   emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
653   jump = emit_jump_insn (gen_branch_false (L_loop_byte));
654   add_int_reg_note (jump, REG_BR_PROB, prob_likely);
655 
656   /* end loop.  */
657 
658   emit_label (L_return);
659 
660   emit_insn (gen_addsi3 (start_addr, start_addr, GEN_INT (1)));
661   emit_insn (gen_subsi3 (operands[0], current_addr, start_addr));
662 
663   return true;
664 }
665 
666 /* Emit code to perform a memset.
667 
668    OPERANDS[0] is the destination.
669    OPERANDS[1] is the size;
670    OPERANDS[2] is the char to search.
671    OPERANDS[3] is the alignment.  */
672 void
sh_expand_setmem(rtx * operands)673 sh_expand_setmem (rtx *operands)
674 {
675   rtx_code_label *L_loop_byte = gen_label_rtx ();
676   rtx_code_label *L_loop_word = gen_label_rtx ();
677   rtx_code_label *L_return = gen_label_rtx ();
678   rtx_insn *jump;
679   rtx dest = copy_rtx (operands[0]);
680   rtx dest_addr = copy_addr_to_reg (XEXP (dest, 0));
681   rtx val = copy_to_mode_reg (SImode, operands[2]);
682   int align = INTVAL (operands[3]);
683   rtx len = copy_to_mode_reg (SImode, operands[1]);
684 
685   if (! CONST_INT_P (operands[1]))
686     return;
687 
688   int count = INTVAL (operands[1]);
689 
690   if (CONST_INT_P (operands[2])
691       && (INTVAL (operands[2]) == 0 || INTVAL (operands[2]) == -1) && count > 8)
692     {
693       rtx lenw = gen_reg_rtx (SImode);
694 
695       if (align < 4)
696 	{
697 	  emit_insn (gen_tstsi_t (dest_addr, GEN_INT (3)));
698 	  jump = emit_jump_insn (gen_branch_false (L_loop_byte));
699 	  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
700 	}
701 
702       /* word count. Do we have iterations ?  */
703       emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
704 
705       dest = adjust_automodify_address (dest, SImode, dest_addr, 0);
706 
707       /* start loop.  */
708       emit_label (L_loop_word);
709 
710       if (TARGET_SH2)
711         emit_insn (gen_dect (lenw, lenw));
712       else
713 	{
714 	  emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
715 	  emit_insn (gen_tstsi_t (lenw, lenw));
716 	}
717 
718       emit_move_insn (dest, val);
719       emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
720 						GET_MODE_SIZE (SImode)));
721 
722 
723       jump = emit_jump_insn (gen_branch_false (L_loop_word));
724       add_int_reg_note (jump, REG_BR_PROB, prob_likely);
725       count = count % 4;
726 
727       dest = adjust_address (dest, QImode, 0);
728 
729       val = gen_lowpart (QImode, val);
730 
731       while (count--)
732 	{
733 	  emit_move_insn (dest, val);
734 	  emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
735 						    GET_MODE_SIZE (QImode)));
736 	}
737 
738       jump = emit_jump_insn (gen_jump_compact (L_return));
739       emit_barrier_after (jump);
740     }
741 
742   dest = adjust_automodify_address (dest, QImode, dest_addr, 0);
743 
744   /* start loop.  */
745   emit_label (L_loop_byte);
746 
747   if (TARGET_SH2)
748     emit_insn (gen_dect (len, len));
749   else
750     {
751       emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
752       emit_insn (gen_tstsi_t (len, len));
753     }
754 
755   val = gen_lowpart (QImode, val);
756   emit_move_insn (dest, val);
757   emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
758                                             GET_MODE_SIZE (QImode)));
759 
760   jump = emit_jump_insn (gen_branch_false (L_loop_byte));
761   add_int_reg_note (jump, REG_BR_PROB, prob_likely);
762 
763   emit_label (L_return);
764 }
765