1;; AArch64 ldp/stp peephole optimizations.
2;; Copyright (C) 2014-2020 Free Software Foundation, Inc.
3;; Contributed by ARM Ltd.
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published by
9;; the Free Software Foundation; either version 3, or (at your option)
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful, but
13;; WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15;; General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3.  If not see
19;; <http://www.gnu.org/licenses/>.
20
21(define_peephole2
22  [(set (match_operand:GPI 0 "register_operand" "")
23	(match_operand:GPI 1 "memory_operand" ""))
24   (set (match_operand:GPI 2 "register_operand" "")
25	(match_operand:GPI 3 "memory_operand" ""))]
26  "aarch64_operands_ok_for_ldpstp (operands, true, <MODE>mode)"
27  [(parallel [(set (match_dup 0) (match_dup 1))
28	      (set (match_dup 2) (match_dup 3))])]
29{
30  aarch64_swap_ldrstr_operands (operands, true);
31})
32
33(define_peephole2
34  [(set (match_operand:GPI 0 "memory_operand" "")
35	(match_operand:GPI 1 "aarch64_reg_or_zero" ""))
36   (set (match_operand:GPI 2 "memory_operand" "")
37	(match_operand:GPI 3 "aarch64_reg_or_zero" ""))]
38  "aarch64_operands_ok_for_ldpstp (operands, false, <MODE>mode)"
39  [(parallel [(set (match_dup 0) (match_dup 1))
40	      (set (match_dup 2) (match_dup 3))])]
41{
42  aarch64_swap_ldrstr_operands (operands, false);
43})
44
45(define_peephole2
46  [(set (match_operand:GPF 0 "register_operand" "")
47	(match_operand:GPF 1 "memory_operand" ""))
48   (set (match_operand:GPF 2 "register_operand" "")
49	(match_operand:GPF 3 "memory_operand" ""))]
50  "aarch64_operands_ok_for_ldpstp (operands, true, <MODE>mode)"
51  [(parallel [(set (match_dup 0) (match_dup 1))
52	      (set (match_dup 2) (match_dup 3))])]
53{
54  aarch64_swap_ldrstr_operands (operands, true);
55})
56
57(define_peephole2
58  [(set (match_operand:GPF 0 "memory_operand" "")
59	(match_operand:GPF 1 "aarch64_reg_or_fp_zero" ""))
60   (set (match_operand:GPF 2 "memory_operand" "")
61	(match_operand:GPF 3 "aarch64_reg_or_fp_zero" ""))]
62  "aarch64_operands_ok_for_ldpstp (operands, false, <MODE>mode)"
63  [(parallel [(set (match_dup 0) (match_dup 1))
64	      (set (match_dup 2) (match_dup 3))])]
65{
66  aarch64_swap_ldrstr_operands (operands, false);
67})
68
69(define_peephole2
70  [(set (match_operand:DREG 0 "register_operand" "")
71	(match_operand:DREG 1 "memory_operand" ""))
72   (set (match_operand:DREG2 2 "register_operand" "")
73	(match_operand:DREG2 3 "memory_operand" ""))]
74  "aarch64_operands_ok_for_ldpstp (operands, true, <DREG:MODE>mode)"
75  [(parallel [(set (match_dup 0) (match_dup 1))
76	      (set (match_dup 2) (match_dup 3))])]
77{
78  aarch64_swap_ldrstr_operands (operands, true);
79})
80
81(define_peephole2
82  [(set (match_operand:DREG 0 "memory_operand" "")
83	(match_operand:DREG 1 "register_operand" ""))
84   (set (match_operand:DREG2 2 "memory_operand" "")
85	(match_operand:DREG2 3 "register_operand" ""))]
86  "TARGET_SIMD
87   && aarch64_operands_ok_for_ldpstp (operands, false, <DREG:MODE>mode)"
88  [(parallel [(set (match_dup 0) (match_dup 1))
89	      (set (match_dup 2) (match_dup 3))])]
90{
91  aarch64_swap_ldrstr_operands (operands, false);
92})
93
94(define_peephole2
95  [(set (match_operand:VQ 0 "register_operand" "")
96	(match_operand:VQ 1 "memory_operand" ""))
97   (set (match_operand:VQ2 2 "register_operand" "")
98	(match_operand:VQ2 3 "memory_operand" ""))]
99  "TARGET_SIMD
100   && aarch64_operands_ok_for_ldpstp (operands, true, <VQ:MODE>mode)
101   && (aarch64_tune_params.extra_tuning_flags
102	& AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS) == 0"
103  [(parallel [(set (match_dup 0) (match_dup 1))
104	      (set (match_dup 2) (match_dup 3))])]
105{
106  aarch64_swap_ldrstr_operands (operands, true);
107})
108
109(define_peephole2
110  [(set (match_operand:VQ 0 "memory_operand" "")
111	(match_operand:VQ 1 "register_operand" ""))
112   (set (match_operand:VQ2 2 "memory_operand" "")
113	(match_operand:VQ2 3 "register_operand" ""))]
114  "TARGET_SIMD
115   && aarch64_operands_ok_for_ldpstp (operands, false, <VQ:MODE>mode)
116   && (aarch64_tune_params.extra_tuning_flags
117	& AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS) == 0"
118  [(parallel [(set (match_dup 0) (match_dup 1))
119	      (set (match_dup 2) (match_dup 3))])]
120{
121  aarch64_swap_ldrstr_operands (operands, false);
122})
123
124
125;; Handle sign/zero extended consecutive load/store.
126
127(define_peephole2
128  [(set (match_operand:DI 0 "register_operand" "")
129	(sign_extend:DI (match_operand:SI 1 "memory_operand" "")))
130   (set (match_operand:DI 2 "register_operand" "")
131	(sign_extend:DI (match_operand:SI 3 "memory_operand" "")))]
132  "aarch64_operands_ok_for_ldpstp (operands, true, SImode)"
133  [(parallel [(set (match_dup 0) (sign_extend:DI (match_dup 1)))
134	      (set (match_dup 2) (sign_extend:DI (match_dup 3)))])]
135{
136  aarch64_swap_ldrstr_operands (operands, true);
137})
138
139(define_peephole2
140  [(set (match_operand:DI 0 "register_operand" "")
141	(zero_extend:DI (match_operand:SI 1 "memory_operand" "")))
142   (set (match_operand:DI 2 "register_operand" "")
143	(zero_extend:DI (match_operand:SI 3 "memory_operand" "")))]
144  "aarch64_operands_ok_for_ldpstp (operands, true, SImode)"
145  [(parallel [(set (match_dup 0) (zero_extend:DI (match_dup 1)))
146	      (set (match_dup 2) (zero_extend:DI (match_dup 3)))])]
147{
148  aarch64_swap_ldrstr_operands (operands, true);
149})
150
151;; Handle storing of a floating point zero with integer data.
152;; This handles cases like:
153;;   struct pair { int a; float b; }
154;;
155;;   p->a = 1;
156;;   p->b = 0.0;
157;;
158;; We can match modes that won't work for a stp instruction
159;; as aarch64_operands_ok_for_ldpstp checks that the modes are
160;; compatible.
161(define_peephole2
162  [(set (match_operand:DSX 0 "memory_operand" "")
163	(match_operand:DSX 1 "aarch64_reg_zero_or_fp_zero" ""))
164   (set (match_operand:<FCVT_TARGET> 2 "memory_operand" "")
165	(match_operand:<FCVT_TARGET> 3 "aarch64_reg_zero_or_fp_zero" ""))]
166  "aarch64_operands_ok_for_ldpstp (operands, false, <V_INT_EQUIV>mode)"
167  [(parallel [(set (match_dup 0) (match_dup 1))
168	      (set (match_dup 2) (match_dup 3))])]
169{
170  aarch64_swap_ldrstr_operands (operands, false);
171})
172
173;; Handle consecutive load/store whose offset is out of the range
174;; supported by ldp/ldpsw/stp.  We firstly adjust offset in a scratch
175;; register, then merge them into ldp/ldpsw/stp by using the adjusted
176;; offset.
177
178(define_peephole2
179  [(match_scratch:DI 8 "r")
180   (set (match_operand:GPI 0 "register_operand" "")
181	(match_operand:GPI 1 "memory_operand" ""))
182   (set (match_operand:GPI 2 "register_operand" "")
183	(match_operand:GPI 3 "memory_operand" ""))
184   (set (match_operand:GPI 4 "register_operand" "")
185	(match_operand:GPI 5 "memory_operand" ""))
186   (set (match_operand:GPI 6 "register_operand" "")
187	(match_operand:GPI 7 "memory_operand" ""))
188   (match_dup 8)]
189  "aarch64_operands_adjust_ok_for_ldpstp (operands, true, <MODE>mode)"
190  [(const_int 0)]
191{
192  if (aarch64_gen_adjusted_ldpstp (operands, true, <MODE>mode, UNKNOWN))
193    DONE;
194  else
195    FAIL;
196})
197
198(define_peephole2
199  [(match_scratch:DI 8 "r")
200   (set (match_operand:GPF 0 "register_operand" "")
201	(match_operand:GPF 1 "memory_operand" ""))
202   (set (match_operand:GPF 2 "register_operand" "")
203	(match_operand:GPF 3 "memory_operand" ""))
204   (set (match_operand:GPF 4 "register_operand" "")
205	(match_operand:GPF 5 "memory_operand" ""))
206   (set (match_operand:GPF 6 "register_operand" "")
207	(match_operand:GPF 7 "memory_operand" ""))
208   (match_dup 8)]
209  "aarch64_operands_adjust_ok_for_ldpstp (operands, true, <MODE>mode)"
210  [(const_int 0)]
211{
212  if (aarch64_gen_adjusted_ldpstp (operands, true, <MODE>mode, UNKNOWN))
213    DONE;
214  else
215    FAIL;
216})
217
218(define_peephole2
219  [(match_scratch:DI 8 "r")
220   (set (match_operand:DI 0 "register_operand" "")
221	(sign_extend:DI (match_operand:SI 1 "memory_operand" "")))
222   (set (match_operand:DI 2 "register_operand" "")
223	(sign_extend:DI (match_operand:SI 3 "memory_operand" "")))
224   (set (match_operand:DI 4 "register_operand" "")
225	(sign_extend:DI (match_operand:SI 5 "memory_operand" "")))
226   (set (match_operand:DI 6 "register_operand" "")
227	(sign_extend:DI (match_operand:SI 7 "memory_operand" "")))
228   (match_dup 8)]
229  "aarch64_operands_adjust_ok_for_ldpstp (operands, true, SImode)"
230  [(const_int 0)]
231{
232  if (aarch64_gen_adjusted_ldpstp (operands, true, SImode, SIGN_EXTEND))
233    DONE;
234  else
235    FAIL;
236})
237
238(define_peephole2
239  [(match_scratch:DI 8 "r")
240   (set (match_operand:DI 0 "register_operand" "")
241	(zero_extend:DI (match_operand:SI 1 "memory_operand" "")))
242   (set (match_operand:DI 2 "register_operand" "")
243	(zero_extend:DI (match_operand:SI 3 "memory_operand" "")))
244   (set (match_operand:DI 4 "register_operand" "")
245	(zero_extend:DI (match_operand:SI 5 "memory_operand" "")))
246   (set (match_operand:DI 6 "register_operand" "")
247	(zero_extend:DI (match_operand:SI 7 "memory_operand" "")))
248   (match_dup 8)]
249  "aarch64_operands_adjust_ok_for_ldpstp (operands, true, SImode)"
250  [(const_int 0)]
251{
252  if (aarch64_gen_adjusted_ldpstp (operands, true, SImode, ZERO_EXTEND))
253    DONE;
254  else
255    FAIL;
256})
257
258(define_peephole2
259  [(match_scratch:DI 8 "r")
260   (set (match_operand:GPI 0 "memory_operand" "")
261	(match_operand:GPI 1 "aarch64_reg_or_zero" ""))
262   (set (match_operand:GPI 2 "memory_operand" "")
263	(match_operand:GPI 3 "aarch64_reg_or_zero" ""))
264   (set (match_operand:GPI 4 "memory_operand" "")
265	(match_operand:GPI 5 "aarch64_reg_or_zero" ""))
266   (set (match_operand:GPI 6 "memory_operand" "")
267	(match_operand:GPI 7 "aarch64_reg_or_zero" ""))
268   (match_dup 8)]
269  "aarch64_operands_adjust_ok_for_ldpstp (operands, false, <MODE>mode)"
270  [(const_int 0)]
271{
272  if (aarch64_gen_adjusted_ldpstp (operands, false, <MODE>mode, UNKNOWN))
273    DONE;
274  else
275    FAIL;
276})
277
278(define_peephole2
279  [(match_scratch:DI 8 "r")
280   (set (match_operand:GPF 0 "memory_operand" "")
281	(match_operand:GPF 1 "aarch64_reg_or_fp_zero" ""))
282   (set (match_operand:GPF 2 "memory_operand" "")
283	(match_operand:GPF 3 "aarch64_reg_or_fp_zero" ""))
284   (set (match_operand:GPF 4 "memory_operand" "")
285	(match_operand:GPF 5 "aarch64_reg_or_fp_zero" ""))
286   (set (match_operand:GPF 6 "memory_operand" "")
287	(match_operand:GPF 7 "aarch64_reg_or_fp_zero" ""))
288   (match_dup 8)]
289  "aarch64_operands_adjust_ok_for_ldpstp (operands, false, <MODE>mode)"
290  [(const_int 0)]
291{
292  if (aarch64_gen_adjusted_ldpstp (operands, false, <MODE>mode, UNKNOWN))
293    DONE;
294  else
295    FAIL;
296})
297