1;; AArch64 ldp/stp peephole optimizations.
2;; Copyright (C) 2014-2018 Free Software Foundation, Inc.
3;; Contributed by ARM Ltd.
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published by
9;; the Free Software Foundation; either version 3, or (at your option)
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful, but
13;; WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15;; General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3.  If not see
19;; <http://www.gnu.org/licenses/>.
20
21(define_peephole2
22  [(set (match_operand:GPI 0 "register_operand" "")
23	(match_operand:GPI 1 "aarch64_mem_pair_operand" ""))
24   (set (match_operand:GPI 2 "register_operand" "")
25	(match_operand:GPI 3 "memory_operand" ""))]
26  "aarch64_operands_ok_for_ldpstp (operands, true, <MODE>mode)"
27  [(parallel [(set (match_dup 0) (match_dup 1))
28	      (set (match_dup 2) (match_dup 3))])]
29{
30  rtx base, offset_1, offset_2;
31
32  extract_base_offset_in_addr (operands[1], &base, &offset_1);
33  extract_base_offset_in_addr (operands[3], &base, &offset_2);
34  if (INTVAL (offset_1) > INTVAL (offset_2))
35    {
36      std::swap (operands[0], operands[2]);
37      std::swap (operands[1], operands[3]);
38    }
39})
40
41(define_peephole2
42  [(set (match_operand:GPI 0 "aarch64_mem_pair_operand" "")
43	(match_operand:GPI 1 "aarch64_reg_or_zero" ""))
44   (set (match_operand:GPI 2 "memory_operand" "")
45	(match_operand:GPI 3 "aarch64_reg_or_zero" ""))]
46  "aarch64_operands_ok_for_ldpstp (operands, false, <MODE>mode)"
47  [(parallel [(set (match_dup 0) (match_dup 1))
48	      (set (match_dup 2) (match_dup 3))])]
49{
50  rtx base, offset_1, offset_2;
51
52  extract_base_offset_in_addr (operands[0], &base, &offset_1);
53  extract_base_offset_in_addr (operands[2], &base, &offset_2);
54  if (INTVAL (offset_1) > INTVAL (offset_2))
55    {
56      std::swap (operands[0], operands[2]);
57      std::swap (operands[1], operands[3]);
58    }
59})
60
61(define_peephole2
62  [(set (match_operand:GPF 0 "register_operand" "")
63	(match_operand:GPF 1 "aarch64_mem_pair_operand" ""))
64   (set (match_operand:GPF 2 "register_operand" "")
65	(match_operand:GPF 3 "memory_operand" ""))]
66  "aarch64_operands_ok_for_ldpstp (operands, true, <MODE>mode)"
67  [(parallel [(set (match_dup 0) (match_dup 1))
68	      (set (match_dup 2) (match_dup 3))])]
69{
70  rtx base, offset_1, offset_2;
71
72  extract_base_offset_in_addr (operands[1], &base, &offset_1);
73  extract_base_offset_in_addr (operands[3], &base, &offset_2);
74  if (INTVAL (offset_1) > INTVAL (offset_2))
75    {
76      std::swap (operands[0], operands[2]);
77      std::swap (operands[1], operands[3]);
78    }
79})
80
81(define_peephole2
82  [(set (match_operand:GPF 0 "aarch64_mem_pair_operand" "")
83	(match_operand:GPF 1 "aarch64_reg_or_fp_zero" ""))
84   (set (match_operand:GPF 2 "memory_operand" "")
85	(match_operand:GPF 3 "aarch64_reg_or_fp_zero" ""))]
86  "aarch64_operands_ok_for_ldpstp (operands, false, <MODE>mode)"
87  [(parallel [(set (match_dup 0) (match_dup 1))
88	      (set (match_dup 2) (match_dup 3))])]
89{
90  rtx base, offset_1, offset_2;
91
92  extract_base_offset_in_addr (operands[0], &base, &offset_1);
93  extract_base_offset_in_addr (operands[2], &base, &offset_2);
94  if (INTVAL (offset_1) > INTVAL (offset_2))
95    {
96      std::swap (operands[0], operands[2]);
97      std::swap (operands[1], operands[3]);
98    }
99})
100
101(define_peephole2
102  [(set (match_operand:VD 0 "register_operand" "")
103	(match_operand:VD 1 "aarch64_mem_pair_operand" ""))
104   (set (match_operand:VD 2 "register_operand" "")
105	(match_operand:VD 3 "memory_operand" ""))]
106  "aarch64_operands_ok_for_ldpstp (operands, true, <MODE>mode)"
107  [(parallel [(set (match_dup 0) (match_dup 1))
108	      (set (match_dup 2) (match_dup 3))])]
109{
110  rtx base, offset_1, offset_2;
111
112  extract_base_offset_in_addr (operands[1], &base, &offset_1);
113  extract_base_offset_in_addr (operands[3], &base, &offset_2);
114  if (INTVAL (offset_1) > INTVAL (offset_2))
115    {
116      std::swap (operands[0], operands[2]);
117      std::swap (operands[1], operands[3]);
118    }
119})
120
121(define_peephole2
122  [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "")
123	(match_operand:VD 1 "register_operand" ""))
124   (set (match_operand:VD 2 "memory_operand" "")
125	(match_operand:VD 3 "register_operand" ""))]
126  "TARGET_SIMD && aarch64_operands_ok_for_ldpstp (operands, false, <MODE>mode)"
127  [(parallel [(set (match_dup 0) (match_dup 1))
128	      (set (match_dup 2) (match_dup 3))])]
129{
130  rtx base, offset_1, offset_2;
131
132  extract_base_offset_in_addr (operands[0], &base, &offset_1);
133  extract_base_offset_in_addr (operands[2], &base, &offset_2);
134  if (INTVAL (offset_1) > INTVAL (offset_2))
135    {
136      std::swap (operands[0], operands[2]);
137      std::swap (operands[1], operands[3]);
138    }
139})
140
141
142;; Handle sign/zero extended consecutive load/store.
143
144(define_peephole2
145  [(set (match_operand:DI 0 "register_operand" "")
146	(sign_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" "")))
147   (set (match_operand:DI 2 "register_operand" "")
148	(sign_extend:DI (match_operand:SI 3 "memory_operand" "")))]
149  "aarch64_operands_ok_for_ldpstp (operands, true, SImode)"
150  [(parallel [(set (match_dup 0) (sign_extend:DI (match_dup 1)))
151	      (set (match_dup 2) (sign_extend:DI (match_dup 3)))])]
152{
153  rtx base, offset_1, offset_2;
154
155  extract_base_offset_in_addr (operands[1], &base, &offset_1);
156  extract_base_offset_in_addr (operands[3], &base, &offset_2);
157  if (INTVAL (offset_1) > INTVAL (offset_2))
158    {
159      std::swap (operands[0], operands[2]);
160      std::swap (operands[1], operands[3]);
161    }
162})
163
164(define_peephole2
165  [(set (match_operand:DI 0 "register_operand" "")
166	(zero_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" "")))
167   (set (match_operand:DI 2 "register_operand" "")
168	(zero_extend:DI (match_operand:SI 3 "memory_operand" "")))]
169  "aarch64_operands_ok_for_ldpstp (operands, true, SImode)"
170  [(parallel [(set (match_dup 0) (zero_extend:DI (match_dup 1)))
171	      (set (match_dup 2) (zero_extend:DI (match_dup 3)))])]
172{
173  rtx base, offset_1, offset_2;
174
175  extract_base_offset_in_addr (operands[1], &base, &offset_1);
176  extract_base_offset_in_addr (operands[3], &base, &offset_2);
177  if (INTVAL (offset_1) > INTVAL (offset_2))
178    {
179      std::swap (operands[0], operands[2]);
180      std::swap (operands[1], operands[3]);
181    }
182})
183
184;; Handle consecutive load/store whose offset is out of the range
185;; supported by ldp/ldpsw/stp.  We firstly adjust offset in a scratch
186;; register, then merge them into ldp/ldpsw/stp by using the adjusted
187;; offset.
188
189(define_peephole2
190  [(match_scratch:DI 8 "r")
191   (set (match_operand:GPI 0 "register_operand" "")
192	(match_operand:GPI 1 "memory_operand" ""))
193   (set (match_operand:GPI 2 "register_operand" "")
194	(match_operand:GPI 3 "memory_operand" ""))
195   (set (match_operand:GPI 4 "register_operand" "")
196	(match_operand:GPI 5 "memory_operand" ""))
197   (set (match_operand:GPI 6 "register_operand" "")
198	(match_operand:GPI 7 "memory_operand" ""))
199   (match_dup 8)]
200  "aarch64_operands_adjust_ok_for_ldpstp (operands, true, <MODE>mode)"
201  [(const_int 0)]
202{
203  rtx base, offset_1, offset_2;
204
205  extract_base_offset_in_addr (operands[1], &base, &offset_1);
206  extract_base_offset_in_addr (operands[3], &base, &offset_2);
207  if (INTVAL (offset_1) > INTVAL (offset_2))
208    {
209      std::swap (operands[0], operands[6]);
210      std::swap (operands[1], operands[7]);
211      std::swap (operands[2], operands[4]);
212      std::swap (operands[3], operands[5]);
213    }
214
215  if (aarch64_gen_adjusted_ldpstp (operands, true, <MODE>mode, UNKNOWN))
216    DONE;
217  else
218    FAIL;
219})
220
221(define_peephole2
222  [(match_scratch:DI 8 "r")
223   (set (match_operand:GPF 0 "register_operand" "")
224	(match_operand:GPF 1 "memory_operand" ""))
225   (set (match_operand:GPF 2 "register_operand" "")
226	(match_operand:GPF 3 "memory_operand" ""))
227   (set (match_operand:GPF 4 "register_operand" "")
228	(match_operand:GPF 5 "memory_operand" ""))
229   (set (match_operand:GPF 6 "register_operand" "")
230	(match_operand:GPF 7 "memory_operand" ""))
231   (match_dup 8)]
232  "aarch64_operands_adjust_ok_for_ldpstp (operands, true, <MODE>mode)"
233  [(const_int 0)]
234{
235  rtx base, offset_1, offset_2;
236
237  extract_base_offset_in_addr (operands[1], &base, &offset_1);
238  extract_base_offset_in_addr (operands[3], &base, &offset_2);
239  if (INTVAL (offset_1) > INTVAL (offset_2))
240    {
241      std::swap (operands[0], operands[6]);
242      std::swap (operands[1], operands[7]);
243      std::swap (operands[2], operands[4]);
244      std::swap (operands[3], operands[5]);
245    }
246
247  if (aarch64_gen_adjusted_ldpstp (operands, true, <MODE>mode, UNKNOWN))
248    DONE;
249  else
250    FAIL;
251})
252
253(define_peephole2
254  [(match_scratch:DI 8 "r")
255   (set (match_operand:DI 0 "register_operand" "")
256	(sign_extend:DI (match_operand:SI 1 "memory_operand" "")))
257   (set (match_operand:DI 2 "register_operand" "")
258	(sign_extend:DI (match_operand:SI 3 "memory_operand" "")))
259   (set (match_operand:DI 4 "register_operand" "")
260	(sign_extend:DI (match_operand:SI 5 "memory_operand" "")))
261   (set (match_operand:DI 6 "register_operand" "")
262	(sign_extend:DI (match_operand:SI 7 "memory_operand" "")))
263   (match_dup 8)]
264  "aarch64_operands_adjust_ok_for_ldpstp (operands, true, SImode)"
265  [(const_int 0)]
266{
267  rtx base, offset_1, offset_2;
268
269  extract_base_offset_in_addr (operands[1], &base, &offset_1);
270  extract_base_offset_in_addr (operands[3], &base, &offset_2);
271  if (INTVAL (offset_1) > INTVAL (offset_2))
272    {
273      std::swap (operands[0], operands[6]);
274      std::swap (operands[1], operands[7]);
275      std::swap (operands[2], operands[4]);
276      std::swap (operands[3], operands[5]);
277    }
278
279  if (aarch64_gen_adjusted_ldpstp (operands, true, SImode, SIGN_EXTEND))
280    DONE;
281  else
282    FAIL;
283})
284
285(define_peephole2
286  [(match_scratch:DI 8 "r")
287   (set (match_operand:DI 0 "register_operand" "")
288	(zero_extend:DI (match_operand:SI 1 "memory_operand" "")))
289   (set (match_operand:DI 2 "register_operand" "")
290	(zero_extend:DI (match_operand:SI 3 "memory_operand" "")))
291   (set (match_operand:DI 4 "register_operand" "")
292	(zero_extend:DI (match_operand:SI 5 "memory_operand" "")))
293   (set (match_operand:DI 6 "register_operand" "")
294	(zero_extend:DI (match_operand:SI 7 "memory_operand" "")))
295   (match_dup 8)]
296  "aarch64_operands_adjust_ok_for_ldpstp (operands, true, SImode)"
297  [(const_int 0)]
298{
299  rtx base, offset_1, offset_2;
300
301  extract_base_offset_in_addr (operands[1], &base, &offset_1);
302  extract_base_offset_in_addr (operands[3], &base, &offset_2);
303  if (INTVAL (offset_1) > INTVAL (offset_2))
304    {
305      std::swap (operands[0], operands[6]);
306      std::swap (operands[1], operands[7]);
307      std::swap (operands[2], operands[4]);
308      std::swap (operands[3], operands[5]);
309    }
310
311  if (aarch64_gen_adjusted_ldpstp (operands, true, SImode, ZERO_EXTEND))
312    DONE;
313  else
314    FAIL;
315})
316
317(define_peephole2
318  [(match_scratch:DI 8 "r")
319   (set (match_operand:GPI 0 "memory_operand" "")
320	(match_operand:GPI 1 "aarch64_reg_or_zero" ""))
321   (set (match_operand:GPI 2 "memory_operand" "")
322	(match_operand:GPI 3 "aarch64_reg_or_zero" ""))
323   (set (match_operand:GPI 4 "memory_operand" "")
324	(match_operand:GPI 5 "aarch64_reg_or_zero" ""))
325   (set (match_operand:GPI 6 "memory_operand" "")
326	(match_operand:GPI 7 "aarch64_reg_or_zero" ""))
327   (match_dup 8)]
328  "aarch64_operands_adjust_ok_for_ldpstp (operands, false, <MODE>mode)"
329  [(const_int 0)]
330{
331  rtx base, offset_1, offset_2;
332
333  extract_base_offset_in_addr (operands[0], &base, &offset_1);
334  extract_base_offset_in_addr (operands[2], &base, &offset_2);
335  if (INTVAL (offset_1) > INTVAL (offset_2))
336    {
337      std::swap (operands[0], operands[6]);
338      std::swap (operands[1], operands[7]);
339      std::swap (operands[2], operands[4]);
340      std::swap (operands[3], operands[5]);
341    }
342
343  if (aarch64_gen_adjusted_ldpstp (operands, false, <MODE>mode, UNKNOWN))
344    DONE;
345  else
346    FAIL;
347})
348
349(define_peephole2
350  [(match_scratch:DI 8 "r")
351   (set (match_operand:GPF 0 "memory_operand" "")
352	(match_operand:GPF 1 "aarch64_reg_or_fp_zero" ""))
353   (set (match_operand:GPF 2 "memory_operand" "")
354	(match_operand:GPF 3 "aarch64_reg_or_fp_zero" ""))
355   (set (match_operand:GPF 4 "memory_operand" "")
356	(match_operand:GPF 5 "aarch64_reg_or_fp_zero" ""))
357   (set (match_operand:GPF 6 "memory_operand" "")
358	(match_operand:GPF 7 "aarch64_reg_or_fp_zero" ""))
359   (match_dup 8)]
360  "aarch64_operands_adjust_ok_for_ldpstp (operands, false, <MODE>mode)"
361  [(const_int 0)]
362{
363  rtx base, offset_1, offset_2;
364
365  extract_base_offset_in_addr (operands[0], &base, &offset_1);
366  extract_base_offset_in_addr (operands[2], &base, &offset_2);
367  if (INTVAL (offset_1) > INTVAL (offset_2))
368    {
369      std::swap (operands[0], operands[6]);
370      std::swap (operands[1], operands[7]);
371      std::swap (operands[2], operands[4]);
372      std::swap (operands[3], operands[5]);
373    }
374
375  if (aarch64_gen_adjusted_ldpstp (operands, false, <MODE>mode, UNKNOWN))
376    DONE;
377  else
378    FAIL;
379})
380