1;; AArch64 ldp/stp peephole optimizations. 2;; Copyright (C) 2014-2016 Free Software Foundation, Inc. 3;; Contributed by ARM Ltd. 4;; 5;; This file is part of GCC. 6;; 7;; GCC is free software; you can redistribute it and/or modify it 8;; under the terms of the GNU General Public License as published by 9;; the Free Software Foundation; either version 3, or (at your option) 10;; any later version. 11;; 12;; GCC is distributed in the hope that it will be useful, but 13;; WITHOUT ANY WARRANTY; without even the implied warranty of 14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15;; General Public License for more details. 16;; 17;; You should have received a copy of the GNU General Public License 18;; along with GCC; see the file COPYING3. If not see 19;; <http://www.gnu.org/licenses/>. 20 21(define_peephole2 22 [(set (match_operand:GPI 0 "register_operand" "") 23 (match_operand:GPI 1 "aarch64_mem_pair_operand" "")) 24 (set (match_operand:GPI 2 "register_operand" "") 25 (match_operand:GPI 3 "memory_operand" ""))] 26 "aarch64_operands_ok_for_ldpstp (operands, true, <MODE>mode)" 27 [(parallel [(set (match_dup 0) (match_dup 1)) 28 (set (match_dup 2) (match_dup 3))])] 29{ 30 rtx base, offset_1, offset_2; 31 32 extract_base_offset_in_addr (operands[1], &base, &offset_1); 33 extract_base_offset_in_addr (operands[3], &base, &offset_2); 34 if (INTVAL (offset_1) > INTVAL (offset_2)) 35 { 36 std::swap (operands[0], operands[2]); 37 std::swap (operands[1], operands[3]); 38 } 39}) 40 41(define_peephole2 42 [(set (match_operand:GPI 0 "aarch64_mem_pair_operand" "") 43 (match_operand:GPI 1 "aarch64_reg_or_zero" "")) 44 (set (match_operand:GPI 2 "memory_operand" "") 45 (match_operand:GPI 3 "aarch64_reg_or_zero" ""))] 46 "aarch64_operands_ok_for_ldpstp (operands, false, <MODE>mode)" 47 [(parallel [(set (match_dup 0) (match_dup 1)) 48 (set (match_dup 2) (match_dup 3))])] 49{ 50 rtx base, offset_1, offset_2; 51 52 extract_base_offset_in_addr (operands[0], &base, &offset_1); 53 extract_base_offset_in_addr (operands[2], &base, &offset_2); 54 if (INTVAL (offset_1) > INTVAL (offset_2)) 55 { 56 std::swap (operands[0], operands[2]); 57 std::swap (operands[1], operands[3]); 58 } 59}) 60 61(define_peephole2 62 [(set (match_operand:GPF 0 "register_operand" "") 63 (match_operand:GPF 1 "aarch64_mem_pair_operand" "")) 64 (set (match_operand:GPF 2 "register_operand" "") 65 (match_operand:GPF 3 "memory_operand" ""))] 66 "aarch64_operands_ok_for_ldpstp (operands, true, <MODE>mode)" 67 [(parallel [(set (match_dup 0) (match_dup 1)) 68 (set (match_dup 2) (match_dup 3))])] 69{ 70 rtx base, offset_1, offset_2; 71 72 extract_base_offset_in_addr (operands[1], &base, &offset_1); 73 extract_base_offset_in_addr (operands[3], &base, &offset_2); 74 if (INTVAL (offset_1) > INTVAL (offset_2)) 75 { 76 std::swap (operands[0], operands[2]); 77 std::swap (operands[1], operands[3]); 78 } 79}) 80 81(define_peephole2 82 [(set (match_operand:GPF 0 "aarch64_mem_pair_operand" "") 83 (match_operand:GPF 1 "aarch64_reg_or_fp_zero" "")) 84 (set (match_operand:GPF 2 "memory_operand" "") 85 (match_operand:GPF 3 "aarch64_reg_or_fp_zero" ""))] 86 "aarch64_operands_ok_for_ldpstp (operands, false, <MODE>mode)" 87 [(parallel [(set (match_dup 0) (match_dup 1)) 88 (set (match_dup 2) (match_dup 3))])] 89{ 90 rtx base, offset_1, offset_2; 91 92 extract_base_offset_in_addr (operands[0], &base, &offset_1); 93 extract_base_offset_in_addr (operands[2], &base, &offset_2); 94 if (INTVAL (offset_1) > INTVAL (offset_2)) 95 { 96 std::swap (operands[0], operands[2]); 97 std::swap (operands[1], operands[3]); 98 } 99}) 100 101(define_peephole2 102 [(set (match_operand:VD 0 "register_operand" "") 103 (match_operand:VD 1 "aarch64_mem_pair_operand" "")) 104 (set (match_operand:VD 2 "register_operand" "") 105 (match_operand:VD 3 "memory_operand" ""))] 106 "aarch64_operands_ok_for_ldpstp (operands, true, <MODE>mode)" 107 [(parallel [(set (match_dup 0) (match_dup 1)) 108 (set (match_dup 2) (match_dup 3))])] 109{ 110 rtx base, offset_1, offset_2; 111 112 extract_base_offset_in_addr (operands[1], &base, &offset_1); 113 extract_base_offset_in_addr (operands[3], &base, &offset_2); 114 if (INTVAL (offset_1) > INTVAL (offset_2)) 115 { 116 std::swap (operands[0], operands[2]); 117 std::swap (operands[1], operands[3]); 118 } 119}) 120 121(define_peephole2 122 [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "") 123 (match_operand:VD 1 "register_operand" "")) 124 (set (match_operand:VD 2 "memory_operand" "") 125 (match_operand:VD 3 "register_operand" ""))] 126 "TARGET_SIMD && aarch64_operands_ok_for_ldpstp (operands, false, <MODE>mode)" 127 [(parallel [(set (match_dup 0) (match_dup 1)) 128 (set (match_dup 2) (match_dup 3))])] 129{ 130 rtx base, offset_1, offset_2; 131 132 extract_base_offset_in_addr (operands[0], &base, &offset_1); 133 extract_base_offset_in_addr (operands[2], &base, &offset_2); 134 if (INTVAL (offset_1) > INTVAL (offset_2)) 135 { 136 std::swap (operands[0], operands[2]); 137 std::swap (operands[1], operands[3]); 138 } 139}) 140 141 142;; Handle sign/zero extended consecutive load/store. 143 144(define_peephole2 145 [(set (match_operand:DI 0 "register_operand" "") 146 (sign_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" ""))) 147 (set (match_operand:DI 2 "register_operand" "") 148 (sign_extend:DI (match_operand:SI 3 "memory_operand" "")))] 149 "aarch64_operands_ok_for_ldpstp (operands, true, SImode)" 150 [(parallel [(set (match_dup 0) (sign_extend:DI (match_dup 1))) 151 (set (match_dup 2) (sign_extend:DI (match_dup 3)))])] 152{ 153 rtx base, offset_1, offset_2; 154 155 extract_base_offset_in_addr (operands[1], &base, &offset_1); 156 extract_base_offset_in_addr (operands[3], &base, &offset_2); 157 if (INTVAL (offset_1) > INTVAL (offset_2)) 158 { 159 std::swap (operands[0], operands[2]); 160 std::swap (operands[1], operands[3]); 161 } 162}) 163 164(define_peephole2 165 [(set (match_operand:DI 0 "register_operand" "") 166 (zero_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" ""))) 167 (set (match_operand:DI 2 "register_operand" "") 168 (zero_extend:DI (match_operand:SI 3 "memory_operand" "")))] 169 "aarch64_operands_ok_for_ldpstp (operands, true, SImode)" 170 [(parallel [(set (match_dup 0) (zero_extend:DI (match_dup 1))) 171 (set (match_dup 2) (zero_extend:DI (match_dup 3)))])] 172{ 173 rtx base, offset_1, offset_2; 174 175 extract_base_offset_in_addr (operands[1], &base, &offset_1); 176 extract_base_offset_in_addr (operands[3], &base, &offset_2); 177 if (INTVAL (offset_1) > INTVAL (offset_2)) 178 { 179 std::swap (operands[0], operands[2]); 180 std::swap (operands[1], operands[3]); 181 } 182}) 183 184;; Handle consecutive load/store whose offset is out of the range 185;; supported by ldp/ldpsw/stp. We firstly adjust offset in a scratch 186;; register, then merge them into ldp/ldpsw/stp by using the adjusted 187;; offset. 188 189(define_peephole2 190 [(match_scratch:DI 8 "r") 191 (set (match_operand:GPI 0 "register_operand" "") 192 (match_operand:GPI 1 "memory_operand" "")) 193 (set (match_operand:GPI 2 "register_operand" "") 194 (match_operand:GPI 3 "memory_operand" "")) 195 (set (match_operand:GPI 4 "register_operand" "") 196 (match_operand:GPI 5 "memory_operand" "")) 197 (set (match_operand:GPI 6 "register_operand" "") 198 (match_operand:GPI 7 "memory_operand" "")) 199 (match_dup 8)] 200 "aarch64_operands_adjust_ok_for_ldpstp (operands, true, <MODE>mode)" 201 [(const_int 0)] 202{ 203 rtx base, offset_1, offset_2; 204 205 extract_base_offset_in_addr (operands[1], &base, &offset_1); 206 extract_base_offset_in_addr (operands[3], &base, &offset_2); 207 if (INTVAL (offset_1) > INTVAL (offset_2)) 208 { 209 std::swap (operands[0], operands[6]); 210 std::swap (operands[1], operands[7]); 211 std::swap (operands[2], operands[4]); 212 std::swap (operands[3], operands[5]); 213 } 214 215 if (aarch64_gen_adjusted_ldpstp (operands, true, <MODE>mode, UNKNOWN)) 216 DONE; 217 else 218 FAIL; 219}) 220 221(define_peephole2 222 [(match_scratch:DI 8 "r") 223 (set (match_operand:GPF 0 "register_operand" "") 224 (match_operand:GPF 1 "memory_operand" "")) 225 (set (match_operand:GPF 2 "register_operand" "") 226 (match_operand:GPF 3 "memory_operand" "")) 227 (set (match_operand:GPF 4 "register_operand" "") 228 (match_operand:GPF 5 "memory_operand" "")) 229 (set (match_operand:GPF 6 "register_operand" "") 230 (match_operand:GPF 7 "memory_operand" "")) 231 (match_dup 8)] 232 "aarch64_operands_adjust_ok_for_ldpstp (operands, true, <MODE>mode)" 233 [(const_int 0)] 234{ 235 rtx base, offset_1, offset_2; 236 237 extract_base_offset_in_addr (operands[1], &base, &offset_1); 238 extract_base_offset_in_addr (operands[3], &base, &offset_2); 239 if (INTVAL (offset_1) > INTVAL (offset_2)) 240 { 241 std::swap (operands[0], operands[6]); 242 std::swap (operands[1], operands[7]); 243 std::swap (operands[2], operands[4]); 244 std::swap (operands[3], operands[5]); 245 } 246 247 if (aarch64_gen_adjusted_ldpstp (operands, true, <MODE>mode, UNKNOWN)) 248 DONE; 249 else 250 FAIL; 251}) 252 253(define_peephole2 254 [(match_scratch:DI 8 "r") 255 (set (match_operand:DI 0 "register_operand" "") 256 (sign_extend:DI (match_operand:SI 1 "memory_operand" ""))) 257 (set (match_operand:DI 2 "register_operand" "") 258 (sign_extend:DI (match_operand:SI 3 "memory_operand" ""))) 259 (set (match_operand:DI 4 "register_operand" "") 260 (sign_extend:DI (match_operand:SI 5 "memory_operand" ""))) 261 (set (match_operand:DI 6 "register_operand" "") 262 (sign_extend:DI (match_operand:SI 7 "memory_operand" ""))) 263 (match_dup 8)] 264 "aarch64_operands_adjust_ok_for_ldpstp (operands, true, SImode)" 265 [(const_int 0)] 266{ 267 rtx base, offset_1, offset_2; 268 269 extract_base_offset_in_addr (operands[1], &base, &offset_1); 270 extract_base_offset_in_addr (operands[3], &base, &offset_2); 271 if (INTVAL (offset_1) > INTVAL (offset_2)) 272 { 273 std::swap (operands[0], operands[6]); 274 std::swap (operands[1], operands[7]); 275 std::swap (operands[2], operands[4]); 276 std::swap (operands[3], operands[5]); 277 } 278 279 if (aarch64_gen_adjusted_ldpstp (operands, true, SImode, SIGN_EXTEND)) 280 DONE; 281 else 282 FAIL; 283}) 284 285(define_peephole2 286 [(match_scratch:DI 8 "r") 287 (set (match_operand:DI 0 "register_operand" "") 288 (zero_extend:DI (match_operand:SI 1 "memory_operand" ""))) 289 (set (match_operand:DI 2 "register_operand" "") 290 (zero_extend:DI (match_operand:SI 3 "memory_operand" ""))) 291 (set (match_operand:DI 4 "register_operand" "") 292 (zero_extend:DI (match_operand:SI 5 "memory_operand" ""))) 293 (set (match_operand:DI 6 "register_operand" "") 294 (zero_extend:DI (match_operand:SI 7 "memory_operand" ""))) 295 (match_dup 8)] 296 "aarch64_operands_adjust_ok_for_ldpstp (operands, true, SImode)" 297 [(const_int 0)] 298{ 299 rtx base, offset_1, offset_2; 300 301 extract_base_offset_in_addr (operands[1], &base, &offset_1); 302 extract_base_offset_in_addr (operands[3], &base, &offset_2); 303 if (INTVAL (offset_1) > INTVAL (offset_2)) 304 { 305 std::swap (operands[0], operands[6]); 306 std::swap (operands[1], operands[7]); 307 std::swap (operands[2], operands[4]); 308 std::swap (operands[3], operands[5]); 309 } 310 311 if (aarch64_gen_adjusted_ldpstp (operands, true, SImode, ZERO_EXTEND)) 312 DONE; 313 else 314 FAIL; 315}) 316 317(define_peephole2 318 [(match_scratch:DI 8 "r") 319 (set (match_operand:GPI 0 "memory_operand" "") 320 (match_operand:GPI 1 "aarch64_reg_or_zero" "")) 321 (set (match_operand:GPI 2 "memory_operand" "") 322 (match_operand:GPI 3 "aarch64_reg_or_zero" "")) 323 (set (match_operand:GPI 4 "memory_operand" "") 324 (match_operand:GPI 5 "aarch64_reg_or_zero" "")) 325 (set (match_operand:GPI 6 "memory_operand" "") 326 (match_operand:GPI 7 "aarch64_reg_or_zero" "")) 327 (match_dup 8)] 328 "aarch64_operands_adjust_ok_for_ldpstp (operands, false, <MODE>mode)" 329 [(const_int 0)] 330{ 331 rtx base, offset_1, offset_2; 332 333 extract_base_offset_in_addr (operands[0], &base, &offset_1); 334 extract_base_offset_in_addr (operands[2], &base, &offset_2); 335 if (INTVAL (offset_1) > INTVAL (offset_2)) 336 { 337 std::swap (operands[0], operands[6]); 338 std::swap (operands[1], operands[7]); 339 std::swap (operands[2], operands[4]); 340 std::swap (operands[3], operands[5]); 341 } 342 343 if (aarch64_gen_adjusted_ldpstp (operands, false, <MODE>mode, UNKNOWN)) 344 DONE; 345 else 346 FAIL; 347}) 348 349(define_peephole2 350 [(match_scratch:DI 8 "r") 351 (set (match_operand:GPF 0 "memory_operand" "") 352 (match_operand:GPF 1 "aarch64_reg_or_fp_zero" "")) 353 (set (match_operand:GPF 2 "memory_operand" "") 354 (match_operand:GPF 3 "aarch64_reg_or_fp_zero" "")) 355 (set (match_operand:GPF 4 "memory_operand" "") 356 (match_operand:GPF 5 "aarch64_reg_or_fp_zero" "")) 357 (set (match_operand:GPF 6 "memory_operand" "") 358 (match_operand:GPF 7 "aarch64_reg_or_fp_zero" "")) 359 (match_dup 8)] 360 "aarch64_operands_adjust_ok_for_ldpstp (operands, false, <MODE>mode)" 361 [(const_int 0)] 362{ 363 rtx base, offset_1, offset_2; 364 365 extract_base_offset_in_addr (operands[0], &base, &offset_1); 366 extract_base_offset_in_addr (operands[2], &base, &offset_2); 367 if (INTVAL (offset_1) > INTVAL (offset_2)) 368 { 369 std::swap (operands[0], operands[6]); 370 std::swap (operands[1], operands[7]); 371 std::swap (operands[2], operands[4]); 372 std::swap (operands[3], operands[5]); 373 } 374 375 if (aarch64_gen_adjusted_ldpstp (operands, false, <MODE>mode, UNKNOWN)) 376 DONE; 377 else 378 FAIL; 379}) 380