1 /* GIMPLE store merging and byte swapping passes.
2 Copyright (C) 2009-2020 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* The purpose of the store merging pass is to combine multiple memory stores
22 of constant values, values loaded from memory, bitwise operations on those,
23 or bit-field values, to consecutive locations, into fewer wider stores.
24
25 For example, if we have a sequence peforming four byte stores to
26 consecutive memory locations:
27 [p ] := imm1;
28 [p + 1B] := imm2;
29 [p + 2B] := imm3;
30 [p + 3B] := imm4;
31 we can transform this into a single 4-byte store if the target supports it:
32 [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
33
34 Or:
35 [p ] := [q ];
36 [p + 1B] := [q + 1B];
37 [p + 2B] := [q + 2B];
38 [p + 3B] := [q + 3B];
39 if there is no overlap can be transformed into a single 4-byte
40 load followed by single 4-byte store.
41
42 Or:
43 [p ] := [q ] ^ imm1;
44 [p + 1B] := [q + 1B] ^ imm2;
45 [p + 2B] := [q + 2B] ^ imm3;
46 [p + 3B] := [q + 3B] ^ imm4;
47 if there is no overlap can be transformed into a single 4-byte
48 load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
49
50 Or:
51 [p:1 ] := imm;
52 [p:31] := val & 0x7FFFFFFF;
53 we can transform this into a single 4-byte store if the target supports it:
54 [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
55
56 The algorithm is applied to each basic block in three phases:
57
58 1) Scan through the basic block and record assignments to destinations
59 that can be expressed as a store to memory of a certain size at a certain
60 bit offset from base expressions we can handle. For bit-fields we also
61 record the surrounding bit region, i.e. bits that could be stored in
62 a read-modify-write operation when storing the bit-field. Record store
63 chains to different bases in a hash_map (m_stores) and make sure to
64 terminate such chains when appropriate (for example when the stored
65 values get used subsequently).
66 These stores can be a result of structure element initializers, array stores
67 etc. A store_immediate_info object is recorded for every such store.
68 Record as many such assignments to a single base as possible until a
69 statement that interferes with the store sequence is encountered.
70 Each store has up to 2 operands, which can be a either constant, a memory
71 load or an SSA name, from which the value to be stored can be computed.
72 At most one of the operands can be a constant. The operands are recorded
73 in store_operand_info struct.
74
75 2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
76 store_immediate_info objects) and coalesce contiguous stores into
77 merged_store_group objects. For bit-field stores, we don't need to
78 require the stores to be contiguous, just their surrounding bit regions
79 have to be contiguous. If the expression being stored is different
80 between adjacent stores, such as one store storing a constant and
81 following storing a value loaded from memory, or if the loaded memory
82 objects are not adjacent, a new merged_store_group is created as well.
83
84 For example, given the stores:
85 [p ] := 0;
86 [p + 1B] := 1;
87 [p + 3B] := 0;
88 [p + 4B] := 1;
89 [p + 5B] := 0;
90 [p + 6B] := 0;
91 This phase would produce two merged_store_group objects, one recording the
92 two bytes stored in the memory region [p : p + 1] and another
93 recording the four bytes stored in the memory region [p + 3 : p + 6].
94
95 3) The merged_store_group objects produced in phase 2) are processed
96 to generate the sequence of wider stores that set the contiguous memory
97 regions to the sequence of bytes that correspond to it. This may emit
98 multiple stores per store group to handle contiguous stores that are not
99 of a size that is a power of 2. For example it can try to emit a 40-bit
100 store as a 32-bit store followed by an 8-bit store.
101 We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
102 or TARGET_SLOW_UNALIGNED_ACCESS settings.
103
104 Note on endianness and example:
105 Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
106 [p ] := 0x1234;
107 [p + 2B] := 0x5678;
108 [p + 4B] := 0xab;
109 [p + 5B] := 0xcd;
110
111 The memory layout for little-endian (LE) and big-endian (BE) must be:
112 p |LE|BE|
113 ---------
114 0 |34|12|
115 1 |12|34|
116 2 |78|56|
117 3 |56|78|
118 4 |ab|ab|
119 5 |cd|cd|
120
121 To merge these into a single 48-bit merged value 'val' in phase 2)
122 on little-endian we insert stores to higher (consecutive) bitpositions
123 into the most significant bits of the merged value.
124 The final merged value would be: 0xcdab56781234
125
126 For big-endian we insert stores to higher bitpositions into the least
127 significant bits of the merged value.
128 The final merged value would be: 0x12345678abcd
129
130 Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
131 followed by a 16-bit store. Again, we must consider endianness when
132 breaking down the 48-bit value 'val' computed above.
133 For little endian we emit:
134 [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
135 [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
136
137 Whereas for big-endian we emit:
138 [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
139 [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */
140
141 #include "config.h"
142 #include "system.h"
143 #include "coretypes.h"
144 #include "backend.h"
145 #include "tree.h"
146 #include "gimple.h"
147 #include "builtins.h"
148 #include "fold-const.h"
149 #include "tree-pass.h"
150 #include "ssa.h"
151 #include "gimple-pretty-print.h"
152 #include "alias.h"
153 #include "fold-const.h"
154 #include "print-tree.h"
155 #include "tree-hash-traits.h"
156 #include "gimple-iterator.h"
157 #include "gimplify.h"
158 #include "gimple-fold.h"
159 #include "stor-layout.h"
160 #include "timevar.h"
161 #include "cfganal.h"
162 #include "cfgcleanup.h"
163 #include "tree-cfg.h"
164 #include "except.h"
165 #include "tree-eh.h"
166 #include "target.h"
167 #include "gimplify-me.h"
168 #include "rtl.h"
169 #include "expr.h" /* For get_bit_range. */
170 #include "optabs-tree.h"
171 #include "dbgcnt.h"
172 #include "selftest.h"
173
174 /* The maximum size (in bits) of the stores this pass should generate. */
175 #define MAX_STORE_BITSIZE (BITS_PER_WORD)
176 #define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
177
178 /* Limit to bound the number of aliasing checks for loads with the same
179 vuse as the corresponding store. */
180 #define MAX_STORE_ALIAS_CHECKS 64
181
182 namespace {
183
184 struct bswap_stat
185 {
186 /* Number of hand-written 16-bit nop / bswaps found. */
187 int found_16bit;
188
189 /* Number of hand-written 32-bit nop / bswaps found. */
190 int found_32bit;
191
192 /* Number of hand-written 64-bit nop / bswaps found. */
193 int found_64bit;
194 } nop_stats, bswap_stats;
195
196 /* A symbolic number structure is used to detect byte permutation and selection
197 patterns of a source. To achieve that, its field N contains an artificial
198 number consisting of BITS_PER_MARKER sized markers tracking where does each
199 byte come from in the source:
200
201 0 - target byte has the value 0
202 FF - target byte has an unknown value (eg. due to sign extension)
203 1..size - marker value is the byte index in the source (0 for lsb).
204
205 To detect permutations on memory sources (arrays and structures), a symbolic
206 number is also associated:
207 - a base address BASE_ADDR and an OFFSET giving the address of the source;
208 - a range which gives the difference between the highest and lowest accessed
209 memory location to make such a symbolic number;
210 - the address SRC of the source element of lowest address as a convenience
211 to easily get BASE_ADDR + offset + lowest bytepos;
212 - number of expressions N_OPS bitwise ored together to represent
213 approximate cost of the computation.
214
215 Note 1: the range is different from size as size reflects the size of the
216 type of the current expression. For instance, for an array char a[],
217 (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
218 (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
219 time a range of 1.
220
221 Note 2: for non-memory sources, range holds the same value as size.
222
223 Note 3: SRC points to the SSA_NAME in case of non-memory source. */
224
225 struct symbolic_number {
226 uint64_t n;
227 tree type;
228 tree base_addr;
229 tree offset;
230 poly_int64_pod bytepos;
231 tree src;
232 tree alias_set;
233 tree vuse;
234 unsigned HOST_WIDE_INT range;
235 int n_ops;
236 };
237
238 #define BITS_PER_MARKER 8
239 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
240 #define MARKER_BYTE_UNKNOWN MARKER_MASK
241 #define HEAD_MARKER(n, size) \
242 ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
243
244 /* The number which the find_bswap_or_nop_1 result should match in
245 order to have a nop. The number is masked according to the size of
246 the symbolic number before using it. */
247 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
248 (uint64_t)0x08070605 << 32 | 0x04030201)
249
250 /* The number which the find_bswap_or_nop_1 result should match in
251 order to have a byte swap. The number is masked according to the
252 size of the symbolic number before using it. */
253 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
254 (uint64_t)0x01020304 << 32 | 0x05060708)
255
256 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
257 number N. Return false if the requested operation is not permitted
258 on a symbolic number. */
259
260 inline bool
do_shift_rotate(enum tree_code code,struct symbolic_number * n,int count)261 do_shift_rotate (enum tree_code code,
262 struct symbolic_number *n,
263 int count)
264 {
265 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
266 unsigned head_marker;
267
268 if (count < 0
269 || count >= TYPE_PRECISION (n->type)
270 || count % BITS_PER_UNIT != 0)
271 return false;
272 count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
273
274 /* Zero out the extra bits of N in order to avoid them being shifted
275 into the significant bits. */
276 if (size < 64 / BITS_PER_MARKER)
277 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
278
279 switch (code)
280 {
281 case LSHIFT_EXPR:
282 n->n <<= count;
283 break;
284 case RSHIFT_EXPR:
285 head_marker = HEAD_MARKER (n->n, size);
286 n->n >>= count;
287 /* Arithmetic shift of signed type: result is dependent on the value. */
288 if (!TYPE_UNSIGNED (n->type) && head_marker)
289 for (i = 0; i < count / BITS_PER_MARKER; i++)
290 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
291 << ((size - 1 - i) * BITS_PER_MARKER);
292 break;
293 case LROTATE_EXPR:
294 n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
295 break;
296 case RROTATE_EXPR:
297 n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
298 break;
299 default:
300 return false;
301 }
302 /* Zero unused bits for size. */
303 if (size < 64 / BITS_PER_MARKER)
304 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
305 return true;
306 }
307
308 /* Perform sanity checking for the symbolic number N and the gimple
309 statement STMT. */
310
311 inline bool
verify_symbolic_number_p(struct symbolic_number * n,gimple * stmt)312 verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
313 {
314 tree lhs_type;
315
316 lhs_type = gimple_expr_type (stmt);
317
318 if (TREE_CODE (lhs_type) != INTEGER_TYPE
319 && TREE_CODE (lhs_type) != ENUMERAL_TYPE)
320 return false;
321
322 if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
323 return false;
324
325 return true;
326 }
327
328 /* Initialize the symbolic number N for the bswap pass from the base element
329 SRC manipulated by the bitwise OR expression. */
330
331 bool
init_symbolic_number(struct symbolic_number * n,tree src)332 init_symbolic_number (struct symbolic_number *n, tree src)
333 {
334 int size;
335
336 if (! INTEGRAL_TYPE_P (TREE_TYPE (src)))
337 return false;
338
339 n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
340 n->src = src;
341
342 /* Set up the symbolic number N by setting each byte to a value between 1 and
343 the byte size of rhs1. The highest order byte is set to n->size and the
344 lowest order byte to 1. */
345 n->type = TREE_TYPE (src);
346 size = TYPE_PRECISION (n->type);
347 if (size % BITS_PER_UNIT != 0)
348 return false;
349 size /= BITS_PER_UNIT;
350 if (size > 64 / BITS_PER_MARKER)
351 return false;
352 n->range = size;
353 n->n = CMPNOP;
354 n->n_ops = 1;
355
356 if (size < 64 / BITS_PER_MARKER)
357 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
358
359 return true;
360 }
361
362 /* Check if STMT might be a byte swap or a nop from a memory source and returns
363 the answer. If so, REF is that memory source and the base of the memory area
364 accessed and the offset of the access from that base are recorded in N. */
365
366 bool
find_bswap_or_nop_load(gimple * stmt,tree ref,struct symbolic_number * n)367 find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
368 {
369 /* Leaf node is an array or component ref. Memorize its base and
370 offset from base to compare to other such leaf node. */
371 poly_int64 bitsize, bitpos, bytepos;
372 machine_mode mode;
373 int unsignedp, reversep, volatilep;
374 tree offset, base_addr;
375
376 /* Not prepared to handle PDP endian. */
377 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
378 return false;
379
380 if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
381 return false;
382
383 base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
384 &unsignedp, &reversep, &volatilep);
385
386 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
387 /* Do not rewrite TARGET_MEM_REF. */
388 return false;
389 else if (TREE_CODE (base_addr) == MEM_REF)
390 {
391 poly_offset_int bit_offset = 0;
392 tree off = TREE_OPERAND (base_addr, 1);
393
394 if (!integer_zerop (off))
395 {
396 poly_offset_int boff = mem_ref_offset (base_addr);
397 boff <<= LOG2_BITS_PER_UNIT;
398 bit_offset += boff;
399 }
400
401 base_addr = TREE_OPERAND (base_addr, 0);
402
403 /* Avoid returning a negative bitpos as this may wreak havoc later. */
404 if (maybe_lt (bit_offset, 0))
405 {
406 tree byte_offset = wide_int_to_tree
407 (sizetype, bits_to_bytes_round_down (bit_offset));
408 bit_offset = num_trailing_bits (bit_offset);
409 if (offset)
410 offset = size_binop (PLUS_EXPR, offset, byte_offset);
411 else
412 offset = byte_offset;
413 }
414
415 bitpos += bit_offset.force_shwi ();
416 }
417 else
418 base_addr = build_fold_addr_expr (base_addr);
419
420 if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
421 return false;
422 if (!multiple_p (bitsize, BITS_PER_UNIT))
423 return false;
424 if (reversep)
425 return false;
426
427 if (!init_symbolic_number (n, ref))
428 return false;
429 n->base_addr = base_addr;
430 n->offset = offset;
431 n->bytepos = bytepos;
432 n->alias_set = reference_alias_ptr_type (ref);
433 n->vuse = gimple_vuse (stmt);
434 return true;
435 }
436
437 /* Compute the symbolic number N representing the result of a bitwise OR on 2
438 symbolic number N1 and N2 whose source statements are respectively
439 SOURCE_STMT1 and SOURCE_STMT2. */
440
441 gimple *
perform_symbolic_merge(gimple * source_stmt1,struct symbolic_number * n1,gimple * source_stmt2,struct symbolic_number * n2,struct symbolic_number * n)442 perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
443 gimple *source_stmt2, struct symbolic_number *n2,
444 struct symbolic_number *n)
445 {
446 int i, size;
447 uint64_t mask;
448 gimple *source_stmt;
449 struct symbolic_number *n_start;
450
451 tree rhs1 = gimple_assign_rhs1 (source_stmt1);
452 if (TREE_CODE (rhs1) == BIT_FIELD_REF
453 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
454 rhs1 = TREE_OPERAND (rhs1, 0);
455 tree rhs2 = gimple_assign_rhs1 (source_stmt2);
456 if (TREE_CODE (rhs2) == BIT_FIELD_REF
457 && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
458 rhs2 = TREE_OPERAND (rhs2, 0);
459
460 /* Sources are different, cancel bswap if they are not memory location with
461 the same base (array, structure, ...). */
462 if (rhs1 != rhs2)
463 {
464 uint64_t inc;
465 HOST_WIDE_INT start1, start2, start_sub, end_sub, end1, end2, end;
466 struct symbolic_number *toinc_n_ptr, *n_end;
467 basic_block bb1, bb2;
468
469 if (!n1->base_addr || !n2->base_addr
470 || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
471 return NULL;
472
473 if (!n1->offset != !n2->offset
474 || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
475 return NULL;
476
477 start1 = 0;
478 if (!(n2->bytepos - n1->bytepos).is_constant (&start2))
479 return NULL;
480
481 if (start1 < start2)
482 {
483 n_start = n1;
484 start_sub = start2 - start1;
485 }
486 else
487 {
488 n_start = n2;
489 start_sub = start1 - start2;
490 }
491
492 bb1 = gimple_bb (source_stmt1);
493 bb2 = gimple_bb (source_stmt2);
494 if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
495 source_stmt = source_stmt1;
496 else
497 source_stmt = source_stmt2;
498
499 /* Find the highest address at which a load is performed and
500 compute related info. */
501 end1 = start1 + (n1->range - 1);
502 end2 = start2 + (n2->range - 1);
503 if (end1 < end2)
504 {
505 end = end2;
506 end_sub = end2 - end1;
507 }
508 else
509 {
510 end = end1;
511 end_sub = end1 - end2;
512 }
513 n_end = (end2 > end1) ? n2 : n1;
514
515 /* Find symbolic number whose lsb is the most significant. */
516 if (BYTES_BIG_ENDIAN)
517 toinc_n_ptr = (n_end == n1) ? n2 : n1;
518 else
519 toinc_n_ptr = (n_start == n1) ? n2 : n1;
520
521 n->range = end - MIN (start1, start2) + 1;
522
523 /* Check that the range of memory covered can be represented by
524 a symbolic number. */
525 if (n->range > 64 / BITS_PER_MARKER)
526 return NULL;
527
528 /* Reinterpret byte marks in symbolic number holding the value of
529 bigger weight according to target endianness. */
530 inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
531 size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
532 for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
533 {
534 unsigned marker
535 = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
536 if (marker && marker != MARKER_BYTE_UNKNOWN)
537 toinc_n_ptr->n += inc;
538 }
539 }
540 else
541 {
542 n->range = n1->range;
543 n_start = n1;
544 source_stmt = source_stmt1;
545 }
546
547 if (!n1->alias_set
548 || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
549 n->alias_set = n1->alias_set;
550 else
551 n->alias_set = ptr_type_node;
552 n->vuse = n_start->vuse;
553 n->base_addr = n_start->base_addr;
554 n->offset = n_start->offset;
555 n->src = n_start->src;
556 n->bytepos = n_start->bytepos;
557 n->type = n_start->type;
558 size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
559
560 for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
561 {
562 uint64_t masked1, masked2;
563
564 masked1 = n1->n & mask;
565 masked2 = n2->n & mask;
566 if (masked1 && masked2 && masked1 != masked2)
567 return NULL;
568 }
569 n->n = n1->n | n2->n;
570 n->n_ops = n1->n_ops + n2->n_ops;
571
572 return source_stmt;
573 }
574
575 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
576 the operation given by the rhs of STMT on the result. If the operation
577 could successfully be executed the function returns a gimple stmt whose
578 rhs's first tree is the expression of the source operand and NULL
579 otherwise. */
580
581 gimple *
find_bswap_or_nop_1(gimple * stmt,struct symbolic_number * n,int limit)582 find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
583 {
584 enum tree_code code;
585 tree rhs1, rhs2 = NULL;
586 gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
587 enum gimple_rhs_class rhs_class;
588
589 if (!limit || !is_gimple_assign (stmt))
590 return NULL;
591
592 rhs1 = gimple_assign_rhs1 (stmt);
593
594 if (find_bswap_or_nop_load (stmt, rhs1, n))
595 return stmt;
596
597 /* Handle BIT_FIELD_REF. */
598 if (TREE_CODE (rhs1) == BIT_FIELD_REF
599 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
600 {
601 unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
602 unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
603 if (bitpos % BITS_PER_UNIT == 0
604 && bitsize % BITS_PER_UNIT == 0
605 && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
606 {
607 /* Handle big-endian bit numbering in BIT_FIELD_REF. */
608 if (BYTES_BIG_ENDIAN)
609 bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
610
611 /* Shift. */
612 if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
613 return NULL;
614
615 /* Mask. */
616 uint64_t mask = 0;
617 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
618 for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
619 i++, tmp <<= BITS_PER_UNIT)
620 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
621 n->n &= mask;
622
623 /* Convert. */
624 n->type = TREE_TYPE (rhs1);
625 if (!n->base_addr)
626 n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
627
628 return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
629 }
630
631 return NULL;
632 }
633
634 if (TREE_CODE (rhs1) != SSA_NAME)
635 return NULL;
636
637 code = gimple_assign_rhs_code (stmt);
638 rhs_class = gimple_assign_rhs_class (stmt);
639 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
640
641 if (rhs_class == GIMPLE_BINARY_RHS)
642 rhs2 = gimple_assign_rhs2 (stmt);
643
644 /* Handle unary rhs and binary rhs with integer constants as second
645 operand. */
646
647 if (rhs_class == GIMPLE_UNARY_RHS
648 || (rhs_class == GIMPLE_BINARY_RHS
649 && TREE_CODE (rhs2) == INTEGER_CST))
650 {
651 if (code != BIT_AND_EXPR
652 && code != LSHIFT_EXPR
653 && code != RSHIFT_EXPR
654 && code != LROTATE_EXPR
655 && code != RROTATE_EXPR
656 && !CONVERT_EXPR_CODE_P (code))
657 return NULL;
658
659 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
660
661 /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
662 we have to initialize the symbolic number. */
663 if (!source_stmt1)
664 {
665 if (gimple_assign_load_p (stmt)
666 || !init_symbolic_number (n, rhs1))
667 return NULL;
668 source_stmt1 = stmt;
669 }
670
671 switch (code)
672 {
673 case BIT_AND_EXPR:
674 {
675 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
676 uint64_t val = int_cst_value (rhs2), mask = 0;
677 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
678
679 /* Only constants masking full bytes are allowed. */
680 for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
681 if ((val & tmp) != 0 && (val & tmp) != tmp)
682 return NULL;
683 else if (val & tmp)
684 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
685
686 n->n &= mask;
687 }
688 break;
689 case LSHIFT_EXPR:
690 case RSHIFT_EXPR:
691 case LROTATE_EXPR:
692 case RROTATE_EXPR:
693 if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
694 return NULL;
695 break;
696 CASE_CONVERT:
697 {
698 int i, type_size, old_type_size;
699 tree type;
700
701 type = gimple_expr_type (stmt);
702 type_size = TYPE_PRECISION (type);
703 if (type_size % BITS_PER_UNIT != 0)
704 return NULL;
705 type_size /= BITS_PER_UNIT;
706 if (type_size > 64 / BITS_PER_MARKER)
707 return NULL;
708
709 /* Sign extension: result is dependent on the value. */
710 old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
711 if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
712 && HEAD_MARKER (n->n, old_type_size))
713 for (i = 0; i < type_size - old_type_size; i++)
714 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
715 << ((type_size - 1 - i) * BITS_PER_MARKER);
716
717 if (type_size < 64 / BITS_PER_MARKER)
718 {
719 /* If STMT casts to a smaller type mask out the bits not
720 belonging to the target type. */
721 n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
722 }
723 n->type = type;
724 if (!n->base_addr)
725 n->range = type_size;
726 }
727 break;
728 default:
729 return NULL;
730 };
731 return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
732 }
733
734 /* Handle binary rhs. */
735
736 if (rhs_class == GIMPLE_BINARY_RHS)
737 {
738 struct symbolic_number n1, n2;
739 gimple *source_stmt, *source_stmt2;
740
741 if (code != BIT_IOR_EXPR)
742 return NULL;
743
744 if (TREE_CODE (rhs2) != SSA_NAME)
745 return NULL;
746
747 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
748
749 switch (code)
750 {
751 case BIT_IOR_EXPR:
752 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
753
754 if (!source_stmt1)
755 return NULL;
756
757 source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
758
759 if (!source_stmt2)
760 return NULL;
761
762 if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
763 return NULL;
764
765 if (n1.vuse != n2.vuse)
766 return NULL;
767
768 source_stmt
769 = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
770
771 if (!source_stmt)
772 return NULL;
773
774 if (!verify_symbolic_number_p (n, stmt))
775 return NULL;
776
777 break;
778 default:
779 return NULL;
780 }
781 return source_stmt;
782 }
783 return NULL;
784 }
785
786 /* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
787 *CMPXCHG, *CMPNOP and adjust *N. */
788
789 void
find_bswap_or_nop_finalize(struct symbolic_number * n,uint64_t * cmpxchg,uint64_t * cmpnop)790 find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
791 uint64_t *cmpnop)
792 {
793 unsigned rsize;
794 uint64_t tmpn, mask;
795
796 /* The number which the find_bswap_or_nop_1 result should match in order
797 to have a full byte swap. The number is shifted to the right
798 according to the size of the symbolic number before using it. */
799 *cmpxchg = CMPXCHG;
800 *cmpnop = CMPNOP;
801
802 /* Find real size of result (highest non-zero byte). */
803 if (n->base_addr)
804 for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
805 else
806 rsize = n->range;
807
808 /* Zero out the bits corresponding to untouched bytes in original gimple
809 expression. */
810 if (n->range < (int) sizeof (int64_t))
811 {
812 mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
813 *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
814 *cmpnop &= mask;
815 }
816
817 /* Zero out the bits corresponding to unused bytes in the result of the
818 gimple expression. */
819 if (rsize < n->range)
820 {
821 if (BYTES_BIG_ENDIAN)
822 {
823 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
824 *cmpxchg &= mask;
825 *cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
826 }
827 else
828 {
829 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
830 *cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
831 *cmpnop &= mask;
832 }
833 n->range = rsize;
834 }
835
836 n->range *= BITS_PER_UNIT;
837 }
838
839 /* Check if STMT completes a bswap implementation or a read in a given
840 endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
841 accordingly. It also sets N to represent the kind of operations
842 performed: size of the resulting expression and whether it works on
843 a memory source, and if so alias-set and vuse. At last, the
844 function returns a stmt whose rhs's first tree is the source
845 expression. */
846
847 gimple *
find_bswap_or_nop(gimple * stmt,struct symbolic_number * n,bool * bswap)848 find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
849 {
850 /* The last parameter determines the depth search limit. It usually
851 correlates directly to the number n of bytes to be touched. We
852 increase that number by 2 * (log2(n) + 1) here in order to also
853 cover signed -> unsigned conversions of the src operand as can be seen
854 in libgcc, and for initial shift/and operation of the src operand. */
855 int limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
856 limit += 2 * (1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit));
857 gimple *ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
858
859 if (!ins_stmt)
860 return NULL;
861
862 uint64_t cmpxchg, cmpnop;
863 find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop);
864
865 /* A complete byte swap should make the symbolic number to start with
866 the largest digit in the highest order byte. Unchanged symbolic
867 number indicates a read with same endianness as target architecture. */
868 if (n->n == cmpnop)
869 *bswap = false;
870 else if (n->n == cmpxchg)
871 *bswap = true;
872 else
873 return NULL;
874
875 /* Useless bit manipulation performed by code. */
876 if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
877 return NULL;
878
879 return ins_stmt;
880 }
881
882 const pass_data pass_data_optimize_bswap =
883 {
884 GIMPLE_PASS, /* type */
885 "bswap", /* name */
886 OPTGROUP_NONE, /* optinfo_flags */
887 TV_NONE, /* tv_id */
888 PROP_ssa, /* properties_required */
889 0, /* properties_provided */
890 0, /* properties_destroyed */
891 0, /* todo_flags_start */
892 0, /* todo_flags_finish */
893 };
894
895 class pass_optimize_bswap : public gimple_opt_pass
896 {
897 public:
pass_optimize_bswap(gcc::context * ctxt)898 pass_optimize_bswap (gcc::context *ctxt)
899 : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
900 {}
901
902 /* opt_pass methods: */
gate(function *)903 virtual bool gate (function *)
904 {
905 return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8;
906 }
907
908 virtual unsigned int execute (function *);
909
910 }; // class pass_optimize_bswap
911
912 /* Perform the bswap optimization: replace the expression computed in the rhs
913 of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
914 bswap, load or load + bswap expression.
915 Which of these alternatives replace the rhs is given by N->base_addr (non
916 null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
917 load to perform are also given in N while the builtin bswap invoke is given
918 in FNDEL. Finally, if a load is involved, INS_STMT refers to one of the
919 load statements involved to construct the rhs in gsi_stmt (GSI) and
920 N->range gives the size of the rhs expression for maintaining some
921 statistics.
922
923 Note that if the replacement involve a load and if gsi_stmt (GSI) is
924 non-NULL, that stmt is moved just after INS_STMT to do the load with the
925 same VUSE which can lead to gsi_stmt (GSI) changing of basic block. */
926
927 tree
bswap_replace(gimple_stmt_iterator gsi,gimple * ins_stmt,tree fndecl,tree bswap_type,tree load_type,struct symbolic_number * n,bool bswap)928 bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
929 tree bswap_type, tree load_type, struct symbolic_number *n,
930 bool bswap)
931 {
932 tree src, tmp, tgt = NULL_TREE;
933 gimple *bswap_stmt;
934
935 gimple *cur_stmt = gsi_stmt (gsi);
936 src = n->src;
937 if (cur_stmt)
938 tgt = gimple_assign_lhs (cur_stmt);
939
940 /* Need to load the value from memory first. */
941 if (n->base_addr)
942 {
943 gimple_stmt_iterator gsi_ins = gsi;
944 if (ins_stmt)
945 gsi_ins = gsi_for_stmt (ins_stmt);
946 tree addr_expr, addr_tmp, val_expr, val_tmp;
947 tree load_offset_ptr, aligned_load_type;
948 gimple *load_stmt;
949 unsigned align = get_object_alignment (src);
950 poly_int64 load_offset = 0;
951
952 if (cur_stmt)
953 {
954 basic_block ins_bb = gimple_bb (ins_stmt);
955 basic_block cur_bb = gimple_bb (cur_stmt);
956 if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
957 return NULL_TREE;
958
959 /* Move cur_stmt just before one of the load of the original
960 to ensure it has the same VUSE. See PR61517 for what could
961 go wrong. */
962 if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
963 reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
964 gsi_move_before (&gsi, &gsi_ins);
965 gsi = gsi_for_stmt (cur_stmt);
966 }
967 else
968 gsi = gsi_ins;
969
970 /* Compute address to load from and cast according to the size
971 of the load. */
972 addr_expr = build_fold_addr_expr (src);
973 if (is_gimple_mem_ref_addr (addr_expr))
974 addr_tmp = unshare_expr (addr_expr);
975 else
976 {
977 addr_tmp = unshare_expr (n->base_addr);
978 if (!is_gimple_mem_ref_addr (addr_tmp))
979 addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp,
980 is_gimple_mem_ref_addr,
981 NULL_TREE, true,
982 GSI_SAME_STMT);
983 load_offset = n->bytepos;
984 if (n->offset)
985 {
986 tree off
987 = force_gimple_operand_gsi (&gsi, unshare_expr (n->offset),
988 true, NULL_TREE, true,
989 GSI_SAME_STMT);
990 gimple *stmt
991 = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)),
992 POINTER_PLUS_EXPR, addr_tmp, off);
993 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
994 addr_tmp = gimple_assign_lhs (stmt);
995 }
996 }
997
998 /* Perform the load. */
999 aligned_load_type = load_type;
1000 if (align < TYPE_ALIGN (load_type))
1001 aligned_load_type = build_aligned_type (load_type, align);
1002 load_offset_ptr = build_int_cst (n->alias_set, load_offset);
1003 val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
1004 load_offset_ptr);
1005
1006 if (!bswap)
1007 {
1008 if (n->range == 16)
1009 nop_stats.found_16bit++;
1010 else if (n->range == 32)
1011 nop_stats.found_32bit++;
1012 else
1013 {
1014 gcc_assert (n->range == 64);
1015 nop_stats.found_64bit++;
1016 }
1017
1018 /* Convert the result of load if necessary. */
1019 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type))
1020 {
1021 val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
1022 "load_dst");
1023 load_stmt = gimple_build_assign (val_tmp, val_expr);
1024 gimple_set_vuse (load_stmt, n->vuse);
1025 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1026 gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
1027 update_stmt (cur_stmt);
1028 }
1029 else if (cur_stmt)
1030 {
1031 gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
1032 gimple_set_vuse (cur_stmt, n->vuse);
1033 update_stmt (cur_stmt);
1034 }
1035 else
1036 {
1037 tgt = make_ssa_name (load_type);
1038 cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr);
1039 gimple_set_vuse (cur_stmt, n->vuse);
1040 gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT);
1041 }
1042
1043 if (dump_file)
1044 {
1045 fprintf (dump_file,
1046 "%d bit load in target endianness found at: ",
1047 (int) n->range);
1048 print_gimple_stmt (dump_file, cur_stmt, 0);
1049 }
1050 return tgt;
1051 }
1052 else
1053 {
1054 val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
1055 load_stmt = gimple_build_assign (val_tmp, val_expr);
1056 gimple_set_vuse (load_stmt, n->vuse);
1057 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1058 }
1059 src = val_tmp;
1060 }
1061 else if (!bswap)
1062 {
1063 gimple *g = NULL;
1064 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
1065 {
1066 if (!is_gimple_val (src))
1067 return NULL_TREE;
1068 g = gimple_build_assign (tgt, NOP_EXPR, src);
1069 }
1070 else if (cur_stmt)
1071 g = gimple_build_assign (tgt, src);
1072 else
1073 tgt = src;
1074 if (n->range == 16)
1075 nop_stats.found_16bit++;
1076 else if (n->range == 32)
1077 nop_stats.found_32bit++;
1078 else
1079 {
1080 gcc_assert (n->range == 64);
1081 nop_stats.found_64bit++;
1082 }
1083 if (dump_file)
1084 {
1085 fprintf (dump_file,
1086 "%d bit reshuffle in target endianness found at: ",
1087 (int) n->range);
1088 if (cur_stmt)
1089 print_gimple_stmt (dump_file, cur_stmt, 0);
1090 else
1091 {
1092 print_generic_expr (dump_file, tgt, TDF_NONE);
1093 fprintf (dump_file, "\n");
1094 }
1095 }
1096 if (cur_stmt)
1097 gsi_replace (&gsi, g, true);
1098 return tgt;
1099 }
1100 else if (TREE_CODE (src) == BIT_FIELD_REF)
1101 src = TREE_OPERAND (src, 0);
1102
1103 if (n->range == 16)
1104 bswap_stats.found_16bit++;
1105 else if (n->range == 32)
1106 bswap_stats.found_32bit++;
1107 else
1108 {
1109 gcc_assert (n->range == 64);
1110 bswap_stats.found_64bit++;
1111 }
1112
1113 tmp = src;
1114
1115 /* Convert the src expression if necessary. */
1116 if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
1117 {
1118 gimple *convert_stmt;
1119
1120 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
1121 convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
1122 gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1123 }
1124
1125 /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
1126 are considered as rotation of 2N bit values by N bits is generally not
1127 equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
1128 gives 0x03040102 while a bswap for that value is 0x04030201. */
1129 if (bswap && n->range == 16)
1130 {
1131 tree count = build_int_cst (NULL, BITS_PER_UNIT);
1132 src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
1133 bswap_stmt = gimple_build_assign (NULL, src);
1134 }
1135 else
1136 bswap_stmt = gimple_build_call (fndecl, 1, tmp);
1137
1138 if (tgt == NULL_TREE)
1139 tgt = make_ssa_name (bswap_type);
1140 tmp = tgt;
1141
1142 /* Convert the result if necessary. */
1143 if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
1144 {
1145 gimple *convert_stmt;
1146
1147 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1148 convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp);
1149 gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
1150 }
1151
1152 gimple_set_lhs (bswap_stmt, tmp);
1153
1154 if (dump_file)
1155 {
1156 fprintf (dump_file, "%d bit bswap implementation found at: ",
1157 (int) n->range);
1158 if (cur_stmt)
1159 print_gimple_stmt (dump_file, cur_stmt, 0);
1160 else
1161 {
1162 print_generic_expr (dump_file, tgt, TDF_NONE);
1163 fprintf (dump_file, "\n");
1164 }
1165 }
1166
1167 if (cur_stmt)
1168 {
1169 gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
1170 gsi_remove (&gsi, true);
1171 }
1172 else
1173 gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
1174 return tgt;
1175 }
1176
1177 /* Find manual byte swap implementations as well as load in a given
1178 endianness. Byte swaps are turned into a bswap builtin invokation
1179 while endian loads are converted to bswap builtin invokation or
1180 simple load according to the target endianness. */
1181
1182 unsigned int
execute(function * fun)1183 pass_optimize_bswap::execute (function *fun)
1184 {
1185 basic_block bb;
1186 bool bswap32_p, bswap64_p;
1187 bool changed = false;
1188 tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1189
1190 bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1191 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1192 bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1193 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1194 || (bswap32_p && word_mode == SImode)));
1195
1196 /* Determine the argument type of the builtins. The code later on
1197 assumes that the return and argument type are the same. */
1198 if (bswap32_p)
1199 {
1200 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1201 bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1202 }
1203
1204 if (bswap64_p)
1205 {
1206 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1207 bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1208 }
1209
1210 memset (&nop_stats, 0, sizeof (nop_stats));
1211 memset (&bswap_stats, 0, sizeof (bswap_stats));
1212 calculate_dominance_info (CDI_DOMINATORS);
1213
1214 FOR_EACH_BB_FN (bb, fun)
1215 {
1216 gimple_stmt_iterator gsi;
1217
1218 /* We do a reverse scan for bswap patterns to make sure we get the
1219 widest match. As bswap pattern matching doesn't handle previously
1220 inserted smaller bswap replacements as sub-patterns, the wider
1221 variant wouldn't be detected. */
1222 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
1223 {
1224 gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
1225 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1226 enum tree_code code;
1227 struct symbolic_number n;
1228 bool bswap;
1229
1230 /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1231 might be moved to a different basic block by bswap_replace and gsi
1232 must not points to it if that's the case. Moving the gsi_prev
1233 there make sure that gsi points to the statement previous to
1234 cur_stmt while still making sure that all statements are
1235 considered in this basic block. */
1236 gsi_prev (&gsi);
1237
1238 if (!is_gimple_assign (cur_stmt))
1239 continue;
1240
1241 code = gimple_assign_rhs_code (cur_stmt);
1242 switch (code)
1243 {
1244 case LROTATE_EXPR:
1245 case RROTATE_EXPR:
1246 if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
1247 || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
1248 % BITS_PER_UNIT)
1249 continue;
1250 /* Fall through. */
1251 case BIT_IOR_EXPR:
1252 break;
1253 default:
1254 continue;
1255 }
1256
1257 ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
1258
1259 if (!ins_stmt)
1260 continue;
1261
1262 switch (n.range)
1263 {
1264 case 16:
1265 /* Already in canonical form, nothing to do. */
1266 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1267 continue;
1268 load_type = bswap_type = uint16_type_node;
1269 break;
1270 case 32:
1271 load_type = uint32_type_node;
1272 if (bswap32_p)
1273 {
1274 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1275 bswap_type = bswap32_type;
1276 }
1277 break;
1278 case 64:
1279 load_type = uint64_type_node;
1280 if (bswap64_p)
1281 {
1282 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1283 bswap_type = bswap64_type;
1284 }
1285 break;
1286 default:
1287 continue;
1288 }
1289
1290 if (bswap && !fndecl && n.range != 16)
1291 continue;
1292
1293 if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
1294 bswap_type, load_type, &n, bswap))
1295 changed = true;
1296 }
1297 }
1298
1299 statistics_counter_event (fun, "16-bit nop implementations found",
1300 nop_stats.found_16bit);
1301 statistics_counter_event (fun, "32-bit nop implementations found",
1302 nop_stats.found_32bit);
1303 statistics_counter_event (fun, "64-bit nop implementations found",
1304 nop_stats.found_64bit);
1305 statistics_counter_event (fun, "16-bit bswap implementations found",
1306 bswap_stats.found_16bit);
1307 statistics_counter_event (fun, "32-bit bswap implementations found",
1308 bswap_stats.found_32bit);
1309 statistics_counter_event (fun, "64-bit bswap implementations found",
1310 bswap_stats.found_64bit);
1311
1312 return (changed ? TODO_update_ssa : 0);
1313 }
1314
1315 } // anon namespace
1316
1317 gimple_opt_pass *
make_pass_optimize_bswap(gcc::context * ctxt)1318 make_pass_optimize_bswap (gcc::context *ctxt)
1319 {
1320 return new pass_optimize_bswap (ctxt);
1321 }
1322
1323 namespace {
1324
1325 /* Struct recording one operand for the store, which is either a constant,
1326 then VAL represents the constant and all the other fields are zero, or
1327 a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1328 and the other fields also reflect the memory load, or an SSA name, then
1329 VAL represents the SSA name and all the other fields are zero, */
1330
1331 class store_operand_info
1332 {
1333 public:
1334 tree val;
1335 tree base_addr;
1336 poly_uint64 bitsize;
1337 poly_uint64 bitpos;
1338 poly_uint64 bitregion_start;
1339 poly_uint64 bitregion_end;
1340 gimple *stmt;
1341 bool bit_not_p;
1342 store_operand_info ();
1343 };
1344
store_operand_info()1345 store_operand_info::store_operand_info ()
1346 : val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0),
1347 bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false)
1348 {
1349 }
1350
1351 /* Struct recording the information about a single store of an immediate
1352 to memory. These are created in the first phase and coalesced into
1353 merged_store_group objects in the second phase. */
1354
1355 class store_immediate_info
1356 {
1357 public:
1358 unsigned HOST_WIDE_INT bitsize;
1359 unsigned HOST_WIDE_INT bitpos;
1360 unsigned HOST_WIDE_INT bitregion_start;
1361 /* This is one past the last bit of the bit region. */
1362 unsigned HOST_WIDE_INT bitregion_end;
1363 gimple *stmt;
1364 unsigned int order;
1365 /* INTEGER_CST for constant stores, MEM_REF for memory copy,
1366 BIT_*_EXPR for logical bitwise operation, BIT_INSERT_EXPR
1367 for bit insertion.
1368 LROTATE_EXPR if it can be only bswap optimized and
1369 ops are not really meaningful.
1370 NOP_EXPR if bswap optimization detected identity, ops
1371 are not meaningful. */
1372 enum tree_code rhs_code;
1373 /* Two fields for bswap optimization purposes. */
1374 struct symbolic_number n;
1375 gimple *ins_stmt;
1376 /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing. */
1377 bool bit_not_p;
1378 /* True if ops have been swapped and thus ops[1] represents
1379 rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2. */
1380 bool ops_swapped_p;
1381 /* The index number of the landing pad, or 0 if there is none. */
1382 int lp_nr;
1383 /* Operands. For BIT_*_EXPR rhs_code both operands are used, otherwise
1384 just the first one. */
1385 store_operand_info ops[2];
1386 store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1387 unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1388 gimple *, unsigned int, enum tree_code,
1389 struct symbolic_number &, gimple *, bool, int,
1390 const store_operand_info &,
1391 const store_operand_info &);
1392 };
1393
store_immediate_info(unsigned HOST_WIDE_INT bs,unsigned HOST_WIDE_INT bp,unsigned HOST_WIDE_INT brs,unsigned HOST_WIDE_INT bre,gimple * st,unsigned int ord,enum tree_code rhscode,struct symbolic_number & nr,gimple * ins_stmtp,bool bitnotp,int nr2,const store_operand_info & op0r,const store_operand_info & op1r)1394 store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
1395 unsigned HOST_WIDE_INT bp,
1396 unsigned HOST_WIDE_INT brs,
1397 unsigned HOST_WIDE_INT bre,
1398 gimple *st,
1399 unsigned int ord,
1400 enum tree_code rhscode,
1401 struct symbolic_number &nr,
1402 gimple *ins_stmtp,
1403 bool bitnotp,
1404 int nr2,
1405 const store_operand_info &op0r,
1406 const store_operand_info &op1r)
1407 : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
1408 stmt (st), order (ord), rhs_code (rhscode), n (nr),
1409 ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false),
1410 lp_nr (nr2)
1411 #if __cplusplus >= 201103L
1412 , ops { op0r, op1r }
1413 {
1414 }
1415 #else
1416 {
1417 ops[0] = op0r;
1418 ops[1] = op1r;
1419 }
1420 #endif
1421
1422 /* Struct representing a group of stores to contiguous memory locations.
1423 These are produced by the second phase (coalescing) and consumed in the
1424 third phase that outputs the widened stores. */
1425
1426 class merged_store_group
1427 {
1428 public:
1429 unsigned HOST_WIDE_INT start;
1430 unsigned HOST_WIDE_INT width;
1431 unsigned HOST_WIDE_INT bitregion_start;
1432 unsigned HOST_WIDE_INT bitregion_end;
1433 /* The size of the allocated memory for val and mask. */
1434 unsigned HOST_WIDE_INT buf_size;
1435 unsigned HOST_WIDE_INT align_base;
1436 poly_uint64 load_align_base[2];
1437
1438 unsigned int align;
1439 unsigned int load_align[2];
1440 unsigned int first_order;
1441 unsigned int last_order;
1442 bool bit_insertion;
1443 bool only_constants;
1444 unsigned int first_nonmergeable_order;
1445 int lp_nr;
1446
1447 auto_vec<store_immediate_info *> stores;
1448 /* We record the first and last original statements in the sequence because
1449 we'll need their vuse/vdef and replacement position. It's easier to keep
1450 track of them separately as 'stores' is reordered by apply_stores. */
1451 gimple *last_stmt;
1452 gimple *first_stmt;
1453 unsigned char *val;
1454 unsigned char *mask;
1455
1456 merged_store_group (store_immediate_info *);
1457 ~merged_store_group ();
1458 bool can_be_merged_into (store_immediate_info *);
1459 void merge_into (store_immediate_info *);
1460 void merge_overlapping (store_immediate_info *);
1461 bool apply_stores ();
1462 private:
1463 void do_merge (store_immediate_info *);
1464 };
1465
1466 /* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
1467
1468 static void
dump_char_array(FILE * fd,unsigned char * ptr,unsigned int len)1469 dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
1470 {
1471 if (!fd)
1472 return;
1473
1474 for (unsigned int i = 0; i < len; i++)
1475 fprintf (fd, "%02x ", ptr[i]);
1476 fprintf (fd, "\n");
1477 }
1478
1479 /* Clear out LEN bits starting from bit START in the byte array
1480 PTR. This clears the bits to the *right* from START.
1481 START must be within [0, BITS_PER_UNIT) and counts starting from
1482 the least significant bit. */
1483
1484 static void
clear_bit_region_be(unsigned char * ptr,unsigned int start,unsigned int len)1485 clear_bit_region_be (unsigned char *ptr, unsigned int start,
1486 unsigned int len)
1487 {
1488 if (len == 0)
1489 return;
1490 /* Clear len bits to the right of start. */
1491 else if (len <= start + 1)
1492 {
1493 unsigned char mask = (~(~0U << len));
1494 mask = mask << (start + 1U - len);
1495 ptr[0] &= ~mask;
1496 }
1497 else if (start != BITS_PER_UNIT - 1)
1498 {
1499 clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
1500 clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
1501 len - (start % BITS_PER_UNIT) - 1);
1502 }
1503 else if (start == BITS_PER_UNIT - 1
1504 && len > BITS_PER_UNIT)
1505 {
1506 unsigned int nbytes = len / BITS_PER_UNIT;
1507 memset (ptr, 0, nbytes);
1508 if (len % BITS_PER_UNIT != 0)
1509 clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
1510 len % BITS_PER_UNIT);
1511 }
1512 else
1513 gcc_unreachable ();
1514 }
1515
1516 /* In the byte array PTR clear the bit region starting at bit
1517 START and is LEN bits wide.
1518 For regions spanning multiple bytes do this recursively until we reach
1519 zero LEN or a region contained within a single byte. */
1520
1521 static void
clear_bit_region(unsigned char * ptr,unsigned int start,unsigned int len)1522 clear_bit_region (unsigned char *ptr, unsigned int start,
1523 unsigned int len)
1524 {
1525 /* Degenerate base case. */
1526 if (len == 0)
1527 return;
1528 else if (start >= BITS_PER_UNIT)
1529 clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
1530 /* Second base case. */
1531 else if ((start + len) <= BITS_PER_UNIT)
1532 {
1533 unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
1534 mask >>= BITS_PER_UNIT - (start + len);
1535
1536 ptr[0] &= ~mask;
1537
1538 return;
1539 }
1540 /* Clear most significant bits in a byte and proceed with the next byte. */
1541 else if (start != 0)
1542 {
1543 clear_bit_region (ptr, start, BITS_PER_UNIT - start);
1544 clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
1545 }
1546 /* Whole bytes need to be cleared. */
1547 else if (start == 0 && len > BITS_PER_UNIT)
1548 {
1549 unsigned int nbytes = len / BITS_PER_UNIT;
1550 /* We could recurse on each byte but we clear whole bytes, so a simple
1551 memset will do. */
1552 memset (ptr, '\0', nbytes);
1553 /* Clear the remaining sub-byte region if there is one. */
1554 if (len % BITS_PER_UNIT != 0)
1555 clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
1556 }
1557 else
1558 gcc_unreachable ();
1559 }
1560
1561 /* Write BITLEN bits of EXPR to the byte array PTR at
1562 bit position BITPOS. PTR should contain TOTAL_BYTES elements.
1563 Return true if the operation succeeded. */
1564
1565 static bool
encode_tree_to_bitpos(tree expr,unsigned char * ptr,int bitlen,int bitpos,unsigned int total_bytes)1566 encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
1567 unsigned int total_bytes)
1568 {
1569 unsigned int first_byte = bitpos / BITS_PER_UNIT;
1570 bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
1571 || (bitpos % BITS_PER_UNIT)
1572 || !int_mode_for_size (bitlen, 0).exists ());
1573 bool empty_ctor_p
1574 = (TREE_CODE (expr) == CONSTRUCTOR
1575 && CONSTRUCTOR_NELTS (expr) == 0
1576 && TYPE_SIZE_UNIT (TREE_TYPE (expr))
1577 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr))));
1578
1579 if (!sub_byte_op_p)
1580 {
1581 if (first_byte >= total_bytes)
1582 return false;
1583 total_bytes -= first_byte;
1584 if (empty_ctor_p)
1585 {
1586 unsigned HOST_WIDE_INT rhs_bytes
1587 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1588 if (rhs_bytes > total_bytes)
1589 return false;
1590 memset (ptr + first_byte, '\0', rhs_bytes);
1591 return true;
1592 }
1593 return native_encode_expr (expr, ptr + first_byte, total_bytes) != 0;
1594 }
1595
1596 /* LITTLE-ENDIAN
1597 We are writing a non byte-sized quantity or at a position that is not
1598 at a byte boundary.
1599 |--------|--------|--------| ptr + first_byte
1600 ^ ^
1601 xxx xxxxxxxx xxx< bp>
1602 |______EXPR____|
1603
1604 First native_encode_expr EXPR into a temporary buffer and shift each
1605 byte in the buffer by 'bp' (carrying the bits over as necessary).
1606 |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
1607 <------bitlen---->< bp>
1608 Then we clear the destination bits:
1609 |---00000|00000000|000-----| ptr + first_byte
1610 <-------bitlen--->< bp>
1611
1612 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1613 |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
1614
1615 BIG-ENDIAN
1616 We are writing a non byte-sized quantity or at a position that is not
1617 at a byte boundary.
1618 ptr + first_byte |--------|--------|--------|
1619 ^ ^
1620 <bp >xxx xxxxxxxx xxx
1621 |_____EXPR_____|
1622
1623 First native_encode_expr EXPR into a temporary buffer and shift each
1624 byte in the buffer to the right by (carrying the bits over as necessary).
1625 We shift by as much as needed to align the most significant bit of EXPR
1626 with bitpos:
1627 |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
1628 <---bitlen----> <bp ><-----bitlen----->
1629 Then we clear the destination bits:
1630 ptr + first_byte |-----000||00000000||00000---|
1631 <bp ><-------bitlen----->
1632
1633 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1634 ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
1635 The awkwardness comes from the fact that bitpos is counted from the
1636 most significant bit of a byte. */
1637
1638 /* We must be dealing with fixed-size data at this point, since the
1639 total size is also fixed. */
1640 unsigned int byte_size;
1641 if (empty_ctor_p)
1642 {
1643 unsigned HOST_WIDE_INT rhs_bytes
1644 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1645 if (rhs_bytes > total_bytes)
1646 return false;
1647 byte_size = rhs_bytes;
1648 }
1649 else
1650 {
1651 fixed_size_mode mode
1652 = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (expr)));
1653 byte_size = GET_MODE_SIZE (mode);
1654 }
1655 /* Allocate an extra byte so that we have space to shift into. */
1656 byte_size++;
1657 unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
1658 memset (tmpbuf, '\0', byte_size);
1659 /* The store detection code should only have allowed constants that are
1660 accepted by native_encode_expr or empty ctors. */
1661 if (!empty_ctor_p
1662 && native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
1663 gcc_unreachable ();
1664
1665 /* The native_encode_expr machinery uses TYPE_MODE to determine how many
1666 bytes to write. This means it can write more than
1667 ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
1668 write 8 bytes for a bitlen of 40). Skip the bytes that are not within
1669 bitlen and zero out the bits that are not relevant as well (that may
1670 contain a sign bit due to sign-extension). */
1671 unsigned int padding
1672 = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1;
1673 /* On big-endian the padding is at the 'front' so just skip the initial
1674 bytes. */
1675 if (BYTES_BIG_ENDIAN)
1676 tmpbuf += padding;
1677
1678 byte_size -= padding;
1679
1680 if (bitlen % BITS_PER_UNIT != 0)
1681 {
1682 if (BYTES_BIG_ENDIAN)
1683 clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1,
1684 BITS_PER_UNIT - (bitlen % BITS_PER_UNIT));
1685 else
1686 clear_bit_region (tmpbuf, bitlen,
1687 byte_size * BITS_PER_UNIT - bitlen);
1688 }
1689 /* Left shifting relies on the last byte being clear if bitlen is
1690 a multiple of BITS_PER_UNIT, which might not be clear if
1691 there are padding bytes. */
1692 else if (!BYTES_BIG_ENDIAN)
1693 tmpbuf[byte_size - 1] = '\0';
1694
1695 /* Clear the bit region in PTR where the bits from TMPBUF will be
1696 inserted into. */
1697 if (BYTES_BIG_ENDIAN)
1698 clear_bit_region_be (ptr + first_byte,
1699 BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
1700 else
1701 clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen);
1702
1703 int shift_amnt;
1704 int bitlen_mod = bitlen % BITS_PER_UNIT;
1705 int bitpos_mod = bitpos % BITS_PER_UNIT;
1706
1707 bool skip_byte = false;
1708 if (BYTES_BIG_ENDIAN)
1709 {
1710 /* BITPOS and BITLEN are exactly aligned and no shifting
1711 is necessary. */
1712 if (bitpos_mod + bitlen_mod == BITS_PER_UNIT
1713 || (bitpos_mod == 0 && bitlen_mod == 0))
1714 shift_amnt = 0;
1715 /* |. . . . . . . .|
1716 <bp > <blen >.
1717 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
1718 of the value until it aligns with 'bp' in the next byte over. */
1719 else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT)
1720 {
1721 shift_amnt = bitlen_mod + bitpos_mod;
1722 skip_byte = bitlen_mod != 0;
1723 }
1724 /* |. . . . . . . .|
1725 <----bp--->
1726 <---blen---->.
1727 Shift the value right within the same byte so it aligns with 'bp'. */
1728 else
1729 shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT;
1730 }
1731 else
1732 shift_amnt = bitpos % BITS_PER_UNIT;
1733
1734 /* Create the shifted version of EXPR. */
1735 if (!BYTES_BIG_ENDIAN)
1736 {
1737 shift_bytes_in_array_left (tmpbuf, byte_size, shift_amnt);
1738 if (shift_amnt == 0)
1739 byte_size--;
1740 }
1741 else
1742 {
1743 gcc_assert (BYTES_BIG_ENDIAN);
1744 shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt);
1745 /* If shifting right forced us to move into the next byte skip the now
1746 empty byte. */
1747 if (skip_byte)
1748 {
1749 tmpbuf++;
1750 byte_size--;
1751 }
1752 }
1753
1754 /* Insert the bits from TMPBUF. */
1755 for (unsigned int i = 0; i < byte_size; i++)
1756 ptr[first_byte + i] |= tmpbuf[i];
1757
1758 return true;
1759 }
1760
1761 /* Sorting function for store_immediate_info objects.
1762 Sorts them by bitposition. */
1763
1764 static int
sort_by_bitpos(const void * x,const void * y)1765 sort_by_bitpos (const void *x, const void *y)
1766 {
1767 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
1768 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
1769
1770 if ((*tmp)->bitpos < (*tmp2)->bitpos)
1771 return -1;
1772 else if ((*tmp)->bitpos > (*tmp2)->bitpos)
1773 return 1;
1774 else
1775 /* If they are the same let's use the order which is guaranteed to
1776 be different. */
1777 return (*tmp)->order - (*tmp2)->order;
1778 }
1779
1780 /* Sorting function for store_immediate_info objects.
1781 Sorts them by the order field. */
1782
1783 static int
sort_by_order(const void * x,const void * y)1784 sort_by_order (const void *x, const void *y)
1785 {
1786 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
1787 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
1788
1789 if ((*tmp)->order < (*tmp2)->order)
1790 return -1;
1791 else if ((*tmp)->order > (*tmp2)->order)
1792 return 1;
1793
1794 gcc_unreachable ();
1795 }
1796
1797 /* Initialize a merged_store_group object from a store_immediate_info
1798 object. */
1799
merged_store_group(store_immediate_info * info)1800 merged_store_group::merged_store_group (store_immediate_info *info)
1801 {
1802 start = info->bitpos;
1803 width = info->bitsize;
1804 bitregion_start = info->bitregion_start;
1805 bitregion_end = info->bitregion_end;
1806 /* VAL has memory allocated for it in apply_stores once the group
1807 width has been finalized. */
1808 val = NULL;
1809 mask = NULL;
1810 bit_insertion = false;
1811 only_constants = info->rhs_code == INTEGER_CST;
1812 first_nonmergeable_order = ~0U;
1813 lp_nr = info->lp_nr;
1814 unsigned HOST_WIDE_INT align_bitpos = 0;
1815 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
1816 &align, &align_bitpos);
1817 align_base = start - align_bitpos;
1818 for (int i = 0; i < 2; ++i)
1819 {
1820 store_operand_info &op = info->ops[i];
1821 if (op.base_addr == NULL_TREE)
1822 {
1823 load_align[i] = 0;
1824 load_align_base[i] = 0;
1825 }
1826 else
1827 {
1828 get_object_alignment_1 (op.val, &load_align[i], &align_bitpos);
1829 load_align_base[i] = op.bitpos - align_bitpos;
1830 }
1831 }
1832 stores.create (1);
1833 stores.safe_push (info);
1834 last_stmt = info->stmt;
1835 last_order = info->order;
1836 first_stmt = last_stmt;
1837 first_order = last_order;
1838 buf_size = 0;
1839 }
1840
~merged_store_group()1841 merged_store_group::~merged_store_group ()
1842 {
1843 if (val)
1844 XDELETEVEC (val);
1845 }
1846
1847 /* Return true if the store described by INFO can be merged into the group. */
1848
1849 bool
can_be_merged_into(store_immediate_info * info)1850 merged_store_group::can_be_merged_into (store_immediate_info *info)
1851 {
1852 /* Do not merge bswap patterns. */
1853 if (info->rhs_code == LROTATE_EXPR)
1854 return false;
1855
1856 if (info->lp_nr != lp_nr)
1857 return false;
1858
1859 /* The canonical case. */
1860 if (info->rhs_code == stores[0]->rhs_code)
1861 return true;
1862
1863 /* BIT_INSERT_EXPR is compatible with INTEGER_CST. */
1864 if (info->rhs_code == BIT_INSERT_EXPR && stores[0]->rhs_code == INTEGER_CST)
1865 return true;
1866
1867 if (stores[0]->rhs_code == BIT_INSERT_EXPR && info->rhs_code == INTEGER_CST)
1868 return true;
1869
1870 /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
1871 if (info->rhs_code == MEM_REF
1872 && (stores[0]->rhs_code == INTEGER_CST
1873 || stores[0]->rhs_code == BIT_INSERT_EXPR)
1874 && info->bitregion_start == stores[0]->bitregion_start
1875 && info->bitregion_end == stores[0]->bitregion_end)
1876 return true;
1877
1878 if (stores[0]->rhs_code == MEM_REF
1879 && (info->rhs_code == INTEGER_CST
1880 || info->rhs_code == BIT_INSERT_EXPR)
1881 && info->bitregion_start == stores[0]->bitregion_start
1882 && info->bitregion_end == stores[0]->bitregion_end)
1883 return true;
1884
1885 return false;
1886 }
1887
1888 /* Helper method for merge_into and merge_overlapping to do
1889 the common part. */
1890
1891 void
do_merge(store_immediate_info * info)1892 merged_store_group::do_merge (store_immediate_info *info)
1893 {
1894 bitregion_start = MIN (bitregion_start, info->bitregion_start);
1895 bitregion_end = MAX (bitregion_end, info->bitregion_end);
1896
1897 unsigned int this_align;
1898 unsigned HOST_WIDE_INT align_bitpos = 0;
1899 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
1900 &this_align, &align_bitpos);
1901 if (this_align > align)
1902 {
1903 align = this_align;
1904 align_base = info->bitpos - align_bitpos;
1905 }
1906 for (int i = 0; i < 2; ++i)
1907 {
1908 store_operand_info &op = info->ops[i];
1909 if (!op.base_addr)
1910 continue;
1911
1912 get_object_alignment_1 (op.val, &this_align, &align_bitpos);
1913 if (this_align > load_align[i])
1914 {
1915 load_align[i] = this_align;
1916 load_align_base[i] = op.bitpos - align_bitpos;
1917 }
1918 }
1919
1920 gimple *stmt = info->stmt;
1921 stores.safe_push (info);
1922 if (info->order > last_order)
1923 {
1924 last_order = info->order;
1925 last_stmt = stmt;
1926 }
1927 else if (info->order < first_order)
1928 {
1929 first_order = info->order;
1930 first_stmt = stmt;
1931 }
1932 if (info->rhs_code != INTEGER_CST)
1933 only_constants = false;
1934 }
1935
1936 /* Merge a store recorded by INFO into this merged store.
1937 The store is not overlapping with the existing recorded
1938 stores. */
1939
1940 void
merge_into(store_immediate_info * info)1941 merged_store_group::merge_into (store_immediate_info *info)
1942 {
1943 /* Make sure we're inserting in the position we think we're inserting. */
1944 gcc_assert (info->bitpos >= start + width
1945 && info->bitregion_start <= bitregion_end);
1946
1947 width = info->bitpos + info->bitsize - start;
1948 do_merge (info);
1949 }
1950
1951 /* Merge a store described by INFO into this merged store.
1952 INFO overlaps in some way with the current store (i.e. it's not contiguous
1953 which is handled by merged_store_group::merge_into). */
1954
1955 void
merge_overlapping(store_immediate_info * info)1956 merged_store_group::merge_overlapping (store_immediate_info *info)
1957 {
1958 /* If the store extends the size of the group, extend the width. */
1959 if (info->bitpos + info->bitsize > start + width)
1960 width = info->bitpos + info->bitsize - start;
1961
1962 do_merge (info);
1963 }
1964
1965 /* Go through all the recorded stores in this group in program order and
1966 apply their values to the VAL byte array to create the final merged
1967 value. Return true if the operation succeeded. */
1968
1969 bool
apply_stores()1970 merged_store_group::apply_stores ()
1971 {
1972 /* Make sure we have more than one store in the group, otherwise we cannot
1973 merge anything. */
1974 if (bitregion_start % BITS_PER_UNIT != 0
1975 || bitregion_end % BITS_PER_UNIT != 0
1976 || stores.length () == 1)
1977 return false;
1978
1979 stores.qsort (sort_by_order);
1980 store_immediate_info *info;
1981 unsigned int i;
1982 /* Create a power-of-2-sized buffer for native_encode_expr. */
1983 buf_size = 1 << ceil_log2 ((bitregion_end - bitregion_start) / BITS_PER_UNIT);
1984 val = XNEWVEC (unsigned char, 2 * buf_size);
1985 mask = val + buf_size;
1986 memset (val, 0, buf_size);
1987 memset (mask, ~0U, buf_size);
1988
1989 FOR_EACH_VEC_ELT (stores, i, info)
1990 {
1991 unsigned int pos_in_buffer = info->bitpos - bitregion_start;
1992 tree cst;
1993 if (info->ops[0].val && info->ops[0].base_addr == NULL_TREE)
1994 cst = info->ops[0].val;
1995 else if (info->ops[1].val && info->ops[1].base_addr == NULL_TREE)
1996 cst = info->ops[1].val;
1997 else
1998 cst = NULL_TREE;
1999 bool ret = true;
2000 if (cst)
2001 {
2002 if (info->rhs_code == BIT_INSERT_EXPR)
2003 bit_insertion = true;
2004 else
2005 ret = encode_tree_to_bitpos (cst, val, info->bitsize,
2006 pos_in_buffer, buf_size);
2007 }
2008 unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
2009 if (BYTES_BIG_ENDIAN)
2010 clear_bit_region_be (m, (BITS_PER_UNIT - 1
2011 - (pos_in_buffer % BITS_PER_UNIT)),
2012 info->bitsize);
2013 else
2014 clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
2015 if (cst && dump_file && (dump_flags & TDF_DETAILS))
2016 {
2017 if (ret)
2018 {
2019 fputs ("After writing ", dump_file);
2020 print_generic_expr (dump_file, cst, TDF_NONE);
2021 fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
2022 " at position %d\n", info->bitsize, pos_in_buffer);
2023 fputs (" the merged value contains ", dump_file);
2024 dump_char_array (dump_file, val, buf_size);
2025 fputs (" the merged mask contains ", dump_file);
2026 dump_char_array (dump_file, mask, buf_size);
2027 if (bit_insertion)
2028 fputs (" bit insertion is required\n", dump_file);
2029 }
2030 else
2031 fprintf (dump_file, "Failed to merge stores\n");
2032 }
2033 if (!ret)
2034 return false;
2035 }
2036 stores.qsort (sort_by_bitpos);
2037 return true;
2038 }
2039
2040 /* Structure describing the store chain. */
2041
2042 class imm_store_chain_info
2043 {
2044 public:
2045 /* Doubly-linked list that imposes an order on chain processing.
2046 PNXP (prev's next pointer) points to the head of a list, or to
2047 the next field in the previous chain in the list.
2048 See pass_store_merging::m_stores_head for more rationale. */
2049 imm_store_chain_info *next, **pnxp;
2050 tree base_addr;
2051 auto_vec<store_immediate_info *> m_store_info;
2052 auto_vec<merged_store_group *> m_merged_store_groups;
2053
imm_store_chain_info(imm_store_chain_info * & inspt,tree b_a)2054 imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a)
2055 : next (inspt), pnxp (&inspt), base_addr (b_a)
2056 {
2057 inspt = this;
2058 if (next)
2059 {
2060 gcc_checking_assert (pnxp == next->pnxp);
2061 next->pnxp = &next;
2062 }
2063 }
~imm_store_chain_info()2064 ~imm_store_chain_info ()
2065 {
2066 *pnxp = next;
2067 if (next)
2068 {
2069 gcc_checking_assert (&next == next->pnxp);
2070 next->pnxp = pnxp;
2071 }
2072 }
2073 bool terminate_and_process_chain ();
2074 bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int,
2075 unsigned int);
2076 bool coalesce_immediate_stores ();
2077 bool output_merged_store (merged_store_group *);
2078 bool output_merged_stores ();
2079 };
2080
2081 const pass_data pass_data_tree_store_merging = {
2082 GIMPLE_PASS, /* type */
2083 "store-merging", /* name */
2084 OPTGROUP_NONE, /* optinfo_flags */
2085 TV_GIMPLE_STORE_MERGING, /* tv_id */
2086 PROP_ssa, /* properties_required */
2087 0, /* properties_provided */
2088 0, /* properties_destroyed */
2089 0, /* todo_flags_start */
2090 TODO_update_ssa, /* todo_flags_finish */
2091 };
2092
2093 class pass_store_merging : public gimple_opt_pass
2094 {
2095 public:
pass_store_merging(gcc::context * ctxt)2096 pass_store_merging (gcc::context *ctxt)
2097 : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head ()
2098 {
2099 }
2100
2101 /* Pass not supported for PDP-endian, nor for insane hosts or
2102 target character sizes where native_{encode,interpret}_expr
2103 doesn't work properly. */
2104 virtual bool
gate(function *)2105 gate (function *)
2106 {
2107 return flag_store_merging
2108 && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
2109 && CHAR_BIT == 8
2110 && BITS_PER_UNIT == 8;
2111 }
2112
2113 virtual unsigned int execute (function *);
2114
2115 private:
2116 hash_map<tree_operand_hash, class imm_store_chain_info *> m_stores;
2117
2118 /* Form a doubly-linked stack of the elements of m_stores, so that
2119 we can iterate over them in a predictable way. Using this order
2120 avoids extraneous differences in the compiler output just because
2121 of tree pointer variations (e.g. different chains end up in
2122 different positions of m_stores, so they are handled in different
2123 orders, so they allocate or release SSA names in different
2124 orders, and when they get reused, subsequent passes end up
2125 getting different SSA names, which may ultimately change
2126 decisions when going out of SSA). */
2127 imm_store_chain_info *m_stores_head;
2128
2129 bool process_store (gimple *);
2130 bool terminate_and_process_chain (imm_store_chain_info *);
2131 bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
2132 bool terminate_and_process_all_chains ();
2133 }; // class pass_store_merging
2134
2135 /* Terminate and process all recorded chains. Return true if any changes
2136 were made. */
2137
2138 bool
terminate_and_process_all_chains()2139 pass_store_merging::terminate_and_process_all_chains ()
2140 {
2141 bool ret = false;
2142 while (m_stores_head)
2143 ret |= terminate_and_process_chain (m_stores_head);
2144 gcc_assert (m_stores.is_empty ());
2145 return ret;
2146 }
2147
2148 /* Terminate all chains that are affected by the statement STMT.
2149 CHAIN_INFO is the chain we should ignore from the checks if
2150 non-NULL. Return true if any changes were made. */
2151
2152 bool
terminate_all_aliasing_chains(imm_store_chain_info ** chain_info,gimple * stmt)2153 pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
2154 **chain_info,
2155 gimple *stmt)
2156 {
2157 bool ret = false;
2158
2159 /* If the statement doesn't touch memory it can't alias. */
2160 if (!gimple_vuse (stmt))
2161 return false;
2162
2163 tree store_lhs = gimple_store_p (stmt) ? gimple_get_lhs (stmt) : NULL_TREE;
2164 ao_ref store_lhs_ref;
2165 ao_ref_init (&store_lhs_ref, store_lhs);
2166 for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
2167 {
2168 next = cur->next;
2169
2170 /* We already checked all the stores in chain_info and terminated the
2171 chain if necessary. Skip it here. */
2172 if (chain_info && *chain_info == cur)
2173 continue;
2174
2175 store_immediate_info *info;
2176 unsigned int i;
2177 FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
2178 {
2179 tree lhs = gimple_assign_lhs (info->stmt);
2180 ao_ref lhs_ref;
2181 ao_ref_init (&lhs_ref, lhs);
2182 if (ref_maybe_used_by_stmt_p (stmt, &lhs_ref)
2183 || stmt_may_clobber_ref_p_1 (stmt, &lhs_ref)
2184 || (store_lhs && refs_may_alias_p_1 (&store_lhs_ref,
2185 &lhs_ref, false)))
2186 {
2187 if (dump_file && (dump_flags & TDF_DETAILS))
2188 {
2189 fprintf (dump_file, "stmt causes chain termination:\n");
2190 print_gimple_stmt (dump_file, stmt, 0);
2191 }
2192 ret |= terminate_and_process_chain (cur);
2193 break;
2194 }
2195 }
2196 }
2197
2198 return ret;
2199 }
2200
2201 /* Helper function. Terminate the recorded chain storing to base object
2202 BASE. Return true if the merging and output was successful. The m_stores
2203 entry is removed after the processing in any case. */
2204
2205 bool
terminate_and_process_chain(imm_store_chain_info * chain_info)2206 pass_store_merging::terminate_and_process_chain (imm_store_chain_info *chain_info)
2207 {
2208 bool ret = chain_info->terminate_and_process_chain ();
2209 m_stores.remove (chain_info->base_addr);
2210 delete chain_info;
2211 return ret;
2212 }
2213
2214 /* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
2215 may clobber REF. FIRST and LAST must have non-NULL vdef. We want to
2216 be able to sink load of REF across stores between FIRST and LAST, up
2217 to right before LAST. */
2218
2219 bool
stmts_may_clobber_ref_p(gimple * first,gimple * last,tree ref)2220 stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
2221 {
2222 ao_ref r;
2223 ao_ref_init (&r, ref);
2224 unsigned int count = 0;
2225 tree vop = gimple_vdef (last);
2226 gimple *stmt;
2227
2228 /* Return true conservatively if the basic blocks are different. */
2229 if (gimple_bb (first) != gimple_bb (last))
2230 return true;
2231
2232 do
2233 {
2234 stmt = SSA_NAME_DEF_STMT (vop);
2235 if (stmt_may_clobber_ref_p_1 (stmt, &r))
2236 return true;
2237 if (gimple_store_p (stmt)
2238 && refs_anti_dependent_p (ref, gimple_get_lhs (stmt)))
2239 return true;
2240 /* Avoid quadratic compile time by bounding the number of checks
2241 we perform. */
2242 if (++count > MAX_STORE_ALIAS_CHECKS)
2243 return true;
2244 vop = gimple_vuse (stmt);
2245 }
2246 while (stmt != first);
2247
2248 return false;
2249 }
2250
2251 /* Return true if INFO->ops[IDX] is mergeable with the
2252 corresponding loads already in MERGED_STORE group.
2253 BASE_ADDR is the base address of the whole store group. */
2254
2255 bool
compatible_load_p(merged_store_group * merged_store,store_immediate_info * info,tree base_addr,int idx)2256 compatible_load_p (merged_store_group *merged_store,
2257 store_immediate_info *info,
2258 tree base_addr, int idx)
2259 {
2260 store_immediate_info *infof = merged_store->stores[0];
2261 if (!info->ops[idx].base_addr
2262 || maybe_ne (info->ops[idx].bitpos - infof->ops[idx].bitpos,
2263 info->bitpos - infof->bitpos)
2264 || !operand_equal_p (info->ops[idx].base_addr,
2265 infof->ops[idx].base_addr, 0))
2266 return false;
2267
2268 store_immediate_info *infol = merged_store->stores.last ();
2269 tree load_vuse = gimple_vuse (info->ops[idx].stmt);
2270 /* In this case all vuses should be the same, e.g.
2271 _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2272 or
2273 _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2274 and we can emit the coalesced load next to any of those loads. */
2275 if (gimple_vuse (infof->ops[idx].stmt) == load_vuse
2276 && gimple_vuse (infol->ops[idx].stmt) == load_vuse)
2277 return true;
2278
2279 /* Otherwise, at least for now require that the load has the same
2280 vuse as the store. See following examples. */
2281 if (gimple_vuse (info->stmt) != load_vuse)
2282 return false;
2283
2284 if (gimple_vuse (infof->stmt) != gimple_vuse (infof->ops[idx].stmt)
2285 || (infof != infol
2286 && gimple_vuse (infol->stmt) != gimple_vuse (infol->ops[idx].stmt)))
2287 return false;
2288
2289 /* If the load is from the same location as the store, already
2290 the construction of the immediate chain info guarantees no intervening
2291 stores, so no further checks are needed. Example:
2292 _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4; */
2293 if (known_eq (info->ops[idx].bitpos, info->bitpos)
2294 && operand_equal_p (info->ops[idx].base_addr, base_addr, 0))
2295 return true;
2296
2297 /* Otherwise, we need to punt if any of the loads can be clobbered by any
2298 of the stores in the group, or any other stores in between those.
2299 Previous calls to compatible_load_p ensured that for all the
2300 merged_store->stores IDX loads, no stmts starting with
2301 merged_store->first_stmt and ending right before merged_store->last_stmt
2302 clobbers those loads. */
2303 gimple *first = merged_store->first_stmt;
2304 gimple *last = merged_store->last_stmt;
2305 unsigned int i;
2306 store_immediate_info *infoc;
2307 /* The stores are sorted by increasing store bitpos, so if info->stmt store
2308 comes before the so far first load, we'll be changing
2309 merged_store->first_stmt. In that case we need to give up if
2310 any of the earlier processed loads clobber with the stmts in the new
2311 range. */
2312 if (info->order < merged_store->first_order)
2313 {
2314 FOR_EACH_VEC_ELT (merged_store->stores, i, infoc)
2315 if (stmts_may_clobber_ref_p (info->stmt, first, infoc->ops[idx].val))
2316 return false;
2317 first = info->stmt;
2318 }
2319 /* Similarly, we could change merged_store->last_stmt, so ensure
2320 in that case no stmts in the new range clobber any of the earlier
2321 processed loads. */
2322 else if (info->order > merged_store->last_order)
2323 {
2324 FOR_EACH_VEC_ELT (merged_store->stores, i, infoc)
2325 if (stmts_may_clobber_ref_p (last, info->stmt, infoc->ops[idx].val))
2326 return false;
2327 last = info->stmt;
2328 }
2329 /* And finally, we'd be adding a new load to the set, ensure it isn't
2330 clobbered in the new range. */
2331 if (stmts_may_clobber_ref_p (first, last, info->ops[idx].val))
2332 return false;
2333
2334 /* Otherwise, we are looking for:
2335 _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2336 or
2337 _1 = s.a; t.a = _1; _2 = s.b; t.b = _2; */
2338 return true;
2339 }
2340
2341 /* Add all refs loaded to compute VAL to REFS vector. */
2342
2343 void
gather_bswap_load_refs(vec<tree> * refs,tree val)2344 gather_bswap_load_refs (vec<tree> *refs, tree val)
2345 {
2346 if (TREE_CODE (val) != SSA_NAME)
2347 return;
2348
2349 gimple *stmt = SSA_NAME_DEF_STMT (val);
2350 if (!is_gimple_assign (stmt))
2351 return;
2352
2353 if (gimple_assign_load_p (stmt))
2354 {
2355 refs->safe_push (gimple_assign_rhs1 (stmt));
2356 return;
2357 }
2358
2359 switch (gimple_assign_rhs_class (stmt))
2360 {
2361 case GIMPLE_BINARY_RHS:
2362 gather_bswap_load_refs (refs, gimple_assign_rhs2 (stmt));
2363 /* FALLTHRU */
2364 case GIMPLE_UNARY_RHS:
2365 gather_bswap_load_refs (refs, gimple_assign_rhs1 (stmt));
2366 break;
2367 default:
2368 gcc_unreachable ();
2369 }
2370 }
2371
2372 /* Check if there are any stores in M_STORE_INFO after index I
2373 (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2374 a potential group ending with END that have their order
2375 smaller than LAST_ORDER. ALL_INTEGER_CST_P is true if
2376 all the stores already merged and the one under consideration
2377 have rhs_code of INTEGER_CST. Return true if there are no such stores.
2378 Consider:
2379 MEM[(long long int *)p_28] = 0;
2380 MEM[(long long int *)p_28 + 8B] = 0;
2381 MEM[(long long int *)p_28 + 16B] = 0;
2382 MEM[(long long int *)p_28 + 24B] = 0;
2383 _129 = (int) _130;
2384 MEM[(int *)p_28 + 8B] = _129;
2385 MEM[(int *)p_28].a = -1;
2386 We already have
2387 MEM[(long long int *)p_28] = 0;
2388 MEM[(int *)p_28].a = -1;
2389 stmts in the current group and need to consider if it is safe to
2390 add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2391 There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2392 store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2393 into the group and merging of those 3 stores is successful, merged
2394 stmts will be emitted at the latest store from that group, i.e.
2395 LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2396 The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2397 the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2398 so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2399 into the group. That way it will be its own store group and will
2400 not be touched. If ALL_INTEGER_CST_P and there are overlapping
2401 INTEGER_CST stores, those are mergeable using merge_overlapping,
2402 so don't return false for those.
2403
2404 Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
2405 (exclusive), whether they don't overlap the bitrange START to END
2406 and have order in between FIRST_ORDER and LAST_ORDER. This is to
2407 prevent merging in cases like:
2408 MEM <char[12]> [&b + 8B] = {};
2409 MEM[(short *) &b] = 5;
2410 _5 = *x_4(D);
2411 MEM <long long unsigned int> [&b + 2B] = _5;
2412 MEM[(char *)&b + 16B] = 88;
2413 MEM[(int *)&b + 20B] = 1;
2414 The = {} store comes in sort_by_bitpos before the = 88 store, and can't
2415 be merged with it, because the = _5 store overlaps these and is in between
2416 them in sort_by_order ordering. If it was merged, the merged store would
2417 go after the = _5 store and thus change behavior. */
2418
2419 static bool
check_no_overlap(vec<store_immediate_info * > m_store_info,unsigned int i,bool all_integer_cst_p,unsigned int first_order,unsigned int last_order,unsigned HOST_WIDE_INT start,unsigned HOST_WIDE_INT end,unsigned int first_earlier,unsigned end_earlier)2420 check_no_overlap (vec<store_immediate_info *> m_store_info, unsigned int i,
2421 bool all_integer_cst_p, unsigned int first_order,
2422 unsigned int last_order, unsigned HOST_WIDE_INT start,
2423 unsigned HOST_WIDE_INT end, unsigned int first_earlier,
2424 unsigned end_earlier)
2425 {
2426 unsigned int len = m_store_info.length ();
2427 for (unsigned int j = first_earlier; j < end_earlier; j++)
2428 {
2429 store_immediate_info *info = m_store_info[j];
2430 if (info->order > first_order
2431 && info->order < last_order
2432 && info->bitpos + info->bitsize > start)
2433 return false;
2434 }
2435 for (++i; i < len; ++i)
2436 {
2437 store_immediate_info *info = m_store_info[i];
2438 if (info->bitpos >= end)
2439 break;
2440 if (info->order < last_order
2441 && (!all_integer_cst_p || info->rhs_code != INTEGER_CST))
2442 return false;
2443 }
2444 return true;
2445 }
2446
2447 /* Return true if m_store_info[first] and at least one following store
2448 form a group which store try_size bitsize value which is byte swapped
2449 from a memory load or some value, or identity from some value.
2450 This uses the bswap pass APIs. */
2451
2452 bool
try_coalesce_bswap(merged_store_group * merged_store,unsigned int first,unsigned int try_size,unsigned int first_earlier)2453 imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
2454 unsigned int first,
2455 unsigned int try_size,
2456 unsigned int first_earlier)
2457 {
2458 unsigned int len = m_store_info.length (), last = first;
2459 unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
2460 if (width >= try_size)
2461 return false;
2462 for (unsigned int i = first + 1; i < len; ++i)
2463 {
2464 if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width
2465 || m_store_info[i]->lp_nr != merged_store->lp_nr
2466 || m_store_info[i]->ins_stmt == NULL)
2467 return false;
2468 width += m_store_info[i]->bitsize;
2469 if (width >= try_size)
2470 {
2471 last = i;
2472 break;
2473 }
2474 }
2475 if (width != try_size)
2476 return false;
2477
2478 bool allow_unaligned
2479 = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
2480 /* Punt if the combined store would not be aligned and we need alignment. */
2481 if (!allow_unaligned)
2482 {
2483 unsigned int align = merged_store->align;
2484 unsigned HOST_WIDE_INT align_base = merged_store->align_base;
2485 for (unsigned int i = first + 1; i <= last; ++i)
2486 {
2487 unsigned int this_align;
2488 unsigned HOST_WIDE_INT align_bitpos = 0;
2489 get_object_alignment_1 (gimple_assign_lhs (m_store_info[i]->stmt),
2490 &this_align, &align_bitpos);
2491 if (this_align > align)
2492 {
2493 align = this_align;
2494 align_base = m_store_info[i]->bitpos - align_bitpos;
2495 }
2496 }
2497 unsigned HOST_WIDE_INT align_bitpos
2498 = (m_store_info[first]->bitpos - align_base) & (align - 1);
2499 if (align_bitpos)
2500 align = least_bit_hwi (align_bitpos);
2501 if (align < try_size)
2502 return false;
2503 }
2504
2505 tree type;
2506 switch (try_size)
2507 {
2508 case 16: type = uint16_type_node; break;
2509 case 32: type = uint32_type_node; break;
2510 case 64: type = uint64_type_node; break;
2511 default: gcc_unreachable ();
2512 }
2513 struct symbolic_number n;
2514 gimple *ins_stmt = NULL;
2515 int vuse_store = -1;
2516 unsigned int first_order = merged_store->first_order;
2517 unsigned int last_order = merged_store->last_order;
2518 gimple *first_stmt = merged_store->first_stmt;
2519 gimple *last_stmt = merged_store->last_stmt;
2520 unsigned HOST_WIDE_INT end = merged_store->start + merged_store->width;
2521 store_immediate_info *infof = m_store_info[first];
2522
2523 for (unsigned int i = first; i <= last; ++i)
2524 {
2525 store_immediate_info *info = m_store_info[i];
2526 struct symbolic_number this_n = info->n;
2527 this_n.type = type;
2528 if (!this_n.base_addr)
2529 this_n.range = try_size / BITS_PER_UNIT;
2530 else
2531 /* Update vuse in case it has changed by output_merged_stores. */
2532 this_n.vuse = gimple_vuse (info->ins_stmt);
2533 unsigned int bitpos = info->bitpos - infof->bitpos;
2534 if (!do_shift_rotate (LSHIFT_EXPR, &this_n,
2535 BYTES_BIG_ENDIAN
2536 ? try_size - info->bitsize - bitpos
2537 : bitpos))
2538 return false;
2539 if (this_n.base_addr && vuse_store)
2540 {
2541 unsigned int j;
2542 for (j = first; j <= last; ++j)
2543 if (this_n.vuse == gimple_vuse (m_store_info[j]->stmt))
2544 break;
2545 if (j > last)
2546 {
2547 if (vuse_store == 1)
2548 return false;
2549 vuse_store = 0;
2550 }
2551 }
2552 if (i == first)
2553 {
2554 n = this_n;
2555 ins_stmt = info->ins_stmt;
2556 }
2557 else
2558 {
2559 if (n.base_addr && n.vuse != this_n.vuse)
2560 {
2561 if (vuse_store == 0)
2562 return false;
2563 vuse_store = 1;
2564 }
2565 if (info->order > last_order)
2566 {
2567 last_order = info->order;
2568 last_stmt = info->stmt;
2569 }
2570 else if (info->order < first_order)
2571 {
2572 first_order = info->order;
2573 first_stmt = info->stmt;
2574 }
2575 end = MAX (end, info->bitpos + info->bitsize);
2576
2577 ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
2578 &this_n, &n);
2579 if (ins_stmt == NULL)
2580 return false;
2581 }
2582 }
2583
2584 uint64_t cmpxchg, cmpnop;
2585 find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop);
2586
2587 /* A complete byte swap should make the symbolic number to start with
2588 the largest digit in the highest order byte. Unchanged symbolic
2589 number indicates a read with same endianness as target architecture. */
2590 if (n.n != cmpnop && n.n != cmpxchg)
2591 return false;
2592
2593 if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
2594 return false;
2595
2596 if (!check_no_overlap (m_store_info, last, false, first_order, last_order,
2597 merged_store->start, end, first_earlier, first))
2598 return false;
2599
2600 /* Don't handle memory copy this way if normal non-bswap processing
2601 would handle it too. */
2602 if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1)
2603 {
2604 unsigned int i;
2605 for (i = first; i <= last; ++i)
2606 if (m_store_info[i]->rhs_code != MEM_REF)
2607 break;
2608 if (i == last + 1)
2609 return false;
2610 }
2611
2612 if (n.n == cmpxchg)
2613 switch (try_size)
2614 {
2615 case 16:
2616 /* Will emit LROTATE_EXPR. */
2617 break;
2618 case 32:
2619 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2620 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
2621 break;
2622 return false;
2623 case 64:
2624 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2625 && optab_handler (bswap_optab, DImode) != CODE_FOR_nothing)
2626 break;
2627 return false;
2628 default:
2629 gcc_unreachable ();
2630 }
2631
2632 if (!allow_unaligned && n.base_addr)
2633 {
2634 unsigned int align = get_object_alignment (n.src);
2635 if (align < try_size)
2636 return false;
2637 }
2638
2639 /* If each load has vuse of the corresponding store, need to verify
2640 the loads can be sunk right before the last store. */
2641 if (vuse_store == 1)
2642 {
2643 auto_vec<tree, 64> refs;
2644 for (unsigned int i = first; i <= last; ++i)
2645 gather_bswap_load_refs (&refs,
2646 gimple_assign_rhs1 (m_store_info[i]->stmt));
2647
2648 unsigned int i;
2649 tree ref;
2650 FOR_EACH_VEC_ELT (refs, i, ref)
2651 if (stmts_may_clobber_ref_p (first_stmt, last_stmt, ref))
2652 return false;
2653 n.vuse = NULL_TREE;
2654 }
2655
2656 infof->n = n;
2657 infof->ins_stmt = ins_stmt;
2658 for (unsigned int i = first; i <= last; ++i)
2659 {
2660 m_store_info[i]->rhs_code = n.n == cmpxchg ? LROTATE_EXPR : NOP_EXPR;
2661 m_store_info[i]->ops[0].base_addr = NULL_TREE;
2662 m_store_info[i]->ops[1].base_addr = NULL_TREE;
2663 if (i != first)
2664 merged_store->merge_into (m_store_info[i]);
2665 }
2666
2667 return true;
2668 }
2669
2670 /* Go through the candidate stores recorded in m_store_info and merge them
2671 into merged_store_group objects recorded into m_merged_store_groups
2672 representing the widened stores. Return true if coalescing was successful
2673 and the number of widened stores is fewer than the original number
2674 of stores. */
2675
2676 bool
coalesce_immediate_stores()2677 imm_store_chain_info::coalesce_immediate_stores ()
2678 {
2679 /* Anything less can't be processed. */
2680 if (m_store_info.length () < 2)
2681 return false;
2682
2683 if (dump_file && (dump_flags & TDF_DETAILS))
2684 fprintf (dump_file, "Attempting to coalesce %u stores in chain\n",
2685 m_store_info.length ());
2686
2687 store_immediate_info *info;
2688 unsigned int i, ignore = 0;
2689 unsigned int first_earlier = 0;
2690 unsigned int end_earlier = 0;
2691
2692 /* Order the stores by the bitposition they write to. */
2693 m_store_info.qsort (sort_by_bitpos);
2694
2695 info = m_store_info[0];
2696 merged_store_group *merged_store = new merged_store_group (info);
2697 if (dump_file && (dump_flags & TDF_DETAILS))
2698 fputs ("New store group\n", dump_file);
2699
2700 FOR_EACH_VEC_ELT (m_store_info, i, info)
2701 {
2702 unsigned HOST_WIDE_INT new_bitregion_start, new_bitregion_end;
2703
2704 if (i <= ignore)
2705 goto done;
2706
2707 while (first_earlier < end_earlier
2708 && (m_store_info[first_earlier]->bitpos
2709 + m_store_info[first_earlier]->bitsize
2710 <= merged_store->start))
2711 first_earlier++;
2712
2713 /* First try to handle group of stores like:
2714 p[0] = data >> 24;
2715 p[1] = data >> 16;
2716 p[2] = data >> 8;
2717 p[3] = data;
2718 using the bswap framework. */
2719 if (info->bitpos == merged_store->start + merged_store->width
2720 && merged_store->stores.length () == 1
2721 && merged_store->stores[0]->ins_stmt != NULL
2722 && info->lp_nr == merged_store->lp_nr
2723 && info->ins_stmt != NULL)
2724 {
2725 unsigned int try_size;
2726 for (try_size = 64; try_size >= 16; try_size >>= 1)
2727 if (try_coalesce_bswap (merged_store, i - 1, try_size,
2728 first_earlier))
2729 break;
2730
2731 if (try_size >= 16)
2732 {
2733 ignore = i + merged_store->stores.length () - 1;
2734 m_merged_store_groups.safe_push (merged_store);
2735 if (ignore < m_store_info.length ())
2736 {
2737 merged_store = new merged_store_group (m_store_info[ignore]);
2738 end_earlier = ignore;
2739 }
2740 else
2741 merged_store = NULL;
2742 goto done;
2743 }
2744 }
2745
2746 new_bitregion_start
2747 = MIN (merged_store->bitregion_start, info->bitregion_start);
2748 new_bitregion_end
2749 = MAX (merged_store->bitregion_end, info->bitregion_end);
2750
2751 if (info->order >= merged_store->first_nonmergeable_order
2752 || (((new_bitregion_end - new_bitregion_start + 1) / BITS_PER_UNIT)
2753 > (unsigned) param_store_merging_max_size))
2754 ;
2755
2756 /* |---store 1---|
2757 |---store 2---|
2758 Overlapping stores. */
2759 else if (IN_RANGE (info->bitpos, merged_store->start,
2760 merged_store->start + merged_store->width - 1)
2761 /* |---store 1---||---store 2---|
2762 Handle also the consecutive INTEGER_CST stores case here,
2763 as we have here the code to deal with overlaps. */
2764 || (info->bitregion_start <= merged_store->bitregion_end
2765 && info->rhs_code == INTEGER_CST
2766 && merged_store->only_constants
2767 && merged_store->can_be_merged_into (info)))
2768 {
2769 /* Only allow overlapping stores of constants. */
2770 if (info->rhs_code == INTEGER_CST
2771 && merged_store->only_constants
2772 && info->lp_nr == merged_store->lp_nr)
2773 {
2774 unsigned int first_order
2775 = MIN (merged_store->first_order, info->order);
2776 unsigned int last_order
2777 = MAX (merged_store->last_order, info->order);
2778 unsigned HOST_WIDE_INT end
2779 = MAX (merged_store->start + merged_store->width,
2780 info->bitpos + info->bitsize);
2781 if (check_no_overlap (m_store_info, i, true, first_order,
2782 last_order, merged_store->start, end,
2783 first_earlier, end_earlier))
2784 {
2785 /* check_no_overlap call above made sure there are no
2786 overlapping stores with non-INTEGER_CST rhs_code
2787 in between the first and last of the stores we've
2788 just merged. If there are any INTEGER_CST rhs_code
2789 stores in between, we need to merge_overlapping them
2790 even if in the sort_by_bitpos order there are other
2791 overlapping stores in between. Keep those stores as is.
2792 Example:
2793 MEM[(int *)p_28] = 0;
2794 MEM[(char *)p_28 + 3B] = 1;
2795 MEM[(char *)p_28 + 1B] = 2;
2796 MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
2797 We can't merge the zero store with the store of two and
2798 not merge anything else, because the store of one is
2799 in the original order in between those two, but in
2800 store_by_bitpos order it comes after the last store that
2801 we can't merge with them. We can merge the first 3 stores
2802 and keep the last store as is though. */
2803 unsigned int len = m_store_info.length ();
2804 unsigned int try_order = last_order;
2805 unsigned int first_nonmergeable_order;
2806 unsigned int k;
2807 bool last_iter = false;
2808 int attempts = 0;
2809 do
2810 {
2811 unsigned int max_order = 0;
2812 unsigned int min_order = first_order;
2813 unsigned first_nonmergeable_int_order = ~0U;
2814 unsigned HOST_WIDE_INT this_end = end;
2815 k = i;
2816 first_nonmergeable_order = ~0U;
2817 for (unsigned int j = i + 1; j < len; ++j)
2818 {
2819 store_immediate_info *info2 = m_store_info[j];
2820 if (info2->bitpos >= this_end)
2821 break;
2822 if (info2->order < try_order)
2823 {
2824 if (info2->rhs_code != INTEGER_CST
2825 || info2->lp_nr != merged_store->lp_nr)
2826 {
2827 /* Normally check_no_overlap makes sure this
2828 doesn't happen, but if end grows below,
2829 then we need to process more stores than
2830 check_no_overlap verified. Example:
2831 MEM[(int *)p_5] = 0;
2832 MEM[(short *)p_5 + 3B] = 1;
2833 MEM[(char *)p_5 + 4B] = _9;
2834 MEM[(char *)p_5 + 2B] = 2; */
2835 k = 0;
2836 break;
2837 }
2838 k = j;
2839 min_order = MIN (min_order, info2->order);
2840 this_end = MAX (this_end,
2841 info2->bitpos + info2->bitsize);
2842 }
2843 else if (info2->rhs_code == INTEGER_CST
2844 && info2->lp_nr == merged_store->lp_nr
2845 && !last_iter)
2846 {
2847 max_order = MAX (max_order, info2->order + 1);
2848 first_nonmergeable_int_order
2849 = MIN (first_nonmergeable_int_order,
2850 info2->order);
2851 }
2852 else
2853 first_nonmergeable_order
2854 = MIN (first_nonmergeable_order, info2->order);
2855 }
2856 if (k > i
2857 && !check_no_overlap (m_store_info, len - 1, true,
2858 min_order, try_order,
2859 merged_store->start, this_end,
2860 first_earlier, end_earlier))
2861 k = 0;
2862 if (k == 0)
2863 {
2864 if (last_order == try_order)
2865 break;
2866 /* If this failed, but only because we grew
2867 try_order, retry with the last working one,
2868 so that we merge at least something. */
2869 try_order = last_order;
2870 last_iter = true;
2871 continue;
2872 }
2873 last_order = try_order;
2874 /* Retry with a larger try_order to see if we could
2875 merge some further INTEGER_CST stores. */
2876 if (max_order
2877 && (first_nonmergeable_int_order
2878 < first_nonmergeable_order))
2879 {
2880 try_order = MIN (max_order,
2881 first_nonmergeable_order);
2882 try_order
2883 = MIN (try_order,
2884 merged_store->first_nonmergeable_order);
2885 if (try_order > last_order && ++attempts < 16)
2886 continue;
2887 }
2888 first_nonmergeable_order
2889 = MIN (first_nonmergeable_order,
2890 first_nonmergeable_int_order);
2891 end = this_end;
2892 break;
2893 }
2894 while (1);
2895
2896 if (k != 0)
2897 {
2898 merged_store->merge_overlapping (info);
2899
2900 merged_store->first_nonmergeable_order
2901 = MIN (merged_store->first_nonmergeable_order,
2902 first_nonmergeable_order);
2903
2904 for (unsigned int j = i + 1; j <= k; j++)
2905 {
2906 store_immediate_info *info2 = m_store_info[j];
2907 gcc_assert (info2->bitpos < end);
2908 if (info2->order < last_order)
2909 {
2910 gcc_assert (info2->rhs_code == INTEGER_CST);
2911 if (info != info2)
2912 merged_store->merge_overlapping (info2);
2913 }
2914 /* Other stores are kept and not merged in any
2915 way. */
2916 }
2917 ignore = k;
2918 goto done;
2919 }
2920 }
2921 }
2922 }
2923 /* |---store 1---||---store 2---|
2924 This store is consecutive to the previous one.
2925 Merge it into the current store group. There can be gaps in between
2926 the stores, but there can't be gaps in between bitregions. */
2927 else if (info->bitregion_start <= merged_store->bitregion_end
2928 && merged_store->can_be_merged_into (info))
2929 {
2930 store_immediate_info *infof = merged_store->stores[0];
2931
2932 /* All the rhs_code ops that take 2 operands are commutative,
2933 swap the operands if it could make the operands compatible. */
2934 if (infof->ops[0].base_addr
2935 && infof->ops[1].base_addr
2936 && info->ops[0].base_addr
2937 && info->ops[1].base_addr
2938 && known_eq (info->ops[1].bitpos - infof->ops[0].bitpos,
2939 info->bitpos - infof->bitpos)
2940 && operand_equal_p (info->ops[1].base_addr,
2941 infof->ops[0].base_addr, 0))
2942 {
2943 std::swap (info->ops[0], info->ops[1]);
2944 info->ops_swapped_p = true;
2945 }
2946 if (check_no_overlap (m_store_info, i, false,
2947 MIN (merged_store->first_order, info->order),
2948 MAX (merged_store->last_order, info->order),
2949 merged_store->start,
2950 MAX (merged_store->start + merged_store->width,
2951 info->bitpos + info->bitsize),
2952 first_earlier, end_earlier))
2953 {
2954 /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
2955 if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
2956 {
2957 info->rhs_code = BIT_INSERT_EXPR;
2958 info->ops[0].val = gimple_assign_rhs1 (info->stmt);
2959 info->ops[0].base_addr = NULL_TREE;
2960 }
2961 else if (infof->rhs_code == MEM_REF && info->rhs_code != MEM_REF)
2962 {
2963 store_immediate_info *infoj;
2964 unsigned int j;
2965 FOR_EACH_VEC_ELT (merged_store->stores, j, infoj)
2966 {
2967 infoj->rhs_code = BIT_INSERT_EXPR;
2968 infoj->ops[0].val = gimple_assign_rhs1 (infoj->stmt);
2969 infoj->ops[0].base_addr = NULL_TREE;
2970 }
2971 }
2972 if ((infof->ops[0].base_addr
2973 ? compatible_load_p (merged_store, info, base_addr, 0)
2974 : !info->ops[0].base_addr)
2975 && (infof->ops[1].base_addr
2976 ? compatible_load_p (merged_store, info, base_addr, 1)
2977 : !info->ops[1].base_addr))
2978 {
2979 merged_store->merge_into (info);
2980 goto done;
2981 }
2982 }
2983 }
2984
2985 /* |---store 1---| <gap> |---store 2---|.
2986 Gap between stores or the rhs not compatible. Start a new group. */
2987
2988 /* Try to apply all the stores recorded for the group to determine
2989 the bitpattern they write and discard it if that fails.
2990 This will also reject single-store groups. */
2991 if (merged_store->apply_stores ())
2992 m_merged_store_groups.safe_push (merged_store);
2993 else
2994 delete merged_store;
2995
2996 merged_store = new merged_store_group (info);
2997 end_earlier = i;
2998 if (dump_file && (dump_flags & TDF_DETAILS))
2999 fputs ("New store group\n", dump_file);
3000
3001 done:
3002 if (dump_file && (dump_flags & TDF_DETAILS))
3003 {
3004 fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
3005 " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:",
3006 i, info->bitsize, info->bitpos);
3007 print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
3008 fputc ('\n', dump_file);
3009 }
3010 }
3011
3012 /* Record or discard the last store group. */
3013 if (merged_store)
3014 {
3015 if (merged_store->apply_stores ())
3016 m_merged_store_groups.safe_push (merged_store);
3017 else
3018 delete merged_store;
3019 }
3020
3021 gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
3022
3023 bool success
3024 = !m_merged_store_groups.is_empty ()
3025 && m_merged_store_groups.length () < m_store_info.length ();
3026
3027 if (success && dump_file)
3028 fprintf (dump_file, "Coalescing successful!\nMerged into %u stores\n",
3029 m_merged_store_groups.length ());
3030
3031 return success;
3032 }
3033
3034 /* Return the type to use for the merged stores or loads described by STMTS.
3035 This is needed to get the alias sets right. If IS_LOAD, look for rhs,
3036 otherwise lhs. Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
3037 of the MEM_REFs if any. */
3038
3039 static tree
get_alias_type_for_stmts(vec<gimple * > & stmts,bool is_load,unsigned short * cliquep,unsigned short * basep)3040 get_alias_type_for_stmts (vec<gimple *> &stmts, bool is_load,
3041 unsigned short *cliquep, unsigned short *basep)
3042 {
3043 gimple *stmt;
3044 unsigned int i;
3045 tree type = NULL_TREE;
3046 tree ret = NULL_TREE;
3047 *cliquep = 0;
3048 *basep = 0;
3049
3050 FOR_EACH_VEC_ELT (stmts, i, stmt)
3051 {
3052 tree ref = is_load ? gimple_assign_rhs1 (stmt)
3053 : gimple_assign_lhs (stmt);
3054 tree type1 = reference_alias_ptr_type (ref);
3055 tree base = get_base_address (ref);
3056
3057 if (i == 0)
3058 {
3059 if (TREE_CODE (base) == MEM_REF)
3060 {
3061 *cliquep = MR_DEPENDENCE_CLIQUE (base);
3062 *basep = MR_DEPENDENCE_BASE (base);
3063 }
3064 ret = type = type1;
3065 continue;
3066 }
3067 if (!alias_ptr_types_compatible_p (type, type1))
3068 ret = ptr_type_node;
3069 if (TREE_CODE (base) != MEM_REF
3070 || *cliquep != MR_DEPENDENCE_CLIQUE (base)
3071 || *basep != MR_DEPENDENCE_BASE (base))
3072 {
3073 *cliquep = 0;
3074 *basep = 0;
3075 }
3076 }
3077 return ret;
3078 }
3079
3080 /* Return the location_t information we can find among the statements
3081 in STMTS. */
3082
3083 static location_t
get_location_for_stmts(vec<gimple * > & stmts)3084 get_location_for_stmts (vec<gimple *> &stmts)
3085 {
3086 gimple *stmt;
3087 unsigned int i;
3088
3089 FOR_EACH_VEC_ELT (stmts, i, stmt)
3090 if (gimple_has_location (stmt))
3091 return gimple_location (stmt);
3092
3093 return UNKNOWN_LOCATION;
3094 }
3095
3096 /* Used to decribe a store resulting from splitting a wide store in smaller
3097 regularly-sized stores in split_group. */
3098
3099 class split_store
3100 {
3101 public:
3102 unsigned HOST_WIDE_INT bytepos;
3103 unsigned HOST_WIDE_INT size;
3104 unsigned HOST_WIDE_INT align;
3105 auto_vec<store_immediate_info *> orig_stores;
3106 /* True if there is a single orig stmt covering the whole split store. */
3107 bool orig;
3108 split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
3109 unsigned HOST_WIDE_INT);
3110 };
3111
3112 /* Simple constructor. */
3113
split_store(unsigned HOST_WIDE_INT bp,unsigned HOST_WIDE_INT sz,unsigned HOST_WIDE_INT al)3114 split_store::split_store (unsigned HOST_WIDE_INT bp,
3115 unsigned HOST_WIDE_INT sz,
3116 unsigned HOST_WIDE_INT al)
3117 : bytepos (bp), size (sz), align (al), orig (false)
3118 {
3119 orig_stores.create (0);
3120 }
3121
3122 /* Record all stores in GROUP that write to the region starting at BITPOS and
3123 is of size BITSIZE. Record infos for such statements in STORES if
3124 non-NULL. The stores in GROUP must be sorted by bitposition. Return INFO
3125 if there is exactly one original store in the range (in that case ignore
3126 clobber stmts, unless there are only clobber stmts). */
3127
3128 static store_immediate_info *
find_constituent_stores(class merged_store_group * group,vec<store_immediate_info * > * stores,unsigned int * first,unsigned HOST_WIDE_INT bitpos,unsigned HOST_WIDE_INT bitsize)3129 find_constituent_stores (class merged_store_group *group,
3130 vec<store_immediate_info *> *stores,
3131 unsigned int *first,
3132 unsigned HOST_WIDE_INT bitpos,
3133 unsigned HOST_WIDE_INT bitsize)
3134 {
3135 store_immediate_info *info, *ret = NULL;
3136 unsigned int i;
3137 bool second = false;
3138 bool update_first = true;
3139 unsigned HOST_WIDE_INT end = bitpos + bitsize;
3140 for (i = *first; group->stores.iterate (i, &info); ++i)
3141 {
3142 unsigned HOST_WIDE_INT stmt_start = info->bitpos;
3143 unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
3144 if (stmt_end <= bitpos)
3145 {
3146 /* BITPOS passed to this function never decreases from within the
3147 same split_group call, so optimize and don't scan info records
3148 which are known to end before or at BITPOS next time.
3149 Only do it if all stores before this one also pass this. */
3150 if (update_first)
3151 *first = i + 1;
3152 continue;
3153 }
3154 else
3155 update_first = false;
3156
3157 /* The stores in GROUP are ordered by bitposition so if we're past
3158 the region for this group return early. */
3159 if (stmt_start >= end)
3160 return ret;
3161
3162 if (gimple_clobber_p (info->stmt))
3163 {
3164 if (stores)
3165 stores->safe_push (info);
3166 if (ret == NULL)
3167 ret = info;
3168 continue;
3169 }
3170 if (stores)
3171 {
3172 stores->safe_push (info);
3173 if (ret && !gimple_clobber_p (ret->stmt))
3174 {
3175 ret = NULL;
3176 second = true;
3177 }
3178 }
3179 else if (ret && !gimple_clobber_p (ret->stmt))
3180 return NULL;
3181 if (!second)
3182 ret = info;
3183 }
3184 return ret;
3185 }
3186
3187 /* Return how many SSA_NAMEs used to compute value to store in the INFO
3188 store have multiple uses. If any SSA_NAME has multiple uses, also
3189 count statements needed to compute it. */
3190
3191 static unsigned
count_multiple_uses(store_immediate_info * info)3192 count_multiple_uses (store_immediate_info *info)
3193 {
3194 gimple *stmt = info->stmt;
3195 unsigned ret = 0;
3196 switch (info->rhs_code)
3197 {
3198 case INTEGER_CST:
3199 return 0;
3200 case BIT_AND_EXPR:
3201 case BIT_IOR_EXPR:
3202 case BIT_XOR_EXPR:
3203 if (info->bit_not_p)
3204 {
3205 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3206 ret = 1; /* Fall through below to return
3207 the BIT_NOT_EXPR stmt and then
3208 BIT_{AND,IOR,XOR}_EXPR and anything it
3209 uses. */
3210 else
3211 /* stmt is after this the BIT_NOT_EXPR. */
3212 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3213 }
3214 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3215 {
3216 ret += 1 + info->ops[0].bit_not_p;
3217 if (info->ops[1].base_addr)
3218 ret += 1 + info->ops[1].bit_not_p;
3219 return ret + 1;
3220 }
3221 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3222 /* stmt is now the BIT_*_EXPR. */
3223 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3224 ret += 1 + info->ops[info->ops_swapped_p].bit_not_p;
3225 else if (info->ops[info->ops_swapped_p].bit_not_p)
3226 {
3227 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3228 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3229 ++ret;
3230 }
3231 if (info->ops[1].base_addr == NULL_TREE)
3232 {
3233 gcc_checking_assert (!info->ops_swapped_p);
3234 return ret;
3235 }
3236 if (!has_single_use (gimple_assign_rhs2 (stmt)))
3237 ret += 1 + info->ops[1 - info->ops_swapped_p].bit_not_p;
3238 else if (info->ops[1 - info->ops_swapped_p].bit_not_p)
3239 {
3240 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3241 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3242 ++ret;
3243 }
3244 return ret;
3245 case MEM_REF:
3246 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3247 return 1 + info->ops[0].bit_not_p;
3248 else if (info->ops[0].bit_not_p)
3249 {
3250 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3251 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3252 return 1;
3253 }
3254 return 0;
3255 case BIT_INSERT_EXPR:
3256 return has_single_use (gimple_assign_rhs1 (stmt)) ? 0 : 1;
3257 default:
3258 gcc_unreachable ();
3259 }
3260 }
3261
3262 /* Split a merged store described by GROUP by populating the SPLIT_STORES
3263 vector (if non-NULL) with split_store structs describing the byte offset
3264 (from the base), the bit size and alignment of each store as well as the
3265 original statements involved in each such split group.
3266 This is to separate the splitting strategy from the statement
3267 building/emission/linking done in output_merged_store.
3268 Return number of new stores.
3269 If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
3270 If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
3271 BZERO_FIRST may be true only when the first store covers the whole group
3272 and clears it; if BZERO_FIRST is true, keep that first store in the set
3273 unmodified and emit further stores for the overrides only.
3274 If SPLIT_STORES is NULL, it is just a dry run to count number of
3275 new stores. */
3276
3277 static unsigned int
split_group(merged_store_group * group,bool allow_unaligned_store,bool allow_unaligned_load,bool bzero_first,vec<split_store * > * split_stores,unsigned * total_orig,unsigned * total_new)3278 split_group (merged_store_group *group, bool allow_unaligned_store,
3279 bool allow_unaligned_load, bool bzero_first,
3280 vec<split_store *> *split_stores,
3281 unsigned *total_orig,
3282 unsigned *total_new)
3283 {
3284 unsigned HOST_WIDE_INT pos = group->bitregion_start;
3285 unsigned HOST_WIDE_INT size = group->bitregion_end - pos;
3286 unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
3287 unsigned HOST_WIDE_INT group_align = group->align;
3288 unsigned HOST_WIDE_INT align_base = group->align_base;
3289 unsigned HOST_WIDE_INT group_load_align = group_align;
3290 bool any_orig = false;
3291
3292 gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
3293
3294 if (group->stores[0]->rhs_code == LROTATE_EXPR
3295 || group->stores[0]->rhs_code == NOP_EXPR)
3296 {
3297 gcc_assert (!bzero_first);
3298 /* For bswap framework using sets of stores, all the checking
3299 has been done earlier in try_coalesce_bswap and needs to be
3300 emitted as a single store. */
3301 if (total_orig)
3302 {
3303 /* Avoid the old/new stmt count heuristics. It should be
3304 always beneficial. */
3305 total_new[0] = 1;
3306 total_orig[0] = 2;
3307 }
3308
3309 if (split_stores)
3310 {
3311 unsigned HOST_WIDE_INT align_bitpos
3312 = (group->start - align_base) & (group_align - 1);
3313 unsigned HOST_WIDE_INT align = group_align;
3314 if (align_bitpos)
3315 align = least_bit_hwi (align_bitpos);
3316 bytepos = group->start / BITS_PER_UNIT;
3317 split_store *store
3318 = new split_store (bytepos, group->width, align);
3319 unsigned int first = 0;
3320 find_constituent_stores (group, &store->orig_stores,
3321 &first, group->start, group->width);
3322 split_stores->safe_push (store);
3323 }
3324
3325 return 1;
3326 }
3327
3328 unsigned int ret = 0, first = 0;
3329 unsigned HOST_WIDE_INT try_pos = bytepos;
3330
3331 if (total_orig)
3332 {
3333 unsigned int i;
3334 store_immediate_info *info = group->stores[0];
3335
3336 total_new[0] = 0;
3337 total_orig[0] = 1; /* The orig store. */
3338 info = group->stores[0];
3339 if (info->ops[0].base_addr)
3340 total_orig[0]++;
3341 if (info->ops[1].base_addr)
3342 total_orig[0]++;
3343 switch (info->rhs_code)
3344 {
3345 case BIT_AND_EXPR:
3346 case BIT_IOR_EXPR:
3347 case BIT_XOR_EXPR:
3348 total_orig[0]++; /* The orig BIT_*_EXPR stmt. */
3349 break;
3350 default:
3351 break;
3352 }
3353 total_orig[0] *= group->stores.length ();
3354
3355 FOR_EACH_VEC_ELT (group->stores, i, info)
3356 {
3357 total_new[0] += count_multiple_uses (info);
3358 total_orig[0] += (info->bit_not_p
3359 + info->ops[0].bit_not_p
3360 + info->ops[1].bit_not_p);
3361 }
3362 }
3363
3364 if (!allow_unaligned_load)
3365 for (int i = 0; i < 2; ++i)
3366 if (group->load_align[i])
3367 group_load_align = MIN (group_load_align, group->load_align[i]);
3368
3369 if (bzero_first)
3370 {
3371 store_immediate_info *gstore;
3372 FOR_EACH_VEC_ELT (group->stores, first, gstore)
3373 if (!gimple_clobber_p (gstore->stmt))
3374 break;
3375 ++first;
3376 ret = 1;
3377 if (split_stores)
3378 {
3379 split_store *store
3380 = new split_store (bytepos, gstore->bitsize, align_base);
3381 store->orig_stores.safe_push (gstore);
3382 store->orig = true;
3383 any_orig = true;
3384 split_stores->safe_push (store);
3385 }
3386 }
3387
3388 while (size > 0)
3389 {
3390 if ((allow_unaligned_store || group_align <= BITS_PER_UNIT)
3391 && (group->mask[try_pos - bytepos] == (unsigned char) ~0U
3392 || (bzero_first && group->val[try_pos - bytepos] == 0)))
3393 {
3394 /* Skip padding bytes. */
3395 ++try_pos;
3396 size -= BITS_PER_UNIT;
3397 continue;
3398 }
3399
3400 unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
3401 unsigned int try_size = MAX_STORE_BITSIZE, nonmasked;
3402 unsigned HOST_WIDE_INT align_bitpos
3403 = (try_bitpos - align_base) & (group_align - 1);
3404 unsigned HOST_WIDE_INT align = group_align;
3405 bool found_orig = false;
3406 if (align_bitpos)
3407 align = least_bit_hwi (align_bitpos);
3408 if (!allow_unaligned_store)
3409 try_size = MIN (try_size, align);
3410 if (!allow_unaligned_load)
3411 {
3412 /* If we can't do or don't want to do unaligned stores
3413 as well as loads, we need to take the loads into account
3414 as well. */
3415 unsigned HOST_WIDE_INT load_align = group_load_align;
3416 align_bitpos = (try_bitpos - align_base) & (load_align - 1);
3417 if (align_bitpos)
3418 load_align = least_bit_hwi (align_bitpos);
3419 for (int i = 0; i < 2; ++i)
3420 if (group->load_align[i])
3421 {
3422 align_bitpos
3423 = known_alignment (try_bitpos
3424 - group->stores[0]->bitpos
3425 + group->stores[0]->ops[i].bitpos
3426 - group->load_align_base[i]);
3427 if (align_bitpos & (group_load_align - 1))
3428 {
3429 unsigned HOST_WIDE_INT a = least_bit_hwi (align_bitpos);
3430 load_align = MIN (load_align, a);
3431 }
3432 }
3433 try_size = MIN (try_size, load_align);
3434 }
3435 store_immediate_info *info
3436 = find_constituent_stores (group, NULL, &first, try_bitpos, try_size);
3437 if (info && !gimple_clobber_p (info->stmt))
3438 {
3439 /* If there is just one original statement for the range, see if
3440 we can just reuse the original store which could be even larger
3441 than try_size. */
3442 unsigned HOST_WIDE_INT stmt_end
3443 = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT);
3444 info = find_constituent_stores (group, NULL, &first, try_bitpos,
3445 stmt_end - try_bitpos);
3446 if (info && info->bitpos >= try_bitpos)
3447 {
3448 store_immediate_info *info2 = NULL;
3449 unsigned int first_copy = first;
3450 if (info->bitpos > try_bitpos
3451 && stmt_end - try_bitpos <= try_size)
3452 {
3453 info2 = find_constituent_stores (group, NULL, &first_copy,
3454 try_bitpos,
3455 info->bitpos - try_bitpos);
3456 gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3457 }
3458 if (info2 == NULL && stmt_end - try_bitpos < try_size)
3459 {
3460 info2 = find_constituent_stores (group, NULL, &first_copy,
3461 stmt_end,
3462 (try_bitpos + try_size)
3463 - stmt_end);
3464 gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3465 }
3466 if (info2 == NULL)
3467 {
3468 try_size = stmt_end - try_bitpos;
3469 found_orig = true;
3470 goto found;
3471 }
3472 }
3473 }
3474
3475 /* Approximate store bitsize for the case when there are no padding
3476 bits. */
3477 while (try_size > size)
3478 try_size /= 2;
3479 /* Now look for whole padding bytes at the end of that bitsize. */
3480 for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked)
3481 if (group->mask[try_pos - bytepos + nonmasked - 1]
3482 != (unsigned char) ~0U
3483 && (!bzero_first
3484 || group->val[try_pos - bytepos + nonmasked - 1] != 0))
3485 break;
3486 if (nonmasked == 0 || (info && gimple_clobber_p (info->stmt)))
3487 {
3488 /* If entire try_size range is padding, skip it. */
3489 try_pos += try_size / BITS_PER_UNIT;
3490 size -= try_size;
3491 continue;
3492 }
3493 /* Otherwise try to decrease try_size if second half, last 3 quarters
3494 etc. are padding. */
3495 nonmasked *= BITS_PER_UNIT;
3496 while (nonmasked <= try_size / 2)
3497 try_size /= 2;
3498 if (!allow_unaligned_store && group_align > BITS_PER_UNIT)
3499 {
3500 /* Now look for whole padding bytes at the start of that bitsize. */
3501 unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked;
3502 for (masked = 0; masked < try_bytesize; ++masked)
3503 if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U
3504 && (!bzero_first
3505 || group->val[try_pos - bytepos + masked] != 0))
3506 break;
3507 masked *= BITS_PER_UNIT;
3508 gcc_assert (masked < try_size);
3509 if (masked >= try_size / 2)
3510 {
3511 while (masked >= try_size / 2)
3512 {
3513 try_size /= 2;
3514 try_pos += try_size / BITS_PER_UNIT;
3515 size -= try_size;
3516 masked -= try_size;
3517 }
3518 /* Need to recompute the alignment, so just retry at the new
3519 position. */
3520 continue;
3521 }
3522 }
3523
3524 found:
3525 ++ret;
3526
3527 if (split_stores)
3528 {
3529 split_store *store
3530 = new split_store (try_pos, try_size, align);
3531 info = find_constituent_stores (group, &store->orig_stores,
3532 &first, try_bitpos, try_size);
3533 if (info
3534 && !gimple_clobber_p (info->stmt)
3535 && info->bitpos >= try_bitpos
3536 && info->bitpos + info->bitsize <= try_bitpos + try_size
3537 && (store->orig_stores.length () == 1
3538 || found_orig
3539 || (info->bitpos == try_bitpos
3540 && (info->bitpos + info->bitsize
3541 == try_bitpos + try_size))))
3542 {
3543 store->orig = true;
3544 any_orig = true;
3545 }
3546 split_stores->safe_push (store);
3547 }
3548
3549 try_pos += try_size / BITS_PER_UNIT;
3550 size -= try_size;
3551 }
3552
3553 if (total_orig)
3554 {
3555 unsigned int i;
3556 split_store *store;
3557 /* If we are reusing some original stores and any of the
3558 original SSA_NAMEs had multiple uses, we need to subtract
3559 those now before we add the new ones. */
3560 if (total_new[0] && any_orig)
3561 {
3562 FOR_EACH_VEC_ELT (*split_stores, i, store)
3563 if (store->orig)
3564 total_new[0] -= count_multiple_uses (store->orig_stores[0]);
3565 }
3566 total_new[0] += ret; /* The new store. */
3567 store_immediate_info *info = group->stores[0];
3568 if (info->ops[0].base_addr)
3569 total_new[0] += ret;
3570 if (info->ops[1].base_addr)
3571 total_new[0] += ret;
3572 switch (info->rhs_code)
3573 {
3574 case BIT_AND_EXPR:
3575 case BIT_IOR_EXPR:
3576 case BIT_XOR_EXPR:
3577 total_new[0] += ret; /* The new BIT_*_EXPR stmt. */
3578 break;
3579 default:
3580 break;
3581 }
3582 FOR_EACH_VEC_ELT (*split_stores, i, store)
3583 {
3584 unsigned int j;
3585 bool bit_not_p[3] = { false, false, false };
3586 /* If all orig_stores have certain bit_not_p set, then
3587 we'd use a BIT_NOT_EXPR stmt and need to account for it.
3588 If some orig_stores have certain bit_not_p set, then
3589 we'd use a BIT_XOR_EXPR with a mask and need to account for
3590 it. */
3591 FOR_EACH_VEC_ELT (store->orig_stores, j, info)
3592 {
3593 if (info->ops[0].bit_not_p)
3594 bit_not_p[0] = true;
3595 if (info->ops[1].bit_not_p)
3596 bit_not_p[1] = true;
3597 if (info->bit_not_p)
3598 bit_not_p[2] = true;
3599 }
3600 total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2];
3601 }
3602
3603 }
3604
3605 return ret;
3606 }
3607
3608 /* Return the operation through which the operand IDX (if < 2) or
3609 result (IDX == 2) should be inverted. If NOP_EXPR, no inversion
3610 is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
3611 the bits should be xored with mask. */
3612
3613 static enum tree_code
invert_op(split_store * split_store,int idx,tree int_type,tree & mask)3614 invert_op (split_store *split_store, int idx, tree int_type, tree &mask)
3615 {
3616 unsigned int i;
3617 store_immediate_info *info;
3618 unsigned int cnt = 0;
3619 bool any_paddings = false;
3620 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3621 {
3622 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3623 if (bit_not_p)
3624 {
3625 ++cnt;
3626 tree lhs = gimple_assign_lhs (info->stmt);
3627 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3628 && TYPE_PRECISION (TREE_TYPE (lhs)) < info->bitsize)
3629 any_paddings = true;
3630 }
3631 }
3632 mask = NULL_TREE;
3633 if (cnt == 0)
3634 return NOP_EXPR;
3635 if (cnt == split_store->orig_stores.length () && !any_paddings)
3636 return BIT_NOT_EXPR;
3637
3638 unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT;
3639 unsigned buf_size = split_store->size / BITS_PER_UNIT;
3640 unsigned char *buf
3641 = XALLOCAVEC (unsigned char, buf_size);
3642 memset (buf, ~0U, buf_size);
3643 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3644 {
3645 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3646 if (!bit_not_p)
3647 continue;
3648 /* Clear regions with bit_not_p and invert afterwards, rather than
3649 clear regions with !bit_not_p, so that gaps in between stores aren't
3650 set in the mask. */
3651 unsigned HOST_WIDE_INT bitsize = info->bitsize;
3652 unsigned HOST_WIDE_INT prec = bitsize;
3653 unsigned int pos_in_buffer = 0;
3654 if (any_paddings)
3655 {
3656 tree lhs = gimple_assign_lhs (info->stmt);
3657 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3658 && TYPE_PRECISION (TREE_TYPE (lhs)) < bitsize)
3659 prec = TYPE_PRECISION (TREE_TYPE (lhs));
3660 }
3661 if (info->bitpos < try_bitpos)
3662 {
3663 gcc_assert (info->bitpos + bitsize > try_bitpos);
3664 if (!BYTES_BIG_ENDIAN)
3665 {
3666 if (prec <= try_bitpos - info->bitpos)
3667 continue;
3668 prec -= try_bitpos - info->bitpos;
3669 }
3670 bitsize -= try_bitpos - info->bitpos;
3671 if (BYTES_BIG_ENDIAN && prec > bitsize)
3672 prec = bitsize;
3673 }
3674 else
3675 pos_in_buffer = info->bitpos - try_bitpos;
3676 if (prec < bitsize)
3677 {
3678 /* If this is a bool inversion, invert just the least significant
3679 prec bits rather than all bits of it. */
3680 if (BYTES_BIG_ENDIAN)
3681 {
3682 pos_in_buffer += bitsize - prec;
3683 if (pos_in_buffer >= split_store->size)
3684 continue;
3685 }
3686 bitsize = prec;
3687 }
3688 if (pos_in_buffer + bitsize > split_store->size)
3689 bitsize = split_store->size - pos_in_buffer;
3690 unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT);
3691 if (BYTES_BIG_ENDIAN)
3692 clear_bit_region_be (p, (BITS_PER_UNIT - 1
3693 - (pos_in_buffer % BITS_PER_UNIT)), bitsize);
3694 else
3695 clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize);
3696 }
3697 for (unsigned int i = 0; i < buf_size; ++i)
3698 buf[i] = ~buf[i];
3699 mask = native_interpret_expr (int_type, buf, buf_size);
3700 return BIT_XOR_EXPR;
3701 }
3702
3703 /* Given a merged store group GROUP output the widened version of it.
3704 The store chain is against the base object BASE.
3705 Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
3706 unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
3707 Make sure that the number of statements output is less than the number of
3708 original statements. If a better sequence is possible emit it and
3709 return true. */
3710
3711 bool
output_merged_store(merged_store_group * group)3712 imm_store_chain_info::output_merged_store (merged_store_group *group)
3713 {
3714 split_store *split_store;
3715 unsigned int i;
3716 unsigned HOST_WIDE_INT start_byte_pos
3717 = group->bitregion_start / BITS_PER_UNIT;
3718
3719 unsigned int orig_num_stmts = group->stores.length ();
3720 if (orig_num_stmts < 2)
3721 return false;
3722
3723 auto_vec<class split_store *, 32> split_stores;
3724 bool allow_unaligned_store
3725 = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
3726 bool allow_unaligned_load = allow_unaligned_store;
3727 bool bzero_first = false;
3728 store_immediate_info *store;
3729 unsigned int num_clobber_stmts = 0;
3730 if (group->stores[0]->rhs_code == INTEGER_CST)
3731 {
3732 FOR_EACH_VEC_ELT (group->stores, i, store)
3733 if (gimple_clobber_p (store->stmt))
3734 num_clobber_stmts++;
3735 else if (TREE_CODE (gimple_assign_rhs1 (store->stmt)) == CONSTRUCTOR
3736 && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (store->stmt)) == 0
3737 && group->start == store->bitpos
3738 && group->width == store->bitsize
3739 && (group->start % BITS_PER_UNIT) == 0
3740 && (group->width % BITS_PER_UNIT) == 0)
3741 {
3742 bzero_first = true;
3743 break;
3744 }
3745 else
3746 break;
3747 FOR_EACH_VEC_ELT_FROM (group->stores, i, store, i)
3748 if (gimple_clobber_p (store->stmt))
3749 num_clobber_stmts++;
3750 if (num_clobber_stmts == orig_num_stmts)
3751 return false;
3752 orig_num_stmts -= num_clobber_stmts;
3753 }
3754 if (allow_unaligned_store || bzero_first)
3755 {
3756 /* If unaligned stores are allowed, see how many stores we'd emit
3757 for unaligned and how many stores we'd emit for aligned stores.
3758 Only use unaligned stores if it allows fewer stores than aligned.
3759 Similarly, if there is a whole region clear first, prefer expanding
3760 it together compared to expanding clear first followed by merged
3761 further stores. */
3762 unsigned cnt[4] = { ~0, ~0, ~0, ~0 };
3763 int pass_min = 0;
3764 for (int pass = 0; pass < 4; ++pass)
3765 {
3766 if (!allow_unaligned_store && (pass & 1) != 0)
3767 continue;
3768 if (!bzero_first && (pass & 2) != 0)
3769 continue;
3770 cnt[pass] = split_group (group, (pass & 1) != 0,
3771 allow_unaligned_load, (pass & 2) != 0,
3772 NULL, NULL, NULL);
3773 if (cnt[pass] < cnt[pass_min])
3774 pass_min = pass;
3775 }
3776 if ((pass_min & 1) == 0)
3777 allow_unaligned_store = false;
3778 if ((pass_min & 2) == 0)
3779 bzero_first = false;
3780 }
3781 unsigned total_orig, total_new;
3782 split_group (group, allow_unaligned_store, allow_unaligned_load, bzero_first,
3783 &split_stores, &total_orig, &total_new);
3784
3785 /* Determine if there is a clobber covering the whole group at the start,
3786 followed by proposed split stores that cover the whole group. In that
3787 case, prefer the transformation even if
3788 split_stores.length () == orig_num_stmts. */
3789 bool clobber_first = false;
3790 if (num_clobber_stmts
3791 && gimple_clobber_p (group->stores[0]->stmt)
3792 && group->start == group->stores[0]->bitpos
3793 && group->width == group->stores[0]->bitsize
3794 && (group->start % BITS_PER_UNIT) == 0
3795 && (group->width % BITS_PER_UNIT) == 0)
3796 {
3797 clobber_first = true;
3798 unsigned HOST_WIDE_INT pos = group->start / BITS_PER_UNIT;
3799 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3800 if (split_store->bytepos != pos)
3801 {
3802 clobber_first = false;
3803 break;
3804 }
3805 else
3806 pos += split_store->size / BITS_PER_UNIT;
3807 if (pos != (group->start + group->width) / BITS_PER_UNIT)
3808 clobber_first = false;
3809 }
3810
3811 if (split_stores.length () >= orig_num_stmts + clobber_first)
3812 {
3813
3814 /* We didn't manage to reduce the number of statements. Bail out. */
3815 if (dump_file && (dump_flags & TDF_DETAILS))
3816 fprintf (dump_file, "Exceeded original number of stmts (%u)."
3817 " Not profitable to emit new sequence.\n",
3818 orig_num_stmts);
3819 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3820 delete split_store;
3821 return false;
3822 }
3823 if (total_orig <= total_new)
3824 {
3825 /* If number of estimated new statements is above estimated original
3826 statements, bail out too. */
3827 if (dump_file && (dump_flags & TDF_DETAILS))
3828 fprintf (dump_file, "Estimated number of original stmts (%u)"
3829 " not larger than estimated number of new"
3830 " stmts (%u).\n",
3831 total_orig, total_new);
3832 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3833 delete split_store;
3834 return false;
3835 }
3836 if (group->stores[0]->rhs_code == INTEGER_CST)
3837 {
3838 bool all_orig = true;
3839 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3840 if (!split_store->orig)
3841 {
3842 all_orig = false;
3843 break;
3844 }
3845 if (all_orig)
3846 {
3847 unsigned int cnt = split_stores.length ();
3848 store_immediate_info *store;
3849 FOR_EACH_VEC_ELT (group->stores, i, store)
3850 if (gimple_clobber_p (store->stmt))
3851 ++cnt;
3852 /* Punt if we wouldn't make any real changes, i.e. keep all
3853 orig stmts + all clobbers. */
3854 if (cnt == group->stores.length ())
3855 {
3856 if (dump_file && (dump_flags & TDF_DETAILS))
3857 fprintf (dump_file, "Exceeded original number of stmts (%u)."
3858 " Not profitable to emit new sequence.\n",
3859 orig_num_stmts);
3860 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3861 delete split_store;
3862 return false;
3863 }
3864 }
3865 }
3866
3867 gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
3868 gimple_seq seq = NULL;
3869 tree last_vdef, new_vuse;
3870 last_vdef = gimple_vdef (group->last_stmt);
3871 new_vuse = gimple_vuse (group->last_stmt);
3872 tree bswap_res = NULL_TREE;
3873
3874 /* Clobbers are not removed. */
3875 if (gimple_clobber_p (group->last_stmt))
3876 {
3877 new_vuse = make_ssa_name (gimple_vop (cfun), group->last_stmt);
3878 gimple_set_vdef (group->last_stmt, new_vuse);
3879 }
3880
3881 if (group->stores[0]->rhs_code == LROTATE_EXPR
3882 || group->stores[0]->rhs_code == NOP_EXPR)
3883 {
3884 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
3885 gimple *ins_stmt = group->stores[0]->ins_stmt;
3886 struct symbolic_number *n = &group->stores[0]->n;
3887 bool bswap = group->stores[0]->rhs_code == LROTATE_EXPR;
3888
3889 switch (n->range)
3890 {
3891 case 16:
3892 load_type = bswap_type = uint16_type_node;
3893 break;
3894 case 32:
3895 load_type = uint32_type_node;
3896 if (bswap)
3897 {
3898 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
3899 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
3900 }
3901 break;
3902 case 64:
3903 load_type = uint64_type_node;
3904 if (bswap)
3905 {
3906 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
3907 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
3908 }
3909 break;
3910 default:
3911 gcc_unreachable ();
3912 }
3913
3914 /* If the loads have each vuse of the corresponding store,
3915 we've checked the aliasing already in try_coalesce_bswap and
3916 we want to sink the need load into seq. So need to use new_vuse
3917 on the load. */
3918 if (n->base_addr)
3919 {
3920 if (n->vuse == NULL)
3921 {
3922 n->vuse = new_vuse;
3923 ins_stmt = NULL;
3924 }
3925 else
3926 /* Update vuse in case it has changed by output_merged_stores. */
3927 n->vuse = gimple_vuse (ins_stmt);
3928 }
3929 bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl,
3930 bswap_type, load_type, n, bswap);
3931 gcc_assert (bswap_res);
3932 }
3933
3934 gimple *stmt = NULL;
3935 auto_vec<gimple *, 32> orig_stmts;
3936 gimple_seq this_seq;
3937 tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &this_seq,
3938 is_gimple_mem_ref_addr, NULL_TREE);
3939 gimple_seq_add_seq_without_update (&seq, this_seq);
3940
3941 tree load_addr[2] = { NULL_TREE, NULL_TREE };
3942 gimple_seq load_seq[2] = { NULL, NULL };
3943 gimple_stmt_iterator load_gsi[2] = { gsi_none (), gsi_none () };
3944 for (int j = 0; j < 2; ++j)
3945 {
3946 store_operand_info &op = group->stores[0]->ops[j];
3947 if (op.base_addr == NULL_TREE)
3948 continue;
3949
3950 store_immediate_info *infol = group->stores.last ();
3951 if (gimple_vuse (op.stmt) == gimple_vuse (infol->ops[j].stmt))
3952 {
3953 /* We can't pick the location randomly; while we've verified
3954 all the loads have the same vuse, they can be still in different
3955 basic blocks and we need to pick the one from the last bb:
3956 int x = q[0];
3957 if (x == N) return;
3958 int y = q[1];
3959 p[0] = x;
3960 p[1] = y;
3961 otherwise if we put the wider load at the q[0] load, we might
3962 segfault if q[1] is not mapped. */
3963 basic_block bb = gimple_bb (op.stmt);
3964 gimple *ostmt = op.stmt;
3965 store_immediate_info *info;
3966 FOR_EACH_VEC_ELT (group->stores, i, info)
3967 {
3968 gimple *tstmt = info->ops[j].stmt;
3969 basic_block tbb = gimple_bb (tstmt);
3970 if (dominated_by_p (CDI_DOMINATORS, tbb, bb))
3971 {
3972 ostmt = tstmt;
3973 bb = tbb;
3974 }
3975 }
3976 load_gsi[j] = gsi_for_stmt (ostmt);
3977 load_addr[j]
3978 = force_gimple_operand_1 (unshare_expr (op.base_addr),
3979 &load_seq[j], is_gimple_mem_ref_addr,
3980 NULL_TREE);
3981 }
3982 else if (operand_equal_p (base_addr, op.base_addr, 0))
3983 load_addr[j] = addr;
3984 else
3985 {
3986 load_addr[j]
3987 = force_gimple_operand_1 (unshare_expr (op.base_addr),
3988 &this_seq, is_gimple_mem_ref_addr,
3989 NULL_TREE);
3990 gimple_seq_add_seq_without_update (&seq, this_seq);
3991 }
3992 }
3993
3994 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3995 {
3996 unsigned HOST_WIDE_INT try_size = split_store->size;
3997 unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
3998 unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
3999 unsigned HOST_WIDE_INT align = split_store->align;
4000 tree dest, src;
4001 location_t loc;
4002 if (split_store->orig)
4003 {
4004 /* If there is just a single non-clobber constituent store
4005 which covers the whole area, just reuse the lhs and rhs. */
4006 gimple *orig_stmt = NULL;
4007 store_immediate_info *store;
4008 unsigned int j;
4009 FOR_EACH_VEC_ELT (split_store->orig_stores, j, store)
4010 if (!gimple_clobber_p (store->stmt))
4011 {
4012 orig_stmt = store->stmt;
4013 break;
4014 }
4015 dest = gimple_assign_lhs (orig_stmt);
4016 src = gimple_assign_rhs1 (orig_stmt);
4017 loc = gimple_location (orig_stmt);
4018 }
4019 else
4020 {
4021 store_immediate_info *info;
4022 unsigned short clique, base;
4023 unsigned int k;
4024 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4025 orig_stmts.safe_push (info->stmt);
4026 tree offset_type
4027 = get_alias_type_for_stmts (orig_stmts, false, &clique, &base);
4028 loc = get_location_for_stmts (orig_stmts);
4029 orig_stmts.truncate (0);
4030
4031 tree int_type = build_nonstandard_integer_type (try_size, UNSIGNED);
4032 int_type = build_aligned_type (int_type, align);
4033 dest = fold_build2 (MEM_REF, int_type, addr,
4034 build_int_cst (offset_type, try_pos));
4035 if (TREE_CODE (dest) == MEM_REF)
4036 {
4037 MR_DEPENDENCE_CLIQUE (dest) = clique;
4038 MR_DEPENDENCE_BASE (dest) = base;
4039 }
4040
4041 tree mask;
4042 if (bswap_res)
4043 mask = integer_zero_node;
4044 else
4045 mask = native_interpret_expr (int_type,
4046 group->mask + try_pos
4047 - start_byte_pos,
4048 group->buf_size);
4049
4050 tree ops[2];
4051 for (int j = 0;
4052 j < 1 + (split_store->orig_stores[0]->ops[1].val != NULL_TREE);
4053 ++j)
4054 {
4055 store_operand_info &op = split_store->orig_stores[0]->ops[j];
4056 if (bswap_res)
4057 ops[j] = bswap_res;
4058 else if (op.base_addr)
4059 {
4060 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4061 orig_stmts.safe_push (info->ops[j].stmt);
4062
4063 offset_type = get_alias_type_for_stmts (orig_stmts, true,
4064 &clique, &base);
4065 location_t load_loc = get_location_for_stmts (orig_stmts);
4066 orig_stmts.truncate (0);
4067
4068 unsigned HOST_WIDE_INT load_align = group->load_align[j];
4069 unsigned HOST_WIDE_INT align_bitpos
4070 = known_alignment (try_bitpos
4071 - split_store->orig_stores[0]->bitpos
4072 + op.bitpos);
4073 if (align_bitpos & (load_align - 1))
4074 load_align = least_bit_hwi (align_bitpos);
4075
4076 tree load_int_type
4077 = build_nonstandard_integer_type (try_size, UNSIGNED);
4078 load_int_type
4079 = build_aligned_type (load_int_type, load_align);
4080
4081 poly_uint64 load_pos
4082 = exact_div (try_bitpos
4083 - split_store->orig_stores[0]->bitpos
4084 + op.bitpos,
4085 BITS_PER_UNIT);
4086 ops[j] = fold_build2 (MEM_REF, load_int_type, load_addr[j],
4087 build_int_cst (offset_type, load_pos));
4088 if (TREE_CODE (ops[j]) == MEM_REF)
4089 {
4090 MR_DEPENDENCE_CLIQUE (ops[j]) = clique;
4091 MR_DEPENDENCE_BASE (ops[j]) = base;
4092 }
4093 if (!integer_zerop (mask))
4094 /* The load might load some bits (that will be masked off
4095 later on) uninitialized, avoid -W*uninitialized
4096 warnings in that case. */
4097 TREE_NO_WARNING (ops[j]) = 1;
4098
4099 stmt = gimple_build_assign (make_ssa_name (int_type),
4100 ops[j]);
4101 gimple_set_location (stmt, load_loc);
4102 if (gsi_bb (load_gsi[j]))
4103 {
4104 gimple_set_vuse (stmt, gimple_vuse (op.stmt));
4105 gimple_seq_add_stmt_without_update (&load_seq[j], stmt);
4106 }
4107 else
4108 {
4109 gimple_set_vuse (stmt, new_vuse);
4110 gimple_seq_add_stmt_without_update (&seq, stmt);
4111 }
4112 ops[j] = gimple_assign_lhs (stmt);
4113 tree xor_mask;
4114 enum tree_code inv_op
4115 = invert_op (split_store, j, int_type, xor_mask);
4116 if (inv_op != NOP_EXPR)
4117 {
4118 stmt = gimple_build_assign (make_ssa_name (int_type),
4119 inv_op, ops[j], xor_mask);
4120 gimple_set_location (stmt, load_loc);
4121 ops[j] = gimple_assign_lhs (stmt);
4122
4123 if (gsi_bb (load_gsi[j]))
4124 gimple_seq_add_stmt_without_update (&load_seq[j],
4125 stmt);
4126 else
4127 gimple_seq_add_stmt_without_update (&seq, stmt);
4128 }
4129 }
4130 else
4131 ops[j] = native_interpret_expr (int_type,
4132 group->val + try_pos
4133 - start_byte_pos,
4134 group->buf_size);
4135 }
4136
4137 switch (split_store->orig_stores[0]->rhs_code)
4138 {
4139 case BIT_AND_EXPR:
4140 case BIT_IOR_EXPR:
4141 case BIT_XOR_EXPR:
4142 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4143 {
4144 tree rhs1 = gimple_assign_rhs1 (info->stmt);
4145 orig_stmts.safe_push (SSA_NAME_DEF_STMT (rhs1));
4146 }
4147 location_t bit_loc;
4148 bit_loc = get_location_for_stmts (orig_stmts);
4149 orig_stmts.truncate (0);
4150
4151 stmt
4152 = gimple_build_assign (make_ssa_name (int_type),
4153 split_store->orig_stores[0]->rhs_code,
4154 ops[0], ops[1]);
4155 gimple_set_location (stmt, bit_loc);
4156 /* If there is just one load and there is a separate
4157 load_seq[0], emit the bitwise op right after it. */
4158 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4159 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4160 /* Otherwise, if at least one load is in seq, we need to
4161 emit the bitwise op right before the store. If there
4162 are two loads and are emitted somewhere else, it would
4163 be better to emit the bitwise op as early as possible;
4164 we don't track where that would be possible right now
4165 though. */
4166 else
4167 gimple_seq_add_stmt_without_update (&seq, stmt);
4168 src = gimple_assign_lhs (stmt);
4169 tree xor_mask;
4170 enum tree_code inv_op;
4171 inv_op = invert_op (split_store, 2, int_type, xor_mask);
4172 if (inv_op != NOP_EXPR)
4173 {
4174 stmt = gimple_build_assign (make_ssa_name (int_type),
4175 inv_op, src, xor_mask);
4176 gimple_set_location (stmt, bit_loc);
4177 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4178 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4179 else
4180 gimple_seq_add_stmt_without_update (&seq, stmt);
4181 src = gimple_assign_lhs (stmt);
4182 }
4183 break;
4184 case LROTATE_EXPR:
4185 case NOP_EXPR:
4186 src = ops[0];
4187 if (!is_gimple_val (src))
4188 {
4189 stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (src)),
4190 src);
4191 gimple_seq_add_stmt_without_update (&seq, stmt);
4192 src = gimple_assign_lhs (stmt);
4193 }
4194 if (!useless_type_conversion_p (int_type, TREE_TYPE (src)))
4195 {
4196 stmt = gimple_build_assign (make_ssa_name (int_type),
4197 NOP_EXPR, src);
4198 gimple_seq_add_stmt_without_update (&seq, stmt);
4199 src = gimple_assign_lhs (stmt);
4200 }
4201 inv_op = invert_op (split_store, 2, int_type, xor_mask);
4202 if (inv_op != NOP_EXPR)
4203 {
4204 stmt = gimple_build_assign (make_ssa_name (int_type),
4205 inv_op, src, xor_mask);
4206 gimple_set_location (stmt, loc);
4207 gimple_seq_add_stmt_without_update (&seq, stmt);
4208 src = gimple_assign_lhs (stmt);
4209 }
4210 break;
4211 default:
4212 src = ops[0];
4213 break;
4214 }
4215
4216 /* If bit insertion is required, we use the source as an accumulator
4217 into which the successive bit-field values are manually inserted.
4218 FIXME: perhaps use BIT_INSERT_EXPR instead in some cases? */
4219 if (group->bit_insertion)
4220 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4221 if (info->rhs_code == BIT_INSERT_EXPR
4222 && info->bitpos < try_bitpos + try_size
4223 && info->bitpos + info->bitsize > try_bitpos)
4224 {
4225 /* Mask, truncate, convert to final type, shift and ior into
4226 the accumulator. Note that every step can be a no-op. */
4227 const HOST_WIDE_INT start_gap = info->bitpos - try_bitpos;
4228 const HOST_WIDE_INT end_gap
4229 = (try_bitpos + try_size) - (info->bitpos + info->bitsize);
4230 tree tem = info->ops[0].val;
4231 if (TYPE_PRECISION (TREE_TYPE (tem)) <= info->bitsize)
4232 {
4233 tree bitfield_type
4234 = build_nonstandard_integer_type (info->bitsize,
4235 UNSIGNED);
4236 tem = gimple_convert (&seq, loc, bitfield_type, tem);
4237 }
4238 else if ((BYTES_BIG_ENDIAN ? start_gap : end_gap) > 0)
4239 {
4240 const unsigned HOST_WIDE_INT imask
4241 = (HOST_WIDE_INT_1U << info->bitsize) - 1;
4242 tem = gimple_build (&seq, loc,
4243 BIT_AND_EXPR, TREE_TYPE (tem), tem,
4244 build_int_cst (TREE_TYPE (tem),
4245 imask));
4246 }
4247 const HOST_WIDE_INT shift
4248 = (BYTES_BIG_ENDIAN ? end_gap : start_gap);
4249 if (shift < 0)
4250 tem = gimple_build (&seq, loc,
4251 RSHIFT_EXPR, TREE_TYPE (tem), tem,
4252 build_int_cst (NULL_TREE, -shift));
4253 tem = gimple_convert (&seq, loc, int_type, tem);
4254 if (shift > 0)
4255 tem = gimple_build (&seq, loc,
4256 LSHIFT_EXPR, int_type, tem,
4257 build_int_cst (NULL_TREE, shift));
4258 src = gimple_build (&seq, loc,
4259 BIT_IOR_EXPR, int_type, tem, src);
4260 }
4261
4262 if (!integer_zerop (mask))
4263 {
4264 tree tem = make_ssa_name (int_type);
4265 tree load_src = unshare_expr (dest);
4266 /* The load might load some or all bits uninitialized,
4267 avoid -W*uninitialized warnings in that case.
4268 As optimization, it would be nice if all the bits are
4269 provably uninitialized (no stores at all yet or previous
4270 store a CLOBBER) we'd optimize away the load and replace
4271 it e.g. with 0. */
4272 TREE_NO_WARNING (load_src) = 1;
4273 stmt = gimple_build_assign (tem, load_src);
4274 gimple_set_location (stmt, loc);
4275 gimple_set_vuse (stmt, new_vuse);
4276 gimple_seq_add_stmt_without_update (&seq, stmt);
4277
4278 /* FIXME: If there is a single chunk of zero bits in mask,
4279 perhaps use BIT_INSERT_EXPR instead? */
4280 stmt = gimple_build_assign (make_ssa_name (int_type),
4281 BIT_AND_EXPR, tem, mask);
4282 gimple_set_location (stmt, loc);
4283 gimple_seq_add_stmt_without_update (&seq, stmt);
4284 tem = gimple_assign_lhs (stmt);
4285
4286 if (TREE_CODE (src) == INTEGER_CST)
4287 src = wide_int_to_tree (int_type,
4288 wi::bit_and_not (wi::to_wide (src),
4289 wi::to_wide (mask)));
4290 else
4291 {
4292 tree nmask
4293 = wide_int_to_tree (int_type,
4294 wi::bit_not (wi::to_wide (mask)));
4295 stmt = gimple_build_assign (make_ssa_name (int_type),
4296 BIT_AND_EXPR, src, nmask);
4297 gimple_set_location (stmt, loc);
4298 gimple_seq_add_stmt_without_update (&seq, stmt);
4299 src = gimple_assign_lhs (stmt);
4300 }
4301 stmt = gimple_build_assign (make_ssa_name (int_type),
4302 BIT_IOR_EXPR, tem, src);
4303 gimple_set_location (stmt, loc);
4304 gimple_seq_add_stmt_without_update (&seq, stmt);
4305 src = gimple_assign_lhs (stmt);
4306 }
4307 }
4308
4309 stmt = gimple_build_assign (dest, src);
4310 gimple_set_location (stmt, loc);
4311 gimple_set_vuse (stmt, new_vuse);
4312 gimple_seq_add_stmt_without_update (&seq, stmt);
4313
4314 if (group->lp_nr && stmt_could_throw_p (cfun, stmt))
4315 add_stmt_to_eh_lp (stmt, group->lp_nr);
4316
4317 tree new_vdef;
4318 if (i < split_stores.length () - 1)
4319 new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
4320 else
4321 new_vdef = last_vdef;
4322
4323 gimple_set_vdef (stmt, new_vdef);
4324 SSA_NAME_DEF_STMT (new_vdef) = stmt;
4325 new_vuse = new_vdef;
4326 }
4327
4328 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4329 delete split_store;
4330
4331 gcc_assert (seq);
4332 if (dump_file)
4333 {
4334 fprintf (dump_file,
4335 "New sequence of %u stores to replace old one of %u stores\n",
4336 split_stores.length (), orig_num_stmts);
4337 if (dump_flags & TDF_DETAILS)
4338 print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
4339 }
4340
4341 if (gimple_clobber_p (group->last_stmt))
4342 update_stmt (group->last_stmt);
4343
4344 if (group->lp_nr > 0)
4345 {
4346 /* We're going to insert a sequence of (potentially) throwing stores
4347 into an active EH region. This means that we're going to create
4348 new basic blocks with EH edges pointing to the post landing pad
4349 and, therefore, to have to update its PHI nodes, if any. For the
4350 virtual PHI node, we're going to use the VDEFs created above, but
4351 for the other nodes, we need to record the original reaching defs. */
4352 eh_landing_pad lp = get_eh_landing_pad_from_number (group->lp_nr);
4353 basic_block lp_bb = label_to_block (cfun, lp->post_landing_pad);
4354 basic_block last_bb = gimple_bb (group->last_stmt);
4355 edge last_edge = find_edge (last_bb, lp_bb);
4356 auto_vec<tree, 16> last_defs;
4357 gphi_iterator gpi;
4358 for (gpi = gsi_start_phis (lp_bb); !gsi_end_p (gpi); gsi_next (&gpi))
4359 {
4360 gphi *phi = gpi.phi ();
4361 tree last_def;
4362 if (virtual_operand_p (gimple_phi_result (phi)))
4363 last_def = NULL_TREE;
4364 else
4365 last_def = gimple_phi_arg_def (phi, last_edge->dest_idx);
4366 last_defs.safe_push (last_def);
4367 }
4368
4369 /* Do the insertion. Then, if new basic blocks have been created in the
4370 process, rewind the chain of VDEFs create above to walk the new basic
4371 blocks and update the corresponding arguments of the PHI nodes. */
4372 update_modified_stmts (seq);
4373 if (gimple_find_sub_bbs (seq, &last_gsi))
4374 while (last_vdef != gimple_vuse (group->last_stmt))
4375 {
4376 gimple *stmt = SSA_NAME_DEF_STMT (last_vdef);
4377 if (stmt_could_throw_p (cfun, stmt))
4378 {
4379 edge new_edge = find_edge (gimple_bb (stmt), lp_bb);
4380 unsigned int i;
4381 for (gpi = gsi_start_phis (lp_bb), i = 0;
4382 !gsi_end_p (gpi);
4383 gsi_next (&gpi), i++)
4384 {
4385 gphi *phi = gpi.phi ();
4386 tree new_def;
4387 if (virtual_operand_p (gimple_phi_result (phi)))
4388 new_def = last_vdef;
4389 else
4390 new_def = last_defs[i];
4391 add_phi_arg (phi, new_def, new_edge, UNKNOWN_LOCATION);
4392 }
4393 }
4394 last_vdef = gimple_vuse (stmt);
4395 }
4396 }
4397 else
4398 gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
4399
4400 for (int j = 0; j < 2; ++j)
4401 if (load_seq[j])
4402 gsi_insert_seq_after (&load_gsi[j], load_seq[j], GSI_SAME_STMT);
4403
4404 return true;
4405 }
4406
4407 /* Process the merged_store_group objects created in the coalescing phase.
4408 The stores are all against the base object BASE.
4409 Try to output the widened stores and delete the original statements if
4410 successful. Return true iff any changes were made. */
4411
4412 bool
output_merged_stores()4413 imm_store_chain_info::output_merged_stores ()
4414 {
4415 unsigned int i;
4416 merged_store_group *merged_store;
4417 bool ret = false;
4418 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
4419 {
4420 if (dbg_cnt (store_merging)
4421 && output_merged_store (merged_store))
4422 {
4423 unsigned int j;
4424 store_immediate_info *store;
4425 FOR_EACH_VEC_ELT (merged_store->stores, j, store)
4426 {
4427 gimple *stmt = store->stmt;
4428 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4429 /* Don't remove clobbers, they are still useful even if
4430 everything is overwritten afterwards. */
4431 if (gimple_clobber_p (stmt))
4432 continue;
4433 gsi_remove (&gsi, true);
4434 if (store->lp_nr)
4435 remove_stmt_from_eh_lp (stmt);
4436 if (stmt != merged_store->last_stmt)
4437 {
4438 unlink_stmt_vdef (stmt);
4439 release_defs (stmt);
4440 }
4441 }
4442 ret = true;
4443 }
4444 }
4445 if (ret && dump_file)
4446 fprintf (dump_file, "Merging successful!\n");
4447
4448 return ret;
4449 }
4450
4451 /* Coalesce the store_immediate_info objects recorded against the base object
4452 BASE in the first phase and output them.
4453 Delete the allocated structures.
4454 Return true if any changes were made. */
4455
4456 bool
terminate_and_process_chain()4457 imm_store_chain_info::terminate_and_process_chain ()
4458 {
4459 /* Process store chain. */
4460 bool ret = false;
4461 if (m_store_info.length () > 1)
4462 {
4463 ret = coalesce_immediate_stores ();
4464 if (ret)
4465 ret = output_merged_stores ();
4466 }
4467
4468 /* Delete all the entries we allocated ourselves. */
4469 store_immediate_info *info;
4470 unsigned int i;
4471 FOR_EACH_VEC_ELT (m_store_info, i, info)
4472 delete info;
4473
4474 merged_store_group *merged_info;
4475 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
4476 delete merged_info;
4477
4478 return ret;
4479 }
4480
4481 /* Return true iff LHS is a destination potentially interesting for
4482 store merging. In practice these are the codes that get_inner_reference
4483 can process. */
4484
4485 static bool
lhs_valid_for_store_merging_p(tree lhs)4486 lhs_valid_for_store_merging_p (tree lhs)
4487 {
4488 if (DECL_P (lhs))
4489 return true;
4490
4491 switch (TREE_CODE (lhs))
4492 {
4493 case ARRAY_REF:
4494 case ARRAY_RANGE_REF:
4495 case BIT_FIELD_REF:
4496 case COMPONENT_REF:
4497 case MEM_REF:
4498 return true;
4499 default:
4500 return false;
4501 }
4502
4503 gcc_unreachable ();
4504 }
4505
4506 /* Return true if the tree RHS is a constant we want to consider
4507 during store merging. In practice accept all codes that
4508 native_encode_expr accepts. */
4509
4510 static bool
rhs_valid_for_store_merging_p(tree rhs)4511 rhs_valid_for_store_merging_p (tree rhs)
4512 {
4513 unsigned HOST_WIDE_INT size;
4514 if (TREE_CODE (rhs) == CONSTRUCTOR
4515 && CONSTRUCTOR_NELTS (rhs) == 0
4516 && TYPE_SIZE_UNIT (TREE_TYPE (rhs))
4517 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs))))
4518 return true;
4519 return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs))).is_constant (&size)
4520 && native_encode_expr (rhs, NULL, size) != 0);
4521 }
4522
4523 /* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
4524 and return true on success or false on failure. */
4525
4526 static bool
adjust_bit_pos(poly_offset_int byte_off,poly_int64 * pbitpos,poly_uint64 * pbitregion_start,poly_uint64 * pbitregion_end)4527 adjust_bit_pos (poly_offset_int byte_off,
4528 poly_int64 *pbitpos,
4529 poly_uint64 *pbitregion_start,
4530 poly_uint64 *pbitregion_end)
4531 {
4532 poly_offset_int bit_off = byte_off << LOG2_BITS_PER_UNIT;
4533 bit_off += *pbitpos;
4534
4535 if (known_ge (bit_off, 0) && bit_off.to_shwi (pbitpos))
4536 {
4537 if (maybe_ne (*pbitregion_end, 0U))
4538 {
4539 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4540 bit_off += *pbitregion_start;
4541 if (bit_off.to_uhwi (pbitregion_start))
4542 {
4543 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4544 bit_off += *pbitregion_end;
4545 if (!bit_off.to_uhwi (pbitregion_end))
4546 *pbitregion_end = 0;
4547 }
4548 else
4549 *pbitregion_end = 0;
4550 }
4551 return true;
4552 }
4553 else
4554 return false;
4555 }
4556
4557 /* If MEM is a memory reference usable for store merging (either as
4558 store destination or for loads), return the non-NULL base_addr
4559 and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
4560 Otherwise return NULL, *PBITPOS should be still valid even for that
4561 case. */
4562
4563 static tree
mem_valid_for_store_merging(tree mem,poly_uint64 * pbitsize,poly_uint64 * pbitpos,poly_uint64 * pbitregion_start,poly_uint64 * pbitregion_end)4564 mem_valid_for_store_merging (tree mem, poly_uint64 *pbitsize,
4565 poly_uint64 *pbitpos,
4566 poly_uint64 *pbitregion_start,
4567 poly_uint64 *pbitregion_end)
4568 {
4569 poly_int64 bitsize, bitpos;
4570 poly_uint64 bitregion_start = 0, bitregion_end = 0;
4571 machine_mode mode;
4572 int unsignedp = 0, reversep = 0, volatilep = 0;
4573 tree offset;
4574 tree base_addr = get_inner_reference (mem, &bitsize, &bitpos, &offset, &mode,
4575 &unsignedp, &reversep, &volatilep);
4576 *pbitsize = bitsize;
4577 if (known_eq (bitsize, 0))
4578 return NULL_TREE;
4579
4580 if (TREE_CODE (mem) == COMPONENT_REF
4581 && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem, 1)))
4582 {
4583 get_bit_range (&bitregion_start, &bitregion_end, mem, &bitpos, &offset);
4584 if (maybe_ne (bitregion_end, 0U))
4585 bitregion_end += 1;
4586 }
4587
4588 if (reversep)
4589 return NULL_TREE;
4590
4591 /* We do not want to rewrite TARGET_MEM_REFs. */
4592 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
4593 return NULL_TREE;
4594 /* In some cases get_inner_reference may return a
4595 MEM_REF [ptr + byteoffset]. For the purposes of this pass
4596 canonicalize the base_addr to MEM_REF [ptr] and take
4597 byteoffset into account in the bitpos. This occurs in
4598 PR 23684 and this way we can catch more chains. */
4599 else if (TREE_CODE (base_addr) == MEM_REF)
4600 {
4601 if (!adjust_bit_pos (mem_ref_offset (base_addr), &bitpos,
4602 &bitregion_start, &bitregion_end))
4603 return NULL_TREE;
4604 base_addr = TREE_OPERAND (base_addr, 0);
4605 }
4606 /* get_inner_reference returns the base object, get at its
4607 address now. */
4608 else
4609 {
4610 if (maybe_lt (bitpos, 0))
4611 return NULL_TREE;
4612 base_addr = build_fold_addr_expr (base_addr);
4613 }
4614
4615 if (offset)
4616 {
4617 /* If the access is variable offset then a base decl has to be
4618 address-taken to be able to emit pointer-based stores to it.
4619 ??? We might be able to get away with re-using the original
4620 base up to the first variable part and then wrapping that inside
4621 a BIT_FIELD_REF. */
4622 tree base = get_base_address (base_addr);
4623 if (!base || (DECL_P (base) && !TREE_ADDRESSABLE (base)))
4624 return NULL_TREE;
4625
4626 /* Similarly to above for the base, remove constant from the offset. */
4627 if (TREE_CODE (offset) == PLUS_EXPR
4628 && TREE_CODE (TREE_OPERAND (offset, 1)) == INTEGER_CST
4629 && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset, 1)),
4630 &bitpos, &bitregion_start, &bitregion_end))
4631 offset = TREE_OPERAND (offset, 0);
4632
4633 base_addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base_addr),
4634 base_addr, offset);
4635 }
4636
4637 if (known_eq (bitregion_end, 0U))
4638 {
4639 bitregion_start = round_down_to_byte_boundary (bitpos);
4640 bitregion_end = round_up_to_byte_boundary (bitpos + bitsize);
4641 }
4642
4643 *pbitsize = bitsize;
4644 *pbitpos = bitpos;
4645 *pbitregion_start = bitregion_start;
4646 *pbitregion_end = bitregion_end;
4647 return base_addr;
4648 }
4649
4650 /* Return true if STMT is a load that can be used for store merging.
4651 In that case fill in *OP. BITSIZE, BITPOS, BITREGION_START and
4652 BITREGION_END are properties of the corresponding store. */
4653
4654 static bool
handled_load(gimple * stmt,store_operand_info * op,poly_uint64 bitsize,poly_uint64 bitpos,poly_uint64 bitregion_start,poly_uint64 bitregion_end)4655 handled_load (gimple *stmt, store_operand_info *op,
4656 poly_uint64 bitsize, poly_uint64 bitpos,
4657 poly_uint64 bitregion_start, poly_uint64 bitregion_end)
4658 {
4659 if (!is_gimple_assign (stmt))
4660 return false;
4661 if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR)
4662 {
4663 tree rhs1 = gimple_assign_rhs1 (stmt);
4664 if (TREE_CODE (rhs1) == SSA_NAME
4665 && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
4666 bitregion_start, bitregion_end))
4667 {
4668 /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
4669 been optimized earlier, but if allowed here, would confuse the
4670 multiple uses counting. */
4671 if (op->bit_not_p)
4672 return false;
4673 op->bit_not_p = !op->bit_not_p;
4674 return true;
4675 }
4676 return false;
4677 }
4678 if (gimple_vuse (stmt)
4679 && gimple_assign_load_p (stmt)
4680 && !stmt_can_throw_internal (cfun, stmt)
4681 && !gimple_has_volatile_ops (stmt))
4682 {
4683 tree mem = gimple_assign_rhs1 (stmt);
4684 op->base_addr
4685 = mem_valid_for_store_merging (mem, &op->bitsize, &op->bitpos,
4686 &op->bitregion_start,
4687 &op->bitregion_end);
4688 if (op->base_addr != NULL_TREE
4689 && known_eq (op->bitsize, bitsize)
4690 && multiple_p (op->bitpos - bitpos, BITS_PER_UNIT)
4691 && known_ge (op->bitpos - op->bitregion_start,
4692 bitpos - bitregion_start)
4693 && known_ge (op->bitregion_end - op->bitpos,
4694 bitregion_end - bitpos))
4695 {
4696 op->stmt = stmt;
4697 op->val = mem;
4698 op->bit_not_p = false;
4699 return true;
4700 }
4701 }
4702 return false;
4703 }
4704
4705 /* Return the index number of the landing pad for STMT, if any. */
4706
4707 static int
lp_nr_for_store(gimple * stmt)4708 lp_nr_for_store (gimple *stmt)
4709 {
4710 if (!cfun->can_throw_non_call_exceptions || !cfun->eh)
4711 return 0;
4712
4713 if (!stmt_could_throw_p (cfun, stmt))
4714 return 0;
4715
4716 return lookup_stmt_eh_lp (stmt);
4717 }
4718
4719 /* Record the store STMT for store merging optimization if it can be
4720 optimized. Return true if any changes were made. */
4721
4722 bool
process_store(gimple * stmt)4723 pass_store_merging::process_store (gimple *stmt)
4724 {
4725 tree lhs = gimple_assign_lhs (stmt);
4726 tree rhs = gimple_assign_rhs1 (stmt);
4727 poly_uint64 bitsize, bitpos;
4728 poly_uint64 bitregion_start, bitregion_end;
4729 tree base_addr
4730 = mem_valid_for_store_merging (lhs, &bitsize, &bitpos,
4731 &bitregion_start, &bitregion_end);
4732 if (known_eq (bitsize, 0U))
4733 return false;
4734
4735 bool invalid = (base_addr == NULL_TREE
4736 || (maybe_gt (bitsize,
4737 (unsigned int) MAX_BITSIZE_MODE_ANY_INT)
4738 && TREE_CODE (rhs) != INTEGER_CST
4739 && (TREE_CODE (rhs) != CONSTRUCTOR
4740 || CONSTRUCTOR_NELTS (rhs) != 0)));
4741 enum tree_code rhs_code = ERROR_MARK;
4742 bool bit_not_p = false;
4743 struct symbolic_number n;
4744 gimple *ins_stmt = NULL;
4745 store_operand_info ops[2];
4746 if (invalid)
4747 ;
4748 else if (rhs_valid_for_store_merging_p (rhs))
4749 {
4750 rhs_code = INTEGER_CST;
4751 ops[0].val = rhs;
4752 }
4753 else if (TREE_CODE (rhs) != SSA_NAME)
4754 invalid = true;
4755 else
4756 {
4757 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs), *def_stmt1, *def_stmt2;
4758 if (!is_gimple_assign (def_stmt))
4759 invalid = true;
4760 else if (handled_load (def_stmt, &ops[0], bitsize, bitpos,
4761 bitregion_start, bitregion_end))
4762 rhs_code = MEM_REF;
4763 else if (gimple_assign_rhs_code (def_stmt) == BIT_NOT_EXPR)
4764 {
4765 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4766 if (TREE_CODE (rhs1) == SSA_NAME
4767 && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1)))
4768 {
4769 bit_not_p = true;
4770 def_stmt = SSA_NAME_DEF_STMT (rhs1);
4771 }
4772 }
4773
4774 if (rhs_code == ERROR_MARK && !invalid)
4775 switch ((rhs_code = gimple_assign_rhs_code (def_stmt)))
4776 {
4777 case BIT_AND_EXPR:
4778 case BIT_IOR_EXPR:
4779 case BIT_XOR_EXPR:
4780 tree rhs1, rhs2;
4781 rhs1 = gimple_assign_rhs1 (def_stmt);
4782 rhs2 = gimple_assign_rhs2 (def_stmt);
4783 invalid = true;
4784 if (TREE_CODE (rhs1) != SSA_NAME)
4785 break;
4786 def_stmt1 = SSA_NAME_DEF_STMT (rhs1);
4787 if (!is_gimple_assign (def_stmt1)
4788 || !handled_load (def_stmt1, &ops[0], bitsize, bitpos,
4789 bitregion_start, bitregion_end))
4790 break;
4791 if (rhs_valid_for_store_merging_p (rhs2))
4792 ops[1].val = rhs2;
4793 else if (TREE_CODE (rhs2) != SSA_NAME)
4794 break;
4795 else
4796 {
4797 def_stmt2 = SSA_NAME_DEF_STMT (rhs2);
4798 if (!is_gimple_assign (def_stmt2))
4799 break;
4800 else if (!handled_load (def_stmt2, &ops[1], bitsize, bitpos,
4801 bitregion_start, bitregion_end))
4802 break;
4803 }
4804 invalid = false;
4805 break;
4806 default:
4807 invalid = true;
4808 break;
4809 }
4810
4811 unsigned HOST_WIDE_INT const_bitsize;
4812 if (bitsize.is_constant (&const_bitsize)
4813 && (const_bitsize % BITS_PER_UNIT) == 0
4814 && const_bitsize <= 64
4815 && multiple_p (bitpos, BITS_PER_UNIT))
4816 {
4817 ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
4818 if (ins_stmt)
4819 {
4820 uint64_t nn = n.n;
4821 for (unsigned HOST_WIDE_INT i = 0;
4822 i < const_bitsize;
4823 i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
4824 if ((nn & MARKER_MASK) == 0
4825 || (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN)
4826 {
4827 ins_stmt = NULL;
4828 break;
4829 }
4830 if (ins_stmt)
4831 {
4832 if (invalid)
4833 {
4834 rhs_code = LROTATE_EXPR;
4835 ops[0].base_addr = NULL_TREE;
4836 ops[1].base_addr = NULL_TREE;
4837 }
4838 invalid = false;
4839 }
4840 }
4841 }
4842
4843 if (invalid
4844 && bitsize.is_constant (&const_bitsize)
4845 && ((const_bitsize % BITS_PER_UNIT) != 0
4846 || !multiple_p (bitpos, BITS_PER_UNIT))
4847 && const_bitsize <= 64)
4848 {
4849 /* Bypass a conversion to the bit-field type. */
4850 if (!bit_not_p
4851 && is_gimple_assign (def_stmt)
4852 && CONVERT_EXPR_CODE_P (rhs_code))
4853 {
4854 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4855 if (TREE_CODE (rhs1) == SSA_NAME
4856 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)))
4857 rhs = rhs1;
4858 }
4859 rhs_code = BIT_INSERT_EXPR;
4860 bit_not_p = false;
4861 ops[0].val = rhs;
4862 ops[0].base_addr = NULL_TREE;
4863 ops[1].base_addr = NULL_TREE;
4864 invalid = false;
4865 }
4866 }
4867
4868 unsigned HOST_WIDE_INT const_bitsize, const_bitpos;
4869 unsigned HOST_WIDE_INT const_bitregion_start, const_bitregion_end;
4870 if (invalid
4871 || !bitsize.is_constant (&const_bitsize)
4872 || !bitpos.is_constant (&const_bitpos)
4873 || !bitregion_start.is_constant (&const_bitregion_start)
4874 || !bitregion_end.is_constant (&const_bitregion_end))
4875 return terminate_all_aliasing_chains (NULL, stmt);
4876
4877 if (!ins_stmt)
4878 memset (&n, 0, sizeof (n));
4879
4880 class imm_store_chain_info **chain_info = NULL;
4881 bool ret = false;
4882 if (base_addr)
4883 chain_info = m_stores.get (base_addr);
4884
4885 store_immediate_info *info;
4886 if (chain_info)
4887 {
4888 unsigned int ord = (*chain_info)->m_store_info.length ();
4889 info = new store_immediate_info (const_bitsize, const_bitpos,
4890 const_bitregion_start,
4891 const_bitregion_end,
4892 stmt, ord, rhs_code, n, ins_stmt,
4893 bit_not_p, lp_nr_for_store (stmt),
4894 ops[0], ops[1]);
4895 if (dump_file && (dump_flags & TDF_DETAILS))
4896 {
4897 fprintf (dump_file, "Recording immediate store from stmt:\n");
4898 print_gimple_stmt (dump_file, stmt, 0);
4899 }
4900 (*chain_info)->m_store_info.safe_push (info);
4901 ret |= terminate_all_aliasing_chains (chain_info, stmt);
4902 /* If we reach the limit of stores to merge in a chain terminate and
4903 process the chain now. */
4904 if ((*chain_info)->m_store_info.length ()
4905 == (unsigned int) param_max_stores_to_merge)
4906 {
4907 if (dump_file && (dump_flags & TDF_DETAILS))
4908 fprintf (dump_file,
4909 "Reached maximum number of statements to merge:\n");
4910 ret |= terminate_and_process_chain (*chain_info);
4911 }
4912 return ret;
4913 }
4914
4915 /* Store aliases any existing chain? */
4916 ret |= terminate_all_aliasing_chains (NULL, stmt);
4917 /* Start a new chain. */
4918 class imm_store_chain_info *new_chain
4919 = new imm_store_chain_info (m_stores_head, base_addr);
4920 info = new store_immediate_info (const_bitsize, const_bitpos,
4921 const_bitregion_start,
4922 const_bitregion_end,
4923 stmt, 0, rhs_code, n, ins_stmt,
4924 bit_not_p, lp_nr_for_store (stmt),
4925 ops[0], ops[1]);
4926 new_chain->m_store_info.safe_push (info);
4927 m_stores.put (base_addr, new_chain);
4928 if (dump_file && (dump_flags & TDF_DETAILS))
4929 {
4930 fprintf (dump_file, "Starting new chain with statement:\n");
4931 print_gimple_stmt (dump_file, stmt, 0);
4932 fprintf (dump_file, "The base object is:\n");
4933 print_generic_expr (dump_file, base_addr);
4934 fprintf (dump_file, "\n");
4935 }
4936 return ret;
4937 }
4938
4939 /* Return true if STMT is a store valid for store merging. */
4940
4941 static bool
store_valid_for_store_merging_p(gimple * stmt)4942 store_valid_for_store_merging_p (gimple *stmt)
4943 {
4944 return gimple_assign_single_p (stmt)
4945 && gimple_vdef (stmt)
4946 && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt))
4947 && (!gimple_has_volatile_ops (stmt) || gimple_clobber_p (stmt));
4948 }
4949
4950 enum basic_block_status { BB_INVALID, BB_VALID, BB_EXTENDED_VALID };
4951
4952 /* Return the status of basic block BB wrt store merging. */
4953
4954 static enum basic_block_status
get_status_for_store_merging(basic_block bb)4955 get_status_for_store_merging (basic_block bb)
4956 {
4957 unsigned int num_statements = 0;
4958 gimple_stmt_iterator gsi;
4959 edge e;
4960
4961 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4962 {
4963 gimple *stmt = gsi_stmt (gsi);
4964
4965 if (is_gimple_debug (stmt))
4966 continue;
4967
4968 if (store_valid_for_store_merging_p (stmt) && ++num_statements >= 2)
4969 break;
4970 }
4971
4972 if (num_statements == 0)
4973 return BB_INVALID;
4974
4975 if (cfun->can_throw_non_call_exceptions && cfun->eh
4976 && store_valid_for_store_merging_p (gimple_seq_last_stmt (bb_seq (bb)))
4977 && (e = find_fallthru_edge (bb->succs))
4978 && e->dest == bb->next_bb)
4979 return BB_EXTENDED_VALID;
4980
4981 return num_statements >= 2 ? BB_VALID : BB_INVALID;
4982 }
4983
4984 /* Entry point for the pass. Go over each basic block recording chains of
4985 immediate stores. Upon encountering a terminating statement (as defined
4986 by stmt_terminates_chain_p) process the recorded stores and emit the widened
4987 variants. */
4988
4989 unsigned int
execute(function * fun)4990 pass_store_merging::execute (function *fun)
4991 {
4992 basic_block bb;
4993 hash_set<gimple *> orig_stmts;
4994 bool changed = false, open_chains = false;
4995
4996 /* If the function can throw and catch non-call exceptions, we'll be trying
4997 to merge stores across different basic blocks so we need to first unsplit
4998 the EH edges in order to streamline the CFG of the function. */
4999 if (cfun->can_throw_non_call_exceptions && cfun->eh)
5000 unsplit_eh_edges ();
5001
5002 calculate_dominance_info (CDI_DOMINATORS);
5003
5004 FOR_EACH_BB_FN (bb, fun)
5005 {
5006 const basic_block_status bb_status = get_status_for_store_merging (bb);
5007 gimple_stmt_iterator gsi;
5008
5009 if (open_chains && (bb_status == BB_INVALID || !single_pred_p (bb)))
5010 {
5011 changed |= terminate_and_process_all_chains ();
5012 open_chains = false;
5013 }
5014
5015 if (bb_status == BB_INVALID)
5016 continue;
5017
5018 if (dump_file && (dump_flags & TDF_DETAILS))
5019 fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
5020
5021 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5022 {
5023 gimple *stmt = gsi_stmt (gsi);
5024
5025 if (is_gimple_debug (stmt))
5026 continue;
5027
5028 if (gimple_has_volatile_ops (stmt) && !gimple_clobber_p (stmt))
5029 {
5030 /* Terminate all chains. */
5031 if (dump_file && (dump_flags & TDF_DETAILS))
5032 fprintf (dump_file, "Volatile access terminates "
5033 "all chains\n");
5034 changed |= terminate_and_process_all_chains ();
5035 open_chains = false;
5036 continue;
5037 }
5038
5039 if (store_valid_for_store_merging_p (stmt))
5040 changed |= process_store (stmt);
5041 else
5042 changed |= terminate_all_aliasing_chains (NULL, stmt);
5043 }
5044
5045 if (bb_status == BB_EXTENDED_VALID)
5046 open_chains = true;
5047 else
5048 {
5049 changed |= terminate_and_process_all_chains ();
5050 open_chains = false;
5051 }
5052 }
5053
5054 if (open_chains)
5055 changed |= terminate_and_process_all_chains ();
5056
5057 /* If the function can throw and catch non-call exceptions and something
5058 changed during the pass, then the CFG has (very likely) changed too. */
5059 if (cfun->can_throw_non_call_exceptions && cfun->eh && changed)
5060 {
5061 free_dominance_info (CDI_DOMINATORS);
5062 return TODO_cleanup_cfg;
5063 }
5064
5065 return 0;
5066 }
5067
5068 } // anon namespace
5069
5070 /* Construct and return a store merging pass object. */
5071
5072 gimple_opt_pass *
make_pass_store_merging(gcc::context * ctxt)5073 make_pass_store_merging (gcc::context *ctxt)
5074 {
5075 return new pass_store_merging (ctxt);
5076 }
5077
5078 #if CHECKING_P
5079
5080 namespace selftest {
5081
5082 /* Selftests for store merging helpers. */
5083
5084 /* Assert that all elements of the byte arrays X and Y, both of length N
5085 are equal. */
5086
5087 static void
verify_array_eq(unsigned char * x,unsigned char * y,unsigned int n)5088 verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
5089 {
5090 for (unsigned int i = 0; i < n; i++)
5091 {
5092 if (x[i] != y[i])
5093 {
5094 fprintf (stderr, "Arrays do not match. X:\n");
5095 dump_char_array (stderr, x, n);
5096 fprintf (stderr, "Y:\n");
5097 dump_char_array (stderr, y, n);
5098 }
5099 ASSERT_EQ (x[i], y[i]);
5100 }
5101 }
5102
5103 /* Test shift_bytes_in_array_left and that it carries bits across between
5104 bytes correctly. */
5105
5106 static void
verify_shift_bytes_in_array_left(void)5107 verify_shift_bytes_in_array_left (void)
5108 {
5109 /* byte 1 | byte 0
5110 00011111 | 11100000. */
5111 unsigned char orig[2] = { 0xe0, 0x1f };
5112 unsigned char in[2];
5113 memcpy (in, orig, sizeof orig);
5114
5115 unsigned char expected[2] = { 0x80, 0x7f };
5116 shift_bytes_in_array_left (in, sizeof (in), 2);
5117 verify_array_eq (in, expected, sizeof (in));
5118
5119 memcpy (in, orig, sizeof orig);
5120 memcpy (expected, orig, sizeof orig);
5121 /* Check that shifting by zero doesn't change anything. */
5122 shift_bytes_in_array_left (in, sizeof (in), 0);
5123 verify_array_eq (in, expected, sizeof (in));
5124
5125 }
5126
5127 /* Test shift_bytes_in_array_right and that it carries bits across between
5128 bytes correctly. */
5129
5130 static void
verify_shift_bytes_in_array_right(void)5131 verify_shift_bytes_in_array_right (void)
5132 {
5133 /* byte 1 | byte 0
5134 00011111 | 11100000. */
5135 unsigned char orig[2] = { 0x1f, 0xe0};
5136 unsigned char in[2];
5137 memcpy (in, orig, sizeof orig);
5138 unsigned char expected[2] = { 0x07, 0xf8};
5139 shift_bytes_in_array_right (in, sizeof (in), 2);
5140 verify_array_eq (in, expected, sizeof (in));
5141
5142 memcpy (in, orig, sizeof orig);
5143 memcpy (expected, orig, sizeof orig);
5144 /* Check that shifting by zero doesn't change anything. */
5145 shift_bytes_in_array_right (in, sizeof (in), 0);
5146 verify_array_eq (in, expected, sizeof (in));
5147 }
5148
5149 /* Test clear_bit_region that it clears exactly the bits asked and
5150 nothing more. */
5151
5152 static void
verify_clear_bit_region(void)5153 verify_clear_bit_region (void)
5154 {
5155 /* Start with all bits set and test clearing various patterns in them. */
5156 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5157 unsigned char in[3];
5158 unsigned char expected[3];
5159 memcpy (in, orig, sizeof in);
5160
5161 /* Check zeroing out all the bits. */
5162 clear_bit_region (in, 0, 3 * BITS_PER_UNIT);
5163 expected[0] = expected[1] = expected[2] = 0;
5164 verify_array_eq (in, expected, sizeof in);
5165
5166 memcpy (in, orig, sizeof in);
5167 /* Leave the first and last bits intact. */
5168 clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2);
5169 expected[0] = 0x1;
5170 expected[1] = 0;
5171 expected[2] = 0x80;
5172 verify_array_eq (in, expected, sizeof in);
5173 }
5174
5175 /* Test clear_bit_region_be that it clears exactly the bits asked and
5176 nothing more. */
5177
5178 static void
verify_clear_bit_region_be(void)5179 verify_clear_bit_region_be (void)
5180 {
5181 /* Start with all bits set and test clearing various patterns in them. */
5182 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5183 unsigned char in[3];
5184 unsigned char expected[3];
5185 memcpy (in, orig, sizeof in);
5186
5187 /* Check zeroing out all the bits. */
5188 clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT);
5189 expected[0] = expected[1] = expected[2] = 0;
5190 verify_array_eq (in, expected, sizeof in);
5191
5192 memcpy (in, orig, sizeof in);
5193 /* Leave the first and last bits intact. */
5194 clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2);
5195 expected[0] = 0x80;
5196 expected[1] = 0;
5197 expected[2] = 0x1;
5198 verify_array_eq (in, expected, sizeof in);
5199 }
5200
5201
5202 /* Run all of the selftests within this file. */
5203
5204 void
store_merging_c_tests(void)5205 store_merging_c_tests (void)
5206 {
5207 verify_shift_bytes_in_array_left ();
5208 verify_shift_bytes_in_array_right ();
5209 verify_clear_bit_region ();
5210 verify_clear_bit_region_be ();
5211 }
5212
5213 } // namespace selftest
5214 #endif /* CHECKING_P. */
5215