1 /* GIMPLE store merging and byte swapping passes.
2    Copyright (C) 2009-2020 Free Software Foundation, Inc.
3    Contributed by ARM Ltd.
4 
5    This file is part of GCC.
6 
7    GCC is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    GCC is distributed in the hope that it will be useful, but
13    WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15    General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with GCC; see the file COPYING3.  If not see
19    <http://www.gnu.org/licenses/>.  */
20 
21 /* The purpose of the store merging pass is to combine multiple memory stores
22    of constant values, values loaded from memory, bitwise operations on those,
23    or bit-field values, to consecutive locations, into fewer wider stores.
24 
25    For example, if we have a sequence peforming four byte stores to
26    consecutive memory locations:
27    [p     ] := imm1;
28    [p + 1B] := imm2;
29    [p + 2B] := imm3;
30    [p + 3B] := imm4;
31    we can transform this into a single 4-byte store if the target supports it:
32    [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
33 
34    Or:
35    [p     ] := [q     ];
36    [p + 1B] := [q + 1B];
37    [p + 2B] := [q + 2B];
38    [p + 3B] := [q + 3B];
39    if there is no overlap can be transformed into a single 4-byte
40    load followed by single 4-byte store.
41 
42    Or:
43    [p     ] := [q     ] ^ imm1;
44    [p + 1B] := [q + 1B] ^ imm2;
45    [p + 2B] := [q + 2B] ^ imm3;
46    [p + 3B] := [q + 3B] ^ imm4;
47    if there is no overlap can be transformed into a single 4-byte
48    load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
49 
50    Or:
51    [p:1 ] := imm;
52    [p:31] := val & 0x7FFFFFFF;
53    we can transform this into a single 4-byte store if the target supports it:
54    [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
55 
56    The algorithm is applied to each basic block in three phases:
57 
58    1) Scan through the basic block and record assignments to destinations
59    that can be expressed as a store to memory of a certain size at a certain
60    bit offset from base expressions we can handle.  For bit-fields we also
61    record the surrounding bit region, i.e. bits that could be stored in
62    a read-modify-write operation when storing the bit-field.  Record store
63    chains to different bases in a hash_map (m_stores) and make sure to
64    terminate such chains when appropriate (for example when the stored
65    values get used subsequently).
66    These stores can be a result of structure element initializers, array stores
67    etc.  A store_immediate_info object is recorded for every such store.
68    Record as many such assignments to a single base as possible until a
69    statement that interferes with the store sequence is encountered.
70    Each store has up to 2 operands, which can be a either constant, a memory
71    load or an SSA name, from which the value to be stored can be computed.
72    At most one of the operands can be a constant.  The operands are recorded
73    in store_operand_info struct.
74 
75    2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
76    store_immediate_info objects) and coalesce contiguous stores into
77    merged_store_group objects.  For bit-field stores, we don't need to
78    require the stores to be contiguous, just their surrounding bit regions
79    have to be contiguous.  If the expression being stored is different
80    between adjacent stores, such as one store storing a constant and
81    following storing a value loaded from memory, or if the loaded memory
82    objects are not adjacent, a new merged_store_group is created as well.
83 
84    For example, given the stores:
85    [p     ] := 0;
86    [p + 1B] := 1;
87    [p + 3B] := 0;
88    [p + 4B] := 1;
89    [p + 5B] := 0;
90    [p + 6B] := 0;
91    This phase would produce two merged_store_group objects, one recording the
92    two bytes stored in the memory region [p : p + 1] and another
93    recording the four bytes stored in the memory region [p + 3 : p + 6].
94 
95    3) The merged_store_group objects produced in phase 2) are processed
96    to generate the sequence of wider stores that set the contiguous memory
97    regions to the sequence of bytes that correspond to it.  This may emit
98    multiple stores per store group to handle contiguous stores that are not
99    of a size that is a power of 2.  For example it can try to emit a 40-bit
100    store as a 32-bit store followed by an 8-bit store.
101    We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
102    or TARGET_SLOW_UNALIGNED_ACCESS settings.
103 
104    Note on endianness and example:
105    Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
106    [p     ] := 0x1234;
107    [p + 2B] := 0x5678;
108    [p + 4B] := 0xab;
109    [p + 5B] := 0xcd;
110 
111    The memory layout for little-endian (LE) and big-endian (BE) must be:
112   p |LE|BE|
113   ---------
114   0 |34|12|
115   1 |12|34|
116   2 |78|56|
117   3 |56|78|
118   4 |ab|ab|
119   5 |cd|cd|
120 
121   To merge these into a single 48-bit merged value 'val' in phase 2)
122   on little-endian we insert stores to higher (consecutive) bitpositions
123   into the most significant bits of the merged value.
124   The final merged value would be: 0xcdab56781234
125 
126   For big-endian we insert stores to higher bitpositions into the least
127   significant bits of the merged value.
128   The final merged value would be: 0x12345678abcd
129 
130   Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
131   followed by a 16-bit store.  Again, we must consider endianness when
132   breaking down the 48-bit value 'val' computed above.
133   For little endian we emit:
134   [p]      (32-bit) := 0x56781234; // val & 0x0000ffffffff;
135   [p + 4B] (16-bit) := 0xcdab;    // (val & 0xffff00000000) >> 32;
136 
137   Whereas for big-endian we emit:
138   [p]      (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
139   [p + 4B] (16-bit) := 0xabcd;     //  val & 0x00000000ffff;  */
140 
141 #include "config.h"
142 #include "system.h"
143 #include "coretypes.h"
144 #include "backend.h"
145 #include "tree.h"
146 #include "gimple.h"
147 #include "builtins.h"
148 #include "fold-const.h"
149 #include "tree-pass.h"
150 #include "ssa.h"
151 #include "gimple-pretty-print.h"
152 #include "alias.h"
153 #include "fold-const.h"
154 #include "print-tree.h"
155 #include "tree-hash-traits.h"
156 #include "gimple-iterator.h"
157 #include "gimplify.h"
158 #include "gimple-fold.h"
159 #include "stor-layout.h"
160 #include "timevar.h"
161 #include "cfganal.h"
162 #include "cfgcleanup.h"
163 #include "tree-cfg.h"
164 #include "except.h"
165 #include "tree-eh.h"
166 #include "target.h"
167 #include "gimplify-me.h"
168 #include "rtl.h"
169 #include "expr.h"	/* For get_bit_range.  */
170 #include "optabs-tree.h"
171 #include "dbgcnt.h"
172 #include "selftest.h"
173 
174 /* The maximum size (in bits) of the stores this pass should generate.  */
175 #define MAX_STORE_BITSIZE (BITS_PER_WORD)
176 #define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
177 
178 /* Limit to bound the number of aliasing checks for loads with the same
179    vuse as the corresponding store.  */
180 #define MAX_STORE_ALIAS_CHECKS 64
181 
182 namespace {
183 
184 struct bswap_stat
185 {
186   /* Number of hand-written 16-bit nop / bswaps found.  */
187   int found_16bit;
188 
189   /* Number of hand-written 32-bit nop / bswaps found.  */
190   int found_32bit;
191 
192   /* Number of hand-written 64-bit nop / bswaps found.  */
193   int found_64bit;
194 } nop_stats, bswap_stats;
195 
196 /* A symbolic number structure is used to detect byte permutation and selection
197    patterns of a source.  To achieve that, its field N contains an artificial
198    number consisting of BITS_PER_MARKER sized markers tracking where does each
199    byte come from in the source:
200 
201    0	   - target byte has the value 0
202    FF	   - target byte has an unknown value (eg. due to sign extension)
203    1..size - marker value is the byte index in the source (0 for lsb).
204 
205    To detect permutations on memory sources (arrays and structures), a symbolic
206    number is also associated:
207    - a base address BASE_ADDR and an OFFSET giving the address of the source;
208    - a range which gives the difference between the highest and lowest accessed
209      memory location to make such a symbolic number;
210    - the address SRC of the source element of lowest address as a convenience
211      to easily get BASE_ADDR + offset + lowest bytepos;
212    - number of expressions N_OPS bitwise ored together to represent
213      approximate cost of the computation.
214 
215    Note 1: the range is different from size as size reflects the size of the
216    type of the current expression.  For instance, for an array char a[],
217    (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
218    (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
219    time a range of 1.
220 
221    Note 2: for non-memory sources, range holds the same value as size.
222 
223    Note 3: SRC points to the SSA_NAME in case of non-memory source.  */
224 
225 struct symbolic_number {
226   uint64_t n;
227   tree type;
228   tree base_addr;
229   tree offset;
230   poly_int64_pod bytepos;
231   tree src;
232   tree alias_set;
233   tree vuse;
234   unsigned HOST_WIDE_INT range;
235   int n_ops;
236 };
237 
238 #define BITS_PER_MARKER 8
239 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
240 #define MARKER_BYTE_UNKNOWN MARKER_MASK
241 #define HEAD_MARKER(n, size) \
242   ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
243 
244 /* The number which the find_bswap_or_nop_1 result should match in
245    order to have a nop.  The number is masked according to the size of
246    the symbolic number before using it.  */
247 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
248   (uint64_t)0x08070605 << 32 | 0x04030201)
249 
250 /* The number which the find_bswap_or_nop_1 result should match in
251    order to have a byte swap.  The number is masked according to the
252    size of the symbolic number before using it.  */
253 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
254   (uint64_t)0x01020304 << 32 | 0x05060708)
255 
256 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
257    number N.  Return false if the requested operation is not permitted
258    on a symbolic number.  */
259 
260 inline bool
do_shift_rotate(enum tree_code code,struct symbolic_number * n,int count)261 do_shift_rotate (enum tree_code code,
262 		 struct symbolic_number *n,
263 		 int count)
264 {
265   int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
266   unsigned head_marker;
267 
268   if (count < 0
269       || count >= TYPE_PRECISION (n->type)
270       || count % BITS_PER_UNIT != 0)
271     return false;
272   count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
273 
274   /* Zero out the extra bits of N in order to avoid them being shifted
275      into the significant bits.  */
276   if (size < 64 / BITS_PER_MARKER)
277     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
278 
279   switch (code)
280     {
281     case LSHIFT_EXPR:
282       n->n <<= count;
283       break;
284     case RSHIFT_EXPR:
285       head_marker = HEAD_MARKER (n->n, size);
286       n->n >>= count;
287       /* Arithmetic shift of signed type: result is dependent on the value.  */
288       if (!TYPE_UNSIGNED (n->type) && head_marker)
289 	for (i = 0; i < count / BITS_PER_MARKER; i++)
290 	  n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
291 		  << ((size - 1 - i) * BITS_PER_MARKER);
292       break;
293     case LROTATE_EXPR:
294       n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
295       break;
296     case RROTATE_EXPR:
297       n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
298       break;
299     default:
300       return false;
301     }
302   /* Zero unused bits for size.  */
303   if (size < 64 / BITS_PER_MARKER)
304     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
305   return true;
306 }
307 
308 /* Perform sanity checking for the symbolic number N and the gimple
309    statement STMT.  */
310 
311 inline bool
verify_symbolic_number_p(struct symbolic_number * n,gimple * stmt)312 verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
313 {
314   tree lhs_type;
315 
316   lhs_type = gimple_expr_type (stmt);
317 
318   if (TREE_CODE (lhs_type) != INTEGER_TYPE
319       && TREE_CODE (lhs_type) != ENUMERAL_TYPE)
320     return false;
321 
322   if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
323     return false;
324 
325   return true;
326 }
327 
328 /* Initialize the symbolic number N for the bswap pass from the base element
329    SRC manipulated by the bitwise OR expression.  */
330 
331 bool
init_symbolic_number(struct symbolic_number * n,tree src)332 init_symbolic_number (struct symbolic_number *n, tree src)
333 {
334   int size;
335 
336   if (! INTEGRAL_TYPE_P (TREE_TYPE (src)))
337     return false;
338 
339   n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
340   n->src = src;
341 
342   /* Set up the symbolic number N by setting each byte to a value between 1 and
343      the byte size of rhs1.  The highest order byte is set to n->size and the
344      lowest order byte to 1.  */
345   n->type = TREE_TYPE (src);
346   size = TYPE_PRECISION (n->type);
347   if (size % BITS_PER_UNIT != 0)
348     return false;
349   size /= BITS_PER_UNIT;
350   if (size > 64 / BITS_PER_MARKER)
351     return false;
352   n->range = size;
353   n->n = CMPNOP;
354   n->n_ops = 1;
355 
356   if (size < 64 / BITS_PER_MARKER)
357     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
358 
359   return true;
360 }
361 
362 /* Check if STMT might be a byte swap or a nop from a memory source and returns
363    the answer. If so, REF is that memory source and the base of the memory area
364    accessed and the offset of the access from that base are recorded in N.  */
365 
366 bool
find_bswap_or_nop_load(gimple * stmt,tree ref,struct symbolic_number * n)367 find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
368 {
369   /* Leaf node is an array or component ref. Memorize its base and
370      offset from base to compare to other such leaf node.  */
371   poly_int64 bitsize, bitpos, bytepos;
372   machine_mode mode;
373   int unsignedp, reversep, volatilep;
374   tree offset, base_addr;
375 
376   /* Not prepared to handle PDP endian.  */
377   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
378     return false;
379 
380   if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
381     return false;
382 
383   base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
384 				   &unsignedp, &reversep, &volatilep);
385 
386   if (TREE_CODE (base_addr) == TARGET_MEM_REF)
387     /* Do not rewrite TARGET_MEM_REF.  */
388     return false;
389   else if (TREE_CODE (base_addr) == MEM_REF)
390     {
391       poly_offset_int bit_offset = 0;
392       tree off = TREE_OPERAND (base_addr, 1);
393 
394       if (!integer_zerop (off))
395 	{
396 	  poly_offset_int boff = mem_ref_offset (base_addr);
397 	  boff <<= LOG2_BITS_PER_UNIT;
398 	  bit_offset += boff;
399 	}
400 
401       base_addr = TREE_OPERAND (base_addr, 0);
402 
403       /* Avoid returning a negative bitpos as this may wreak havoc later.  */
404       if (maybe_lt (bit_offset, 0))
405 	{
406 	  tree byte_offset = wide_int_to_tree
407 	    (sizetype, bits_to_bytes_round_down (bit_offset));
408 	  bit_offset = num_trailing_bits (bit_offset);
409 	  if (offset)
410 	    offset = size_binop (PLUS_EXPR, offset, byte_offset);
411 	  else
412 	    offset = byte_offset;
413 	}
414 
415       bitpos += bit_offset.force_shwi ();
416     }
417   else
418     base_addr = build_fold_addr_expr (base_addr);
419 
420   if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
421     return false;
422   if (!multiple_p (bitsize, BITS_PER_UNIT))
423     return false;
424   if (reversep)
425     return false;
426 
427   if (!init_symbolic_number (n, ref))
428     return false;
429   n->base_addr = base_addr;
430   n->offset = offset;
431   n->bytepos = bytepos;
432   n->alias_set = reference_alias_ptr_type (ref);
433   n->vuse = gimple_vuse (stmt);
434   return true;
435 }
436 
437 /* Compute the symbolic number N representing the result of a bitwise OR on 2
438    symbolic number N1 and N2 whose source statements are respectively
439    SOURCE_STMT1 and SOURCE_STMT2.  */
440 
441 gimple *
perform_symbolic_merge(gimple * source_stmt1,struct symbolic_number * n1,gimple * source_stmt2,struct symbolic_number * n2,struct symbolic_number * n)442 perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
443 			gimple *source_stmt2, struct symbolic_number *n2,
444 			struct symbolic_number *n)
445 {
446   int i, size;
447   uint64_t mask;
448   gimple *source_stmt;
449   struct symbolic_number *n_start;
450 
451   tree rhs1 = gimple_assign_rhs1 (source_stmt1);
452   if (TREE_CODE (rhs1) == BIT_FIELD_REF
453       && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
454     rhs1 = TREE_OPERAND (rhs1, 0);
455   tree rhs2 = gimple_assign_rhs1 (source_stmt2);
456   if (TREE_CODE (rhs2) == BIT_FIELD_REF
457       && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
458     rhs2 = TREE_OPERAND (rhs2, 0);
459 
460   /* Sources are different, cancel bswap if they are not memory location with
461      the same base (array, structure, ...).  */
462   if (rhs1 != rhs2)
463     {
464       uint64_t inc;
465       HOST_WIDE_INT start1, start2, start_sub, end_sub, end1, end2, end;
466       struct symbolic_number *toinc_n_ptr, *n_end;
467       basic_block bb1, bb2;
468 
469       if (!n1->base_addr || !n2->base_addr
470 	  || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
471 	return NULL;
472 
473       if (!n1->offset != !n2->offset
474 	  || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
475 	return NULL;
476 
477       start1 = 0;
478       if (!(n2->bytepos - n1->bytepos).is_constant (&start2))
479 	return NULL;
480 
481       if (start1 < start2)
482 	{
483 	  n_start = n1;
484 	  start_sub = start2 - start1;
485 	}
486       else
487 	{
488 	  n_start = n2;
489 	  start_sub = start1 - start2;
490 	}
491 
492       bb1 = gimple_bb (source_stmt1);
493       bb2 = gimple_bb (source_stmt2);
494       if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
495 	source_stmt = source_stmt1;
496       else
497 	source_stmt = source_stmt2;
498 
499       /* Find the highest address at which a load is performed and
500 	 compute related info.  */
501       end1 = start1 + (n1->range - 1);
502       end2 = start2 + (n2->range - 1);
503       if (end1 < end2)
504 	{
505 	  end = end2;
506 	  end_sub = end2 - end1;
507 	}
508       else
509 	{
510 	  end = end1;
511 	  end_sub = end1 - end2;
512 	}
513       n_end = (end2 > end1) ? n2 : n1;
514 
515       /* Find symbolic number whose lsb is the most significant.  */
516       if (BYTES_BIG_ENDIAN)
517 	toinc_n_ptr = (n_end == n1) ? n2 : n1;
518       else
519 	toinc_n_ptr = (n_start == n1) ? n2 : n1;
520 
521       n->range = end - MIN (start1, start2) + 1;
522 
523       /* Check that the range of memory covered can be represented by
524 	 a symbolic number.  */
525       if (n->range > 64 / BITS_PER_MARKER)
526 	return NULL;
527 
528       /* Reinterpret byte marks in symbolic number holding the value of
529 	 bigger weight according to target endianness.  */
530       inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
531       size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
532       for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
533 	{
534 	  unsigned marker
535 	    = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
536 	  if (marker && marker != MARKER_BYTE_UNKNOWN)
537 	    toinc_n_ptr->n += inc;
538 	}
539     }
540   else
541     {
542       n->range = n1->range;
543       n_start = n1;
544       source_stmt = source_stmt1;
545     }
546 
547   if (!n1->alias_set
548       || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
549     n->alias_set = n1->alias_set;
550   else
551     n->alias_set = ptr_type_node;
552   n->vuse = n_start->vuse;
553   n->base_addr = n_start->base_addr;
554   n->offset = n_start->offset;
555   n->src = n_start->src;
556   n->bytepos = n_start->bytepos;
557   n->type = n_start->type;
558   size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
559 
560   for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
561     {
562       uint64_t masked1, masked2;
563 
564       masked1 = n1->n & mask;
565       masked2 = n2->n & mask;
566       if (masked1 && masked2 && masked1 != masked2)
567 	return NULL;
568     }
569   n->n = n1->n | n2->n;
570   n->n_ops = n1->n_ops + n2->n_ops;
571 
572   return source_stmt;
573 }
574 
575 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
576    the operation given by the rhs of STMT on the result.  If the operation
577    could successfully be executed the function returns a gimple stmt whose
578    rhs's first tree is the expression of the source operand and NULL
579    otherwise.  */
580 
581 gimple *
find_bswap_or_nop_1(gimple * stmt,struct symbolic_number * n,int limit)582 find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
583 {
584   enum tree_code code;
585   tree rhs1, rhs2 = NULL;
586   gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
587   enum gimple_rhs_class rhs_class;
588 
589   if (!limit || !is_gimple_assign (stmt))
590     return NULL;
591 
592   rhs1 = gimple_assign_rhs1 (stmt);
593 
594   if (find_bswap_or_nop_load (stmt, rhs1, n))
595     return stmt;
596 
597   /* Handle BIT_FIELD_REF.  */
598   if (TREE_CODE (rhs1) == BIT_FIELD_REF
599       && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
600     {
601       unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
602       unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
603       if (bitpos % BITS_PER_UNIT == 0
604 	  && bitsize % BITS_PER_UNIT == 0
605 	  && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
606 	{
607 	  /* Handle big-endian bit numbering in BIT_FIELD_REF.  */
608 	  if (BYTES_BIG_ENDIAN)
609 	    bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
610 
611 	  /* Shift.  */
612 	  if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
613 	    return NULL;
614 
615 	  /* Mask.  */
616 	  uint64_t mask = 0;
617 	  uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
618 	  for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
619 	       i++, tmp <<= BITS_PER_UNIT)
620 	    mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
621 	  n->n &= mask;
622 
623 	  /* Convert.  */
624 	  n->type = TREE_TYPE (rhs1);
625 	  if (!n->base_addr)
626 	    n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
627 
628 	  return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
629 	}
630 
631       return NULL;
632     }
633 
634   if (TREE_CODE (rhs1) != SSA_NAME)
635     return NULL;
636 
637   code = gimple_assign_rhs_code (stmt);
638   rhs_class = gimple_assign_rhs_class (stmt);
639   rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
640 
641   if (rhs_class == GIMPLE_BINARY_RHS)
642     rhs2 = gimple_assign_rhs2 (stmt);
643 
644   /* Handle unary rhs and binary rhs with integer constants as second
645      operand.  */
646 
647   if (rhs_class == GIMPLE_UNARY_RHS
648       || (rhs_class == GIMPLE_BINARY_RHS
649 	  && TREE_CODE (rhs2) == INTEGER_CST))
650     {
651       if (code != BIT_AND_EXPR
652 	  && code != LSHIFT_EXPR
653 	  && code != RSHIFT_EXPR
654 	  && code != LROTATE_EXPR
655 	  && code != RROTATE_EXPR
656 	  && !CONVERT_EXPR_CODE_P (code))
657 	return NULL;
658 
659       source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
660 
661       /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
662 	 we have to initialize the symbolic number.  */
663       if (!source_stmt1)
664 	{
665 	  if (gimple_assign_load_p (stmt)
666 	      || !init_symbolic_number (n, rhs1))
667 	    return NULL;
668 	  source_stmt1 = stmt;
669 	}
670 
671       switch (code)
672 	{
673 	case BIT_AND_EXPR:
674 	  {
675 	    int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
676 	    uint64_t val = int_cst_value (rhs2), mask = 0;
677 	    uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
678 
679 	    /* Only constants masking full bytes are allowed.  */
680 	    for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
681 	      if ((val & tmp) != 0 && (val & tmp) != tmp)
682 		return NULL;
683 	      else if (val & tmp)
684 		mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
685 
686 	    n->n &= mask;
687 	  }
688 	  break;
689 	case LSHIFT_EXPR:
690 	case RSHIFT_EXPR:
691 	case LROTATE_EXPR:
692 	case RROTATE_EXPR:
693 	  if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
694 	    return NULL;
695 	  break;
696 	CASE_CONVERT:
697 	  {
698 	    int i, type_size, old_type_size;
699 	    tree type;
700 
701 	    type = gimple_expr_type (stmt);
702 	    type_size = TYPE_PRECISION (type);
703 	    if (type_size % BITS_PER_UNIT != 0)
704 	      return NULL;
705 	    type_size /= BITS_PER_UNIT;
706 	    if (type_size > 64 / BITS_PER_MARKER)
707 	      return NULL;
708 
709 	    /* Sign extension: result is dependent on the value.  */
710 	    old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
711 	    if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
712 		&& HEAD_MARKER (n->n, old_type_size))
713 	      for (i = 0; i < type_size - old_type_size; i++)
714 		n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
715 			<< ((type_size - 1 - i) * BITS_PER_MARKER);
716 
717 	    if (type_size < 64 / BITS_PER_MARKER)
718 	      {
719 		/* If STMT casts to a smaller type mask out the bits not
720 		   belonging to the target type.  */
721 		n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
722 	      }
723 	    n->type = type;
724 	    if (!n->base_addr)
725 	      n->range = type_size;
726 	  }
727 	  break;
728 	default:
729 	  return NULL;
730 	};
731       return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
732     }
733 
734   /* Handle binary rhs.  */
735 
736   if (rhs_class == GIMPLE_BINARY_RHS)
737     {
738       struct symbolic_number n1, n2;
739       gimple *source_stmt, *source_stmt2;
740 
741       if (code != BIT_IOR_EXPR)
742 	return NULL;
743 
744       if (TREE_CODE (rhs2) != SSA_NAME)
745 	return NULL;
746 
747       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
748 
749       switch (code)
750 	{
751 	case BIT_IOR_EXPR:
752 	  source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
753 
754 	  if (!source_stmt1)
755 	    return NULL;
756 
757 	  source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
758 
759 	  if (!source_stmt2)
760 	    return NULL;
761 
762 	  if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
763 	    return NULL;
764 
765 	  if (n1.vuse != n2.vuse)
766 	    return NULL;
767 
768 	  source_stmt
769 	    = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
770 
771 	  if (!source_stmt)
772 	    return NULL;
773 
774 	  if (!verify_symbolic_number_p (n, stmt))
775 	    return NULL;
776 
777 	  break;
778 	default:
779 	  return NULL;
780 	}
781       return source_stmt;
782     }
783   return NULL;
784 }
785 
786 /* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
787    *CMPXCHG, *CMPNOP and adjust *N.  */
788 
789 void
find_bswap_or_nop_finalize(struct symbolic_number * n,uint64_t * cmpxchg,uint64_t * cmpnop)790 find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
791 			    uint64_t *cmpnop)
792 {
793   unsigned rsize;
794   uint64_t tmpn, mask;
795 
796   /* The number which the find_bswap_or_nop_1 result should match in order
797      to have a full byte swap.  The number is shifted to the right
798      according to the size of the symbolic number before using it.  */
799   *cmpxchg = CMPXCHG;
800   *cmpnop = CMPNOP;
801 
802   /* Find real size of result (highest non-zero byte).  */
803   if (n->base_addr)
804     for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
805   else
806     rsize = n->range;
807 
808   /* Zero out the bits corresponding to untouched bytes in original gimple
809      expression.  */
810   if (n->range < (int) sizeof (int64_t))
811     {
812       mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
813       *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
814       *cmpnop &= mask;
815     }
816 
817   /* Zero out the bits corresponding to unused bytes in the result of the
818      gimple expression.  */
819   if (rsize < n->range)
820     {
821       if (BYTES_BIG_ENDIAN)
822 	{
823 	  mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
824 	  *cmpxchg &= mask;
825 	  *cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
826 	}
827       else
828 	{
829 	  mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
830 	  *cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
831 	  *cmpnop &= mask;
832 	}
833       n->range = rsize;
834     }
835 
836   n->range *= BITS_PER_UNIT;
837 }
838 
839 /* Check if STMT completes a bswap implementation or a read in a given
840    endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
841    accordingly.  It also sets N to represent the kind of operations
842    performed: size of the resulting expression and whether it works on
843    a memory source, and if so alias-set and vuse.  At last, the
844    function returns a stmt whose rhs's first tree is the source
845    expression.  */
846 
847 gimple *
find_bswap_or_nop(gimple * stmt,struct symbolic_number * n,bool * bswap)848 find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
849 {
850   /* The last parameter determines the depth search limit.  It usually
851      correlates directly to the number n of bytes to be touched.  We
852      increase that number by 2 * (log2(n) + 1) here in order to also
853      cover signed -> unsigned conversions of the src operand as can be seen
854      in libgcc, and for initial shift/and operation of the src operand.  */
855   int limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
856   limit += 2 * (1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit));
857   gimple *ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
858 
859   if (!ins_stmt)
860     return NULL;
861 
862   uint64_t cmpxchg, cmpnop;
863   find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop);
864 
865   /* A complete byte swap should make the symbolic number to start with
866      the largest digit in the highest order byte. Unchanged symbolic
867      number indicates a read with same endianness as target architecture.  */
868   if (n->n == cmpnop)
869     *bswap = false;
870   else if (n->n == cmpxchg)
871     *bswap = true;
872   else
873     return NULL;
874 
875   /* Useless bit manipulation performed by code.  */
876   if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
877     return NULL;
878 
879   return ins_stmt;
880 }
881 
882 const pass_data pass_data_optimize_bswap =
883 {
884   GIMPLE_PASS, /* type */
885   "bswap", /* name */
886   OPTGROUP_NONE, /* optinfo_flags */
887   TV_NONE, /* tv_id */
888   PROP_ssa, /* properties_required */
889   0, /* properties_provided */
890   0, /* properties_destroyed */
891   0, /* todo_flags_start */
892   0, /* todo_flags_finish */
893 };
894 
895 class pass_optimize_bswap : public gimple_opt_pass
896 {
897 public:
pass_optimize_bswap(gcc::context * ctxt)898   pass_optimize_bswap (gcc::context *ctxt)
899     : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
900   {}
901 
902   /* opt_pass methods: */
gate(function *)903   virtual bool gate (function *)
904     {
905       return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8;
906     }
907 
908   virtual unsigned int execute (function *);
909 
910 }; // class pass_optimize_bswap
911 
912 /* Perform the bswap optimization: replace the expression computed in the rhs
913    of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
914    bswap, load or load + bswap expression.
915    Which of these alternatives replace the rhs is given by N->base_addr (non
916    null if a load is needed) and BSWAP.  The type, VUSE and set-alias of the
917    load to perform are also given in N while the builtin bswap invoke is given
918    in FNDEL.  Finally, if a load is involved, INS_STMT refers to one of the
919    load statements involved to construct the rhs in gsi_stmt (GSI) and
920    N->range gives the size of the rhs expression for maintaining some
921    statistics.
922 
923    Note that if the replacement involve a load and if gsi_stmt (GSI) is
924    non-NULL, that stmt is moved just after INS_STMT to do the load with the
925    same VUSE which can lead to gsi_stmt (GSI) changing of basic block.  */
926 
927 tree
bswap_replace(gimple_stmt_iterator gsi,gimple * ins_stmt,tree fndecl,tree bswap_type,tree load_type,struct symbolic_number * n,bool bswap)928 bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
929 	       tree bswap_type, tree load_type, struct symbolic_number *n,
930 	       bool bswap)
931 {
932   tree src, tmp, tgt = NULL_TREE;
933   gimple *bswap_stmt;
934 
935   gimple *cur_stmt = gsi_stmt (gsi);
936   src = n->src;
937   if (cur_stmt)
938     tgt = gimple_assign_lhs (cur_stmt);
939 
940   /* Need to load the value from memory first.  */
941   if (n->base_addr)
942     {
943       gimple_stmt_iterator gsi_ins = gsi;
944       if (ins_stmt)
945 	gsi_ins = gsi_for_stmt (ins_stmt);
946       tree addr_expr, addr_tmp, val_expr, val_tmp;
947       tree load_offset_ptr, aligned_load_type;
948       gimple *load_stmt;
949       unsigned align = get_object_alignment (src);
950       poly_int64 load_offset = 0;
951 
952       if (cur_stmt)
953 	{
954 	  basic_block ins_bb = gimple_bb (ins_stmt);
955 	  basic_block cur_bb = gimple_bb (cur_stmt);
956 	  if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
957 	    return NULL_TREE;
958 
959 	  /* Move cur_stmt just before one of the load of the original
960 	     to ensure it has the same VUSE.  See PR61517 for what could
961 	     go wrong.  */
962 	  if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
963 	    reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
964 	  gsi_move_before (&gsi, &gsi_ins);
965 	  gsi = gsi_for_stmt (cur_stmt);
966 	}
967       else
968 	gsi = gsi_ins;
969 
970       /* Compute address to load from and cast according to the size
971 	 of the load.  */
972       addr_expr = build_fold_addr_expr (src);
973       if (is_gimple_mem_ref_addr (addr_expr))
974 	addr_tmp = unshare_expr (addr_expr);
975       else
976 	{
977 	  addr_tmp = unshare_expr (n->base_addr);
978 	  if (!is_gimple_mem_ref_addr (addr_tmp))
979 	    addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp,
980 						   is_gimple_mem_ref_addr,
981 						   NULL_TREE, true,
982 						   GSI_SAME_STMT);
983 	  load_offset = n->bytepos;
984 	  if (n->offset)
985 	    {
986 	      tree off
987 		= force_gimple_operand_gsi (&gsi, unshare_expr (n->offset),
988 					    true, NULL_TREE, true,
989 					    GSI_SAME_STMT);
990 	      gimple *stmt
991 		= gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)),
992 				       POINTER_PLUS_EXPR, addr_tmp, off);
993 	      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
994 	      addr_tmp = gimple_assign_lhs (stmt);
995 	    }
996 	}
997 
998       /* Perform the load.  */
999       aligned_load_type = load_type;
1000       if (align < TYPE_ALIGN (load_type))
1001 	aligned_load_type = build_aligned_type (load_type, align);
1002       load_offset_ptr = build_int_cst (n->alias_set, load_offset);
1003       val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
1004 			      load_offset_ptr);
1005 
1006       if (!bswap)
1007 	{
1008 	  if (n->range == 16)
1009 	    nop_stats.found_16bit++;
1010 	  else if (n->range == 32)
1011 	    nop_stats.found_32bit++;
1012 	  else
1013 	    {
1014 	      gcc_assert (n->range == 64);
1015 	      nop_stats.found_64bit++;
1016 	    }
1017 
1018 	  /* Convert the result of load if necessary.  */
1019 	  if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type))
1020 	    {
1021 	      val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
1022 					    "load_dst");
1023 	      load_stmt = gimple_build_assign (val_tmp, val_expr);
1024 	      gimple_set_vuse (load_stmt, n->vuse);
1025 	      gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1026 	      gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
1027 	      update_stmt (cur_stmt);
1028 	    }
1029 	  else if (cur_stmt)
1030 	    {
1031 	      gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
1032 	      gimple_set_vuse (cur_stmt, n->vuse);
1033 	      update_stmt (cur_stmt);
1034 	    }
1035 	  else
1036 	    {
1037 	      tgt = make_ssa_name (load_type);
1038 	      cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr);
1039 	      gimple_set_vuse (cur_stmt, n->vuse);
1040 	      gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT);
1041 	    }
1042 
1043 	  if (dump_file)
1044 	    {
1045 	      fprintf (dump_file,
1046 		       "%d bit load in target endianness found at: ",
1047 		       (int) n->range);
1048 	      print_gimple_stmt (dump_file, cur_stmt, 0);
1049 	    }
1050 	  return tgt;
1051 	}
1052       else
1053 	{
1054 	  val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
1055 	  load_stmt = gimple_build_assign (val_tmp, val_expr);
1056 	  gimple_set_vuse (load_stmt, n->vuse);
1057 	  gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1058 	}
1059       src = val_tmp;
1060     }
1061   else if (!bswap)
1062     {
1063       gimple *g = NULL;
1064       if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
1065 	{
1066 	  if (!is_gimple_val (src))
1067 	    return NULL_TREE;
1068 	  g = gimple_build_assign (tgt, NOP_EXPR, src);
1069 	}
1070       else if (cur_stmt)
1071 	g = gimple_build_assign (tgt, src);
1072       else
1073 	tgt = src;
1074       if (n->range == 16)
1075 	nop_stats.found_16bit++;
1076       else if (n->range == 32)
1077 	nop_stats.found_32bit++;
1078       else
1079 	{
1080 	  gcc_assert (n->range == 64);
1081 	  nop_stats.found_64bit++;
1082 	}
1083       if (dump_file)
1084 	{
1085 	  fprintf (dump_file,
1086 		   "%d bit reshuffle in target endianness found at: ",
1087 		   (int) n->range);
1088 	  if (cur_stmt)
1089 	    print_gimple_stmt (dump_file, cur_stmt, 0);
1090 	  else
1091 	    {
1092 	      print_generic_expr (dump_file, tgt, TDF_NONE);
1093 	      fprintf (dump_file, "\n");
1094 	    }
1095 	}
1096       if (cur_stmt)
1097 	gsi_replace (&gsi, g, true);
1098       return tgt;
1099     }
1100   else if (TREE_CODE (src) == BIT_FIELD_REF)
1101     src = TREE_OPERAND (src, 0);
1102 
1103   if (n->range == 16)
1104     bswap_stats.found_16bit++;
1105   else if (n->range == 32)
1106     bswap_stats.found_32bit++;
1107   else
1108     {
1109       gcc_assert (n->range == 64);
1110       bswap_stats.found_64bit++;
1111     }
1112 
1113   tmp = src;
1114 
1115   /* Convert the src expression if necessary.  */
1116   if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
1117     {
1118       gimple *convert_stmt;
1119 
1120       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
1121       convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
1122       gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1123     }
1124 
1125   /* Canonical form for 16 bit bswap is a rotate expression.  Only 16bit values
1126      are considered as rotation of 2N bit values by N bits is generally not
1127      equivalent to a bswap.  Consider for instance 0x01020304 r>> 16 which
1128      gives 0x03040102 while a bswap for that value is 0x04030201.  */
1129   if (bswap && n->range == 16)
1130     {
1131       tree count = build_int_cst (NULL, BITS_PER_UNIT);
1132       src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
1133       bswap_stmt = gimple_build_assign (NULL, src);
1134     }
1135   else
1136     bswap_stmt = gimple_build_call (fndecl, 1, tmp);
1137 
1138   if (tgt == NULL_TREE)
1139     tgt = make_ssa_name (bswap_type);
1140   tmp = tgt;
1141 
1142   /* Convert the result if necessary.  */
1143   if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
1144     {
1145       gimple *convert_stmt;
1146 
1147       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1148       convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp);
1149       gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
1150     }
1151 
1152   gimple_set_lhs (bswap_stmt, tmp);
1153 
1154   if (dump_file)
1155     {
1156       fprintf (dump_file, "%d bit bswap implementation found at: ",
1157 	       (int) n->range);
1158       if (cur_stmt)
1159 	print_gimple_stmt (dump_file, cur_stmt, 0);
1160       else
1161 	{
1162 	  print_generic_expr (dump_file, tgt, TDF_NONE);
1163 	  fprintf (dump_file, "\n");
1164 	}
1165     }
1166 
1167   if (cur_stmt)
1168     {
1169       gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
1170       gsi_remove (&gsi, true);
1171     }
1172   else
1173     gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
1174   return tgt;
1175 }
1176 
1177 /* Find manual byte swap implementations as well as load in a given
1178    endianness. Byte swaps are turned into a bswap builtin invokation
1179    while endian loads are converted to bswap builtin invokation or
1180    simple load according to the target endianness.  */
1181 
1182 unsigned int
execute(function * fun)1183 pass_optimize_bswap::execute (function *fun)
1184 {
1185   basic_block bb;
1186   bool bswap32_p, bswap64_p;
1187   bool changed = false;
1188   tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1189 
1190   bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1191 	       && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1192   bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1193 	       && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1194 		   || (bswap32_p && word_mode == SImode)));
1195 
1196   /* Determine the argument type of the builtins.  The code later on
1197      assumes that the return and argument type are the same.  */
1198   if (bswap32_p)
1199     {
1200       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1201       bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1202     }
1203 
1204   if (bswap64_p)
1205     {
1206       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1207       bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1208     }
1209 
1210   memset (&nop_stats, 0, sizeof (nop_stats));
1211   memset (&bswap_stats, 0, sizeof (bswap_stats));
1212   calculate_dominance_info (CDI_DOMINATORS);
1213 
1214   FOR_EACH_BB_FN (bb, fun)
1215     {
1216       gimple_stmt_iterator gsi;
1217 
1218       /* We do a reverse scan for bswap patterns to make sure we get the
1219 	 widest match. As bswap pattern matching doesn't handle previously
1220 	 inserted smaller bswap replacements as sub-patterns, the wider
1221 	 variant wouldn't be detected.  */
1222       for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
1223 	{
1224 	  gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
1225 	  tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1226 	  enum tree_code code;
1227 	  struct symbolic_number n;
1228 	  bool bswap;
1229 
1230 	  /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1231 	     might be moved to a different basic block by bswap_replace and gsi
1232 	     must not points to it if that's the case.  Moving the gsi_prev
1233 	     there make sure that gsi points to the statement previous to
1234 	     cur_stmt while still making sure that all statements are
1235 	     considered in this basic block.  */
1236 	  gsi_prev (&gsi);
1237 
1238 	  if (!is_gimple_assign (cur_stmt))
1239 	    continue;
1240 
1241 	  code = gimple_assign_rhs_code (cur_stmt);
1242 	  switch (code)
1243 	    {
1244 	    case LROTATE_EXPR:
1245 	    case RROTATE_EXPR:
1246 	      if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
1247 		  || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
1248 		     % BITS_PER_UNIT)
1249 		continue;
1250 	      /* Fall through.  */
1251 	    case BIT_IOR_EXPR:
1252 	      break;
1253 	    default:
1254 	      continue;
1255 	    }
1256 
1257 	  ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
1258 
1259 	  if (!ins_stmt)
1260 	    continue;
1261 
1262 	  switch (n.range)
1263 	    {
1264 	    case 16:
1265 	      /* Already in canonical form, nothing to do.  */
1266 	      if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1267 		continue;
1268 	      load_type = bswap_type = uint16_type_node;
1269 	      break;
1270 	    case 32:
1271 	      load_type = uint32_type_node;
1272 	      if (bswap32_p)
1273 		{
1274 		  fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1275 		  bswap_type = bswap32_type;
1276 		}
1277 	      break;
1278 	    case 64:
1279 	      load_type = uint64_type_node;
1280 	      if (bswap64_p)
1281 		{
1282 		  fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1283 		  bswap_type = bswap64_type;
1284 		}
1285 	      break;
1286 	    default:
1287 	      continue;
1288 	    }
1289 
1290 	  if (bswap && !fndecl && n.range != 16)
1291 	    continue;
1292 
1293 	  if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
1294 			     bswap_type, load_type, &n, bswap))
1295 	    changed = true;
1296 	}
1297     }
1298 
1299   statistics_counter_event (fun, "16-bit nop implementations found",
1300 			    nop_stats.found_16bit);
1301   statistics_counter_event (fun, "32-bit nop implementations found",
1302 			    nop_stats.found_32bit);
1303   statistics_counter_event (fun, "64-bit nop implementations found",
1304 			    nop_stats.found_64bit);
1305   statistics_counter_event (fun, "16-bit bswap implementations found",
1306 			    bswap_stats.found_16bit);
1307   statistics_counter_event (fun, "32-bit bswap implementations found",
1308 			    bswap_stats.found_32bit);
1309   statistics_counter_event (fun, "64-bit bswap implementations found",
1310 			    bswap_stats.found_64bit);
1311 
1312   return (changed ? TODO_update_ssa : 0);
1313 }
1314 
1315 } // anon namespace
1316 
1317 gimple_opt_pass *
make_pass_optimize_bswap(gcc::context * ctxt)1318 make_pass_optimize_bswap (gcc::context *ctxt)
1319 {
1320   return new pass_optimize_bswap (ctxt);
1321 }
1322 
1323 namespace {
1324 
1325 /* Struct recording one operand for the store, which is either a constant,
1326    then VAL represents the constant and all the other fields are zero, or
1327    a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1328    and the other fields also reflect the memory load, or an SSA name, then
1329    VAL represents the SSA name and all the other fields are zero,  */
1330 
1331 class store_operand_info
1332 {
1333 public:
1334   tree val;
1335   tree base_addr;
1336   poly_uint64 bitsize;
1337   poly_uint64 bitpos;
1338   poly_uint64 bitregion_start;
1339   poly_uint64 bitregion_end;
1340   gimple *stmt;
1341   bool bit_not_p;
1342   store_operand_info ();
1343 };
1344 
store_operand_info()1345 store_operand_info::store_operand_info ()
1346   : val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0),
1347     bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false)
1348 {
1349 }
1350 
1351 /* Struct recording the information about a single store of an immediate
1352    to memory.  These are created in the first phase and coalesced into
1353    merged_store_group objects in the second phase.  */
1354 
1355 class store_immediate_info
1356 {
1357 public:
1358   unsigned HOST_WIDE_INT bitsize;
1359   unsigned HOST_WIDE_INT bitpos;
1360   unsigned HOST_WIDE_INT bitregion_start;
1361   /* This is one past the last bit of the bit region.  */
1362   unsigned HOST_WIDE_INT bitregion_end;
1363   gimple *stmt;
1364   unsigned int order;
1365   /* INTEGER_CST for constant stores, MEM_REF for memory copy,
1366      BIT_*_EXPR for logical bitwise operation, BIT_INSERT_EXPR
1367      for bit insertion.
1368      LROTATE_EXPR if it can be only bswap optimized and
1369      ops are not really meaningful.
1370      NOP_EXPR if bswap optimization detected identity, ops
1371      are not meaningful.  */
1372   enum tree_code rhs_code;
1373   /* Two fields for bswap optimization purposes.  */
1374   struct symbolic_number n;
1375   gimple *ins_stmt;
1376   /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing.  */
1377   bool bit_not_p;
1378   /* True if ops have been swapped and thus ops[1] represents
1379      rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2.  */
1380   bool ops_swapped_p;
1381   /* The index number of the landing pad, or 0 if there is none.  */
1382   int lp_nr;
1383   /* Operands.  For BIT_*_EXPR rhs_code both operands are used, otherwise
1384      just the first one.  */
1385   store_operand_info ops[2];
1386   store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1387 			unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1388 			gimple *, unsigned int, enum tree_code,
1389 			struct symbolic_number &, gimple *, bool, int,
1390 			const store_operand_info &,
1391 			const store_operand_info &);
1392 };
1393 
store_immediate_info(unsigned HOST_WIDE_INT bs,unsigned HOST_WIDE_INT bp,unsigned HOST_WIDE_INT brs,unsigned HOST_WIDE_INT bre,gimple * st,unsigned int ord,enum tree_code rhscode,struct symbolic_number & nr,gimple * ins_stmtp,bool bitnotp,int nr2,const store_operand_info & op0r,const store_operand_info & op1r)1394 store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
1395 					    unsigned HOST_WIDE_INT bp,
1396 					    unsigned HOST_WIDE_INT brs,
1397 					    unsigned HOST_WIDE_INT bre,
1398 					    gimple *st,
1399 					    unsigned int ord,
1400 					    enum tree_code rhscode,
1401 					    struct symbolic_number &nr,
1402 					    gimple *ins_stmtp,
1403 					    bool bitnotp,
1404 					    int nr2,
1405 					    const store_operand_info &op0r,
1406 					    const store_operand_info &op1r)
1407   : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
1408     stmt (st), order (ord), rhs_code (rhscode), n (nr),
1409     ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false),
1410     lp_nr (nr2)
1411 #if __cplusplus >= 201103L
1412     , ops { op0r, op1r }
1413 {
1414 }
1415 #else
1416 {
1417   ops[0] = op0r;
1418   ops[1] = op1r;
1419 }
1420 #endif
1421 
1422 /* Struct representing a group of stores to contiguous memory locations.
1423    These are produced by the second phase (coalescing) and consumed in the
1424    third phase that outputs the widened stores.  */
1425 
1426 class merged_store_group
1427 {
1428 public:
1429   unsigned HOST_WIDE_INT start;
1430   unsigned HOST_WIDE_INT width;
1431   unsigned HOST_WIDE_INT bitregion_start;
1432   unsigned HOST_WIDE_INT bitregion_end;
1433   /* The size of the allocated memory for val and mask.  */
1434   unsigned HOST_WIDE_INT buf_size;
1435   unsigned HOST_WIDE_INT align_base;
1436   poly_uint64 load_align_base[2];
1437 
1438   unsigned int align;
1439   unsigned int load_align[2];
1440   unsigned int first_order;
1441   unsigned int last_order;
1442   bool bit_insertion;
1443   bool only_constants;
1444   unsigned int first_nonmergeable_order;
1445   int lp_nr;
1446 
1447   auto_vec<store_immediate_info *> stores;
1448   /* We record the first and last original statements in the sequence because
1449      we'll need their vuse/vdef and replacement position.  It's easier to keep
1450      track of them separately as 'stores' is reordered by apply_stores.  */
1451   gimple *last_stmt;
1452   gimple *first_stmt;
1453   unsigned char *val;
1454   unsigned char *mask;
1455 
1456   merged_store_group (store_immediate_info *);
1457   ~merged_store_group ();
1458   bool can_be_merged_into (store_immediate_info *);
1459   void merge_into (store_immediate_info *);
1460   void merge_overlapping (store_immediate_info *);
1461   bool apply_stores ();
1462 private:
1463   void do_merge (store_immediate_info *);
1464 };
1465 
1466 /* Debug helper.  Dump LEN elements of byte array PTR to FD in hex.  */
1467 
1468 static void
dump_char_array(FILE * fd,unsigned char * ptr,unsigned int len)1469 dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
1470 {
1471   if (!fd)
1472     return;
1473 
1474   for (unsigned int i = 0; i < len; i++)
1475     fprintf (fd, "%02x ", ptr[i]);
1476   fprintf (fd, "\n");
1477 }
1478 
1479 /* Clear out LEN bits starting from bit START in the byte array
1480    PTR.  This clears the bits to the *right* from START.
1481    START must be within [0, BITS_PER_UNIT) and counts starting from
1482    the least significant bit.  */
1483 
1484 static void
clear_bit_region_be(unsigned char * ptr,unsigned int start,unsigned int len)1485 clear_bit_region_be (unsigned char *ptr, unsigned int start,
1486 		     unsigned int len)
1487 {
1488   if (len == 0)
1489     return;
1490   /* Clear len bits to the right of start.  */
1491   else if (len <= start + 1)
1492     {
1493       unsigned char mask = (~(~0U << len));
1494       mask = mask << (start + 1U - len);
1495       ptr[0] &= ~mask;
1496     }
1497   else if (start != BITS_PER_UNIT - 1)
1498     {
1499       clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
1500       clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
1501 			   len - (start % BITS_PER_UNIT) - 1);
1502     }
1503   else if (start == BITS_PER_UNIT - 1
1504 	   && len > BITS_PER_UNIT)
1505     {
1506       unsigned int nbytes = len / BITS_PER_UNIT;
1507       memset (ptr, 0, nbytes);
1508       if (len % BITS_PER_UNIT != 0)
1509 	clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
1510 			     len % BITS_PER_UNIT);
1511     }
1512   else
1513     gcc_unreachable ();
1514 }
1515 
1516 /* In the byte array PTR clear the bit region starting at bit
1517    START and is LEN bits wide.
1518    For regions spanning multiple bytes do this recursively until we reach
1519    zero LEN or a region contained within a single byte.  */
1520 
1521 static void
clear_bit_region(unsigned char * ptr,unsigned int start,unsigned int len)1522 clear_bit_region (unsigned char *ptr, unsigned int start,
1523 		  unsigned int len)
1524 {
1525   /* Degenerate base case.  */
1526   if (len == 0)
1527     return;
1528   else if (start >= BITS_PER_UNIT)
1529     clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
1530   /* Second base case.  */
1531   else if ((start + len) <= BITS_PER_UNIT)
1532     {
1533       unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
1534       mask >>= BITS_PER_UNIT - (start + len);
1535 
1536       ptr[0] &= ~mask;
1537 
1538       return;
1539     }
1540   /* Clear most significant bits in a byte and proceed with the next byte.  */
1541   else if (start != 0)
1542     {
1543       clear_bit_region (ptr, start, BITS_PER_UNIT - start);
1544       clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
1545     }
1546   /* Whole bytes need to be cleared.  */
1547   else if (start == 0 && len > BITS_PER_UNIT)
1548     {
1549       unsigned int nbytes = len / BITS_PER_UNIT;
1550       /* We could recurse on each byte but we clear whole bytes, so a simple
1551 	 memset will do.  */
1552       memset (ptr, '\0', nbytes);
1553       /* Clear the remaining sub-byte region if there is one.  */
1554       if (len % BITS_PER_UNIT != 0)
1555 	clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
1556     }
1557   else
1558     gcc_unreachable ();
1559 }
1560 
1561 /* Write BITLEN bits of EXPR to the byte array PTR at
1562    bit position BITPOS.  PTR should contain TOTAL_BYTES elements.
1563    Return true if the operation succeeded.  */
1564 
1565 static bool
encode_tree_to_bitpos(tree expr,unsigned char * ptr,int bitlen,int bitpos,unsigned int total_bytes)1566 encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
1567 		       unsigned int total_bytes)
1568 {
1569   unsigned int first_byte = bitpos / BITS_PER_UNIT;
1570   bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
1571 			|| (bitpos % BITS_PER_UNIT)
1572 			|| !int_mode_for_size (bitlen, 0).exists ());
1573   bool empty_ctor_p
1574     = (TREE_CODE (expr) == CONSTRUCTOR
1575        && CONSTRUCTOR_NELTS (expr) == 0
1576        && TYPE_SIZE_UNIT (TREE_TYPE (expr))
1577 		       && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr))));
1578 
1579   if (!sub_byte_op_p)
1580     {
1581       if (first_byte >= total_bytes)
1582 	return false;
1583       total_bytes -= first_byte;
1584       if (empty_ctor_p)
1585 	{
1586 	  unsigned HOST_WIDE_INT rhs_bytes
1587 	    = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1588 	  if (rhs_bytes > total_bytes)
1589 	    return false;
1590 	  memset (ptr + first_byte, '\0', rhs_bytes);
1591 	  return true;
1592 	}
1593       return native_encode_expr (expr, ptr + first_byte, total_bytes) != 0;
1594     }
1595 
1596   /* LITTLE-ENDIAN
1597      We are writing a non byte-sized quantity or at a position that is not
1598      at a byte boundary.
1599      |--------|--------|--------| ptr + first_byte
1600            ^              ^
1601            xxx xxxxxxxx xxx< bp>
1602            |______EXPR____|
1603 
1604      First native_encode_expr EXPR into a temporary buffer and shift each
1605      byte in the buffer by 'bp' (carrying the bits over as necessary).
1606      |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
1607                                               <------bitlen---->< bp>
1608     Then we clear the destination bits:
1609     |---00000|00000000|000-----| ptr + first_byte
1610         <-------bitlen--->< bp>
1611 
1612     Finally we ORR the bytes of the shifted EXPR into the cleared region:
1613     |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
1614 
1615    BIG-ENDIAN
1616    We are writing a non byte-sized quantity or at a position that is not
1617    at a byte boundary.
1618      ptr + first_byte |--------|--------|--------|
1619                             ^              ^
1620                        <bp >xxx xxxxxxxx xxx
1621                             |_____EXPR_____|
1622 
1623      First native_encode_expr EXPR into a temporary buffer and shift each
1624      byte in the buffer to the right by (carrying the bits over as necessary).
1625      We shift by as much as needed to align the most significant bit of EXPR
1626      with bitpos:
1627      |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
1628         <---bitlen---->          <bp ><-----bitlen----->
1629     Then we clear the destination bits:
1630     ptr + first_byte |-----000||00000000||00000---|
1631                       <bp ><-------bitlen----->
1632 
1633     Finally we ORR the bytes of the shifted EXPR into the cleared region:
1634     ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
1635     The awkwardness comes from the fact that bitpos is counted from the
1636     most significant bit of a byte.  */
1637 
1638   /* We must be dealing with fixed-size data at this point, since the
1639      total size is also fixed.  */
1640   unsigned int byte_size;
1641   if (empty_ctor_p)
1642     {
1643       unsigned HOST_WIDE_INT rhs_bytes
1644 	= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1645       if (rhs_bytes > total_bytes)
1646 	return false;
1647       byte_size = rhs_bytes;
1648     }
1649   else
1650     {
1651       fixed_size_mode mode
1652 	= as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (expr)));
1653       byte_size = GET_MODE_SIZE (mode);
1654     }
1655   /* Allocate an extra byte so that we have space to shift into.  */
1656   byte_size++;
1657   unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
1658   memset (tmpbuf, '\0', byte_size);
1659   /* The store detection code should only have allowed constants that are
1660      accepted by native_encode_expr or empty ctors.  */
1661   if (!empty_ctor_p
1662       && native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
1663     gcc_unreachable ();
1664 
1665   /* The native_encode_expr machinery uses TYPE_MODE to determine how many
1666      bytes to write.  This means it can write more than
1667      ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
1668      write 8 bytes for a bitlen of 40).  Skip the bytes that are not within
1669      bitlen and zero out the bits that are not relevant as well (that may
1670      contain a sign bit due to sign-extension).  */
1671   unsigned int padding
1672     = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1;
1673   /* On big-endian the padding is at the 'front' so just skip the initial
1674      bytes.  */
1675   if (BYTES_BIG_ENDIAN)
1676     tmpbuf += padding;
1677 
1678   byte_size -= padding;
1679 
1680   if (bitlen % BITS_PER_UNIT != 0)
1681     {
1682       if (BYTES_BIG_ENDIAN)
1683 	clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1,
1684 			     BITS_PER_UNIT - (bitlen % BITS_PER_UNIT));
1685       else
1686 	clear_bit_region (tmpbuf, bitlen,
1687 			  byte_size * BITS_PER_UNIT - bitlen);
1688     }
1689   /* Left shifting relies on the last byte being clear if bitlen is
1690      a multiple of BITS_PER_UNIT, which might not be clear if
1691      there are padding bytes.  */
1692   else if (!BYTES_BIG_ENDIAN)
1693     tmpbuf[byte_size - 1] = '\0';
1694 
1695   /* Clear the bit region in PTR where the bits from TMPBUF will be
1696      inserted into.  */
1697   if (BYTES_BIG_ENDIAN)
1698     clear_bit_region_be (ptr + first_byte,
1699 			 BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
1700   else
1701     clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen);
1702 
1703   int shift_amnt;
1704   int bitlen_mod = bitlen % BITS_PER_UNIT;
1705   int bitpos_mod = bitpos % BITS_PER_UNIT;
1706 
1707   bool skip_byte = false;
1708   if (BYTES_BIG_ENDIAN)
1709     {
1710       /* BITPOS and BITLEN are exactly aligned and no shifting
1711 	 is necessary.  */
1712       if (bitpos_mod + bitlen_mod == BITS_PER_UNIT
1713 	  || (bitpos_mod == 0 && bitlen_mod == 0))
1714 	shift_amnt = 0;
1715       /* |. . . . . . . .|
1716 	  <bp >   <blen >.
1717 	 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
1718 	 of the value until it aligns with 'bp' in the next byte over.  */
1719       else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT)
1720 	{
1721 	  shift_amnt = bitlen_mod + bitpos_mod;
1722 	  skip_byte = bitlen_mod != 0;
1723 	}
1724       /* |. . . . . . . .|
1725 	  <----bp--->
1726 	    <---blen---->.
1727 	 Shift the value right within the same byte so it aligns with 'bp'.  */
1728       else
1729 	shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT;
1730     }
1731   else
1732     shift_amnt = bitpos % BITS_PER_UNIT;
1733 
1734   /* Create the shifted version of EXPR.  */
1735   if (!BYTES_BIG_ENDIAN)
1736     {
1737       shift_bytes_in_array_left (tmpbuf, byte_size, shift_amnt);
1738       if (shift_amnt == 0)
1739 	byte_size--;
1740     }
1741   else
1742     {
1743       gcc_assert (BYTES_BIG_ENDIAN);
1744       shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt);
1745       /* If shifting right forced us to move into the next byte skip the now
1746 	 empty byte.  */
1747       if (skip_byte)
1748 	{
1749 	  tmpbuf++;
1750 	  byte_size--;
1751 	}
1752     }
1753 
1754   /* Insert the bits from TMPBUF.  */
1755   for (unsigned int i = 0; i < byte_size; i++)
1756     ptr[first_byte + i] |= tmpbuf[i];
1757 
1758   return true;
1759 }
1760 
1761 /* Sorting function for store_immediate_info objects.
1762    Sorts them by bitposition.  */
1763 
1764 static int
sort_by_bitpos(const void * x,const void * y)1765 sort_by_bitpos (const void *x, const void *y)
1766 {
1767   store_immediate_info *const *tmp = (store_immediate_info * const *) x;
1768   store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
1769 
1770   if ((*tmp)->bitpos < (*tmp2)->bitpos)
1771     return -1;
1772   else if ((*tmp)->bitpos > (*tmp2)->bitpos)
1773     return 1;
1774   else
1775     /* If they are the same let's use the order which is guaranteed to
1776        be different.  */
1777     return (*tmp)->order - (*tmp2)->order;
1778 }
1779 
1780 /* Sorting function for store_immediate_info objects.
1781    Sorts them by the order field.  */
1782 
1783 static int
sort_by_order(const void * x,const void * y)1784 sort_by_order (const void *x, const void *y)
1785 {
1786   store_immediate_info *const *tmp = (store_immediate_info * const *) x;
1787   store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
1788 
1789   if ((*tmp)->order < (*tmp2)->order)
1790     return -1;
1791   else if ((*tmp)->order > (*tmp2)->order)
1792     return 1;
1793 
1794   gcc_unreachable ();
1795 }
1796 
1797 /* Initialize a merged_store_group object from a store_immediate_info
1798    object.  */
1799 
merged_store_group(store_immediate_info * info)1800 merged_store_group::merged_store_group (store_immediate_info *info)
1801 {
1802   start = info->bitpos;
1803   width = info->bitsize;
1804   bitregion_start = info->bitregion_start;
1805   bitregion_end = info->bitregion_end;
1806   /* VAL has memory allocated for it in apply_stores once the group
1807      width has been finalized.  */
1808   val = NULL;
1809   mask = NULL;
1810   bit_insertion = false;
1811   only_constants = info->rhs_code == INTEGER_CST;
1812   first_nonmergeable_order = ~0U;
1813   lp_nr = info->lp_nr;
1814   unsigned HOST_WIDE_INT align_bitpos = 0;
1815   get_object_alignment_1 (gimple_assign_lhs (info->stmt),
1816 			  &align, &align_bitpos);
1817   align_base = start - align_bitpos;
1818   for (int i = 0; i < 2; ++i)
1819     {
1820       store_operand_info &op = info->ops[i];
1821       if (op.base_addr == NULL_TREE)
1822 	{
1823 	  load_align[i] = 0;
1824 	  load_align_base[i] = 0;
1825 	}
1826       else
1827 	{
1828 	  get_object_alignment_1 (op.val, &load_align[i], &align_bitpos);
1829 	  load_align_base[i] = op.bitpos - align_bitpos;
1830 	}
1831     }
1832   stores.create (1);
1833   stores.safe_push (info);
1834   last_stmt = info->stmt;
1835   last_order = info->order;
1836   first_stmt = last_stmt;
1837   first_order = last_order;
1838   buf_size = 0;
1839 }
1840 
~merged_store_group()1841 merged_store_group::~merged_store_group ()
1842 {
1843   if (val)
1844     XDELETEVEC (val);
1845 }
1846 
1847 /* Return true if the store described by INFO can be merged into the group.  */
1848 
1849 bool
can_be_merged_into(store_immediate_info * info)1850 merged_store_group::can_be_merged_into (store_immediate_info *info)
1851 {
1852   /* Do not merge bswap patterns.  */
1853   if (info->rhs_code == LROTATE_EXPR)
1854     return false;
1855 
1856   if (info->lp_nr != lp_nr)
1857     return false;
1858 
1859   /* The canonical case.  */
1860   if (info->rhs_code == stores[0]->rhs_code)
1861     return true;
1862 
1863   /* BIT_INSERT_EXPR is compatible with INTEGER_CST.  */
1864   if (info->rhs_code == BIT_INSERT_EXPR && stores[0]->rhs_code == INTEGER_CST)
1865     return true;
1866 
1867   if (stores[0]->rhs_code == BIT_INSERT_EXPR && info->rhs_code == INTEGER_CST)
1868     return true;
1869 
1870   /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores.  */
1871   if (info->rhs_code == MEM_REF
1872       && (stores[0]->rhs_code == INTEGER_CST
1873 	  || stores[0]->rhs_code == BIT_INSERT_EXPR)
1874       && info->bitregion_start == stores[0]->bitregion_start
1875       && info->bitregion_end == stores[0]->bitregion_end)
1876     return true;
1877 
1878   if (stores[0]->rhs_code == MEM_REF
1879       && (info->rhs_code == INTEGER_CST
1880 	  || info->rhs_code == BIT_INSERT_EXPR)
1881       && info->bitregion_start == stores[0]->bitregion_start
1882       && info->bitregion_end == stores[0]->bitregion_end)
1883     return true;
1884 
1885   return false;
1886 }
1887 
1888 /* Helper method for merge_into and merge_overlapping to do
1889    the common part.  */
1890 
1891 void
do_merge(store_immediate_info * info)1892 merged_store_group::do_merge (store_immediate_info *info)
1893 {
1894   bitregion_start = MIN (bitregion_start, info->bitregion_start);
1895   bitregion_end = MAX (bitregion_end, info->bitregion_end);
1896 
1897   unsigned int this_align;
1898   unsigned HOST_WIDE_INT align_bitpos = 0;
1899   get_object_alignment_1 (gimple_assign_lhs (info->stmt),
1900 			  &this_align, &align_bitpos);
1901   if (this_align > align)
1902     {
1903       align = this_align;
1904       align_base = info->bitpos - align_bitpos;
1905     }
1906   for (int i = 0; i < 2; ++i)
1907     {
1908       store_operand_info &op = info->ops[i];
1909       if (!op.base_addr)
1910 	continue;
1911 
1912       get_object_alignment_1 (op.val, &this_align, &align_bitpos);
1913       if (this_align > load_align[i])
1914 	{
1915 	  load_align[i] = this_align;
1916 	  load_align_base[i] = op.bitpos - align_bitpos;
1917 	}
1918     }
1919 
1920   gimple *stmt = info->stmt;
1921   stores.safe_push (info);
1922   if (info->order > last_order)
1923     {
1924       last_order = info->order;
1925       last_stmt = stmt;
1926     }
1927   else if (info->order < first_order)
1928     {
1929       first_order = info->order;
1930       first_stmt = stmt;
1931     }
1932   if (info->rhs_code != INTEGER_CST)
1933     only_constants = false;
1934 }
1935 
1936 /* Merge a store recorded by INFO into this merged store.
1937    The store is not overlapping with the existing recorded
1938    stores.  */
1939 
1940 void
merge_into(store_immediate_info * info)1941 merged_store_group::merge_into (store_immediate_info *info)
1942 {
1943   /* Make sure we're inserting in the position we think we're inserting.  */
1944   gcc_assert (info->bitpos >= start + width
1945 	      && info->bitregion_start <= bitregion_end);
1946 
1947   width = info->bitpos + info->bitsize - start;
1948   do_merge (info);
1949 }
1950 
1951 /* Merge a store described by INFO into this merged store.
1952    INFO overlaps in some way with the current store (i.e. it's not contiguous
1953    which is handled by merged_store_group::merge_into).  */
1954 
1955 void
merge_overlapping(store_immediate_info * info)1956 merged_store_group::merge_overlapping (store_immediate_info *info)
1957 {
1958   /* If the store extends the size of the group, extend the width.  */
1959   if (info->bitpos + info->bitsize > start + width)
1960     width = info->bitpos + info->bitsize - start;
1961 
1962   do_merge (info);
1963 }
1964 
1965 /* Go through all the recorded stores in this group in program order and
1966    apply their values to the VAL byte array to create the final merged
1967    value.  Return true if the operation succeeded.  */
1968 
1969 bool
apply_stores()1970 merged_store_group::apply_stores ()
1971 {
1972   /* Make sure we have more than one store in the group, otherwise we cannot
1973      merge anything.  */
1974   if (bitregion_start % BITS_PER_UNIT != 0
1975       || bitregion_end % BITS_PER_UNIT != 0
1976       || stores.length () == 1)
1977     return false;
1978 
1979   stores.qsort (sort_by_order);
1980   store_immediate_info *info;
1981   unsigned int i;
1982   /* Create a power-of-2-sized buffer for native_encode_expr.  */
1983   buf_size = 1 << ceil_log2 ((bitregion_end - bitregion_start) / BITS_PER_UNIT);
1984   val = XNEWVEC (unsigned char, 2 * buf_size);
1985   mask = val + buf_size;
1986   memset (val, 0, buf_size);
1987   memset (mask, ~0U, buf_size);
1988 
1989   FOR_EACH_VEC_ELT (stores, i, info)
1990     {
1991       unsigned int pos_in_buffer = info->bitpos - bitregion_start;
1992       tree cst;
1993       if (info->ops[0].val && info->ops[0].base_addr == NULL_TREE)
1994 	cst = info->ops[0].val;
1995       else if (info->ops[1].val && info->ops[1].base_addr == NULL_TREE)
1996 	cst = info->ops[1].val;
1997       else
1998 	cst = NULL_TREE;
1999       bool ret = true;
2000       if (cst)
2001 	{
2002 	  if (info->rhs_code == BIT_INSERT_EXPR)
2003 	    bit_insertion = true;
2004 	  else
2005 	    ret = encode_tree_to_bitpos (cst, val, info->bitsize,
2006 					 pos_in_buffer, buf_size);
2007 	}
2008       unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
2009       if (BYTES_BIG_ENDIAN)
2010 	clear_bit_region_be (m, (BITS_PER_UNIT - 1
2011 				 - (pos_in_buffer % BITS_PER_UNIT)),
2012 			     info->bitsize);
2013       else
2014 	clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
2015       if (cst && dump_file && (dump_flags & TDF_DETAILS))
2016 	{
2017 	  if (ret)
2018 	    {
2019 	      fputs ("After writing ", dump_file);
2020 	      print_generic_expr (dump_file, cst, TDF_NONE);
2021 	      fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
2022 		       " at position %d\n", info->bitsize, pos_in_buffer);
2023 	      fputs ("  the merged value contains ", dump_file);
2024 	      dump_char_array (dump_file, val, buf_size);
2025 	      fputs ("  the merged mask contains  ", dump_file);
2026 	      dump_char_array (dump_file, mask, buf_size);
2027 	      if (bit_insertion)
2028 		fputs ("  bit insertion is required\n", dump_file);
2029 	    }
2030 	  else
2031 	    fprintf (dump_file, "Failed to merge stores\n");
2032 	}
2033       if (!ret)
2034 	return false;
2035     }
2036   stores.qsort (sort_by_bitpos);
2037   return true;
2038 }
2039 
2040 /* Structure describing the store chain.  */
2041 
2042 class imm_store_chain_info
2043 {
2044 public:
2045   /* Doubly-linked list that imposes an order on chain processing.
2046      PNXP (prev's next pointer) points to the head of a list, or to
2047      the next field in the previous chain in the list.
2048      See pass_store_merging::m_stores_head for more rationale.  */
2049   imm_store_chain_info *next, **pnxp;
2050   tree base_addr;
2051   auto_vec<store_immediate_info *> m_store_info;
2052   auto_vec<merged_store_group *> m_merged_store_groups;
2053 
imm_store_chain_info(imm_store_chain_info * & inspt,tree b_a)2054   imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a)
2055   : next (inspt), pnxp (&inspt), base_addr (b_a)
2056   {
2057     inspt = this;
2058     if (next)
2059       {
2060 	gcc_checking_assert (pnxp == next->pnxp);
2061 	next->pnxp = &next;
2062       }
2063   }
~imm_store_chain_info()2064   ~imm_store_chain_info ()
2065   {
2066     *pnxp = next;
2067     if (next)
2068       {
2069 	gcc_checking_assert (&next == next->pnxp);
2070 	next->pnxp = pnxp;
2071       }
2072   }
2073   bool terminate_and_process_chain ();
2074   bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int,
2075 			   unsigned int);
2076   bool coalesce_immediate_stores ();
2077   bool output_merged_store (merged_store_group *);
2078   bool output_merged_stores ();
2079 };
2080 
2081 const pass_data pass_data_tree_store_merging = {
2082   GIMPLE_PASS,     /* type */
2083   "store-merging", /* name */
2084   OPTGROUP_NONE,   /* optinfo_flags */
2085   TV_GIMPLE_STORE_MERGING,	 /* tv_id */
2086   PROP_ssa,	/* properties_required */
2087   0,		   /* properties_provided */
2088   0,		   /* properties_destroyed */
2089   0,		   /* todo_flags_start */
2090   TODO_update_ssa, /* todo_flags_finish */
2091 };
2092 
2093 class pass_store_merging : public gimple_opt_pass
2094 {
2095 public:
pass_store_merging(gcc::context * ctxt)2096   pass_store_merging (gcc::context *ctxt)
2097     : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head ()
2098   {
2099   }
2100 
2101   /* Pass not supported for PDP-endian, nor for insane hosts or
2102      target character sizes where native_{encode,interpret}_expr
2103      doesn't work properly.  */
2104   virtual bool
gate(function *)2105   gate (function *)
2106   {
2107     return flag_store_merging
2108 	   && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
2109 	   && CHAR_BIT == 8
2110 	   && BITS_PER_UNIT == 8;
2111   }
2112 
2113   virtual unsigned int execute (function *);
2114 
2115 private:
2116   hash_map<tree_operand_hash, class imm_store_chain_info *> m_stores;
2117 
2118   /* Form a doubly-linked stack of the elements of m_stores, so that
2119      we can iterate over them in a predictable way.  Using this order
2120      avoids extraneous differences in the compiler output just because
2121      of tree pointer variations (e.g. different chains end up in
2122      different positions of m_stores, so they are handled in different
2123      orders, so they allocate or release SSA names in different
2124      orders, and when they get reused, subsequent passes end up
2125      getting different SSA names, which may ultimately change
2126      decisions when going out of SSA).  */
2127   imm_store_chain_info *m_stores_head;
2128 
2129   bool process_store (gimple *);
2130   bool terminate_and_process_chain (imm_store_chain_info *);
2131   bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
2132   bool terminate_and_process_all_chains ();
2133 }; // class pass_store_merging
2134 
2135 /* Terminate and process all recorded chains.  Return true if any changes
2136    were made.  */
2137 
2138 bool
terminate_and_process_all_chains()2139 pass_store_merging::terminate_and_process_all_chains ()
2140 {
2141   bool ret = false;
2142   while (m_stores_head)
2143     ret |= terminate_and_process_chain (m_stores_head);
2144   gcc_assert (m_stores.is_empty ());
2145   return ret;
2146 }
2147 
2148 /* Terminate all chains that are affected by the statement STMT.
2149    CHAIN_INFO is the chain we should ignore from the checks if
2150    non-NULL.  Return true if any changes were made.  */
2151 
2152 bool
terminate_all_aliasing_chains(imm_store_chain_info ** chain_info,gimple * stmt)2153 pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
2154 						     **chain_info,
2155 						   gimple *stmt)
2156 {
2157   bool ret = false;
2158 
2159   /* If the statement doesn't touch memory it can't alias.  */
2160   if (!gimple_vuse (stmt))
2161     return false;
2162 
2163   tree store_lhs = gimple_store_p (stmt) ? gimple_get_lhs (stmt) : NULL_TREE;
2164   ao_ref store_lhs_ref;
2165   ao_ref_init (&store_lhs_ref, store_lhs);
2166   for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
2167     {
2168       next = cur->next;
2169 
2170       /* We already checked all the stores in chain_info and terminated the
2171 	 chain if necessary.  Skip it here.  */
2172       if (chain_info && *chain_info == cur)
2173 	continue;
2174 
2175       store_immediate_info *info;
2176       unsigned int i;
2177       FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
2178 	{
2179 	  tree lhs = gimple_assign_lhs (info->stmt);
2180 	  ao_ref lhs_ref;
2181 	  ao_ref_init (&lhs_ref, lhs);
2182 	  if (ref_maybe_used_by_stmt_p (stmt, &lhs_ref)
2183 	      || stmt_may_clobber_ref_p_1 (stmt, &lhs_ref)
2184 	      || (store_lhs && refs_may_alias_p_1 (&store_lhs_ref,
2185 						   &lhs_ref, false)))
2186 	    {
2187 	      if (dump_file && (dump_flags & TDF_DETAILS))
2188 		{
2189 		  fprintf (dump_file, "stmt causes chain termination:\n");
2190 		  print_gimple_stmt (dump_file, stmt, 0);
2191 		}
2192 	      ret |= terminate_and_process_chain (cur);
2193 	      break;
2194 	    }
2195 	}
2196     }
2197 
2198   return ret;
2199 }
2200 
2201 /* Helper function.  Terminate the recorded chain storing to base object
2202    BASE.  Return true if the merging and output was successful.  The m_stores
2203    entry is removed after the processing in any case.  */
2204 
2205 bool
terminate_and_process_chain(imm_store_chain_info * chain_info)2206 pass_store_merging::terminate_and_process_chain (imm_store_chain_info *chain_info)
2207 {
2208   bool ret = chain_info->terminate_and_process_chain ();
2209   m_stores.remove (chain_info->base_addr);
2210   delete chain_info;
2211   return ret;
2212 }
2213 
2214 /* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
2215    may clobber REF.  FIRST and LAST must have non-NULL vdef.  We want to
2216    be able to sink load of REF across stores between FIRST and LAST, up
2217    to right before LAST.  */
2218 
2219 bool
stmts_may_clobber_ref_p(gimple * first,gimple * last,tree ref)2220 stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
2221 {
2222   ao_ref r;
2223   ao_ref_init (&r, ref);
2224   unsigned int count = 0;
2225   tree vop = gimple_vdef (last);
2226   gimple *stmt;
2227 
2228   /* Return true conservatively if the basic blocks are different.  */
2229   if (gimple_bb (first) != gimple_bb (last))
2230     return true;
2231 
2232   do
2233     {
2234       stmt = SSA_NAME_DEF_STMT (vop);
2235       if (stmt_may_clobber_ref_p_1 (stmt, &r))
2236 	return true;
2237       if (gimple_store_p (stmt)
2238 	  && refs_anti_dependent_p (ref, gimple_get_lhs (stmt)))
2239 	return true;
2240       /* Avoid quadratic compile time by bounding the number of checks
2241 	 we perform.  */
2242       if (++count > MAX_STORE_ALIAS_CHECKS)
2243 	return true;
2244       vop = gimple_vuse (stmt);
2245     }
2246   while (stmt != first);
2247 
2248   return false;
2249 }
2250 
2251 /* Return true if INFO->ops[IDX] is mergeable with the
2252    corresponding loads already in MERGED_STORE group.
2253    BASE_ADDR is the base address of the whole store group.  */
2254 
2255 bool
compatible_load_p(merged_store_group * merged_store,store_immediate_info * info,tree base_addr,int idx)2256 compatible_load_p (merged_store_group *merged_store,
2257 		   store_immediate_info *info,
2258 		   tree base_addr, int idx)
2259 {
2260   store_immediate_info *infof = merged_store->stores[0];
2261   if (!info->ops[idx].base_addr
2262       || maybe_ne (info->ops[idx].bitpos - infof->ops[idx].bitpos,
2263 		   info->bitpos - infof->bitpos)
2264       || !operand_equal_p (info->ops[idx].base_addr,
2265 			   infof->ops[idx].base_addr, 0))
2266     return false;
2267 
2268   store_immediate_info *infol = merged_store->stores.last ();
2269   tree load_vuse = gimple_vuse (info->ops[idx].stmt);
2270   /* In this case all vuses should be the same, e.g.
2271      _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2272      or
2273      _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2274      and we can emit the coalesced load next to any of those loads.  */
2275   if (gimple_vuse (infof->ops[idx].stmt) == load_vuse
2276       && gimple_vuse (infol->ops[idx].stmt) == load_vuse)
2277     return true;
2278 
2279   /* Otherwise, at least for now require that the load has the same
2280      vuse as the store.  See following examples.  */
2281   if (gimple_vuse (info->stmt) != load_vuse)
2282     return false;
2283 
2284   if (gimple_vuse (infof->stmt) != gimple_vuse (infof->ops[idx].stmt)
2285       || (infof != infol
2286 	  && gimple_vuse (infol->stmt) != gimple_vuse (infol->ops[idx].stmt)))
2287     return false;
2288 
2289   /* If the load is from the same location as the store, already
2290      the construction of the immediate chain info guarantees no intervening
2291      stores, so no further checks are needed.  Example:
2292      _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4;  */
2293   if (known_eq (info->ops[idx].bitpos, info->bitpos)
2294       && operand_equal_p (info->ops[idx].base_addr, base_addr, 0))
2295     return true;
2296 
2297   /* Otherwise, we need to punt if any of the loads can be clobbered by any
2298      of the stores in the group, or any other stores in between those.
2299      Previous calls to compatible_load_p ensured that for all the
2300      merged_store->stores IDX loads, no stmts starting with
2301      merged_store->first_stmt and ending right before merged_store->last_stmt
2302      clobbers those loads.  */
2303   gimple *first = merged_store->first_stmt;
2304   gimple *last = merged_store->last_stmt;
2305   unsigned int i;
2306   store_immediate_info *infoc;
2307   /* The stores are sorted by increasing store bitpos, so if info->stmt store
2308      comes before the so far first load, we'll be changing
2309      merged_store->first_stmt.  In that case we need to give up if
2310      any of the earlier processed loads clobber with the stmts in the new
2311      range.  */
2312   if (info->order < merged_store->first_order)
2313     {
2314       FOR_EACH_VEC_ELT (merged_store->stores, i, infoc)
2315 	if (stmts_may_clobber_ref_p (info->stmt, first, infoc->ops[idx].val))
2316 	  return false;
2317       first = info->stmt;
2318     }
2319   /* Similarly, we could change merged_store->last_stmt, so ensure
2320      in that case no stmts in the new range clobber any of the earlier
2321      processed loads.  */
2322   else if (info->order > merged_store->last_order)
2323     {
2324       FOR_EACH_VEC_ELT (merged_store->stores, i, infoc)
2325 	if (stmts_may_clobber_ref_p (last, info->stmt, infoc->ops[idx].val))
2326 	  return false;
2327       last = info->stmt;
2328     }
2329   /* And finally, we'd be adding a new load to the set, ensure it isn't
2330      clobbered in the new range.  */
2331   if (stmts_may_clobber_ref_p (first, last, info->ops[idx].val))
2332     return false;
2333 
2334   /* Otherwise, we are looking for:
2335      _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2336      or
2337      _1 = s.a; t.a = _1; _2 = s.b; t.b = _2;  */
2338   return true;
2339 }
2340 
2341 /* Add all refs loaded to compute VAL to REFS vector.  */
2342 
2343 void
gather_bswap_load_refs(vec<tree> * refs,tree val)2344 gather_bswap_load_refs (vec<tree> *refs, tree val)
2345 {
2346   if (TREE_CODE (val) != SSA_NAME)
2347     return;
2348 
2349   gimple *stmt = SSA_NAME_DEF_STMT (val);
2350   if (!is_gimple_assign (stmt))
2351     return;
2352 
2353   if (gimple_assign_load_p (stmt))
2354     {
2355       refs->safe_push (gimple_assign_rhs1 (stmt));
2356       return;
2357     }
2358 
2359   switch (gimple_assign_rhs_class (stmt))
2360     {
2361     case GIMPLE_BINARY_RHS:
2362       gather_bswap_load_refs (refs, gimple_assign_rhs2 (stmt));
2363       /* FALLTHRU */
2364     case GIMPLE_UNARY_RHS:
2365       gather_bswap_load_refs (refs, gimple_assign_rhs1 (stmt));
2366       break;
2367     default:
2368       gcc_unreachable ();
2369     }
2370 }
2371 
2372 /* Check if there are any stores in M_STORE_INFO after index I
2373    (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2374    a potential group ending with END that have their order
2375    smaller than LAST_ORDER.  ALL_INTEGER_CST_P is true if
2376    all the stores already merged and the one under consideration
2377    have rhs_code of INTEGER_CST.  Return true if there are no such stores.
2378    Consider:
2379      MEM[(long long int *)p_28] = 0;
2380      MEM[(long long int *)p_28 + 8B] = 0;
2381      MEM[(long long int *)p_28 + 16B] = 0;
2382      MEM[(long long int *)p_28 + 24B] = 0;
2383      _129 = (int) _130;
2384      MEM[(int *)p_28 + 8B] = _129;
2385      MEM[(int *)p_28].a = -1;
2386    We already have
2387      MEM[(long long int *)p_28] = 0;
2388      MEM[(int *)p_28].a = -1;
2389    stmts in the current group and need to consider if it is safe to
2390    add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2391    There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2392    store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2393    into the group and merging of those 3 stores is successful, merged
2394    stmts will be emitted at the latest store from that group, i.e.
2395    LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2396    The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2397    the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2398    so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2399    into the group.  That way it will be its own store group and will
2400    not be touched.  If ALL_INTEGER_CST_P and there are overlapping
2401    INTEGER_CST stores, those are mergeable using merge_overlapping,
2402    so don't return false for those.
2403 
2404    Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
2405    (exclusive), whether they don't overlap the bitrange START to END
2406    and have order in between FIRST_ORDER and LAST_ORDER.  This is to
2407    prevent merging in cases like:
2408      MEM <char[12]> [&b + 8B] = {};
2409      MEM[(short *) &b] = 5;
2410      _5 = *x_4(D);
2411      MEM <long long unsigned int> [&b + 2B] = _5;
2412      MEM[(char *)&b + 16B] = 88;
2413      MEM[(int *)&b + 20B] = 1;
2414    The = {} store comes in sort_by_bitpos before the = 88 store, and can't
2415    be merged with it, because the = _5 store overlaps these and is in between
2416    them in sort_by_order ordering.  If it was merged, the merged store would
2417    go after the = _5 store and thus change behavior.  */
2418 
2419 static bool
check_no_overlap(vec<store_immediate_info * > m_store_info,unsigned int i,bool all_integer_cst_p,unsigned int first_order,unsigned int last_order,unsigned HOST_WIDE_INT start,unsigned HOST_WIDE_INT end,unsigned int first_earlier,unsigned end_earlier)2420 check_no_overlap (vec<store_immediate_info *> m_store_info, unsigned int i,
2421 		  bool all_integer_cst_p, unsigned int first_order,
2422 		  unsigned int last_order, unsigned HOST_WIDE_INT start,
2423 		  unsigned HOST_WIDE_INT end, unsigned int first_earlier,
2424 		  unsigned end_earlier)
2425 {
2426   unsigned int len = m_store_info.length ();
2427   for (unsigned int j = first_earlier; j < end_earlier; j++)
2428     {
2429       store_immediate_info *info = m_store_info[j];
2430       if (info->order > first_order
2431 	  && info->order < last_order
2432 	  && info->bitpos + info->bitsize > start)
2433 	return false;
2434     }
2435   for (++i; i < len; ++i)
2436     {
2437       store_immediate_info *info = m_store_info[i];
2438       if (info->bitpos >= end)
2439 	break;
2440       if (info->order < last_order
2441 	  && (!all_integer_cst_p || info->rhs_code != INTEGER_CST))
2442 	return false;
2443     }
2444   return true;
2445 }
2446 
2447 /* Return true if m_store_info[first] and at least one following store
2448    form a group which store try_size bitsize value which is byte swapped
2449    from a memory load or some value, or identity from some value.
2450    This uses the bswap pass APIs.  */
2451 
2452 bool
try_coalesce_bswap(merged_store_group * merged_store,unsigned int first,unsigned int try_size,unsigned int first_earlier)2453 imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
2454 					  unsigned int first,
2455 					  unsigned int try_size,
2456 					  unsigned int first_earlier)
2457 {
2458   unsigned int len = m_store_info.length (), last = first;
2459   unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
2460   if (width >= try_size)
2461     return false;
2462   for (unsigned int i = first + 1; i < len; ++i)
2463     {
2464       if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width
2465 	  || m_store_info[i]->lp_nr != merged_store->lp_nr
2466 	  || m_store_info[i]->ins_stmt == NULL)
2467 	return false;
2468       width += m_store_info[i]->bitsize;
2469       if (width >= try_size)
2470 	{
2471 	  last = i;
2472 	  break;
2473 	}
2474     }
2475   if (width != try_size)
2476     return false;
2477 
2478   bool allow_unaligned
2479     = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
2480   /* Punt if the combined store would not be aligned and we need alignment.  */
2481   if (!allow_unaligned)
2482     {
2483       unsigned int align = merged_store->align;
2484       unsigned HOST_WIDE_INT align_base = merged_store->align_base;
2485       for (unsigned int i = first + 1; i <= last; ++i)
2486 	{
2487 	  unsigned int this_align;
2488 	  unsigned HOST_WIDE_INT align_bitpos = 0;
2489 	  get_object_alignment_1 (gimple_assign_lhs (m_store_info[i]->stmt),
2490 				  &this_align, &align_bitpos);
2491 	  if (this_align > align)
2492 	    {
2493 	      align = this_align;
2494 	      align_base = m_store_info[i]->bitpos - align_bitpos;
2495 	    }
2496 	}
2497       unsigned HOST_WIDE_INT align_bitpos
2498 	= (m_store_info[first]->bitpos - align_base) & (align - 1);
2499       if (align_bitpos)
2500 	align = least_bit_hwi (align_bitpos);
2501       if (align < try_size)
2502 	return false;
2503     }
2504 
2505   tree type;
2506   switch (try_size)
2507     {
2508     case 16: type = uint16_type_node; break;
2509     case 32: type = uint32_type_node; break;
2510     case 64: type = uint64_type_node; break;
2511     default: gcc_unreachable ();
2512     }
2513   struct symbolic_number n;
2514   gimple *ins_stmt = NULL;
2515   int vuse_store = -1;
2516   unsigned int first_order = merged_store->first_order;
2517   unsigned int last_order = merged_store->last_order;
2518   gimple *first_stmt = merged_store->first_stmt;
2519   gimple *last_stmt = merged_store->last_stmt;
2520   unsigned HOST_WIDE_INT end = merged_store->start + merged_store->width;
2521   store_immediate_info *infof = m_store_info[first];
2522 
2523   for (unsigned int i = first; i <= last; ++i)
2524     {
2525       store_immediate_info *info = m_store_info[i];
2526       struct symbolic_number this_n = info->n;
2527       this_n.type = type;
2528       if (!this_n.base_addr)
2529 	this_n.range = try_size / BITS_PER_UNIT;
2530       else
2531 	/* Update vuse in case it has changed by output_merged_stores.  */
2532 	this_n.vuse = gimple_vuse (info->ins_stmt);
2533       unsigned int bitpos = info->bitpos - infof->bitpos;
2534       if (!do_shift_rotate (LSHIFT_EXPR, &this_n,
2535 			    BYTES_BIG_ENDIAN
2536 			    ? try_size - info->bitsize - bitpos
2537 			    : bitpos))
2538 	return false;
2539       if (this_n.base_addr && vuse_store)
2540 	{
2541 	  unsigned int j;
2542 	  for (j = first; j <= last; ++j)
2543 	    if (this_n.vuse == gimple_vuse (m_store_info[j]->stmt))
2544 	      break;
2545 	  if (j > last)
2546 	    {
2547 	      if (vuse_store == 1)
2548 		return false;
2549 	      vuse_store = 0;
2550 	    }
2551 	}
2552       if (i == first)
2553 	{
2554 	  n = this_n;
2555 	  ins_stmt = info->ins_stmt;
2556 	}
2557       else
2558 	{
2559 	  if (n.base_addr && n.vuse != this_n.vuse)
2560 	    {
2561 	      if (vuse_store == 0)
2562 		return false;
2563 	      vuse_store = 1;
2564 	    }
2565 	  if (info->order > last_order)
2566 	    {
2567 	      last_order = info->order;
2568 	      last_stmt = info->stmt;
2569 	    }
2570 	  else if (info->order < first_order)
2571 	    {
2572 	      first_order = info->order;
2573 	      first_stmt = info->stmt;
2574 	    }
2575 	  end = MAX (end, info->bitpos + info->bitsize);
2576 
2577 	  ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
2578 					     &this_n, &n);
2579 	  if (ins_stmt == NULL)
2580 	    return false;
2581 	}
2582     }
2583 
2584   uint64_t cmpxchg, cmpnop;
2585   find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop);
2586 
2587   /* A complete byte swap should make the symbolic number to start with
2588      the largest digit in the highest order byte.  Unchanged symbolic
2589      number indicates a read with same endianness as target architecture.  */
2590   if (n.n != cmpnop && n.n != cmpxchg)
2591     return false;
2592 
2593   if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
2594     return false;
2595 
2596   if (!check_no_overlap (m_store_info, last, false, first_order, last_order,
2597 			 merged_store->start, end, first_earlier, first))
2598     return false;
2599 
2600   /* Don't handle memory copy this way if normal non-bswap processing
2601      would handle it too.  */
2602   if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1)
2603     {
2604       unsigned int i;
2605       for (i = first; i <= last; ++i)
2606 	if (m_store_info[i]->rhs_code != MEM_REF)
2607 	  break;
2608       if (i == last + 1)
2609 	return false;
2610     }
2611 
2612   if (n.n == cmpxchg)
2613     switch (try_size)
2614       {
2615       case 16:
2616 	/* Will emit LROTATE_EXPR.  */
2617 	break;
2618       case 32:
2619 	if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2620 	    && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
2621 	  break;
2622 	return false;
2623       case 64:
2624 	if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2625 	    && optab_handler (bswap_optab, DImode) != CODE_FOR_nothing)
2626 	  break;
2627 	return false;
2628       default:
2629 	gcc_unreachable ();
2630       }
2631 
2632   if (!allow_unaligned && n.base_addr)
2633     {
2634       unsigned int align = get_object_alignment (n.src);
2635       if (align < try_size)
2636 	return false;
2637     }
2638 
2639   /* If each load has vuse of the corresponding store, need to verify
2640      the loads can be sunk right before the last store.  */
2641   if (vuse_store == 1)
2642     {
2643       auto_vec<tree, 64> refs;
2644       for (unsigned int i = first; i <= last; ++i)
2645 	gather_bswap_load_refs (&refs,
2646 				gimple_assign_rhs1 (m_store_info[i]->stmt));
2647 
2648       unsigned int i;
2649       tree ref;
2650       FOR_EACH_VEC_ELT (refs, i, ref)
2651 	if (stmts_may_clobber_ref_p (first_stmt, last_stmt, ref))
2652 	  return false;
2653       n.vuse = NULL_TREE;
2654     }
2655 
2656   infof->n = n;
2657   infof->ins_stmt = ins_stmt;
2658   for (unsigned int i = first; i <= last; ++i)
2659     {
2660       m_store_info[i]->rhs_code = n.n == cmpxchg ? LROTATE_EXPR : NOP_EXPR;
2661       m_store_info[i]->ops[0].base_addr = NULL_TREE;
2662       m_store_info[i]->ops[1].base_addr = NULL_TREE;
2663       if (i != first)
2664 	merged_store->merge_into (m_store_info[i]);
2665     }
2666 
2667   return true;
2668 }
2669 
2670 /* Go through the candidate stores recorded in m_store_info and merge them
2671    into merged_store_group objects recorded into m_merged_store_groups
2672    representing the widened stores.  Return true if coalescing was successful
2673    and the number of widened stores is fewer than the original number
2674    of stores.  */
2675 
2676 bool
coalesce_immediate_stores()2677 imm_store_chain_info::coalesce_immediate_stores ()
2678 {
2679   /* Anything less can't be processed.  */
2680   if (m_store_info.length () < 2)
2681     return false;
2682 
2683   if (dump_file && (dump_flags & TDF_DETAILS))
2684     fprintf (dump_file, "Attempting to coalesce %u stores in chain\n",
2685 	     m_store_info.length ());
2686 
2687   store_immediate_info *info;
2688   unsigned int i, ignore = 0;
2689   unsigned int first_earlier = 0;
2690   unsigned int end_earlier = 0;
2691 
2692   /* Order the stores by the bitposition they write to.  */
2693   m_store_info.qsort (sort_by_bitpos);
2694 
2695   info = m_store_info[0];
2696   merged_store_group *merged_store = new merged_store_group (info);
2697   if (dump_file && (dump_flags & TDF_DETAILS))
2698     fputs ("New store group\n", dump_file);
2699 
2700   FOR_EACH_VEC_ELT (m_store_info, i, info)
2701     {
2702       unsigned HOST_WIDE_INT new_bitregion_start, new_bitregion_end;
2703 
2704       if (i <= ignore)
2705 	goto done;
2706 
2707       while (first_earlier < end_earlier
2708 	     && (m_store_info[first_earlier]->bitpos
2709 		 + m_store_info[first_earlier]->bitsize
2710 		 <= merged_store->start))
2711 	first_earlier++;
2712 
2713       /* First try to handle group of stores like:
2714 	 p[0] = data >> 24;
2715 	 p[1] = data >> 16;
2716 	 p[2] = data >> 8;
2717 	 p[3] = data;
2718 	 using the bswap framework.  */
2719       if (info->bitpos == merged_store->start + merged_store->width
2720 	  && merged_store->stores.length () == 1
2721 	  && merged_store->stores[0]->ins_stmt != NULL
2722 	  && info->lp_nr == merged_store->lp_nr
2723 	  && info->ins_stmt != NULL)
2724 	{
2725 	  unsigned int try_size;
2726 	  for (try_size = 64; try_size >= 16; try_size >>= 1)
2727 	    if (try_coalesce_bswap (merged_store, i - 1, try_size,
2728 				    first_earlier))
2729 	      break;
2730 
2731 	  if (try_size >= 16)
2732 	    {
2733 	      ignore = i + merged_store->stores.length () - 1;
2734 	      m_merged_store_groups.safe_push (merged_store);
2735 	      if (ignore < m_store_info.length ())
2736 		{
2737 		  merged_store = new merged_store_group (m_store_info[ignore]);
2738 		  end_earlier = ignore;
2739 		}
2740 	      else
2741 		merged_store = NULL;
2742 	      goto done;
2743 	    }
2744 	}
2745 
2746       new_bitregion_start
2747 	= MIN (merged_store->bitregion_start, info->bitregion_start);
2748       new_bitregion_end
2749 	= MAX (merged_store->bitregion_end, info->bitregion_end);
2750 
2751       if (info->order >= merged_store->first_nonmergeable_order
2752 	  || (((new_bitregion_end - new_bitregion_start + 1) / BITS_PER_UNIT)
2753 	      > (unsigned) param_store_merging_max_size))
2754 	;
2755 
2756       /* |---store 1---|
2757 	       |---store 2---|
2758 	 Overlapping stores.  */
2759       else if (IN_RANGE (info->bitpos, merged_store->start,
2760 			 merged_store->start + merged_store->width - 1)
2761 	       /* |---store 1---||---store 2---|
2762 		  Handle also the consecutive INTEGER_CST stores case here,
2763 		  as we have here the code to deal with overlaps.  */
2764 	       || (info->bitregion_start <= merged_store->bitregion_end
2765 		   && info->rhs_code == INTEGER_CST
2766 		   && merged_store->only_constants
2767 		   && merged_store->can_be_merged_into (info)))
2768 	{
2769 	  /* Only allow overlapping stores of constants.  */
2770 	  if (info->rhs_code == INTEGER_CST
2771 	      && merged_store->only_constants
2772 	      && info->lp_nr == merged_store->lp_nr)
2773 	    {
2774 	      unsigned int first_order
2775 		= MIN (merged_store->first_order, info->order);
2776 	      unsigned int last_order
2777 		= MAX (merged_store->last_order, info->order);
2778 	      unsigned HOST_WIDE_INT end
2779 		= MAX (merged_store->start + merged_store->width,
2780 		       info->bitpos + info->bitsize);
2781 	      if (check_no_overlap (m_store_info, i, true, first_order,
2782 				    last_order, merged_store->start, end,
2783 				    first_earlier, end_earlier))
2784 		{
2785 		  /* check_no_overlap call above made sure there are no
2786 		     overlapping stores with non-INTEGER_CST rhs_code
2787 		     in between the first and last of the stores we've
2788 		     just merged.  If there are any INTEGER_CST rhs_code
2789 		     stores in between, we need to merge_overlapping them
2790 		     even if in the sort_by_bitpos order there are other
2791 		     overlapping stores in between.  Keep those stores as is.
2792 		     Example:
2793 			MEM[(int *)p_28] = 0;
2794 			MEM[(char *)p_28 + 3B] = 1;
2795 			MEM[(char *)p_28 + 1B] = 2;
2796 			MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
2797 		     We can't merge the zero store with the store of two and
2798 		     not merge anything else, because the store of one is
2799 		     in the original order in between those two, but in
2800 		     store_by_bitpos order it comes after the last store that
2801 		     we can't merge with them.  We can merge the first 3 stores
2802 		     and keep the last store as is though.  */
2803 		  unsigned int len = m_store_info.length ();
2804 		  unsigned int try_order = last_order;
2805 		  unsigned int first_nonmergeable_order;
2806 		  unsigned int k;
2807 		  bool last_iter = false;
2808 		  int attempts = 0;
2809 		  do
2810 		    {
2811 		      unsigned int max_order = 0;
2812 		      unsigned int min_order = first_order;
2813 		      unsigned first_nonmergeable_int_order = ~0U;
2814 		      unsigned HOST_WIDE_INT this_end = end;
2815 		      k = i;
2816 		      first_nonmergeable_order = ~0U;
2817 		      for (unsigned int j = i + 1; j < len; ++j)
2818 			{
2819 			  store_immediate_info *info2 = m_store_info[j];
2820 			  if (info2->bitpos >= this_end)
2821 			    break;
2822 			  if (info2->order < try_order)
2823 			    {
2824 			      if (info2->rhs_code != INTEGER_CST
2825 				  || info2->lp_nr != merged_store->lp_nr)
2826 				{
2827 				  /* Normally check_no_overlap makes sure this
2828 				     doesn't happen, but if end grows below,
2829 				     then we need to process more stores than
2830 				     check_no_overlap verified.  Example:
2831 				      MEM[(int *)p_5] = 0;
2832 				      MEM[(short *)p_5 + 3B] = 1;
2833 				      MEM[(char *)p_5 + 4B] = _9;
2834 				      MEM[(char *)p_5 + 2B] = 2;  */
2835 				  k = 0;
2836 				  break;
2837 				}
2838 			      k = j;
2839 			      min_order = MIN (min_order, info2->order);
2840 			      this_end = MAX (this_end,
2841 					      info2->bitpos + info2->bitsize);
2842 			    }
2843 			  else if (info2->rhs_code == INTEGER_CST
2844 				   && info2->lp_nr == merged_store->lp_nr
2845 				   && !last_iter)
2846 			    {
2847 			      max_order = MAX (max_order, info2->order + 1);
2848 			      first_nonmergeable_int_order
2849 				= MIN (first_nonmergeable_int_order,
2850 				       info2->order);
2851 			    }
2852 			  else
2853 			    first_nonmergeable_order
2854 			      = MIN (first_nonmergeable_order, info2->order);
2855 			}
2856 		      if (k > i
2857 			  && !check_no_overlap (m_store_info, len - 1, true,
2858 						min_order, try_order,
2859 						merged_store->start, this_end,
2860 						first_earlier, end_earlier))
2861 			k = 0;
2862 		      if (k == 0)
2863 			{
2864 			  if (last_order == try_order)
2865 			    break;
2866 			  /* If this failed, but only because we grew
2867 			     try_order, retry with the last working one,
2868 			     so that we merge at least something.  */
2869 			  try_order = last_order;
2870 			  last_iter = true;
2871 			  continue;
2872 			}
2873 		      last_order = try_order;
2874 		      /* Retry with a larger try_order to see if we could
2875 			 merge some further INTEGER_CST stores.  */
2876 		      if (max_order
2877 			  && (first_nonmergeable_int_order
2878 			      < first_nonmergeable_order))
2879 			{
2880 			  try_order = MIN (max_order,
2881 					   first_nonmergeable_order);
2882 			  try_order
2883 			    = MIN (try_order,
2884 				   merged_store->first_nonmergeable_order);
2885 			  if (try_order > last_order && ++attempts < 16)
2886 			    continue;
2887 			}
2888 		      first_nonmergeable_order
2889 			= MIN (first_nonmergeable_order,
2890 			       first_nonmergeable_int_order);
2891 		      end = this_end;
2892 		      break;
2893 		    }
2894 		  while (1);
2895 
2896 		  if (k != 0)
2897 		    {
2898 		      merged_store->merge_overlapping (info);
2899 
2900 		      merged_store->first_nonmergeable_order
2901 			= MIN (merged_store->first_nonmergeable_order,
2902 			       first_nonmergeable_order);
2903 
2904 		      for (unsigned int j = i + 1; j <= k; j++)
2905 			{
2906 			  store_immediate_info *info2 = m_store_info[j];
2907 			  gcc_assert (info2->bitpos < end);
2908 			  if (info2->order < last_order)
2909 			    {
2910 			      gcc_assert (info2->rhs_code == INTEGER_CST);
2911 			      if (info != info2)
2912 				merged_store->merge_overlapping (info2);
2913 			    }
2914 			  /* Other stores are kept and not merged in any
2915 			     way.  */
2916 			}
2917 		      ignore = k;
2918 		      goto done;
2919 		    }
2920 		}
2921 	    }
2922 	}
2923       /* |---store 1---||---store 2---|
2924 	 This store is consecutive to the previous one.
2925 	 Merge it into the current store group.  There can be gaps in between
2926 	 the stores, but there can't be gaps in between bitregions.  */
2927       else if (info->bitregion_start <= merged_store->bitregion_end
2928 	       && merged_store->can_be_merged_into (info))
2929 	{
2930 	  store_immediate_info *infof = merged_store->stores[0];
2931 
2932 	  /* All the rhs_code ops that take 2 operands are commutative,
2933 	     swap the operands if it could make the operands compatible.  */
2934 	  if (infof->ops[0].base_addr
2935 	      && infof->ops[1].base_addr
2936 	      && info->ops[0].base_addr
2937 	      && info->ops[1].base_addr
2938 	      && known_eq (info->ops[1].bitpos - infof->ops[0].bitpos,
2939 			   info->bitpos - infof->bitpos)
2940 	      && operand_equal_p (info->ops[1].base_addr,
2941 				  infof->ops[0].base_addr, 0))
2942 	    {
2943 	      std::swap (info->ops[0], info->ops[1]);
2944 	      info->ops_swapped_p = true;
2945 	    }
2946 	  if (check_no_overlap (m_store_info, i, false,
2947 				MIN (merged_store->first_order, info->order),
2948 				MAX (merged_store->last_order, info->order),
2949 				merged_store->start,
2950 				MAX (merged_store->start + merged_store->width,
2951 				     info->bitpos + info->bitsize),
2952 				first_earlier, end_earlier))
2953 	    {
2954 	      /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores.  */
2955 	      if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
2956 		{
2957 		  info->rhs_code = BIT_INSERT_EXPR;
2958 		  info->ops[0].val = gimple_assign_rhs1 (info->stmt);
2959 		  info->ops[0].base_addr = NULL_TREE;
2960 		}
2961 	      else if (infof->rhs_code == MEM_REF && info->rhs_code != MEM_REF)
2962 		{
2963 		  store_immediate_info *infoj;
2964 		  unsigned int j;
2965 		  FOR_EACH_VEC_ELT (merged_store->stores, j, infoj)
2966 		    {
2967 		      infoj->rhs_code = BIT_INSERT_EXPR;
2968 		      infoj->ops[0].val = gimple_assign_rhs1 (infoj->stmt);
2969 		      infoj->ops[0].base_addr = NULL_TREE;
2970 		    }
2971 		}
2972 	      if ((infof->ops[0].base_addr
2973 		   ? compatible_load_p (merged_store, info, base_addr, 0)
2974 		   : !info->ops[0].base_addr)
2975 		  && (infof->ops[1].base_addr
2976 		      ? compatible_load_p (merged_store, info, base_addr, 1)
2977 		      : !info->ops[1].base_addr))
2978 		{
2979 		  merged_store->merge_into (info);
2980 		  goto done;
2981 		}
2982 	    }
2983 	}
2984 
2985       /* |---store 1---| <gap> |---store 2---|.
2986 	 Gap between stores or the rhs not compatible.  Start a new group.  */
2987 
2988       /* Try to apply all the stores recorded for the group to determine
2989 	 the bitpattern they write and discard it if that fails.
2990 	 This will also reject single-store groups.  */
2991       if (merged_store->apply_stores ())
2992 	m_merged_store_groups.safe_push (merged_store);
2993       else
2994 	delete merged_store;
2995 
2996       merged_store = new merged_store_group (info);
2997       end_earlier = i;
2998       if (dump_file && (dump_flags & TDF_DETAILS))
2999 	fputs ("New store group\n", dump_file);
3000 
3001     done:
3002       if (dump_file && (dump_flags & TDF_DETAILS))
3003 	{
3004 	  fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
3005 			      " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:",
3006 		   i, info->bitsize, info->bitpos);
3007 	  print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
3008 	  fputc ('\n', dump_file);
3009 	}
3010     }
3011 
3012   /* Record or discard the last store group.  */
3013   if (merged_store)
3014     {
3015       if (merged_store->apply_stores ())
3016 	m_merged_store_groups.safe_push (merged_store);
3017       else
3018 	delete merged_store;
3019     }
3020 
3021   gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
3022 
3023   bool success
3024     = !m_merged_store_groups.is_empty ()
3025       && m_merged_store_groups.length () < m_store_info.length ();
3026 
3027   if (success && dump_file)
3028     fprintf (dump_file, "Coalescing successful!\nMerged into %u stores\n",
3029 	     m_merged_store_groups.length ());
3030 
3031   return success;
3032 }
3033 
3034 /* Return the type to use for the merged stores or loads described by STMTS.
3035    This is needed to get the alias sets right.  If IS_LOAD, look for rhs,
3036    otherwise lhs.  Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
3037    of the MEM_REFs if any.  */
3038 
3039 static tree
get_alias_type_for_stmts(vec<gimple * > & stmts,bool is_load,unsigned short * cliquep,unsigned short * basep)3040 get_alias_type_for_stmts (vec<gimple *> &stmts, bool is_load,
3041 			  unsigned short *cliquep, unsigned short *basep)
3042 {
3043   gimple *stmt;
3044   unsigned int i;
3045   tree type = NULL_TREE;
3046   tree ret = NULL_TREE;
3047   *cliquep = 0;
3048   *basep = 0;
3049 
3050   FOR_EACH_VEC_ELT (stmts, i, stmt)
3051     {
3052       tree ref = is_load ? gimple_assign_rhs1 (stmt)
3053 			 : gimple_assign_lhs (stmt);
3054       tree type1 = reference_alias_ptr_type (ref);
3055       tree base = get_base_address (ref);
3056 
3057       if (i == 0)
3058 	{
3059 	  if (TREE_CODE (base) == MEM_REF)
3060 	    {
3061 	      *cliquep = MR_DEPENDENCE_CLIQUE (base);
3062 	      *basep = MR_DEPENDENCE_BASE (base);
3063 	    }
3064 	  ret = type = type1;
3065 	  continue;
3066 	}
3067       if (!alias_ptr_types_compatible_p (type, type1))
3068 	ret = ptr_type_node;
3069       if (TREE_CODE (base) != MEM_REF
3070 	  || *cliquep != MR_DEPENDENCE_CLIQUE (base)
3071 	  || *basep != MR_DEPENDENCE_BASE (base))
3072 	{
3073 	  *cliquep = 0;
3074 	  *basep = 0;
3075 	}
3076     }
3077   return ret;
3078 }
3079 
3080 /* Return the location_t information we can find among the statements
3081    in STMTS.  */
3082 
3083 static location_t
get_location_for_stmts(vec<gimple * > & stmts)3084 get_location_for_stmts (vec<gimple *> &stmts)
3085 {
3086   gimple *stmt;
3087   unsigned int i;
3088 
3089   FOR_EACH_VEC_ELT (stmts, i, stmt)
3090     if (gimple_has_location (stmt))
3091       return gimple_location (stmt);
3092 
3093   return UNKNOWN_LOCATION;
3094 }
3095 
3096 /* Used to decribe a store resulting from splitting a wide store in smaller
3097    regularly-sized stores in split_group.  */
3098 
3099 class split_store
3100 {
3101 public:
3102   unsigned HOST_WIDE_INT bytepos;
3103   unsigned HOST_WIDE_INT size;
3104   unsigned HOST_WIDE_INT align;
3105   auto_vec<store_immediate_info *> orig_stores;
3106   /* True if there is a single orig stmt covering the whole split store.  */
3107   bool orig;
3108   split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
3109 	       unsigned HOST_WIDE_INT);
3110 };
3111 
3112 /* Simple constructor.  */
3113 
split_store(unsigned HOST_WIDE_INT bp,unsigned HOST_WIDE_INT sz,unsigned HOST_WIDE_INT al)3114 split_store::split_store (unsigned HOST_WIDE_INT bp,
3115 			  unsigned HOST_WIDE_INT sz,
3116 			  unsigned HOST_WIDE_INT al)
3117 			  : bytepos (bp), size (sz), align (al), orig (false)
3118 {
3119   orig_stores.create (0);
3120 }
3121 
3122 /* Record all stores in GROUP that write to the region starting at BITPOS and
3123    is of size BITSIZE.  Record infos for such statements in STORES if
3124    non-NULL.  The stores in GROUP must be sorted by bitposition.  Return INFO
3125    if there is exactly one original store in the range (in that case ignore
3126    clobber stmts, unless there are only clobber stmts).  */
3127 
3128 static store_immediate_info *
find_constituent_stores(class merged_store_group * group,vec<store_immediate_info * > * stores,unsigned int * first,unsigned HOST_WIDE_INT bitpos,unsigned HOST_WIDE_INT bitsize)3129 find_constituent_stores (class merged_store_group *group,
3130 			 vec<store_immediate_info *> *stores,
3131 			 unsigned int *first,
3132 			 unsigned HOST_WIDE_INT bitpos,
3133 			 unsigned HOST_WIDE_INT bitsize)
3134 {
3135   store_immediate_info *info, *ret = NULL;
3136   unsigned int i;
3137   bool second = false;
3138   bool update_first = true;
3139   unsigned HOST_WIDE_INT end = bitpos + bitsize;
3140   for (i = *first; group->stores.iterate (i, &info); ++i)
3141     {
3142       unsigned HOST_WIDE_INT stmt_start = info->bitpos;
3143       unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
3144       if (stmt_end <= bitpos)
3145 	{
3146 	  /* BITPOS passed to this function never decreases from within the
3147 	     same split_group call, so optimize and don't scan info records
3148 	     which are known to end before or at BITPOS next time.
3149 	     Only do it if all stores before this one also pass this.  */
3150 	  if (update_first)
3151 	    *first = i + 1;
3152 	  continue;
3153 	}
3154       else
3155 	update_first = false;
3156 
3157       /* The stores in GROUP are ordered by bitposition so if we're past
3158 	 the region for this group return early.  */
3159       if (stmt_start >= end)
3160 	return ret;
3161 
3162       if (gimple_clobber_p (info->stmt))
3163 	{
3164 	  if (stores)
3165 	    stores->safe_push (info);
3166 	  if (ret == NULL)
3167 	    ret = info;
3168 	  continue;
3169 	}
3170       if (stores)
3171 	{
3172 	  stores->safe_push (info);
3173 	  if (ret && !gimple_clobber_p (ret->stmt))
3174 	    {
3175 	      ret = NULL;
3176 	      second = true;
3177 	    }
3178 	}
3179       else if (ret && !gimple_clobber_p (ret->stmt))
3180 	return NULL;
3181       if (!second)
3182 	ret = info;
3183     }
3184   return ret;
3185 }
3186 
3187 /* Return how many SSA_NAMEs used to compute value to store in the INFO
3188    store have multiple uses.  If any SSA_NAME has multiple uses, also
3189    count statements needed to compute it.  */
3190 
3191 static unsigned
count_multiple_uses(store_immediate_info * info)3192 count_multiple_uses (store_immediate_info *info)
3193 {
3194   gimple *stmt = info->stmt;
3195   unsigned ret = 0;
3196   switch (info->rhs_code)
3197     {
3198     case INTEGER_CST:
3199       return 0;
3200     case BIT_AND_EXPR:
3201     case BIT_IOR_EXPR:
3202     case BIT_XOR_EXPR:
3203       if (info->bit_not_p)
3204 	{
3205 	  if (!has_single_use (gimple_assign_rhs1 (stmt)))
3206 	    ret = 1; /* Fall through below to return
3207 			the BIT_NOT_EXPR stmt and then
3208 			BIT_{AND,IOR,XOR}_EXPR and anything it
3209 			uses.  */
3210 	  else
3211 	    /* stmt is after this the BIT_NOT_EXPR.  */
3212 	    stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3213 	}
3214       if (!has_single_use (gimple_assign_rhs1 (stmt)))
3215 	{
3216 	  ret += 1 + info->ops[0].bit_not_p;
3217 	  if (info->ops[1].base_addr)
3218 	    ret += 1 + info->ops[1].bit_not_p;
3219 	  return ret + 1;
3220 	}
3221       stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3222       /* stmt is now the BIT_*_EXPR.  */
3223       if (!has_single_use (gimple_assign_rhs1 (stmt)))
3224 	ret += 1 + info->ops[info->ops_swapped_p].bit_not_p;
3225       else if (info->ops[info->ops_swapped_p].bit_not_p)
3226 	{
3227 	  gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3228 	  if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3229 	    ++ret;
3230 	}
3231       if (info->ops[1].base_addr == NULL_TREE)
3232 	{
3233 	  gcc_checking_assert (!info->ops_swapped_p);
3234 	  return ret;
3235 	}
3236       if (!has_single_use (gimple_assign_rhs2 (stmt)))
3237 	ret += 1 + info->ops[1 - info->ops_swapped_p].bit_not_p;
3238       else if (info->ops[1 - info->ops_swapped_p].bit_not_p)
3239 	{
3240 	  gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3241 	  if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3242 	    ++ret;
3243 	}
3244       return ret;
3245     case MEM_REF:
3246       if (!has_single_use (gimple_assign_rhs1 (stmt)))
3247 	return 1 + info->ops[0].bit_not_p;
3248       else if (info->ops[0].bit_not_p)
3249 	{
3250 	  stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3251 	  if (!has_single_use (gimple_assign_rhs1 (stmt)))
3252 	    return 1;
3253 	}
3254       return 0;
3255     case BIT_INSERT_EXPR:
3256       return has_single_use (gimple_assign_rhs1 (stmt)) ? 0 : 1;
3257     default:
3258       gcc_unreachable ();
3259     }
3260 }
3261 
3262 /* Split a merged store described by GROUP by populating the SPLIT_STORES
3263    vector (if non-NULL) with split_store structs describing the byte offset
3264    (from the base), the bit size and alignment of each store as well as the
3265    original statements involved in each such split group.
3266    This is to separate the splitting strategy from the statement
3267    building/emission/linking done in output_merged_store.
3268    Return number of new stores.
3269    If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
3270    If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
3271    BZERO_FIRST may be true only when the first store covers the whole group
3272    and clears it; if BZERO_FIRST is true, keep that first store in the set
3273    unmodified and emit further stores for the overrides only.
3274    If SPLIT_STORES is NULL, it is just a dry run to count number of
3275    new stores.  */
3276 
3277 static unsigned int
split_group(merged_store_group * group,bool allow_unaligned_store,bool allow_unaligned_load,bool bzero_first,vec<split_store * > * split_stores,unsigned * total_orig,unsigned * total_new)3278 split_group (merged_store_group *group, bool allow_unaligned_store,
3279 	     bool allow_unaligned_load, bool bzero_first,
3280 	     vec<split_store *> *split_stores,
3281 	     unsigned *total_orig,
3282 	     unsigned *total_new)
3283 {
3284   unsigned HOST_WIDE_INT pos = group->bitregion_start;
3285   unsigned HOST_WIDE_INT size = group->bitregion_end - pos;
3286   unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
3287   unsigned HOST_WIDE_INT group_align = group->align;
3288   unsigned HOST_WIDE_INT align_base = group->align_base;
3289   unsigned HOST_WIDE_INT group_load_align = group_align;
3290   bool any_orig = false;
3291 
3292   gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
3293 
3294   if (group->stores[0]->rhs_code == LROTATE_EXPR
3295       || group->stores[0]->rhs_code == NOP_EXPR)
3296     {
3297       gcc_assert (!bzero_first);
3298       /* For bswap framework using sets of stores, all the checking
3299 	 has been done earlier in try_coalesce_bswap and needs to be
3300 	 emitted as a single store.  */
3301       if (total_orig)
3302 	{
3303 	  /* Avoid the old/new stmt count heuristics.  It should be
3304 	     always beneficial.  */
3305 	  total_new[0] = 1;
3306 	  total_orig[0] = 2;
3307 	}
3308 
3309       if (split_stores)
3310 	{
3311 	  unsigned HOST_WIDE_INT align_bitpos
3312 	    = (group->start - align_base) & (group_align - 1);
3313 	  unsigned HOST_WIDE_INT align = group_align;
3314 	  if (align_bitpos)
3315 	    align = least_bit_hwi (align_bitpos);
3316 	  bytepos = group->start / BITS_PER_UNIT;
3317 	  split_store *store
3318 	    = new split_store (bytepos, group->width, align);
3319 	  unsigned int first = 0;
3320 	  find_constituent_stores (group, &store->orig_stores,
3321 				   &first, group->start, group->width);
3322 	  split_stores->safe_push (store);
3323 	}
3324 
3325       return 1;
3326     }
3327 
3328   unsigned int ret = 0, first = 0;
3329   unsigned HOST_WIDE_INT try_pos = bytepos;
3330 
3331   if (total_orig)
3332     {
3333       unsigned int i;
3334       store_immediate_info *info = group->stores[0];
3335 
3336       total_new[0] = 0;
3337       total_orig[0] = 1; /* The orig store.  */
3338       info = group->stores[0];
3339       if (info->ops[0].base_addr)
3340 	total_orig[0]++;
3341       if (info->ops[1].base_addr)
3342 	total_orig[0]++;
3343       switch (info->rhs_code)
3344 	{
3345 	case BIT_AND_EXPR:
3346 	case BIT_IOR_EXPR:
3347 	case BIT_XOR_EXPR:
3348 	  total_orig[0]++; /* The orig BIT_*_EXPR stmt.  */
3349 	  break;
3350 	default:
3351 	  break;
3352 	}
3353       total_orig[0] *= group->stores.length ();
3354 
3355       FOR_EACH_VEC_ELT (group->stores, i, info)
3356 	{
3357 	  total_new[0] += count_multiple_uses (info);
3358 	  total_orig[0] += (info->bit_not_p
3359 			    + info->ops[0].bit_not_p
3360 			    + info->ops[1].bit_not_p);
3361 	}
3362     }
3363 
3364   if (!allow_unaligned_load)
3365     for (int i = 0; i < 2; ++i)
3366       if (group->load_align[i])
3367 	group_load_align = MIN (group_load_align, group->load_align[i]);
3368 
3369   if (bzero_first)
3370     {
3371       store_immediate_info *gstore;
3372       FOR_EACH_VEC_ELT (group->stores, first, gstore)
3373 	if (!gimple_clobber_p (gstore->stmt))
3374 	  break;
3375       ++first;
3376       ret = 1;
3377       if (split_stores)
3378 	{
3379 	  split_store *store
3380 	    = new split_store (bytepos, gstore->bitsize, align_base);
3381 	  store->orig_stores.safe_push (gstore);
3382 	  store->orig = true;
3383 	  any_orig = true;
3384 	  split_stores->safe_push (store);
3385 	}
3386     }
3387 
3388   while (size > 0)
3389     {
3390       if ((allow_unaligned_store || group_align <= BITS_PER_UNIT)
3391 	  && (group->mask[try_pos - bytepos] == (unsigned char) ~0U
3392 	      || (bzero_first && group->val[try_pos - bytepos] == 0)))
3393 	{
3394 	  /* Skip padding bytes.  */
3395 	  ++try_pos;
3396 	  size -= BITS_PER_UNIT;
3397 	  continue;
3398 	}
3399 
3400       unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
3401       unsigned int try_size = MAX_STORE_BITSIZE, nonmasked;
3402       unsigned HOST_WIDE_INT align_bitpos
3403 	= (try_bitpos - align_base) & (group_align - 1);
3404       unsigned HOST_WIDE_INT align = group_align;
3405       bool found_orig = false;
3406       if (align_bitpos)
3407 	align = least_bit_hwi (align_bitpos);
3408       if (!allow_unaligned_store)
3409 	try_size = MIN (try_size, align);
3410       if (!allow_unaligned_load)
3411 	{
3412 	  /* If we can't do or don't want to do unaligned stores
3413 	     as well as loads, we need to take the loads into account
3414 	     as well.  */
3415 	  unsigned HOST_WIDE_INT load_align = group_load_align;
3416 	  align_bitpos = (try_bitpos - align_base) & (load_align - 1);
3417 	  if (align_bitpos)
3418 	    load_align = least_bit_hwi (align_bitpos);
3419 	  for (int i = 0; i < 2; ++i)
3420 	    if (group->load_align[i])
3421 	      {
3422 		align_bitpos
3423 		  = known_alignment (try_bitpos
3424 				     - group->stores[0]->bitpos
3425 				     + group->stores[0]->ops[i].bitpos
3426 				     - group->load_align_base[i]);
3427 		if (align_bitpos & (group_load_align - 1))
3428 		  {
3429 		    unsigned HOST_WIDE_INT a = least_bit_hwi (align_bitpos);
3430 		    load_align = MIN (load_align, a);
3431 		  }
3432 	      }
3433 	  try_size = MIN (try_size, load_align);
3434 	}
3435       store_immediate_info *info
3436 	= find_constituent_stores (group, NULL, &first, try_bitpos, try_size);
3437       if (info && !gimple_clobber_p (info->stmt))
3438 	{
3439 	  /* If there is just one original statement for the range, see if
3440 	     we can just reuse the original store which could be even larger
3441 	     than try_size.  */
3442 	  unsigned HOST_WIDE_INT stmt_end
3443 	    = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT);
3444 	  info = find_constituent_stores (group, NULL, &first, try_bitpos,
3445 					  stmt_end - try_bitpos);
3446 	  if (info && info->bitpos >= try_bitpos)
3447 	    {
3448 	      store_immediate_info *info2 = NULL;
3449 	      unsigned int first_copy = first;
3450 	      if (info->bitpos > try_bitpos
3451 		  && stmt_end - try_bitpos <= try_size)
3452 		{
3453 		  info2 = find_constituent_stores (group, NULL, &first_copy,
3454 						   try_bitpos,
3455 						   info->bitpos - try_bitpos);
3456 		  gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3457 		}
3458 	      if (info2 == NULL && stmt_end - try_bitpos < try_size)
3459 		{
3460 		  info2 = find_constituent_stores (group, NULL, &first_copy,
3461 						   stmt_end,
3462 						   (try_bitpos + try_size)
3463 						   - stmt_end);
3464 		  gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3465 		}
3466 	      if (info2 == NULL)
3467 		{
3468 		  try_size = stmt_end - try_bitpos;
3469 		  found_orig = true;
3470 		  goto found;
3471 		}
3472 	    }
3473 	}
3474 
3475       /* Approximate store bitsize for the case when there are no padding
3476 	 bits.  */
3477       while (try_size > size)
3478 	try_size /= 2;
3479       /* Now look for whole padding bytes at the end of that bitsize.  */
3480       for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked)
3481 	if (group->mask[try_pos - bytepos + nonmasked - 1]
3482 	    != (unsigned char) ~0U
3483 	    && (!bzero_first
3484 		|| group->val[try_pos - bytepos + nonmasked - 1] != 0))
3485 	  break;
3486       if (nonmasked == 0 || (info && gimple_clobber_p (info->stmt)))
3487 	{
3488 	  /* If entire try_size range is padding, skip it.  */
3489 	  try_pos += try_size / BITS_PER_UNIT;
3490 	  size -= try_size;
3491 	  continue;
3492 	}
3493       /* Otherwise try to decrease try_size if second half, last 3 quarters
3494 	 etc. are padding.  */
3495       nonmasked *= BITS_PER_UNIT;
3496       while (nonmasked <= try_size / 2)
3497 	try_size /= 2;
3498       if (!allow_unaligned_store && group_align > BITS_PER_UNIT)
3499 	{
3500 	  /* Now look for whole padding bytes at the start of that bitsize.  */
3501 	  unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked;
3502 	  for (masked = 0; masked < try_bytesize; ++masked)
3503 	    if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U
3504 		&& (!bzero_first
3505 		    || group->val[try_pos - bytepos + masked] != 0))
3506 	      break;
3507 	  masked *= BITS_PER_UNIT;
3508 	  gcc_assert (masked < try_size);
3509 	  if (masked >= try_size / 2)
3510 	    {
3511 	      while (masked >= try_size / 2)
3512 		{
3513 		  try_size /= 2;
3514 		  try_pos += try_size / BITS_PER_UNIT;
3515 		  size -= try_size;
3516 		  masked -= try_size;
3517 		}
3518 	      /* Need to recompute the alignment, so just retry at the new
3519 		 position.  */
3520 	      continue;
3521 	    }
3522 	}
3523 
3524     found:
3525       ++ret;
3526 
3527       if (split_stores)
3528 	{
3529 	  split_store *store
3530 	    = new split_store (try_pos, try_size, align);
3531 	  info = find_constituent_stores (group, &store->orig_stores,
3532 					  &first, try_bitpos, try_size);
3533 	  if (info
3534 	      && !gimple_clobber_p (info->stmt)
3535 	      && info->bitpos >= try_bitpos
3536 	      && info->bitpos + info->bitsize <= try_bitpos + try_size
3537 	      && (store->orig_stores.length () == 1
3538 		  || found_orig
3539 		  || (info->bitpos == try_bitpos
3540 		      && (info->bitpos + info->bitsize
3541 			  == try_bitpos + try_size))))
3542 	    {
3543 	      store->orig = true;
3544 	      any_orig = true;
3545 	    }
3546 	  split_stores->safe_push (store);
3547 	}
3548 
3549       try_pos += try_size / BITS_PER_UNIT;
3550       size -= try_size;
3551     }
3552 
3553   if (total_orig)
3554     {
3555       unsigned int i;
3556       split_store *store;
3557       /* If we are reusing some original stores and any of the
3558 	 original SSA_NAMEs had multiple uses, we need to subtract
3559 	 those now before we add the new ones.  */
3560       if (total_new[0] && any_orig)
3561 	{
3562 	  FOR_EACH_VEC_ELT (*split_stores, i, store)
3563 	    if (store->orig)
3564 	      total_new[0] -= count_multiple_uses (store->orig_stores[0]);
3565 	}
3566       total_new[0] += ret; /* The new store.  */
3567       store_immediate_info *info = group->stores[0];
3568       if (info->ops[0].base_addr)
3569 	total_new[0] += ret;
3570       if (info->ops[1].base_addr)
3571 	total_new[0] += ret;
3572       switch (info->rhs_code)
3573 	{
3574 	case BIT_AND_EXPR:
3575 	case BIT_IOR_EXPR:
3576 	case BIT_XOR_EXPR:
3577 	  total_new[0] += ret; /* The new BIT_*_EXPR stmt.  */
3578 	  break;
3579 	default:
3580 	  break;
3581 	}
3582       FOR_EACH_VEC_ELT (*split_stores, i, store)
3583 	{
3584 	  unsigned int j;
3585 	  bool bit_not_p[3] = { false, false, false };
3586 	  /* If all orig_stores have certain bit_not_p set, then
3587 	     we'd use a BIT_NOT_EXPR stmt and need to account for it.
3588 	     If some orig_stores have certain bit_not_p set, then
3589 	     we'd use a BIT_XOR_EXPR with a mask and need to account for
3590 	     it.  */
3591 	  FOR_EACH_VEC_ELT (store->orig_stores, j, info)
3592 	    {
3593 	      if (info->ops[0].bit_not_p)
3594 		bit_not_p[0] = true;
3595 	      if (info->ops[1].bit_not_p)
3596 		bit_not_p[1] = true;
3597 	      if (info->bit_not_p)
3598 		bit_not_p[2] = true;
3599 	    }
3600 	  total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2];
3601 	}
3602 
3603     }
3604 
3605   return ret;
3606 }
3607 
3608 /* Return the operation through which the operand IDX (if < 2) or
3609    result (IDX == 2) should be inverted.  If NOP_EXPR, no inversion
3610    is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
3611    the bits should be xored with mask.  */
3612 
3613 static enum tree_code
invert_op(split_store * split_store,int idx,tree int_type,tree & mask)3614 invert_op (split_store *split_store, int idx, tree int_type, tree &mask)
3615 {
3616   unsigned int i;
3617   store_immediate_info *info;
3618   unsigned int cnt = 0;
3619   bool any_paddings = false;
3620   FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3621     {
3622       bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3623       if (bit_not_p)
3624 	{
3625 	  ++cnt;
3626 	  tree lhs = gimple_assign_lhs (info->stmt);
3627 	  if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3628 	      && TYPE_PRECISION (TREE_TYPE (lhs)) < info->bitsize)
3629 	    any_paddings = true;
3630 	}
3631     }
3632   mask = NULL_TREE;
3633   if (cnt == 0)
3634     return NOP_EXPR;
3635   if (cnt == split_store->orig_stores.length () && !any_paddings)
3636     return BIT_NOT_EXPR;
3637 
3638   unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT;
3639   unsigned buf_size = split_store->size / BITS_PER_UNIT;
3640   unsigned char *buf
3641     = XALLOCAVEC (unsigned char, buf_size);
3642   memset (buf, ~0U, buf_size);
3643   FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3644     {
3645       bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3646       if (!bit_not_p)
3647 	continue;
3648       /* Clear regions with bit_not_p and invert afterwards, rather than
3649 	 clear regions with !bit_not_p, so that gaps in between stores aren't
3650 	 set in the mask.  */
3651       unsigned HOST_WIDE_INT bitsize = info->bitsize;
3652       unsigned HOST_WIDE_INT prec = bitsize;
3653       unsigned int pos_in_buffer = 0;
3654       if (any_paddings)
3655 	{
3656 	  tree lhs = gimple_assign_lhs (info->stmt);
3657 	  if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3658 	      && TYPE_PRECISION (TREE_TYPE (lhs)) < bitsize)
3659 	    prec = TYPE_PRECISION (TREE_TYPE (lhs));
3660 	}
3661       if (info->bitpos < try_bitpos)
3662 	{
3663 	  gcc_assert (info->bitpos + bitsize > try_bitpos);
3664 	  if (!BYTES_BIG_ENDIAN)
3665 	    {
3666 	      if (prec <= try_bitpos - info->bitpos)
3667 		continue;
3668 	      prec -= try_bitpos - info->bitpos;
3669 	    }
3670 	  bitsize -= try_bitpos - info->bitpos;
3671 	  if (BYTES_BIG_ENDIAN && prec > bitsize)
3672 	    prec = bitsize;
3673 	}
3674       else
3675 	pos_in_buffer = info->bitpos - try_bitpos;
3676       if (prec < bitsize)
3677 	{
3678 	  /* If this is a bool inversion, invert just the least significant
3679 	     prec bits rather than all bits of it.  */
3680 	  if (BYTES_BIG_ENDIAN)
3681 	    {
3682 	      pos_in_buffer += bitsize - prec;
3683 	      if (pos_in_buffer >= split_store->size)
3684 		continue;
3685 	    }
3686 	  bitsize = prec;
3687 	}
3688       if (pos_in_buffer + bitsize > split_store->size)
3689 	bitsize = split_store->size - pos_in_buffer;
3690       unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT);
3691       if (BYTES_BIG_ENDIAN)
3692 	clear_bit_region_be (p, (BITS_PER_UNIT - 1
3693 				 - (pos_in_buffer % BITS_PER_UNIT)), bitsize);
3694       else
3695 	clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize);
3696     }
3697   for (unsigned int i = 0; i < buf_size; ++i)
3698     buf[i] = ~buf[i];
3699   mask = native_interpret_expr (int_type, buf, buf_size);
3700   return BIT_XOR_EXPR;
3701 }
3702 
3703 /* Given a merged store group GROUP output the widened version of it.
3704    The store chain is against the base object BASE.
3705    Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
3706    unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
3707    Make sure that the number of statements output is less than the number of
3708    original statements.  If a better sequence is possible emit it and
3709    return true.  */
3710 
3711 bool
output_merged_store(merged_store_group * group)3712 imm_store_chain_info::output_merged_store (merged_store_group *group)
3713 {
3714   split_store *split_store;
3715   unsigned int i;
3716   unsigned HOST_WIDE_INT start_byte_pos
3717     = group->bitregion_start / BITS_PER_UNIT;
3718 
3719   unsigned int orig_num_stmts = group->stores.length ();
3720   if (orig_num_stmts < 2)
3721     return false;
3722 
3723   auto_vec<class split_store *, 32> split_stores;
3724   bool allow_unaligned_store
3725     = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
3726   bool allow_unaligned_load = allow_unaligned_store;
3727   bool bzero_first = false;
3728   store_immediate_info *store;
3729   unsigned int num_clobber_stmts = 0;
3730   if (group->stores[0]->rhs_code == INTEGER_CST)
3731     {
3732       FOR_EACH_VEC_ELT (group->stores, i, store)
3733 	if (gimple_clobber_p (store->stmt))
3734 	  num_clobber_stmts++;
3735 	else if (TREE_CODE (gimple_assign_rhs1 (store->stmt)) == CONSTRUCTOR
3736 		 && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (store->stmt)) == 0
3737 		 && group->start == store->bitpos
3738 		 && group->width == store->bitsize
3739 		 && (group->start % BITS_PER_UNIT) == 0
3740 		 && (group->width % BITS_PER_UNIT) == 0)
3741 	  {
3742 	    bzero_first = true;
3743 	    break;
3744 	  }
3745 	else
3746 	  break;
3747       FOR_EACH_VEC_ELT_FROM (group->stores, i, store, i)
3748 	if (gimple_clobber_p (store->stmt))
3749 	  num_clobber_stmts++;
3750       if (num_clobber_stmts == orig_num_stmts)
3751 	return false;
3752       orig_num_stmts -= num_clobber_stmts;
3753     }
3754   if (allow_unaligned_store || bzero_first)
3755     {
3756       /* If unaligned stores are allowed, see how many stores we'd emit
3757 	 for unaligned and how many stores we'd emit for aligned stores.
3758 	 Only use unaligned stores if it allows fewer stores than aligned.
3759 	 Similarly, if there is a whole region clear first, prefer expanding
3760 	 it together compared to expanding clear first followed by merged
3761 	 further stores.  */
3762       unsigned cnt[4] = { ~0, ~0, ~0, ~0 };
3763       int pass_min = 0;
3764       for (int pass = 0; pass < 4; ++pass)
3765 	{
3766 	  if (!allow_unaligned_store && (pass & 1) != 0)
3767 	    continue;
3768 	  if (!bzero_first && (pass & 2) != 0)
3769 	    continue;
3770 	  cnt[pass] = split_group (group, (pass & 1) != 0,
3771 				   allow_unaligned_load, (pass & 2) != 0,
3772 				   NULL, NULL, NULL);
3773 	  if (cnt[pass] < cnt[pass_min])
3774 	    pass_min = pass;
3775 	}
3776       if ((pass_min & 1) == 0)
3777 	allow_unaligned_store = false;
3778       if ((pass_min & 2) == 0)
3779 	bzero_first = false;
3780     }
3781   unsigned total_orig, total_new;
3782   split_group (group, allow_unaligned_store, allow_unaligned_load, bzero_first,
3783 	       &split_stores, &total_orig, &total_new);
3784 
3785   /* Determine if there is a clobber covering the whole group at the start,
3786      followed by proposed split stores that cover the whole group.  In that
3787      case, prefer the transformation even if
3788      split_stores.length () == orig_num_stmts.  */
3789   bool clobber_first = false;
3790   if (num_clobber_stmts
3791       && gimple_clobber_p (group->stores[0]->stmt)
3792       && group->start == group->stores[0]->bitpos
3793       && group->width == group->stores[0]->bitsize
3794       && (group->start % BITS_PER_UNIT) == 0
3795       && (group->width % BITS_PER_UNIT) == 0)
3796     {
3797       clobber_first = true;
3798       unsigned HOST_WIDE_INT pos = group->start / BITS_PER_UNIT;
3799       FOR_EACH_VEC_ELT (split_stores, i, split_store)
3800 	if (split_store->bytepos != pos)
3801 	  {
3802 	    clobber_first = false;
3803 	    break;
3804 	  }
3805 	else
3806 	  pos += split_store->size / BITS_PER_UNIT;
3807       if (pos != (group->start + group->width) / BITS_PER_UNIT)
3808 	clobber_first = false;
3809     }
3810 
3811   if (split_stores.length () >= orig_num_stmts + clobber_first)
3812     {
3813 
3814       /* We didn't manage to reduce the number of statements.  Bail out.  */
3815       if (dump_file && (dump_flags & TDF_DETAILS))
3816 	fprintf (dump_file, "Exceeded original number of stmts (%u)."
3817 			    "  Not profitable to emit new sequence.\n",
3818 		 orig_num_stmts);
3819       FOR_EACH_VEC_ELT (split_stores, i, split_store)
3820 	delete split_store;
3821       return false;
3822     }
3823   if (total_orig <= total_new)
3824     {
3825       /* If number of estimated new statements is above estimated original
3826 	 statements, bail out too.  */
3827       if (dump_file && (dump_flags & TDF_DETAILS))
3828 	fprintf (dump_file, "Estimated number of original stmts (%u)"
3829 			    " not larger than estimated number of new"
3830 			    " stmts (%u).\n",
3831 		 total_orig, total_new);
3832       FOR_EACH_VEC_ELT (split_stores, i, split_store)
3833 	delete split_store;
3834       return false;
3835     }
3836   if (group->stores[0]->rhs_code == INTEGER_CST)
3837     {
3838       bool all_orig = true;
3839       FOR_EACH_VEC_ELT (split_stores, i, split_store)
3840 	if (!split_store->orig)
3841 	  {
3842 	    all_orig = false;
3843 	    break;
3844 	  }
3845       if (all_orig)
3846 	{
3847 	  unsigned int cnt = split_stores.length ();
3848 	  store_immediate_info *store;
3849 	  FOR_EACH_VEC_ELT (group->stores, i, store)
3850 	    if (gimple_clobber_p (store->stmt))
3851 	      ++cnt;
3852 	  /* Punt if we wouldn't make any real changes, i.e. keep all
3853 	     orig stmts + all clobbers.  */
3854 	  if (cnt == group->stores.length ())
3855 	    {
3856 	      if (dump_file && (dump_flags & TDF_DETAILS))
3857 		fprintf (dump_file, "Exceeded original number of stmts (%u)."
3858 				    "  Not profitable to emit new sequence.\n",
3859 			 orig_num_stmts);
3860 	      FOR_EACH_VEC_ELT (split_stores, i, split_store)
3861 		delete split_store;
3862 	      return false;
3863 	    }
3864 	}
3865     }
3866 
3867   gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
3868   gimple_seq seq = NULL;
3869   tree last_vdef, new_vuse;
3870   last_vdef = gimple_vdef (group->last_stmt);
3871   new_vuse = gimple_vuse (group->last_stmt);
3872   tree bswap_res = NULL_TREE;
3873 
3874   /* Clobbers are not removed.  */
3875   if (gimple_clobber_p (group->last_stmt))
3876     {
3877       new_vuse = make_ssa_name (gimple_vop (cfun), group->last_stmt);
3878       gimple_set_vdef (group->last_stmt, new_vuse);
3879     }
3880 
3881   if (group->stores[0]->rhs_code == LROTATE_EXPR
3882       || group->stores[0]->rhs_code == NOP_EXPR)
3883     {
3884       tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
3885       gimple *ins_stmt = group->stores[0]->ins_stmt;
3886       struct symbolic_number *n = &group->stores[0]->n;
3887       bool bswap = group->stores[0]->rhs_code == LROTATE_EXPR;
3888 
3889       switch (n->range)
3890 	{
3891 	case 16:
3892 	  load_type = bswap_type = uint16_type_node;
3893 	  break;
3894 	case 32:
3895 	  load_type = uint32_type_node;
3896 	  if (bswap)
3897 	    {
3898 	      fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
3899 	      bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
3900 	    }
3901 	  break;
3902 	case 64:
3903 	  load_type = uint64_type_node;
3904 	  if (bswap)
3905 	    {
3906 	      fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
3907 	      bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
3908 	    }
3909 	  break;
3910 	default:
3911 	  gcc_unreachable ();
3912 	}
3913 
3914       /* If the loads have each vuse of the corresponding store,
3915 	 we've checked the aliasing already in try_coalesce_bswap and
3916 	 we want to sink the need load into seq.  So need to use new_vuse
3917 	 on the load.  */
3918       if (n->base_addr)
3919 	{
3920 	  if (n->vuse == NULL)
3921 	    {
3922 	      n->vuse = new_vuse;
3923 	      ins_stmt = NULL;
3924 	    }
3925 	  else
3926 	    /* Update vuse in case it has changed by output_merged_stores.  */
3927 	    n->vuse = gimple_vuse (ins_stmt);
3928 	}
3929       bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl,
3930 				 bswap_type, load_type, n, bswap);
3931       gcc_assert (bswap_res);
3932     }
3933 
3934   gimple *stmt = NULL;
3935   auto_vec<gimple *, 32> orig_stmts;
3936   gimple_seq this_seq;
3937   tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &this_seq,
3938 				      is_gimple_mem_ref_addr, NULL_TREE);
3939   gimple_seq_add_seq_without_update (&seq, this_seq);
3940 
3941   tree load_addr[2] = { NULL_TREE, NULL_TREE };
3942   gimple_seq load_seq[2] = { NULL, NULL };
3943   gimple_stmt_iterator load_gsi[2] = { gsi_none (), gsi_none () };
3944   for (int j = 0; j < 2; ++j)
3945     {
3946       store_operand_info &op = group->stores[0]->ops[j];
3947       if (op.base_addr == NULL_TREE)
3948 	continue;
3949 
3950       store_immediate_info *infol = group->stores.last ();
3951       if (gimple_vuse (op.stmt) == gimple_vuse (infol->ops[j].stmt))
3952 	{
3953 	  /* We can't pick the location randomly; while we've verified
3954 	     all the loads have the same vuse, they can be still in different
3955 	     basic blocks and we need to pick the one from the last bb:
3956 	     int x = q[0];
3957 	     if (x == N) return;
3958 	     int y = q[1];
3959 	     p[0] = x;
3960 	     p[1] = y;
3961 	     otherwise if we put the wider load at the q[0] load, we might
3962 	     segfault if q[1] is not mapped.  */
3963 	  basic_block bb = gimple_bb (op.stmt);
3964 	  gimple *ostmt = op.stmt;
3965 	  store_immediate_info *info;
3966 	  FOR_EACH_VEC_ELT (group->stores, i, info)
3967 	    {
3968 	      gimple *tstmt = info->ops[j].stmt;
3969 	      basic_block tbb = gimple_bb (tstmt);
3970 	      if (dominated_by_p (CDI_DOMINATORS, tbb, bb))
3971 		{
3972 		  ostmt = tstmt;
3973 		  bb = tbb;
3974 		}
3975 	    }
3976 	  load_gsi[j] = gsi_for_stmt (ostmt);
3977 	  load_addr[j]
3978 	    = force_gimple_operand_1 (unshare_expr (op.base_addr),
3979 				      &load_seq[j], is_gimple_mem_ref_addr,
3980 				      NULL_TREE);
3981 	}
3982       else if (operand_equal_p (base_addr, op.base_addr, 0))
3983 	load_addr[j] = addr;
3984       else
3985 	{
3986 	  load_addr[j]
3987 	    = force_gimple_operand_1 (unshare_expr (op.base_addr),
3988 				      &this_seq, is_gimple_mem_ref_addr,
3989 				      NULL_TREE);
3990 	  gimple_seq_add_seq_without_update (&seq, this_seq);
3991 	}
3992     }
3993 
3994   FOR_EACH_VEC_ELT (split_stores, i, split_store)
3995     {
3996       unsigned HOST_WIDE_INT try_size = split_store->size;
3997       unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
3998       unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
3999       unsigned HOST_WIDE_INT align = split_store->align;
4000       tree dest, src;
4001       location_t loc;
4002       if (split_store->orig)
4003 	{
4004 	  /* If there is just a single non-clobber constituent store
4005 	     which covers the whole area, just reuse the lhs and rhs.  */
4006 	  gimple *orig_stmt = NULL;
4007 	  store_immediate_info *store;
4008 	  unsigned int j;
4009 	  FOR_EACH_VEC_ELT (split_store->orig_stores, j, store)
4010 	    if (!gimple_clobber_p (store->stmt))
4011 	      {
4012 		orig_stmt = store->stmt;
4013 		break;
4014 	      }
4015 	  dest = gimple_assign_lhs (orig_stmt);
4016 	  src = gimple_assign_rhs1 (orig_stmt);
4017 	  loc = gimple_location (orig_stmt);
4018 	}
4019       else
4020 	{
4021 	  store_immediate_info *info;
4022 	  unsigned short clique, base;
4023 	  unsigned int k;
4024 	  FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4025 	    orig_stmts.safe_push (info->stmt);
4026 	  tree offset_type
4027 	    = get_alias_type_for_stmts (orig_stmts, false, &clique, &base);
4028 	  loc = get_location_for_stmts (orig_stmts);
4029 	  orig_stmts.truncate (0);
4030 
4031 	  tree int_type = build_nonstandard_integer_type (try_size, UNSIGNED);
4032 	  int_type = build_aligned_type (int_type, align);
4033 	  dest = fold_build2 (MEM_REF, int_type, addr,
4034 			      build_int_cst (offset_type, try_pos));
4035 	  if (TREE_CODE (dest) == MEM_REF)
4036 	    {
4037 	      MR_DEPENDENCE_CLIQUE (dest) = clique;
4038 	      MR_DEPENDENCE_BASE (dest) = base;
4039 	    }
4040 
4041 	  tree mask;
4042 	  if (bswap_res)
4043 	    mask = integer_zero_node;
4044 	  else
4045 	    mask = native_interpret_expr (int_type,
4046 					  group->mask + try_pos
4047 					  - start_byte_pos,
4048 					  group->buf_size);
4049 
4050 	  tree ops[2];
4051 	  for (int j = 0;
4052 	       j < 1 + (split_store->orig_stores[0]->ops[1].val != NULL_TREE);
4053 	       ++j)
4054 	    {
4055 	      store_operand_info &op = split_store->orig_stores[0]->ops[j];
4056 	      if (bswap_res)
4057 		ops[j] = bswap_res;
4058 	      else if (op.base_addr)
4059 		{
4060 		  FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4061 		    orig_stmts.safe_push (info->ops[j].stmt);
4062 
4063 		  offset_type = get_alias_type_for_stmts (orig_stmts, true,
4064 							  &clique, &base);
4065 		  location_t load_loc = get_location_for_stmts (orig_stmts);
4066 		  orig_stmts.truncate (0);
4067 
4068 		  unsigned HOST_WIDE_INT load_align = group->load_align[j];
4069 		  unsigned HOST_WIDE_INT align_bitpos
4070 		    = known_alignment (try_bitpos
4071 				       - split_store->orig_stores[0]->bitpos
4072 				       + op.bitpos);
4073 		  if (align_bitpos & (load_align - 1))
4074 		    load_align = least_bit_hwi (align_bitpos);
4075 
4076 		  tree load_int_type
4077 		    = build_nonstandard_integer_type (try_size, UNSIGNED);
4078 		  load_int_type
4079 		    = build_aligned_type (load_int_type, load_align);
4080 
4081 		  poly_uint64 load_pos
4082 		    = exact_div (try_bitpos
4083 				 - split_store->orig_stores[0]->bitpos
4084 				 + op.bitpos,
4085 				 BITS_PER_UNIT);
4086 		  ops[j] = fold_build2 (MEM_REF, load_int_type, load_addr[j],
4087 					build_int_cst (offset_type, load_pos));
4088 		  if (TREE_CODE (ops[j]) == MEM_REF)
4089 		    {
4090 		      MR_DEPENDENCE_CLIQUE (ops[j]) = clique;
4091 		      MR_DEPENDENCE_BASE (ops[j]) = base;
4092 		    }
4093 		  if (!integer_zerop (mask))
4094 		    /* The load might load some bits (that will be masked off
4095 		       later on) uninitialized, avoid -W*uninitialized
4096 		       warnings in that case.  */
4097 		    TREE_NO_WARNING (ops[j]) = 1;
4098 
4099 		  stmt = gimple_build_assign (make_ssa_name (int_type),
4100 					      ops[j]);
4101 		  gimple_set_location (stmt, load_loc);
4102 		  if (gsi_bb (load_gsi[j]))
4103 		    {
4104 		      gimple_set_vuse (stmt, gimple_vuse (op.stmt));
4105 		      gimple_seq_add_stmt_without_update (&load_seq[j], stmt);
4106 		    }
4107 		  else
4108 		    {
4109 		      gimple_set_vuse (stmt, new_vuse);
4110 		      gimple_seq_add_stmt_without_update (&seq, stmt);
4111 		    }
4112 		  ops[j] = gimple_assign_lhs (stmt);
4113 		  tree xor_mask;
4114 		  enum tree_code inv_op
4115 		    = invert_op (split_store, j, int_type, xor_mask);
4116 		  if (inv_op != NOP_EXPR)
4117 		    {
4118 		      stmt = gimple_build_assign (make_ssa_name (int_type),
4119 						  inv_op, ops[j], xor_mask);
4120 		      gimple_set_location (stmt, load_loc);
4121 		      ops[j] = gimple_assign_lhs (stmt);
4122 
4123 		      if (gsi_bb (load_gsi[j]))
4124 			gimple_seq_add_stmt_without_update (&load_seq[j],
4125 							    stmt);
4126 		      else
4127 			gimple_seq_add_stmt_without_update (&seq, stmt);
4128 		    }
4129 		}
4130 	      else
4131 		ops[j] = native_interpret_expr (int_type,
4132 						group->val + try_pos
4133 						- start_byte_pos,
4134 						group->buf_size);
4135 	    }
4136 
4137 	  switch (split_store->orig_stores[0]->rhs_code)
4138 	    {
4139 	    case BIT_AND_EXPR:
4140 	    case BIT_IOR_EXPR:
4141 	    case BIT_XOR_EXPR:
4142 	      FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4143 		{
4144 		  tree rhs1 = gimple_assign_rhs1 (info->stmt);
4145 		  orig_stmts.safe_push (SSA_NAME_DEF_STMT (rhs1));
4146 		}
4147 	      location_t bit_loc;
4148 	      bit_loc = get_location_for_stmts (orig_stmts);
4149 	      orig_stmts.truncate (0);
4150 
4151 	      stmt
4152 		= gimple_build_assign (make_ssa_name (int_type),
4153 				       split_store->orig_stores[0]->rhs_code,
4154 				       ops[0], ops[1]);
4155 	      gimple_set_location (stmt, bit_loc);
4156 	      /* If there is just one load and there is a separate
4157 		 load_seq[0], emit the bitwise op right after it.  */
4158 	      if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4159 		gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4160 	      /* Otherwise, if at least one load is in seq, we need to
4161 		 emit the bitwise op right before the store.  If there
4162 		 are two loads and are emitted somewhere else, it would
4163 		 be better to emit the bitwise op as early as possible;
4164 		 we don't track where that would be possible right now
4165 		 though.  */
4166 	      else
4167 		gimple_seq_add_stmt_without_update (&seq, stmt);
4168 	      src = gimple_assign_lhs (stmt);
4169 	      tree xor_mask;
4170 	      enum tree_code inv_op;
4171 	      inv_op = invert_op (split_store, 2, int_type, xor_mask);
4172 	      if (inv_op != NOP_EXPR)
4173 		{
4174 		  stmt = gimple_build_assign (make_ssa_name (int_type),
4175 					      inv_op, src, xor_mask);
4176 		  gimple_set_location (stmt, bit_loc);
4177 		  if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4178 		    gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4179 		  else
4180 		    gimple_seq_add_stmt_without_update (&seq, stmt);
4181 		  src = gimple_assign_lhs (stmt);
4182 		}
4183 	      break;
4184 	    case LROTATE_EXPR:
4185 	    case NOP_EXPR:
4186 	      src = ops[0];
4187 	      if (!is_gimple_val (src))
4188 		{
4189 		  stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (src)),
4190 					      src);
4191 		  gimple_seq_add_stmt_without_update (&seq, stmt);
4192 		  src = gimple_assign_lhs (stmt);
4193 		}
4194 	      if (!useless_type_conversion_p (int_type, TREE_TYPE (src)))
4195 		{
4196 		  stmt = gimple_build_assign (make_ssa_name (int_type),
4197 					      NOP_EXPR, src);
4198 		  gimple_seq_add_stmt_without_update (&seq, stmt);
4199 		  src = gimple_assign_lhs (stmt);
4200 		}
4201 	      inv_op = invert_op (split_store, 2, int_type, xor_mask);
4202 	      if (inv_op != NOP_EXPR)
4203 		{
4204 		  stmt = gimple_build_assign (make_ssa_name (int_type),
4205 					      inv_op, src, xor_mask);
4206 		  gimple_set_location (stmt, loc);
4207 		  gimple_seq_add_stmt_without_update (&seq, stmt);
4208 		  src = gimple_assign_lhs (stmt);
4209 		}
4210 	      break;
4211 	    default:
4212 	      src = ops[0];
4213 	      break;
4214 	    }
4215 
4216 	  /* If bit insertion is required, we use the source as an accumulator
4217 	     into which the successive bit-field values are manually inserted.
4218 	     FIXME: perhaps use BIT_INSERT_EXPR instead in some cases?  */
4219 	  if (group->bit_insertion)
4220 	    FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4221 	      if (info->rhs_code == BIT_INSERT_EXPR
4222 		  && info->bitpos < try_bitpos + try_size
4223 		  && info->bitpos + info->bitsize > try_bitpos)
4224 		{
4225 		  /* Mask, truncate, convert to final type, shift and ior into
4226 		     the accumulator.  Note that every step can be a no-op.  */
4227 		  const HOST_WIDE_INT start_gap = info->bitpos - try_bitpos;
4228 		  const HOST_WIDE_INT end_gap
4229 		    = (try_bitpos + try_size) - (info->bitpos + info->bitsize);
4230 		  tree tem = info->ops[0].val;
4231 		  if (TYPE_PRECISION (TREE_TYPE (tem)) <= info->bitsize)
4232 		    {
4233 		      tree bitfield_type
4234 			= build_nonstandard_integer_type (info->bitsize,
4235 							  UNSIGNED);
4236 		      tem = gimple_convert (&seq, loc, bitfield_type, tem);
4237 		    }
4238 		  else if ((BYTES_BIG_ENDIAN ? start_gap : end_gap) > 0)
4239 		    {
4240 		      const unsigned HOST_WIDE_INT imask
4241 			= (HOST_WIDE_INT_1U << info->bitsize) - 1;
4242 		      tem = gimple_build (&seq, loc,
4243 					  BIT_AND_EXPR, TREE_TYPE (tem), tem,
4244 					  build_int_cst (TREE_TYPE (tem),
4245 							 imask));
4246 		    }
4247 		  const HOST_WIDE_INT shift
4248 		    = (BYTES_BIG_ENDIAN ? end_gap : start_gap);
4249 		  if (shift < 0)
4250 		    tem = gimple_build (&seq, loc,
4251 					RSHIFT_EXPR, TREE_TYPE (tem), tem,
4252 					build_int_cst (NULL_TREE, -shift));
4253 		  tem = gimple_convert (&seq, loc, int_type, tem);
4254 		  if (shift > 0)
4255 		    tem = gimple_build (&seq, loc,
4256 					LSHIFT_EXPR, int_type, tem,
4257 					build_int_cst (NULL_TREE, shift));
4258 		  src = gimple_build (&seq, loc,
4259 				      BIT_IOR_EXPR, int_type, tem, src);
4260 		}
4261 
4262 	  if (!integer_zerop (mask))
4263 	    {
4264 	      tree tem = make_ssa_name (int_type);
4265 	      tree load_src = unshare_expr (dest);
4266 	      /* The load might load some or all bits uninitialized,
4267 		 avoid -W*uninitialized warnings in that case.
4268 		 As optimization, it would be nice if all the bits are
4269 		 provably uninitialized (no stores at all yet or previous
4270 		 store a CLOBBER) we'd optimize away the load and replace
4271 		 it e.g. with 0.  */
4272 	      TREE_NO_WARNING (load_src) = 1;
4273 	      stmt = gimple_build_assign (tem, load_src);
4274 	      gimple_set_location (stmt, loc);
4275 	      gimple_set_vuse (stmt, new_vuse);
4276 	      gimple_seq_add_stmt_without_update (&seq, stmt);
4277 
4278 	      /* FIXME: If there is a single chunk of zero bits in mask,
4279 		 perhaps use BIT_INSERT_EXPR instead?  */
4280 	      stmt = gimple_build_assign (make_ssa_name (int_type),
4281 					  BIT_AND_EXPR, tem, mask);
4282 	      gimple_set_location (stmt, loc);
4283 	      gimple_seq_add_stmt_without_update (&seq, stmt);
4284 	      tem = gimple_assign_lhs (stmt);
4285 
4286 	      if (TREE_CODE (src) == INTEGER_CST)
4287 		src = wide_int_to_tree (int_type,
4288 					wi::bit_and_not (wi::to_wide (src),
4289 							 wi::to_wide (mask)));
4290 	      else
4291 		{
4292 		  tree nmask
4293 		    = wide_int_to_tree (int_type,
4294 					wi::bit_not (wi::to_wide (mask)));
4295 		  stmt = gimple_build_assign (make_ssa_name (int_type),
4296 					      BIT_AND_EXPR, src, nmask);
4297 		  gimple_set_location (stmt, loc);
4298 		  gimple_seq_add_stmt_without_update (&seq, stmt);
4299 		  src = gimple_assign_lhs (stmt);
4300 		}
4301 	      stmt = gimple_build_assign (make_ssa_name (int_type),
4302 					  BIT_IOR_EXPR, tem, src);
4303 	      gimple_set_location (stmt, loc);
4304 	      gimple_seq_add_stmt_without_update (&seq, stmt);
4305 	      src = gimple_assign_lhs (stmt);
4306 	    }
4307 	}
4308 
4309       stmt = gimple_build_assign (dest, src);
4310       gimple_set_location (stmt, loc);
4311       gimple_set_vuse (stmt, new_vuse);
4312       gimple_seq_add_stmt_without_update (&seq, stmt);
4313 
4314       if (group->lp_nr && stmt_could_throw_p (cfun, stmt))
4315 	add_stmt_to_eh_lp (stmt, group->lp_nr);
4316 
4317       tree new_vdef;
4318       if (i < split_stores.length () - 1)
4319 	new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
4320       else
4321 	new_vdef = last_vdef;
4322 
4323       gimple_set_vdef (stmt, new_vdef);
4324       SSA_NAME_DEF_STMT (new_vdef) = stmt;
4325       new_vuse = new_vdef;
4326     }
4327 
4328   FOR_EACH_VEC_ELT (split_stores, i, split_store)
4329     delete split_store;
4330 
4331   gcc_assert (seq);
4332   if (dump_file)
4333     {
4334       fprintf (dump_file,
4335 	       "New sequence of %u stores to replace old one of %u stores\n",
4336 	       split_stores.length (), orig_num_stmts);
4337       if (dump_flags & TDF_DETAILS)
4338 	print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
4339     }
4340 
4341   if (gimple_clobber_p (group->last_stmt))
4342     update_stmt (group->last_stmt);
4343 
4344   if (group->lp_nr > 0)
4345     {
4346       /* We're going to insert a sequence of (potentially) throwing stores
4347 	 into an active EH region.  This means that we're going to create
4348 	 new basic blocks with EH edges pointing to the post landing pad
4349 	 and, therefore, to have to update its PHI nodes, if any.  For the
4350 	 virtual PHI node, we're going to use the VDEFs created above, but
4351 	 for the other nodes, we need to record the original reaching defs.  */
4352       eh_landing_pad lp = get_eh_landing_pad_from_number (group->lp_nr);
4353       basic_block lp_bb = label_to_block (cfun, lp->post_landing_pad);
4354       basic_block last_bb = gimple_bb (group->last_stmt);
4355       edge last_edge = find_edge (last_bb, lp_bb);
4356       auto_vec<tree, 16> last_defs;
4357       gphi_iterator gpi;
4358       for (gpi = gsi_start_phis (lp_bb); !gsi_end_p (gpi); gsi_next (&gpi))
4359 	{
4360 	  gphi *phi = gpi.phi ();
4361 	  tree last_def;
4362 	  if (virtual_operand_p (gimple_phi_result (phi)))
4363 	    last_def = NULL_TREE;
4364 	  else
4365 	    last_def = gimple_phi_arg_def (phi, last_edge->dest_idx);
4366 	  last_defs.safe_push (last_def);
4367 	}
4368 
4369       /* Do the insertion.  Then, if new basic blocks have been created in the
4370 	 process, rewind the chain of VDEFs create above to walk the new basic
4371 	 blocks and update the corresponding arguments of the PHI nodes.  */
4372       update_modified_stmts (seq);
4373       if (gimple_find_sub_bbs (seq, &last_gsi))
4374 	while (last_vdef != gimple_vuse (group->last_stmt))
4375 	  {
4376 	    gimple *stmt = SSA_NAME_DEF_STMT (last_vdef);
4377 	    if (stmt_could_throw_p (cfun, stmt))
4378 	      {
4379 		edge new_edge = find_edge (gimple_bb (stmt), lp_bb);
4380 		unsigned int i;
4381 		for (gpi = gsi_start_phis (lp_bb), i = 0;
4382 		     !gsi_end_p (gpi);
4383 		     gsi_next (&gpi), i++)
4384 		  {
4385 		    gphi *phi = gpi.phi ();
4386 		    tree new_def;
4387 		    if (virtual_operand_p (gimple_phi_result (phi)))
4388 		      new_def = last_vdef;
4389 		    else
4390 		      new_def = last_defs[i];
4391 		    add_phi_arg (phi, new_def, new_edge, UNKNOWN_LOCATION);
4392 		  }
4393 	      }
4394 	    last_vdef = gimple_vuse (stmt);
4395 	  }
4396     }
4397   else
4398     gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
4399 
4400   for (int j = 0; j < 2; ++j)
4401     if (load_seq[j])
4402       gsi_insert_seq_after (&load_gsi[j], load_seq[j], GSI_SAME_STMT);
4403 
4404   return true;
4405 }
4406 
4407 /* Process the merged_store_group objects created in the coalescing phase.
4408    The stores are all against the base object BASE.
4409    Try to output the widened stores and delete the original statements if
4410    successful.  Return true iff any changes were made.  */
4411 
4412 bool
output_merged_stores()4413 imm_store_chain_info::output_merged_stores ()
4414 {
4415   unsigned int i;
4416   merged_store_group *merged_store;
4417   bool ret = false;
4418   FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
4419     {
4420       if (dbg_cnt (store_merging)
4421 	  && output_merged_store (merged_store))
4422 	{
4423 	  unsigned int j;
4424 	  store_immediate_info *store;
4425 	  FOR_EACH_VEC_ELT (merged_store->stores, j, store)
4426 	    {
4427 	      gimple *stmt = store->stmt;
4428 	      gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4429 	      /* Don't remove clobbers, they are still useful even if
4430 		 everything is overwritten afterwards.  */
4431 	      if (gimple_clobber_p (stmt))
4432 		continue;
4433 	      gsi_remove (&gsi, true);
4434 	      if (store->lp_nr)
4435 		remove_stmt_from_eh_lp (stmt);
4436 	      if (stmt != merged_store->last_stmt)
4437 		{
4438 		  unlink_stmt_vdef (stmt);
4439 		  release_defs (stmt);
4440 		}
4441 	    }
4442 	  ret = true;
4443 	}
4444     }
4445   if (ret && dump_file)
4446     fprintf (dump_file, "Merging successful!\n");
4447 
4448   return ret;
4449 }
4450 
4451 /* Coalesce the store_immediate_info objects recorded against the base object
4452    BASE in the first phase and output them.
4453    Delete the allocated structures.
4454    Return true if any changes were made.  */
4455 
4456 bool
terminate_and_process_chain()4457 imm_store_chain_info::terminate_and_process_chain ()
4458 {
4459   /* Process store chain.  */
4460   bool ret = false;
4461   if (m_store_info.length () > 1)
4462     {
4463       ret = coalesce_immediate_stores ();
4464       if (ret)
4465 	ret = output_merged_stores ();
4466     }
4467 
4468   /* Delete all the entries we allocated ourselves.  */
4469   store_immediate_info *info;
4470   unsigned int i;
4471   FOR_EACH_VEC_ELT (m_store_info, i, info)
4472     delete info;
4473 
4474   merged_store_group *merged_info;
4475   FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
4476     delete merged_info;
4477 
4478   return ret;
4479 }
4480 
4481 /* Return true iff LHS is a destination potentially interesting for
4482    store merging.  In practice these are the codes that get_inner_reference
4483    can process.  */
4484 
4485 static bool
lhs_valid_for_store_merging_p(tree lhs)4486 lhs_valid_for_store_merging_p (tree lhs)
4487 {
4488   if (DECL_P (lhs))
4489     return true;
4490 
4491   switch (TREE_CODE (lhs))
4492     {
4493     case ARRAY_REF:
4494     case ARRAY_RANGE_REF:
4495     case BIT_FIELD_REF:
4496     case COMPONENT_REF:
4497     case MEM_REF:
4498       return true;
4499     default:
4500       return false;
4501     }
4502 
4503   gcc_unreachable ();
4504 }
4505 
4506 /* Return true if the tree RHS is a constant we want to consider
4507    during store merging.  In practice accept all codes that
4508    native_encode_expr accepts.  */
4509 
4510 static bool
rhs_valid_for_store_merging_p(tree rhs)4511 rhs_valid_for_store_merging_p (tree rhs)
4512 {
4513   unsigned HOST_WIDE_INT size;
4514   if (TREE_CODE (rhs) == CONSTRUCTOR
4515       && CONSTRUCTOR_NELTS (rhs) == 0
4516       && TYPE_SIZE_UNIT (TREE_TYPE (rhs))
4517       && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs))))
4518     return true;
4519   return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs))).is_constant (&size)
4520 	  && native_encode_expr (rhs, NULL, size) != 0);
4521 }
4522 
4523 /* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
4524    and return true on success or false on failure.  */
4525 
4526 static bool
adjust_bit_pos(poly_offset_int byte_off,poly_int64 * pbitpos,poly_uint64 * pbitregion_start,poly_uint64 * pbitregion_end)4527 adjust_bit_pos (poly_offset_int byte_off,
4528 		poly_int64  *pbitpos,
4529 		poly_uint64 *pbitregion_start,
4530 		poly_uint64 *pbitregion_end)
4531 {
4532   poly_offset_int bit_off = byte_off << LOG2_BITS_PER_UNIT;
4533   bit_off += *pbitpos;
4534 
4535   if (known_ge (bit_off, 0) && bit_off.to_shwi (pbitpos))
4536     {
4537       if (maybe_ne (*pbitregion_end, 0U))
4538 	{
4539 	  bit_off = byte_off << LOG2_BITS_PER_UNIT;
4540 	  bit_off += *pbitregion_start;
4541 	  if (bit_off.to_uhwi (pbitregion_start))
4542 	    {
4543 	      bit_off = byte_off << LOG2_BITS_PER_UNIT;
4544 	      bit_off += *pbitregion_end;
4545 	      if (!bit_off.to_uhwi (pbitregion_end))
4546 		*pbitregion_end = 0;
4547 	    }
4548 	  else
4549 	    *pbitregion_end = 0;
4550 	}
4551       return true;
4552     }
4553   else
4554     return false;
4555 }
4556 
4557 /* If MEM is a memory reference usable for store merging (either as
4558    store destination or for loads), return the non-NULL base_addr
4559    and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
4560    Otherwise return NULL, *PBITPOS should be still valid even for that
4561    case.  */
4562 
4563 static tree
mem_valid_for_store_merging(tree mem,poly_uint64 * pbitsize,poly_uint64 * pbitpos,poly_uint64 * pbitregion_start,poly_uint64 * pbitregion_end)4564 mem_valid_for_store_merging (tree mem, poly_uint64 *pbitsize,
4565 			     poly_uint64 *pbitpos,
4566 			     poly_uint64 *pbitregion_start,
4567 			     poly_uint64 *pbitregion_end)
4568 {
4569   poly_int64 bitsize, bitpos;
4570   poly_uint64 bitregion_start = 0, bitregion_end = 0;
4571   machine_mode mode;
4572   int unsignedp = 0, reversep = 0, volatilep = 0;
4573   tree offset;
4574   tree base_addr = get_inner_reference (mem, &bitsize, &bitpos, &offset, &mode,
4575 					&unsignedp, &reversep, &volatilep);
4576   *pbitsize = bitsize;
4577   if (known_eq (bitsize, 0))
4578     return NULL_TREE;
4579 
4580   if (TREE_CODE (mem) == COMPONENT_REF
4581       && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem, 1)))
4582     {
4583       get_bit_range (&bitregion_start, &bitregion_end, mem, &bitpos, &offset);
4584       if (maybe_ne (bitregion_end, 0U))
4585 	bitregion_end += 1;
4586     }
4587 
4588   if (reversep)
4589     return NULL_TREE;
4590 
4591   /* We do not want to rewrite TARGET_MEM_REFs.  */
4592   if (TREE_CODE (base_addr) == TARGET_MEM_REF)
4593     return NULL_TREE;
4594   /* In some cases get_inner_reference may return a
4595      MEM_REF [ptr + byteoffset].  For the purposes of this pass
4596      canonicalize the base_addr to MEM_REF [ptr] and take
4597      byteoffset into account in the bitpos.  This occurs in
4598      PR 23684 and this way we can catch more chains.  */
4599   else if (TREE_CODE (base_addr) == MEM_REF)
4600     {
4601       if (!adjust_bit_pos (mem_ref_offset (base_addr), &bitpos,
4602 			   &bitregion_start, &bitregion_end))
4603 	return NULL_TREE;
4604       base_addr = TREE_OPERAND (base_addr, 0);
4605     }
4606   /* get_inner_reference returns the base object, get at its
4607      address now.  */
4608   else
4609     {
4610       if (maybe_lt (bitpos, 0))
4611 	return NULL_TREE;
4612       base_addr = build_fold_addr_expr (base_addr);
4613     }
4614 
4615   if (offset)
4616     {
4617       /* If the access is variable offset then a base decl has to be
4618 	 address-taken to be able to emit pointer-based stores to it.
4619 	 ???  We might be able to get away with re-using the original
4620 	 base up to the first variable part and then wrapping that inside
4621 	 a BIT_FIELD_REF.  */
4622       tree base = get_base_address (base_addr);
4623       if (!base || (DECL_P (base) && !TREE_ADDRESSABLE (base)))
4624 	return NULL_TREE;
4625 
4626       /* Similarly to above for the base, remove constant from the offset.  */
4627       if (TREE_CODE (offset) == PLUS_EXPR
4628 	  && TREE_CODE (TREE_OPERAND (offset, 1)) == INTEGER_CST
4629 	  && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset, 1)),
4630 			     &bitpos, &bitregion_start, &bitregion_end))
4631 	offset = TREE_OPERAND (offset, 0);
4632 
4633       base_addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base_addr),
4634 			  base_addr, offset);
4635     }
4636 
4637   if (known_eq (bitregion_end, 0U))
4638     {
4639       bitregion_start = round_down_to_byte_boundary (bitpos);
4640       bitregion_end = round_up_to_byte_boundary (bitpos + bitsize);
4641     }
4642 
4643   *pbitsize = bitsize;
4644   *pbitpos = bitpos;
4645   *pbitregion_start = bitregion_start;
4646   *pbitregion_end = bitregion_end;
4647   return base_addr;
4648 }
4649 
4650 /* Return true if STMT is a load that can be used for store merging.
4651    In that case fill in *OP.  BITSIZE, BITPOS, BITREGION_START and
4652    BITREGION_END are properties of the corresponding store.  */
4653 
4654 static bool
handled_load(gimple * stmt,store_operand_info * op,poly_uint64 bitsize,poly_uint64 bitpos,poly_uint64 bitregion_start,poly_uint64 bitregion_end)4655 handled_load (gimple *stmt, store_operand_info *op,
4656 	      poly_uint64 bitsize, poly_uint64 bitpos,
4657 	      poly_uint64 bitregion_start, poly_uint64 bitregion_end)
4658 {
4659   if (!is_gimple_assign (stmt))
4660     return false;
4661   if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR)
4662     {
4663       tree rhs1 = gimple_assign_rhs1 (stmt);
4664       if (TREE_CODE (rhs1) == SSA_NAME
4665 	  && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
4666 			   bitregion_start, bitregion_end))
4667 	{
4668 	  /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
4669 	     been optimized earlier, but if allowed here, would confuse the
4670 	     multiple uses counting.  */
4671 	  if (op->bit_not_p)
4672 	    return false;
4673 	  op->bit_not_p = !op->bit_not_p;
4674 	  return true;
4675 	}
4676       return false;
4677     }
4678   if (gimple_vuse (stmt)
4679       && gimple_assign_load_p (stmt)
4680       && !stmt_can_throw_internal (cfun, stmt)
4681       && !gimple_has_volatile_ops (stmt))
4682     {
4683       tree mem = gimple_assign_rhs1 (stmt);
4684       op->base_addr
4685 	= mem_valid_for_store_merging (mem, &op->bitsize, &op->bitpos,
4686 				       &op->bitregion_start,
4687 				       &op->bitregion_end);
4688       if (op->base_addr != NULL_TREE
4689 	  && known_eq (op->bitsize, bitsize)
4690 	  && multiple_p (op->bitpos - bitpos, BITS_PER_UNIT)
4691 	  && known_ge (op->bitpos - op->bitregion_start,
4692 		       bitpos - bitregion_start)
4693 	  && known_ge (op->bitregion_end - op->bitpos,
4694 		       bitregion_end - bitpos))
4695 	{
4696 	  op->stmt = stmt;
4697 	  op->val = mem;
4698 	  op->bit_not_p = false;
4699 	  return true;
4700 	}
4701     }
4702   return false;
4703 }
4704 
4705 /* Return the index number of the landing pad for STMT, if any.  */
4706 
4707 static int
lp_nr_for_store(gimple * stmt)4708 lp_nr_for_store (gimple *stmt)
4709 {
4710   if (!cfun->can_throw_non_call_exceptions || !cfun->eh)
4711     return 0;
4712 
4713   if (!stmt_could_throw_p (cfun, stmt))
4714     return 0;
4715 
4716   return lookup_stmt_eh_lp (stmt);
4717 }
4718 
4719 /* Record the store STMT for store merging optimization if it can be
4720    optimized.  Return true if any changes were made.  */
4721 
4722 bool
process_store(gimple * stmt)4723 pass_store_merging::process_store (gimple *stmt)
4724 {
4725   tree lhs = gimple_assign_lhs (stmt);
4726   tree rhs = gimple_assign_rhs1 (stmt);
4727   poly_uint64 bitsize, bitpos;
4728   poly_uint64 bitregion_start, bitregion_end;
4729   tree base_addr
4730     = mem_valid_for_store_merging (lhs, &bitsize, &bitpos,
4731 				   &bitregion_start, &bitregion_end);
4732   if (known_eq (bitsize, 0U))
4733     return false;
4734 
4735   bool invalid = (base_addr == NULL_TREE
4736 		  || (maybe_gt (bitsize,
4737 				(unsigned int) MAX_BITSIZE_MODE_ANY_INT)
4738 		      && TREE_CODE (rhs) != INTEGER_CST
4739 		      && (TREE_CODE (rhs) != CONSTRUCTOR
4740 			  || CONSTRUCTOR_NELTS (rhs) != 0)));
4741   enum tree_code rhs_code = ERROR_MARK;
4742   bool bit_not_p = false;
4743   struct symbolic_number n;
4744   gimple *ins_stmt = NULL;
4745   store_operand_info ops[2];
4746   if (invalid)
4747     ;
4748   else if (rhs_valid_for_store_merging_p (rhs))
4749     {
4750       rhs_code = INTEGER_CST;
4751       ops[0].val = rhs;
4752     }
4753   else if (TREE_CODE (rhs) != SSA_NAME)
4754     invalid = true;
4755   else
4756     {
4757       gimple *def_stmt = SSA_NAME_DEF_STMT (rhs), *def_stmt1, *def_stmt2;
4758       if (!is_gimple_assign (def_stmt))
4759 	invalid = true;
4760       else if (handled_load (def_stmt, &ops[0], bitsize, bitpos,
4761 			     bitregion_start, bitregion_end))
4762 	rhs_code = MEM_REF;
4763       else if (gimple_assign_rhs_code (def_stmt) == BIT_NOT_EXPR)
4764 	{
4765 	  tree rhs1 = gimple_assign_rhs1 (def_stmt);
4766 	  if (TREE_CODE (rhs1) == SSA_NAME
4767 	      && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1)))
4768 	    {
4769 	      bit_not_p = true;
4770 	      def_stmt = SSA_NAME_DEF_STMT (rhs1);
4771 	    }
4772 	}
4773 
4774       if (rhs_code == ERROR_MARK && !invalid)
4775 	switch ((rhs_code = gimple_assign_rhs_code (def_stmt)))
4776 	  {
4777 	  case BIT_AND_EXPR:
4778 	  case BIT_IOR_EXPR:
4779 	  case BIT_XOR_EXPR:
4780 	    tree rhs1, rhs2;
4781 	    rhs1 = gimple_assign_rhs1 (def_stmt);
4782 	    rhs2 = gimple_assign_rhs2 (def_stmt);
4783 	    invalid = true;
4784 	    if (TREE_CODE (rhs1) != SSA_NAME)
4785 	      break;
4786 	    def_stmt1 = SSA_NAME_DEF_STMT (rhs1);
4787 	    if (!is_gimple_assign (def_stmt1)
4788 		|| !handled_load (def_stmt1, &ops[0], bitsize, bitpos,
4789 				  bitregion_start, bitregion_end))
4790 	      break;
4791 	    if (rhs_valid_for_store_merging_p (rhs2))
4792 	      ops[1].val = rhs2;
4793 	    else if (TREE_CODE (rhs2) != SSA_NAME)
4794 	      break;
4795 	    else
4796 	      {
4797 		def_stmt2 = SSA_NAME_DEF_STMT (rhs2);
4798 		if (!is_gimple_assign (def_stmt2))
4799 		  break;
4800 		else if (!handled_load (def_stmt2, &ops[1], bitsize, bitpos,
4801 					bitregion_start, bitregion_end))
4802 		  break;
4803 	      }
4804 	    invalid = false;
4805 	    break;
4806 	  default:
4807 	    invalid = true;
4808 	    break;
4809 	  }
4810 
4811       unsigned HOST_WIDE_INT const_bitsize;
4812       if (bitsize.is_constant (&const_bitsize)
4813 	  && (const_bitsize % BITS_PER_UNIT) == 0
4814 	  && const_bitsize <= 64
4815 	  && multiple_p (bitpos, BITS_PER_UNIT))
4816 	{
4817 	  ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
4818 	  if (ins_stmt)
4819 	    {
4820 	      uint64_t nn = n.n;
4821 	      for (unsigned HOST_WIDE_INT i = 0;
4822 		   i < const_bitsize;
4823 		   i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
4824 		if ((nn & MARKER_MASK) == 0
4825 		    || (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN)
4826 		  {
4827 		    ins_stmt = NULL;
4828 		    break;
4829 		  }
4830 	      if (ins_stmt)
4831 		{
4832 		  if (invalid)
4833 		    {
4834 		      rhs_code = LROTATE_EXPR;
4835 		      ops[0].base_addr = NULL_TREE;
4836 		      ops[1].base_addr = NULL_TREE;
4837 		    }
4838 		  invalid = false;
4839 		}
4840 	    }
4841 	}
4842 
4843       if (invalid
4844 	  && bitsize.is_constant (&const_bitsize)
4845 	  && ((const_bitsize % BITS_PER_UNIT) != 0
4846 	      || !multiple_p (bitpos, BITS_PER_UNIT))
4847 	  && const_bitsize <= 64)
4848 	{
4849 	  /* Bypass a conversion to the bit-field type.  */
4850 	  if (!bit_not_p
4851 	      && is_gimple_assign (def_stmt)
4852 	      && CONVERT_EXPR_CODE_P (rhs_code))
4853 	    {
4854 	      tree rhs1 = gimple_assign_rhs1 (def_stmt);
4855 	      if (TREE_CODE (rhs1) == SSA_NAME
4856 		  && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)))
4857 		rhs = rhs1;
4858 	    }
4859 	  rhs_code = BIT_INSERT_EXPR;
4860 	  bit_not_p = false;
4861 	  ops[0].val = rhs;
4862 	  ops[0].base_addr = NULL_TREE;
4863 	  ops[1].base_addr = NULL_TREE;
4864 	  invalid = false;
4865 	}
4866     }
4867 
4868   unsigned HOST_WIDE_INT const_bitsize, const_bitpos;
4869   unsigned HOST_WIDE_INT const_bitregion_start, const_bitregion_end;
4870   if (invalid
4871       || !bitsize.is_constant (&const_bitsize)
4872       || !bitpos.is_constant (&const_bitpos)
4873       || !bitregion_start.is_constant (&const_bitregion_start)
4874       || !bitregion_end.is_constant (&const_bitregion_end))
4875     return terminate_all_aliasing_chains (NULL, stmt);
4876 
4877   if (!ins_stmt)
4878     memset (&n, 0, sizeof (n));
4879 
4880   class imm_store_chain_info **chain_info = NULL;
4881   bool ret = false;
4882   if (base_addr)
4883     chain_info = m_stores.get (base_addr);
4884 
4885   store_immediate_info *info;
4886   if (chain_info)
4887     {
4888       unsigned int ord = (*chain_info)->m_store_info.length ();
4889       info = new store_immediate_info (const_bitsize, const_bitpos,
4890 				       const_bitregion_start,
4891 				       const_bitregion_end,
4892 				       stmt, ord, rhs_code, n, ins_stmt,
4893 				       bit_not_p, lp_nr_for_store (stmt),
4894 				       ops[0], ops[1]);
4895       if (dump_file && (dump_flags & TDF_DETAILS))
4896 	{
4897 	  fprintf (dump_file, "Recording immediate store from stmt:\n");
4898 	  print_gimple_stmt (dump_file, stmt, 0);
4899 	}
4900       (*chain_info)->m_store_info.safe_push (info);
4901       ret |= terminate_all_aliasing_chains (chain_info, stmt);
4902       /* If we reach the limit of stores to merge in a chain terminate and
4903 	 process the chain now.  */
4904       if ((*chain_info)->m_store_info.length ()
4905 	  == (unsigned int) param_max_stores_to_merge)
4906 	{
4907 	  if (dump_file && (dump_flags & TDF_DETAILS))
4908 	    fprintf (dump_file,
4909 		     "Reached maximum number of statements to merge:\n");
4910 	  ret |= terminate_and_process_chain (*chain_info);
4911 	}
4912       return ret;
4913     }
4914 
4915   /* Store aliases any existing chain?  */
4916   ret |= terminate_all_aliasing_chains (NULL, stmt);
4917   /* Start a new chain.  */
4918   class imm_store_chain_info *new_chain
4919     = new imm_store_chain_info (m_stores_head, base_addr);
4920   info = new store_immediate_info (const_bitsize, const_bitpos,
4921 				   const_bitregion_start,
4922 				   const_bitregion_end,
4923 				   stmt, 0, rhs_code, n, ins_stmt,
4924 				   bit_not_p, lp_nr_for_store (stmt),
4925 				   ops[0], ops[1]);
4926   new_chain->m_store_info.safe_push (info);
4927   m_stores.put (base_addr, new_chain);
4928   if (dump_file && (dump_flags & TDF_DETAILS))
4929     {
4930       fprintf (dump_file, "Starting new chain with statement:\n");
4931       print_gimple_stmt (dump_file, stmt, 0);
4932       fprintf (dump_file, "The base object is:\n");
4933       print_generic_expr (dump_file, base_addr);
4934       fprintf (dump_file, "\n");
4935     }
4936   return ret;
4937 }
4938 
4939 /* Return true if STMT is a store valid for store merging.  */
4940 
4941 static bool
store_valid_for_store_merging_p(gimple * stmt)4942 store_valid_for_store_merging_p (gimple *stmt)
4943 {
4944   return gimple_assign_single_p (stmt)
4945 	 && gimple_vdef (stmt)
4946 	 && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt))
4947 	 && (!gimple_has_volatile_ops (stmt) || gimple_clobber_p (stmt));
4948 }
4949 
4950 enum basic_block_status { BB_INVALID, BB_VALID, BB_EXTENDED_VALID };
4951 
4952 /* Return the status of basic block BB wrt store merging.  */
4953 
4954 static enum basic_block_status
get_status_for_store_merging(basic_block bb)4955 get_status_for_store_merging (basic_block bb)
4956 {
4957   unsigned int num_statements = 0;
4958   gimple_stmt_iterator gsi;
4959   edge e;
4960 
4961   for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4962     {
4963       gimple *stmt = gsi_stmt (gsi);
4964 
4965       if (is_gimple_debug (stmt))
4966 	continue;
4967 
4968       if (store_valid_for_store_merging_p (stmt) && ++num_statements >= 2)
4969 	break;
4970     }
4971 
4972   if (num_statements == 0)
4973     return BB_INVALID;
4974 
4975   if (cfun->can_throw_non_call_exceptions && cfun->eh
4976       && store_valid_for_store_merging_p (gimple_seq_last_stmt (bb_seq (bb)))
4977       && (e = find_fallthru_edge (bb->succs))
4978       && e->dest == bb->next_bb)
4979     return BB_EXTENDED_VALID;
4980 
4981   return num_statements >= 2 ? BB_VALID : BB_INVALID;
4982 }
4983 
4984 /* Entry point for the pass.  Go over each basic block recording chains of
4985    immediate stores.  Upon encountering a terminating statement (as defined
4986    by stmt_terminates_chain_p) process the recorded stores and emit the widened
4987    variants.  */
4988 
4989 unsigned int
execute(function * fun)4990 pass_store_merging::execute (function *fun)
4991 {
4992   basic_block bb;
4993   hash_set<gimple *> orig_stmts;
4994   bool changed = false, open_chains = false;
4995 
4996   /* If the function can throw and catch non-call exceptions, we'll be trying
4997      to merge stores across different basic blocks so we need to first unsplit
4998      the EH edges in order to streamline the CFG of the function.  */
4999   if (cfun->can_throw_non_call_exceptions && cfun->eh)
5000     unsplit_eh_edges ();
5001 
5002   calculate_dominance_info (CDI_DOMINATORS);
5003 
5004   FOR_EACH_BB_FN (bb, fun)
5005     {
5006       const basic_block_status bb_status = get_status_for_store_merging (bb);
5007       gimple_stmt_iterator gsi;
5008 
5009       if (open_chains && (bb_status == BB_INVALID || !single_pred_p (bb)))
5010 	{
5011 	  changed |= terminate_and_process_all_chains ();
5012 	  open_chains = false;
5013 	}
5014 
5015       if (bb_status == BB_INVALID)
5016 	continue;
5017 
5018       if (dump_file && (dump_flags & TDF_DETAILS))
5019 	fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
5020 
5021       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5022 	{
5023 	  gimple *stmt = gsi_stmt (gsi);
5024 
5025 	  if (is_gimple_debug (stmt))
5026 	    continue;
5027 
5028 	  if (gimple_has_volatile_ops (stmt) && !gimple_clobber_p (stmt))
5029 	    {
5030 	      /* Terminate all chains.  */
5031 	      if (dump_file && (dump_flags & TDF_DETAILS))
5032 		fprintf (dump_file, "Volatile access terminates "
5033 				    "all chains\n");
5034 	      changed |= terminate_and_process_all_chains ();
5035 	      open_chains = false;
5036 	      continue;
5037 	    }
5038 
5039 	  if (store_valid_for_store_merging_p (stmt))
5040 	    changed |= process_store (stmt);
5041 	  else
5042 	    changed |= terminate_all_aliasing_chains (NULL, stmt);
5043 	}
5044 
5045       if (bb_status == BB_EXTENDED_VALID)
5046 	open_chains = true;
5047       else
5048 	{
5049 	  changed |= terminate_and_process_all_chains ();
5050 	  open_chains = false;
5051 	}
5052     }
5053 
5054   if (open_chains)
5055     changed |= terminate_and_process_all_chains ();
5056 
5057   /* If the function can throw and catch non-call exceptions and something
5058      changed during the pass, then the CFG has (very likely) changed too.  */
5059   if (cfun->can_throw_non_call_exceptions && cfun->eh && changed)
5060     {
5061       free_dominance_info (CDI_DOMINATORS);
5062       return TODO_cleanup_cfg;
5063     }
5064 
5065   return 0;
5066 }
5067 
5068 } // anon namespace
5069 
5070 /* Construct and return a store merging pass object.  */
5071 
5072 gimple_opt_pass *
make_pass_store_merging(gcc::context * ctxt)5073 make_pass_store_merging (gcc::context *ctxt)
5074 {
5075   return new pass_store_merging (ctxt);
5076 }
5077 
5078 #if CHECKING_P
5079 
5080 namespace selftest {
5081 
5082 /* Selftests for store merging helpers.  */
5083 
5084 /* Assert that all elements of the byte arrays X and Y, both of length N
5085    are equal.  */
5086 
5087 static void
verify_array_eq(unsigned char * x,unsigned char * y,unsigned int n)5088 verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
5089 {
5090   for (unsigned int i = 0; i < n; i++)
5091     {
5092       if (x[i] != y[i])
5093 	{
5094 	  fprintf (stderr, "Arrays do not match.  X:\n");
5095 	  dump_char_array (stderr, x, n);
5096 	  fprintf (stderr, "Y:\n");
5097 	  dump_char_array (stderr, y, n);
5098 	}
5099       ASSERT_EQ (x[i], y[i]);
5100     }
5101 }
5102 
5103 /* Test shift_bytes_in_array_left and that it carries bits across between
5104    bytes correctly.  */
5105 
5106 static void
verify_shift_bytes_in_array_left(void)5107 verify_shift_bytes_in_array_left (void)
5108 {
5109    /* byte 1   | byte 0
5110       00011111 | 11100000.  */
5111   unsigned char orig[2] = { 0xe0, 0x1f };
5112   unsigned char in[2];
5113   memcpy (in, orig, sizeof orig);
5114 
5115   unsigned char expected[2] = { 0x80, 0x7f };
5116   shift_bytes_in_array_left (in, sizeof (in), 2);
5117   verify_array_eq (in, expected, sizeof (in));
5118 
5119   memcpy (in, orig, sizeof orig);
5120   memcpy (expected, orig, sizeof orig);
5121   /* Check that shifting by zero doesn't change anything.  */
5122   shift_bytes_in_array_left (in, sizeof (in), 0);
5123   verify_array_eq (in, expected, sizeof (in));
5124 
5125 }
5126 
5127 /* Test shift_bytes_in_array_right and that it carries bits across between
5128    bytes correctly.  */
5129 
5130 static void
verify_shift_bytes_in_array_right(void)5131 verify_shift_bytes_in_array_right (void)
5132 {
5133    /* byte 1   | byte 0
5134       00011111 | 11100000.  */
5135   unsigned char orig[2] = { 0x1f, 0xe0};
5136   unsigned char in[2];
5137   memcpy (in, orig, sizeof orig);
5138   unsigned char expected[2] = { 0x07, 0xf8};
5139   shift_bytes_in_array_right (in, sizeof (in), 2);
5140   verify_array_eq (in, expected, sizeof (in));
5141 
5142   memcpy (in, orig, sizeof orig);
5143   memcpy (expected, orig, sizeof orig);
5144   /* Check that shifting by zero doesn't change anything.  */
5145   shift_bytes_in_array_right (in, sizeof (in), 0);
5146   verify_array_eq (in, expected, sizeof (in));
5147 }
5148 
5149 /* Test clear_bit_region that it clears exactly the bits asked and
5150    nothing more.  */
5151 
5152 static void
verify_clear_bit_region(void)5153 verify_clear_bit_region (void)
5154 {
5155   /* Start with all bits set and test clearing various patterns in them.  */
5156   unsigned char orig[3] = { 0xff, 0xff, 0xff};
5157   unsigned char in[3];
5158   unsigned char expected[3];
5159   memcpy (in, orig, sizeof in);
5160 
5161   /* Check zeroing out all the bits.  */
5162   clear_bit_region (in, 0, 3 * BITS_PER_UNIT);
5163   expected[0] = expected[1] = expected[2] = 0;
5164   verify_array_eq (in, expected, sizeof in);
5165 
5166   memcpy (in, orig, sizeof in);
5167   /* Leave the first and last bits intact.  */
5168   clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2);
5169   expected[0] = 0x1;
5170   expected[1] = 0;
5171   expected[2] = 0x80;
5172   verify_array_eq (in, expected, sizeof in);
5173 }
5174 
5175 /* Test clear_bit_region_be that it clears exactly the bits asked and
5176    nothing more.  */
5177 
5178 static void
verify_clear_bit_region_be(void)5179 verify_clear_bit_region_be (void)
5180 {
5181   /* Start with all bits set and test clearing various patterns in them.  */
5182   unsigned char orig[3] = { 0xff, 0xff, 0xff};
5183   unsigned char in[3];
5184   unsigned char expected[3];
5185   memcpy (in, orig, sizeof in);
5186 
5187   /* Check zeroing out all the bits.  */
5188   clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT);
5189   expected[0] = expected[1] = expected[2] = 0;
5190   verify_array_eq (in, expected, sizeof in);
5191 
5192   memcpy (in, orig, sizeof in);
5193   /* Leave the first and last bits intact.  */
5194   clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2);
5195   expected[0] = 0x80;
5196   expected[1] = 0;
5197   expected[2] = 0x1;
5198   verify_array_eq (in, expected, sizeof in);
5199 }
5200 
5201 
5202 /* Run all of the selftests within this file.  */
5203 
5204 void
store_merging_c_tests(void)5205 store_merging_c_tests (void)
5206 {
5207   verify_shift_bytes_in_array_left ();
5208   verify_shift_bytes_in_array_right ();
5209   verify_clear_bit_region ();
5210   verify_clear_bit_region_be ();
5211 }
5212 
5213 } // namespace selftest
5214 #endif /* CHECKING_P.  */
5215