1 /* GIMPLE store merging and byte swapping passes.
2    Copyright (C) 2009-2018 Free Software Foundation, Inc.
3    Contributed by ARM Ltd.
4 
5    This file is part of GCC.
6 
7    GCC is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    GCC is distributed in the hope that it will be useful, but
13    WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15    General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with GCC; see the file COPYING3.  If not see
19    <http://www.gnu.org/licenses/>.  */
20 
21 /* The purpose of the store merging pass is to combine multiple memory
22    stores of constant values, values loaded from memory or bitwise operations
23    on those to consecutive memory locations into fewer wider stores.
24    For example, if we have a sequence peforming four byte stores to
25    consecutive memory locations:
26    [p     ] := imm1;
27    [p + 1B] := imm2;
28    [p + 2B] := imm3;
29    [p + 3B] := imm4;
30    we can transform this into a single 4-byte store if the target supports it:
31   [p] := imm1:imm2:imm3:imm4 //concatenated immediates according to endianness.
32 
33    Or:
34    [p     ] := [q     ];
35    [p + 1B] := [q + 1B];
36    [p + 2B] := [q + 2B];
37    [p + 3B] := [q + 3B];
38    if there is no overlap can be transformed into a single 4-byte
39    load followed by single 4-byte store.
40 
41    Or:
42    [p     ] := [q     ] ^ imm1;
43    [p + 1B] := [q + 1B] ^ imm2;
44    [p + 2B] := [q + 2B] ^ imm3;
45    [p + 3B] := [q + 3B] ^ imm4;
46    if there is no overlap can be transformed into a single 4-byte
47    load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
48 
49    The algorithm is applied to each basic block in three phases:
50 
51    1) Scan through the basic block recording assignments to
52    destinations that can be expressed as a store to memory of a certain size
53    at a certain bit offset from expressions we can handle.  For bit-fields
54    we also note the surrounding bit region, bits that could be stored in
55    a read-modify-write operation when storing the bit-field.  Record store
56    chains to different bases in a hash_map (m_stores) and make sure to
57    terminate such chains when appropriate (for example when when the stored
58    values get used subsequently).
59    These stores can be a result of structure element initializers, array stores
60    etc.  A store_immediate_info object is recorded for every such store.
61    Record as many such assignments to a single base as possible until a
62    statement that interferes with the store sequence is encountered.
63    Each store has up to 2 operands, which can be an immediate constant
64    or a memory load, from which the value to be stored can be computed.
65    At most one of the operands can be a constant.  The operands are recorded
66    in store_operand_info struct.
67 
68    2) Analyze the chain of stores recorded in phase 1) (i.e. the vector of
69    store_immediate_info objects) and coalesce contiguous stores into
70    merged_store_group objects.  For bit-fields stores, we don't need to
71    require the stores to be contiguous, just their surrounding bit regions
72    have to be contiguous.  If the expression being stored is different
73    between adjacent stores, such as one store storing a constant and
74    following storing a value loaded from memory, or if the loaded memory
75    objects are not adjacent, a new merged_store_group is created as well.
76 
77    For example, given the stores:
78    [p     ] := 0;
79    [p + 1B] := 1;
80    [p + 3B] := 0;
81    [p + 4B] := 1;
82    [p + 5B] := 0;
83    [p + 6B] := 0;
84    This phase would produce two merged_store_group objects, one recording the
85    two bytes stored in the memory region [p : p + 1] and another
86    recording the four bytes stored in the memory region [p + 3 : p + 6].
87 
88    3) The merged_store_group objects produced in phase 2) are processed
89    to generate the sequence of wider stores that set the contiguous memory
90    regions to the sequence of bytes that correspond to it.  This may emit
91    multiple stores per store group to handle contiguous stores that are not
92    of a size that is a power of 2.  For example it can try to emit a 40-bit
93    store as a 32-bit store followed by an 8-bit store.
94    We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT or
95    TARGET_SLOW_UNALIGNED_ACCESS rules.
96 
97    Note on endianness and example:
98    Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
99    [p     ] := 0x1234;
100    [p + 2B] := 0x5678;
101    [p + 4B] := 0xab;
102    [p + 5B] := 0xcd;
103 
104    The memory layout for little-endian (LE) and big-endian (BE) must be:
105   p |LE|BE|
106   ---------
107   0 |34|12|
108   1 |12|34|
109   2 |78|56|
110   3 |56|78|
111   4 |ab|ab|
112   5 |cd|cd|
113 
114   To merge these into a single 48-bit merged value 'val' in phase 2)
115   on little-endian we insert stores to higher (consecutive) bitpositions
116   into the most significant bits of the merged value.
117   The final merged value would be: 0xcdab56781234
118 
119   For big-endian we insert stores to higher bitpositions into the least
120   significant bits of the merged value.
121   The final merged value would be: 0x12345678abcd
122 
123   Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
124   followed by a 16-bit store.  Again, we must consider endianness when
125   breaking down the 48-bit value 'val' computed above.
126   For little endian we emit:
127   [p]      (32-bit) := 0x56781234; // val & 0x0000ffffffff;
128   [p + 4B] (16-bit) := 0xcdab;    // (val & 0xffff00000000) >> 32;
129 
130   Whereas for big-endian we emit:
131   [p]      (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
132   [p + 4B] (16-bit) := 0xabcd;     //  val & 0x00000000ffff;  */
133 
134 #include "config.h"
135 #include "system.h"
136 #include "coretypes.h"
137 #include "backend.h"
138 #include "tree.h"
139 #include "gimple.h"
140 #include "builtins.h"
141 #include "fold-const.h"
142 #include "tree-pass.h"
143 #include "ssa.h"
144 #include "gimple-pretty-print.h"
145 #include "alias.h"
146 #include "fold-const.h"
147 #include "params.h"
148 #include "print-tree.h"
149 #include "tree-hash-traits.h"
150 #include "gimple-iterator.h"
151 #include "gimplify.h"
152 #include "stor-layout.h"
153 #include "timevar.h"
154 #include "tree-cfg.h"
155 #include "tree-eh.h"
156 #include "target.h"
157 #include "gimplify-me.h"
158 #include "rtl.h"
159 #include "expr.h"	/* For get_bit_range.  */
160 #include "optabs-tree.h"
161 #include "selftest.h"
162 
163 /* The maximum size (in bits) of the stores this pass should generate.  */
164 #define MAX_STORE_BITSIZE (BITS_PER_WORD)
165 #define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
166 
167 /* Limit to bound the number of aliasing checks for loads with the same
168    vuse as the corresponding store.  */
169 #define MAX_STORE_ALIAS_CHECKS 64
170 
171 namespace {
172 
173 struct bswap_stat
174 {
175   /* Number of hand-written 16-bit nop / bswaps found.  */
176   int found_16bit;
177 
178   /* Number of hand-written 32-bit nop / bswaps found.  */
179   int found_32bit;
180 
181   /* Number of hand-written 64-bit nop / bswaps found.  */
182   int found_64bit;
183 } nop_stats, bswap_stats;
184 
185 /* A symbolic number structure is used to detect byte permutation and selection
186    patterns of a source.  To achieve that, its field N contains an artificial
187    number consisting of BITS_PER_MARKER sized markers tracking where does each
188    byte come from in the source:
189 
190    0	   - target byte has the value 0
191    FF	   - target byte has an unknown value (eg. due to sign extension)
192    1..size - marker value is the byte index in the source (0 for lsb).
193 
194    To detect permutations on memory sources (arrays and structures), a symbolic
195    number is also associated:
196    - a base address BASE_ADDR and an OFFSET giving the address of the source;
197    - a range which gives the difference between the highest and lowest accessed
198      memory location to make such a symbolic number;
199    - the address SRC of the source element of lowest address as a convenience
200      to easily get BASE_ADDR + offset + lowest bytepos;
201    - number of expressions N_OPS bitwise ored together to represent
202      approximate cost of the computation.
203 
204    Note 1: the range is different from size as size reflects the size of the
205    type of the current expression.  For instance, for an array char a[],
206    (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
207    (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
208    time a range of 1.
209 
210    Note 2: for non-memory sources, range holds the same value as size.
211 
212    Note 3: SRC points to the SSA_NAME in case of non-memory source.  */
213 
214 struct symbolic_number {
215   uint64_t n;
216   tree type;
217   tree base_addr;
218   tree offset;
219   poly_int64_pod bytepos;
220   tree src;
221   tree alias_set;
222   tree vuse;
223   unsigned HOST_WIDE_INT range;
224   int n_ops;
225 };
226 
227 #define BITS_PER_MARKER 8
228 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
229 #define MARKER_BYTE_UNKNOWN MARKER_MASK
230 #define HEAD_MARKER(n, size) \
231   ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
232 
233 /* The number which the find_bswap_or_nop_1 result should match in
234    order to have a nop.  The number is masked according to the size of
235    the symbolic number before using it.  */
236 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
237   (uint64_t)0x08070605 << 32 | 0x04030201)
238 
239 /* The number which the find_bswap_or_nop_1 result should match in
240    order to have a byte swap.  The number is masked according to the
241    size of the symbolic number before using it.  */
242 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
243   (uint64_t)0x01020304 << 32 | 0x05060708)
244 
245 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
246    number N.  Return false if the requested operation is not permitted
247    on a symbolic number.  */
248 
249 inline bool
250 do_shift_rotate (enum tree_code code,
251 		 struct symbolic_number *n,
252 		 int count)
253 {
254   int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
255   unsigned head_marker;
256 
257   if (count % BITS_PER_UNIT != 0)
258     return false;
259   count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
260 
261   /* Zero out the extra bits of N in order to avoid them being shifted
262      into the significant bits.  */
263   if (size < 64 / BITS_PER_MARKER)
264     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
265 
266   switch (code)
267     {
268     case LSHIFT_EXPR:
269       n->n <<= count;
270       break;
271     case RSHIFT_EXPR:
272       head_marker = HEAD_MARKER (n->n, size);
273       n->n >>= count;
274       /* Arithmetic shift of signed type: result is dependent on the value.  */
275       if (!TYPE_UNSIGNED (n->type) && head_marker)
276 	for (i = 0; i < count / BITS_PER_MARKER; i++)
277 	  n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
278 		  << ((size - 1 - i) * BITS_PER_MARKER);
279       break;
280     case LROTATE_EXPR:
281       n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
282       break;
283     case RROTATE_EXPR:
284       n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
285       break;
286     default:
287       return false;
288     }
289   /* Zero unused bits for size.  */
290   if (size < 64 / BITS_PER_MARKER)
291     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
292   return true;
293 }
294 
295 /* Perform sanity checking for the symbolic number N and the gimple
296    statement STMT.  */
297 
298 inline bool
299 verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
300 {
301   tree lhs_type;
302 
303   lhs_type = gimple_expr_type (stmt);
304 
305   if (TREE_CODE (lhs_type) != INTEGER_TYPE)
306     return false;
307 
308   if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
309     return false;
310 
311   return true;
312 }
313 
314 /* Initialize the symbolic number N for the bswap pass from the base element
315    SRC manipulated by the bitwise OR expression.  */
316 
317 bool
318 init_symbolic_number (struct symbolic_number *n, tree src)
319 {
320   int size;
321 
322   if (! INTEGRAL_TYPE_P (TREE_TYPE (src)))
323     return false;
324 
325   n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
326   n->src = src;
327 
328   /* Set up the symbolic number N by setting each byte to a value between 1 and
329      the byte size of rhs1.  The highest order byte is set to n->size and the
330      lowest order byte to 1.  */
331   n->type = TREE_TYPE (src);
332   size = TYPE_PRECISION (n->type);
333   if (size % BITS_PER_UNIT != 0)
334     return false;
335   size /= BITS_PER_UNIT;
336   if (size > 64 / BITS_PER_MARKER)
337     return false;
338   n->range = size;
339   n->n = CMPNOP;
340   n->n_ops = 1;
341 
342   if (size < 64 / BITS_PER_MARKER)
343     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
344 
345   return true;
346 }
347 
348 /* Check if STMT might be a byte swap or a nop from a memory source and returns
349    the answer. If so, REF is that memory source and the base of the memory area
350    accessed and the offset of the access from that base are recorded in N.  */
351 
352 bool
353 find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
354 {
355   /* Leaf node is an array or component ref. Memorize its base and
356      offset from base to compare to other such leaf node.  */
357   poly_int64 bitsize, bitpos, bytepos;
358   machine_mode mode;
359   int unsignedp, reversep, volatilep;
360   tree offset, base_addr;
361 
362   /* Not prepared to handle PDP endian.  */
363   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
364     return false;
365 
366   if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
367     return false;
368 
369   base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
370 				   &unsignedp, &reversep, &volatilep);
371 
372   if (TREE_CODE (base_addr) == TARGET_MEM_REF)
373     /* Do not rewrite TARGET_MEM_REF.  */
374     return false;
375   else if (TREE_CODE (base_addr) == MEM_REF)
376     {
377       poly_offset_int bit_offset = 0;
378       tree off = TREE_OPERAND (base_addr, 1);
379 
380       if (!integer_zerop (off))
381 	{
382 	  poly_offset_int boff = mem_ref_offset (base_addr);
383 	  boff <<= LOG2_BITS_PER_UNIT;
384 	  bit_offset += boff;
385 	}
386 
387       base_addr = TREE_OPERAND (base_addr, 0);
388 
389       /* Avoid returning a negative bitpos as this may wreak havoc later.  */
390       if (maybe_lt (bit_offset, 0))
391 	{
392 	  tree byte_offset = wide_int_to_tree
393 	    (sizetype, bits_to_bytes_round_down (bit_offset));
394 	  bit_offset = num_trailing_bits (bit_offset);
395 	  if (offset)
396 	    offset = size_binop (PLUS_EXPR, offset, byte_offset);
397 	  else
398 	    offset = byte_offset;
399 	}
400 
401       bitpos += bit_offset.force_shwi ();
402     }
403   else
404     base_addr = build_fold_addr_expr (base_addr);
405 
406   if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
407     return false;
408   if (!multiple_p (bitsize, BITS_PER_UNIT))
409     return false;
410   if (reversep)
411     return false;
412 
413   if (!init_symbolic_number (n, ref))
414     return false;
415   n->base_addr = base_addr;
416   n->offset = offset;
417   n->bytepos = bytepos;
418   n->alias_set = reference_alias_ptr_type (ref);
419   n->vuse = gimple_vuse (stmt);
420   return true;
421 }
422 
423 /* Compute the symbolic number N representing the result of a bitwise OR on 2
424    symbolic number N1 and N2 whose source statements are respectively
425    SOURCE_STMT1 and SOURCE_STMT2.  */
426 
427 gimple *
428 perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
429 			gimple *source_stmt2, struct symbolic_number *n2,
430 			struct symbolic_number *n)
431 {
432   int i, size;
433   uint64_t mask;
434   gimple *source_stmt;
435   struct symbolic_number *n_start;
436 
437   tree rhs1 = gimple_assign_rhs1 (source_stmt1);
438   if (TREE_CODE (rhs1) == BIT_FIELD_REF
439       && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
440     rhs1 = TREE_OPERAND (rhs1, 0);
441   tree rhs2 = gimple_assign_rhs1 (source_stmt2);
442   if (TREE_CODE (rhs2) == BIT_FIELD_REF
443       && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
444     rhs2 = TREE_OPERAND (rhs2, 0);
445 
446   /* Sources are different, cancel bswap if they are not memory location with
447      the same base (array, structure, ...).  */
448   if (rhs1 != rhs2)
449     {
450       uint64_t inc;
451       HOST_WIDE_INT start1, start2, start_sub, end_sub, end1, end2, end;
452       struct symbolic_number *toinc_n_ptr, *n_end;
453       basic_block bb1, bb2;
454 
455       if (!n1->base_addr || !n2->base_addr
456 	  || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
457 	return NULL;
458 
459       if (!n1->offset != !n2->offset
460 	  || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
461 	return NULL;
462 
463       start1 = 0;
464       if (!(n2->bytepos - n1->bytepos).is_constant (&start2))
465 	return NULL;
466 
467       if (start1 < start2)
468 	{
469 	  n_start = n1;
470 	  start_sub = start2 - start1;
471 	}
472       else
473 	{
474 	  n_start = n2;
475 	  start_sub = start1 - start2;
476 	}
477 
478       bb1 = gimple_bb (source_stmt1);
479       bb2 = gimple_bb (source_stmt2);
480       if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
481 	source_stmt = source_stmt1;
482       else
483 	source_stmt = source_stmt2;
484 
485       /* Find the highest address at which a load is performed and
486 	 compute related info.  */
487       end1 = start1 + (n1->range - 1);
488       end2 = start2 + (n2->range - 1);
489       if (end1 < end2)
490 	{
491 	  end = end2;
492 	  end_sub = end2 - end1;
493 	}
494       else
495 	{
496 	  end = end1;
497 	  end_sub = end1 - end2;
498 	}
499       n_end = (end2 > end1) ? n2 : n1;
500 
501       /* Find symbolic number whose lsb is the most significant.  */
502       if (BYTES_BIG_ENDIAN)
503 	toinc_n_ptr = (n_end == n1) ? n2 : n1;
504       else
505 	toinc_n_ptr = (n_start == n1) ? n2 : n1;
506 
507       n->range = end - MIN (start1, start2) + 1;
508 
509       /* Check that the range of memory covered can be represented by
510 	 a symbolic number.  */
511       if (n->range > 64 / BITS_PER_MARKER)
512 	return NULL;
513 
514       /* Reinterpret byte marks in symbolic number holding the value of
515 	 bigger weight according to target endianness.  */
516       inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
517       size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
518       for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
519 	{
520 	  unsigned marker
521 	    = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
522 	  if (marker && marker != MARKER_BYTE_UNKNOWN)
523 	    toinc_n_ptr->n += inc;
524 	}
525     }
526   else
527     {
528       n->range = n1->range;
529       n_start = n1;
530       source_stmt = source_stmt1;
531     }
532 
533   if (!n1->alias_set
534       || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
535     n->alias_set = n1->alias_set;
536   else
537     n->alias_set = ptr_type_node;
538   n->vuse = n_start->vuse;
539   n->base_addr = n_start->base_addr;
540   n->offset = n_start->offset;
541   n->src = n_start->src;
542   n->bytepos = n_start->bytepos;
543   n->type = n_start->type;
544   size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
545 
546   for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
547     {
548       uint64_t masked1, masked2;
549 
550       masked1 = n1->n & mask;
551       masked2 = n2->n & mask;
552       if (masked1 && masked2 && masked1 != masked2)
553 	return NULL;
554     }
555   n->n = n1->n | n2->n;
556   n->n_ops = n1->n_ops + n2->n_ops;
557 
558   return source_stmt;
559 }
560 
561 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
562    the operation given by the rhs of STMT on the result.  If the operation
563    could successfully be executed the function returns a gimple stmt whose
564    rhs's first tree is the expression of the source operand and NULL
565    otherwise.  */
566 
567 gimple *
568 find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
569 {
570   enum tree_code code;
571   tree rhs1, rhs2 = NULL;
572   gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
573   enum gimple_rhs_class rhs_class;
574 
575   if (!limit || !is_gimple_assign (stmt))
576     return NULL;
577 
578   rhs1 = gimple_assign_rhs1 (stmt);
579 
580   if (find_bswap_or_nop_load (stmt, rhs1, n))
581     return stmt;
582 
583   /* Handle BIT_FIELD_REF.  */
584   if (TREE_CODE (rhs1) == BIT_FIELD_REF
585       && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
586     {
587       unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
588       unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
589       if (bitpos % BITS_PER_UNIT == 0
590 	  && bitsize % BITS_PER_UNIT == 0
591 	  && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
592 	{
593 	  /* Handle big-endian bit numbering in BIT_FIELD_REF.  */
594 	  if (BYTES_BIG_ENDIAN)
595 	    bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
596 
597 	  /* Shift.  */
598 	  if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
599 	    return NULL;
600 
601 	  /* Mask.  */
602 	  uint64_t mask = 0;
603 	  uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
604 	  for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
605 	       i++, tmp <<= BITS_PER_UNIT)
606 	    mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
607 	  n->n &= mask;
608 
609 	  /* Convert.  */
610 	  n->type = TREE_TYPE (rhs1);
611 	  if (!n->base_addr)
612 	    n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
613 
614 	  return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
615 	}
616 
617       return NULL;
618     }
619 
620   if (TREE_CODE (rhs1) != SSA_NAME)
621     return NULL;
622 
623   code = gimple_assign_rhs_code (stmt);
624   rhs_class = gimple_assign_rhs_class (stmt);
625   rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
626 
627   if (rhs_class == GIMPLE_BINARY_RHS)
628     rhs2 = gimple_assign_rhs2 (stmt);
629 
630   /* Handle unary rhs and binary rhs with integer constants as second
631      operand.  */
632 
633   if (rhs_class == GIMPLE_UNARY_RHS
634       || (rhs_class == GIMPLE_BINARY_RHS
635 	  && TREE_CODE (rhs2) == INTEGER_CST))
636     {
637       if (code != BIT_AND_EXPR
638 	  && code != LSHIFT_EXPR
639 	  && code != RSHIFT_EXPR
640 	  && code != LROTATE_EXPR
641 	  && code != RROTATE_EXPR
642 	  && !CONVERT_EXPR_CODE_P (code))
643 	return NULL;
644 
645       source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
646 
647       /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
648 	 we have to initialize the symbolic number.  */
649       if (!source_stmt1)
650 	{
651 	  if (gimple_assign_load_p (stmt)
652 	      || !init_symbolic_number (n, rhs1))
653 	    return NULL;
654 	  source_stmt1 = stmt;
655 	}
656 
657       switch (code)
658 	{
659 	case BIT_AND_EXPR:
660 	  {
661 	    int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
662 	    uint64_t val = int_cst_value (rhs2), mask = 0;
663 	    uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
664 
665 	    /* Only constants masking full bytes are allowed.  */
666 	    for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
667 	      if ((val & tmp) != 0 && (val & tmp) != tmp)
668 		return NULL;
669 	      else if (val & tmp)
670 		mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
671 
672 	    n->n &= mask;
673 	  }
674 	  break;
675 	case LSHIFT_EXPR:
676 	case RSHIFT_EXPR:
677 	case LROTATE_EXPR:
678 	case RROTATE_EXPR:
679 	  if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
680 	    return NULL;
681 	  break;
682 	CASE_CONVERT:
683 	  {
684 	    int i, type_size, old_type_size;
685 	    tree type;
686 
687 	    type = gimple_expr_type (stmt);
688 	    type_size = TYPE_PRECISION (type);
689 	    if (type_size % BITS_PER_UNIT != 0)
690 	      return NULL;
691 	    type_size /= BITS_PER_UNIT;
692 	    if (type_size > 64 / BITS_PER_MARKER)
693 	      return NULL;
694 
695 	    /* Sign extension: result is dependent on the value.  */
696 	    old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
697 	    if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
698 		&& HEAD_MARKER (n->n, old_type_size))
699 	      for (i = 0; i < type_size - old_type_size; i++)
700 		n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
701 			<< ((type_size - 1 - i) * BITS_PER_MARKER);
702 
703 	    if (type_size < 64 / BITS_PER_MARKER)
704 	      {
705 		/* If STMT casts to a smaller type mask out the bits not
706 		   belonging to the target type.  */
707 		n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
708 	      }
709 	    n->type = type;
710 	    if (!n->base_addr)
711 	      n->range = type_size;
712 	  }
713 	  break;
714 	default:
715 	  return NULL;
716 	};
717       return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
718     }
719 
720   /* Handle binary rhs.  */
721 
722   if (rhs_class == GIMPLE_BINARY_RHS)
723     {
724       struct symbolic_number n1, n2;
725       gimple *source_stmt, *source_stmt2;
726 
727       if (code != BIT_IOR_EXPR)
728 	return NULL;
729 
730       if (TREE_CODE (rhs2) != SSA_NAME)
731 	return NULL;
732 
733       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
734 
735       switch (code)
736 	{
737 	case BIT_IOR_EXPR:
738 	  source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
739 
740 	  if (!source_stmt1)
741 	    return NULL;
742 
743 	  source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
744 
745 	  if (!source_stmt2)
746 	    return NULL;
747 
748 	  if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
749 	    return NULL;
750 
751 	  if (n1.vuse != n2.vuse)
752 	    return NULL;
753 
754 	  source_stmt
755 	    = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
756 
757 	  if (!source_stmt)
758 	    return NULL;
759 
760 	  if (!verify_symbolic_number_p (n, stmt))
761 	    return NULL;
762 
763 	  break;
764 	default:
765 	  return NULL;
766 	}
767       return source_stmt;
768     }
769   return NULL;
770 }
771 
772 /* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
773    *CMPXCHG, *CMPNOP and adjust *N.  */
774 
775 void
776 find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
777 			    uint64_t *cmpnop)
778 {
779   unsigned rsize;
780   uint64_t tmpn, mask;
781 
782   /* The number which the find_bswap_or_nop_1 result should match in order
783      to have a full byte swap.  The number is shifted to the right
784      according to the size of the symbolic number before using it.  */
785   *cmpxchg = CMPXCHG;
786   *cmpnop = CMPNOP;
787 
788   /* Find real size of result (highest non-zero byte).  */
789   if (n->base_addr)
790     for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
791   else
792     rsize = n->range;
793 
794   /* Zero out the bits corresponding to untouched bytes in original gimple
795      expression.  */
796   if (n->range < (int) sizeof (int64_t))
797     {
798       mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
799       *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
800       *cmpnop &= mask;
801     }
802 
803   /* Zero out the bits corresponding to unused bytes in the result of the
804      gimple expression.  */
805   if (rsize < n->range)
806     {
807       if (BYTES_BIG_ENDIAN)
808 	{
809 	  mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
810 	  *cmpxchg &= mask;
811 	  *cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
812 	}
813       else
814 	{
815 	  mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
816 	  *cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
817 	  *cmpnop &= mask;
818 	}
819       n->range = rsize;
820     }
821 
822   n->range *= BITS_PER_UNIT;
823 }
824 
825 /* Check if STMT completes a bswap implementation or a read in a given
826    endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
827    accordingly.  It also sets N to represent the kind of operations
828    performed: size of the resulting expression and whether it works on
829    a memory source, and if so alias-set and vuse.  At last, the
830    function returns a stmt whose rhs's first tree is the source
831    expression.  */
832 
833 gimple *
834 find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
835 {
836   /* The last parameter determines the depth search limit.  It usually
837      correlates directly to the number n of bytes to be touched.  We
838      increase that number by log2(n) + 1 here in order to also
839      cover signed -> unsigned conversions of the src operand as can be seen
840      in libgcc, and for initial shift/and operation of the src operand.  */
841   int limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
842   limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
843   gimple *ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
844 
845   if (!ins_stmt)
846     return NULL;
847 
848   uint64_t cmpxchg, cmpnop;
849   find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop);
850 
851   /* A complete byte swap should make the symbolic number to start with
852      the largest digit in the highest order byte. Unchanged symbolic
853      number indicates a read with same endianness as target architecture.  */
854   if (n->n == cmpnop)
855     *bswap = false;
856   else if (n->n == cmpxchg)
857     *bswap = true;
858   else
859     return NULL;
860 
861   /* Useless bit manipulation performed by code.  */
862   if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
863     return NULL;
864 
865   return ins_stmt;
866 }
867 
868 const pass_data pass_data_optimize_bswap =
869 {
870   GIMPLE_PASS, /* type */
871   "bswap", /* name */
872   OPTGROUP_NONE, /* optinfo_flags */
873   TV_NONE, /* tv_id */
874   PROP_ssa, /* properties_required */
875   0, /* properties_provided */
876   0, /* properties_destroyed */
877   0, /* todo_flags_start */
878   0, /* todo_flags_finish */
879 };
880 
881 class pass_optimize_bswap : public gimple_opt_pass
882 {
883 public:
884   pass_optimize_bswap (gcc::context *ctxt)
885     : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
886   {}
887 
888   /* opt_pass methods: */
889   virtual bool gate (function *)
890     {
891       return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8;
892     }
893 
894   virtual unsigned int execute (function *);
895 
896 }; // class pass_optimize_bswap
897 
898 /* Perform the bswap optimization: replace the expression computed in the rhs
899    of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
900    bswap, load or load + bswap expression.
901    Which of these alternatives replace the rhs is given by N->base_addr (non
902    null if a load is needed) and BSWAP.  The type, VUSE and set-alias of the
903    load to perform are also given in N while the builtin bswap invoke is given
904    in FNDEL.  Finally, if a load is involved, INS_STMT refers to one of the
905    load statements involved to construct the rhs in gsi_stmt (GSI) and
906    N->range gives the size of the rhs expression for maintaining some
907    statistics.
908 
909    Note that if the replacement involve a load and if gsi_stmt (GSI) is
910    non-NULL, that stmt is moved just after INS_STMT to do the load with the
911    same VUSE which can lead to gsi_stmt (GSI) changing of basic block.  */
912 
913 tree
914 bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
915 	       tree bswap_type, tree load_type, struct symbolic_number *n,
916 	       bool bswap)
917 {
918   tree src, tmp, tgt = NULL_TREE;
919   gimple *bswap_stmt;
920 
921   gimple *cur_stmt = gsi_stmt (gsi);
922   src = n->src;
923   if (cur_stmt)
924     tgt = gimple_assign_lhs (cur_stmt);
925 
926   /* Need to load the value from memory first.  */
927   if (n->base_addr)
928     {
929       gimple_stmt_iterator gsi_ins = gsi;
930       if (ins_stmt)
931 	gsi_ins = gsi_for_stmt (ins_stmt);
932       tree addr_expr, addr_tmp, val_expr, val_tmp;
933       tree load_offset_ptr, aligned_load_type;
934       gimple *load_stmt;
935       unsigned align = get_object_alignment (src);
936       poly_int64 load_offset = 0;
937 
938       if (cur_stmt)
939 	{
940 	  basic_block ins_bb = gimple_bb (ins_stmt);
941 	  basic_block cur_bb = gimple_bb (cur_stmt);
942 	  if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
943 	    return NULL_TREE;
944 
945 	  /* Move cur_stmt just before one of the load of the original
946 	     to ensure it has the same VUSE.  See PR61517 for what could
947 	     go wrong.  */
948 	  if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
949 	    reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
950 	  gsi_move_before (&gsi, &gsi_ins);
951 	  gsi = gsi_for_stmt (cur_stmt);
952 	}
953       else
954 	gsi = gsi_ins;
955 
956       /* Compute address to load from and cast according to the size
957 	 of the load.  */
958       addr_expr = build_fold_addr_expr (src);
959       if (is_gimple_mem_ref_addr (addr_expr))
960 	addr_tmp = unshare_expr (addr_expr);
961       else
962 	{
963 	  addr_tmp = unshare_expr (n->base_addr);
964 	  if (!is_gimple_mem_ref_addr (addr_tmp))
965 	    addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp,
966 						   is_gimple_mem_ref_addr,
967 						   NULL_TREE, true,
968 						   GSI_SAME_STMT);
969 	  load_offset = n->bytepos;
970 	  if (n->offset)
971 	    {
972 	      tree off
973 		= force_gimple_operand_gsi (&gsi, unshare_expr (n->offset),
974 					    true, NULL_TREE, true,
975 					    GSI_SAME_STMT);
976 	      gimple *stmt
977 		= gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)),
978 				       POINTER_PLUS_EXPR, addr_tmp, off);
979 	      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
980 	      addr_tmp = gimple_assign_lhs (stmt);
981 	    }
982 	}
983 
984       /* Perform the load.  */
985       aligned_load_type = load_type;
986       if (align < TYPE_ALIGN (load_type))
987 	aligned_load_type = build_aligned_type (load_type, align);
988       load_offset_ptr = build_int_cst (n->alias_set, load_offset);
989       val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
990 			      load_offset_ptr);
991 
992       if (!bswap)
993 	{
994 	  if (n->range == 16)
995 	    nop_stats.found_16bit++;
996 	  else if (n->range == 32)
997 	    nop_stats.found_32bit++;
998 	  else
999 	    {
1000 	      gcc_assert (n->range == 64);
1001 	      nop_stats.found_64bit++;
1002 	    }
1003 
1004 	  /* Convert the result of load if necessary.  */
1005 	  if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type))
1006 	    {
1007 	      val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
1008 					    "load_dst");
1009 	      load_stmt = gimple_build_assign (val_tmp, val_expr);
1010 	      gimple_set_vuse (load_stmt, n->vuse);
1011 	      gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1012 	      gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
1013 	      update_stmt (cur_stmt);
1014 	    }
1015 	  else if (cur_stmt)
1016 	    {
1017 	      gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
1018 	      gimple_set_vuse (cur_stmt, n->vuse);
1019 	      update_stmt (cur_stmt);
1020 	    }
1021 	  else
1022 	    {
1023 	      tgt = make_ssa_name (load_type);
1024 	      cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr);
1025 	      gimple_set_vuse (cur_stmt, n->vuse);
1026 	      gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT);
1027 	    }
1028 
1029 	  if (dump_file)
1030 	    {
1031 	      fprintf (dump_file,
1032 		       "%d bit load in target endianness found at: ",
1033 		       (int) n->range);
1034 	      print_gimple_stmt (dump_file, cur_stmt, 0);
1035 	    }
1036 	  return tgt;
1037 	}
1038       else
1039 	{
1040 	  val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
1041 	  load_stmt = gimple_build_assign (val_tmp, val_expr);
1042 	  gimple_set_vuse (load_stmt, n->vuse);
1043 	  gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1044 	}
1045       src = val_tmp;
1046     }
1047   else if (!bswap)
1048     {
1049       gimple *g = NULL;
1050       if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
1051 	{
1052 	  if (!is_gimple_val (src))
1053 	    return NULL_TREE;
1054 	  g = gimple_build_assign (tgt, NOP_EXPR, src);
1055 	}
1056       else if (cur_stmt)
1057 	g = gimple_build_assign (tgt, src);
1058       else
1059 	tgt = src;
1060       if (n->range == 16)
1061 	nop_stats.found_16bit++;
1062       else if (n->range == 32)
1063 	nop_stats.found_32bit++;
1064       else
1065 	{
1066 	  gcc_assert (n->range == 64);
1067 	  nop_stats.found_64bit++;
1068 	}
1069       if (dump_file)
1070 	{
1071 	  fprintf (dump_file,
1072 		   "%d bit reshuffle in target endianness found at: ",
1073 		   (int) n->range);
1074 	  if (cur_stmt)
1075 	    print_gimple_stmt (dump_file, cur_stmt, 0);
1076 	  else
1077 	    {
1078 	      print_generic_expr (dump_file, tgt, 0);
1079 	      fprintf (dump_file, "\n");
1080 	    }
1081 	}
1082       if (cur_stmt)
1083 	gsi_replace (&gsi, g, true);
1084       return tgt;
1085     }
1086   else if (TREE_CODE (src) == BIT_FIELD_REF)
1087     src = TREE_OPERAND (src, 0);
1088 
1089   if (n->range == 16)
1090     bswap_stats.found_16bit++;
1091   else if (n->range == 32)
1092     bswap_stats.found_32bit++;
1093   else
1094     {
1095       gcc_assert (n->range == 64);
1096       bswap_stats.found_64bit++;
1097     }
1098 
1099   tmp = src;
1100 
1101   /* Convert the src expression if necessary.  */
1102   if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
1103     {
1104       gimple *convert_stmt;
1105 
1106       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
1107       convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
1108       gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1109     }
1110 
1111   /* Canonical form for 16 bit bswap is a rotate expression.  Only 16bit values
1112      are considered as rotation of 2N bit values by N bits is generally not
1113      equivalent to a bswap.  Consider for instance 0x01020304 r>> 16 which
1114      gives 0x03040102 while a bswap for that value is 0x04030201.  */
1115   if (bswap && n->range == 16)
1116     {
1117       tree count = build_int_cst (NULL, BITS_PER_UNIT);
1118       src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
1119       bswap_stmt = gimple_build_assign (NULL, src);
1120     }
1121   else
1122     bswap_stmt = gimple_build_call (fndecl, 1, tmp);
1123 
1124   if (tgt == NULL_TREE)
1125     tgt = make_ssa_name (bswap_type);
1126   tmp = tgt;
1127 
1128   /* Convert the result if necessary.  */
1129   if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
1130     {
1131       gimple *convert_stmt;
1132 
1133       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1134       convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp);
1135       gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
1136     }
1137 
1138   gimple_set_lhs (bswap_stmt, tmp);
1139 
1140   if (dump_file)
1141     {
1142       fprintf (dump_file, "%d bit bswap implementation found at: ",
1143 	       (int) n->range);
1144       if (cur_stmt)
1145 	print_gimple_stmt (dump_file, cur_stmt, 0);
1146       else
1147 	{
1148 	  print_generic_expr (dump_file, tgt, 0);
1149 	  fprintf (dump_file, "\n");
1150 	}
1151     }
1152 
1153   if (cur_stmt)
1154     {
1155       gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
1156       gsi_remove (&gsi, true);
1157     }
1158   else
1159     gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
1160   return tgt;
1161 }
1162 
1163 /* Find manual byte swap implementations as well as load in a given
1164    endianness. Byte swaps are turned into a bswap builtin invokation
1165    while endian loads are converted to bswap builtin invokation or
1166    simple load according to the target endianness.  */
1167 
1168 unsigned int
1169 pass_optimize_bswap::execute (function *fun)
1170 {
1171   basic_block bb;
1172   bool bswap32_p, bswap64_p;
1173   bool changed = false;
1174   tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1175 
1176   bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1177 	       && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1178   bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1179 	       && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1180 		   || (bswap32_p && word_mode == SImode)));
1181 
1182   /* Determine the argument type of the builtins.  The code later on
1183      assumes that the return and argument type are the same.  */
1184   if (bswap32_p)
1185     {
1186       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1187       bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1188     }
1189 
1190   if (bswap64_p)
1191     {
1192       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1193       bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1194     }
1195 
1196   memset (&nop_stats, 0, sizeof (nop_stats));
1197   memset (&bswap_stats, 0, sizeof (bswap_stats));
1198   calculate_dominance_info (CDI_DOMINATORS);
1199 
1200   FOR_EACH_BB_FN (bb, fun)
1201     {
1202       gimple_stmt_iterator gsi;
1203 
1204       /* We do a reverse scan for bswap patterns to make sure we get the
1205 	 widest match. As bswap pattern matching doesn't handle previously
1206 	 inserted smaller bswap replacements as sub-patterns, the wider
1207 	 variant wouldn't be detected.  */
1208       for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
1209 	{
1210 	  gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
1211 	  tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1212 	  enum tree_code code;
1213 	  struct symbolic_number n;
1214 	  bool bswap;
1215 
1216 	  /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1217 	     might be moved to a different basic block by bswap_replace and gsi
1218 	     must not points to it if that's the case.  Moving the gsi_prev
1219 	     there make sure that gsi points to the statement previous to
1220 	     cur_stmt while still making sure that all statements are
1221 	     considered in this basic block.  */
1222 	  gsi_prev (&gsi);
1223 
1224 	  if (!is_gimple_assign (cur_stmt))
1225 	    continue;
1226 
1227 	  code = gimple_assign_rhs_code (cur_stmt);
1228 	  switch (code)
1229 	    {
1230 	    case LROTATE_EXPR:
1231 	    case RROTATE_EXPR:
1232 	      if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
1233 		  || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
1234 		     % BITS_PER_UNIT)
1235 		continue;
1236 	      /* Fall through.  */
1237 	    case BIT_IOR_EXPR:
1238 	      break;
1239 	    default:
1240 	      continue;
1241 	    }
1242 
1243 	  ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
1244 
1245 	  if (!ins_stmt)
1246 	    continue;
1247 
1248 	  switch (n.range)
1249 	    {
1250 	    case 16:
1251 	      /* Already in canonical form, nothing to do.  */
1252 	      if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1253 		continue;
1254 	      load_type = bswap_type = uint16_type_node;
1255 	      break;
1256 	    case 32:
1257 	      load_type = uint32_type_node;
1258 	      if (bswap32_p)
1259 		{
1260 		  fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1261 		  bswap_type = bswap32_type;
1262 		}
1263 	      break;
1264 	    case 64:
1265 	      load_type = uint64_type_node;
1266 	      if (bswap64_p)
1267 		{
1268 		  fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1269 		  bswap_type = bswap64_type;
1270 		}
1271 	      break;
1272 	    default:
1273 	      continue;
1274 	    }
1275 
1276 	  if (bswap && !fndecl && n.range != 16)
1277 	    continue;
1278 
1279 	  if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
1280 			     bswap_type, load_type, &n, bswap))
1281 	    changed = true;
1282 	}
1283     }
1284 
1285   statistics_counter_event (fun, "16-bit nop implementations found",
1286 			    nop_stats.found_16bit);
1287   statistics_counter_event (fun, "32-bit nop implementations found",
1288 			    nop_stats.found_32bit);
1289   statistics_counter_event (fun, "64-bit nop implementations found",
1290 			    nop_stats.found_64bit);
1291   statistics_counter_event (fun, "16-bit bswap implementations found",
1292 			    bswap_stats.found_16bit);
1293   statistics_counter_event (fun, "32-bit bswap implementations found",
1294 			    bswap_stats.found_32bit);
1295   statistics_counter_event (fun, "64-bit bswap implementations found",
1296 			    bswap_stats.found_64bit);
1297 
1298   return (changed ? TODO_update_ssa : 0);
1299 }
1300 
1301 } // anon namespace
1302 
1303 gimple_opt_pass *
1304 make_pass_optimize_bswap (gcc::context *ctxt)
1305 {
1306   return new pass_optimize_bswap (ctxt);
1307 }
1308 
1309 namespace {
1310 
1311 /* Struct recording one operand for the store, which is either a constant,
1312    then VAL represents the constant and all the other fields are zero,
1313    or a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1314    and the other fields also reflect the memory load.  */
1315 
1316 struct store_operand_info
1317 {
1318   tree val;
1319   tree base_addr;
1320   poly_uint64 bitsize;
1321   poly_uint64 bitpos;
1322   poly_uint64 bitregion_start;
1323   poly_uint64 bitregion_end;
1324   gimple *stmt;
1325   bool bit_not_p;
1326   store_operand_info ();
1327 };
1328 
1329 store_operand_info::store_operand_info ()
1330   : val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0),
1331     bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false)
1332 {
1333 }
1334 
1335 /* Struct recording the information about a single store of an immediate
1336    to memory.  These are created in the first phase and coalesced into
1337    merged_store_group objects in the second phase.  */
1338 
1339 struct store_immediate_info
1340 {
1341   unsigned HOST_WIDE_INT bitsize;
1342   unsigned HOST_WIDE_INT bitpos;
1343   unsigned HOST_WIDE_INT bitregion_start;
1344   /* This is one past the last bit of the bit region.  */
1345   unsigned HOST_WIDE_INT bitregion_end;
1346   gimple *stmt;
1347   unsigned int order;
1348   /* INTEGER_CST for constant stores, MEM_REF for memory copy or
1349      BIT_*_EXPR for logical bitwise operation.
1350      LROTATE_EXPR if it can be only bswap optimized and
1351      ops are not really meaningful.
1352      NOP_EXPR if bswap optimization detected identity, ops
1353      are not meaningful.  */
1354   enum tree_code rhs_code;
1355   /* Two fields for bswap optimization purposes.  */
1356   struct symbolic_number n;
1357   gimple *ins_stmt;
1358   /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing.  */
1359   bool bit_not_p;
1360   /* True if ops have been swapped and thus ops[1] represents
1361      rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2.  */
1362   bool ops_swapped_p;
1363   /* Operands.  For BIT_*_EXPR rhs_code both operands are used, otherwise
1364      just the first one.  */
1365   store_operand_info ops[2];
1366   store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1367 			unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1368 			gimple *, unsigned int, enum tree_code,
1369 			struct symbolic_number &, gimple *, bool,
1370 			const store_operand_info &,
1371 			const store_operand_info &);
1372 };
1373 
1374 store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
1375 					    unsigned HOST_WIDE_INT bp,
1376 					    unsigned HOST_WIDE_INT brs,
1377 					    unsigned HOST_WIDE_INT bre,
1378 					    gimple *st,
1379 					    unsigned int ord,
1380 					    enum tree_code rhscode,
1381 					    struct symbolic_number &nr,
1382 					    gimple *ins_stmtp,
1383 					    bool bitnotp,
1384 					    const store_operand_info &op0r,
1385 					    const store_operand_info &op1r)
1386   : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
1387     stmt (st), order (ord), rhs_code (rhscode), n (nr),
1388     ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false)
1389 #if __cplusplus >= 201103L
1390     , ops { op0r, op1r }
1391 {
1392 }
1393 #else
1394 {
1395   ops[0] = op0r;
1396   ops[1] = op1r;
1397 }
1398 #endif
1399 
1400 /* Struct representing a group of stores to contiguous memory locations.
1401    These are produced by the second phase (coalescing) and consumed in the
1402    third phase that outputs the widened stores.  */
1403 
1404 struct merged_store_group
1405 {
1406   unsigned HOST_WIDE_INT start;
1407   unsigned HOST_WIDE_INT width;
1408   unsigned HOST_WIDE_INT bitregion_start;
1409   unsigned HOST_WIDE_INT bitregion_end;
1410   /* The size of the allocated memory for val and mask.  */
1411   unsigned HOST_WIDE_INT buf_size;
1412   unsigned HOST_WIDE_INT align_base;
1413   poly_uint64 load_align_base[2];
1414 
1415   unsigned int align;
1416   unsigned int load_align[2];
1417   unsigned int first_order;
1418   unsigned int last_order;
1419 
1420   auto_vec<store_immediate_info *> stores;
1421   /* We record the first and last original statements in the sequence because
1422      we'll need their vuse/vdef and replacement position.  It's easier to keep
1423      track of them separately as 'stores' is reordered by apply_stores.  */
1424   gimple *last_stmt;
1425   gimple *first_stmt;
1426   unsigned char *val;
1427   unsigned char *mask;
1428 
1429   merged_store_group (store_immediate_info *);
1430   ~merged_store_group ();
1431   void merge_into (store_immediate_info *);
1432   void merge_overlapping (store_immediate_info *);
1433   bool apply_stores ();
1434 private:
1435   void do_merge (store_immediate_info *);
1436 };
1437 
1438 /* Debug helper.  Dump LEN elements of byte array PTR to FD in hex.  */
1439 
1440 static void
1441 dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
1442 {
1443   if (!fd)
1444     return;
1445 
1446   for (unsigned int i = 0; i < len; i++)
1447     fprintf (fd, "%x ", ptr[i]);
1448   fprintf (fd, "\n");
1449 }
1450 
1451 /* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the
1452    bits between adjacent elements.  AMNT should be within
1453    [0, BITS_PER_UNIT).
1454    Example, AMNT = 2:
1455    00011111|11100000 << 2 = 01111111|10000000
1456    PTR[1]  | PTR[0]         PTR[1]  | PTR[0].  */
1457 
1458 static void
1459 shift_bytes_in_array (unsigned char *ptr, unsigned int sz, unsigned int amnt)
1460 {
1461   if (amnt == 0)
1462     return;
1463 
1464   unsigned char carry_over = 0U;
1465   unsigned char carry_mask = (~0U) << (unsigned char) (BITS_PER_UNIT - amnt);
1466   unsigned char clear_mask = (~0U) << amnt;
1467 
1468   for (unsigned int i = 0; i < sz; i++)
1469     {
1470       unsigned prev_carry_over = carry_over;
1471       carry_over = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt);
1472 
1473       ptr[i] <<= amnt;
1474       if (i != 0)
1475 	{
1476 	  ptr[i] &= clear_mask;
1477 	  ptr[i] |= prev_carry_over;
1478 	}
1479     }
1480 }
1481 
1482 /* Like shift_bytes_in_array but for big-endian.
1483    Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the
1484    bits between adjacent elements.  AMNT should be within
1485    [0, BITS_PER_UNIT).
1486    Example, AMNT = 2:
1487    00011111|11100000 >> 2 = 00000111|11111000
1488    PTR[0]  | PTR[1]         PTR[0]  | PTR[1].  */
1489 
1490 static void
1491 shift_bytes_in_array_right (unsigned char *ptr, unsigned int sz,
1492 			    unsigned int amnt)
1493 {
1494   if (amnt == 0)
1495     return;
1496 
1497   unsigned char carry_over = 0U;
1498   unsigned char carry_mask = ~(~0U << amnt);
1499 
1500   for (unsigned int i = 0; i < sz; i++)
1501     {
1502       unsigned prev_carry_over = carry_over;
1503       carry_over = ptr[i] & carry_mask;
1504 
1505       carry_over <<= (unsigned char) BITS_PER_UNIT - amnt;
1506       ptr[i] >>= amnt;
1507       ptr[i] |= prev_carry_over;
1508     }
1509 }
1510 
1511 /* Clear out LEN bits starting from bit START in the byte array
1512    PTR.  This clears the bits to the *right* from START.
1513    START must be within [0, BITS_PER_UNIT) and counts starting from
1514    the least significant bit.  */
1515 
1516 static void
1517 clear_bit_region_be (unsigned char *ptr, unsigned int start,
1518 		     unsigned int len)
1519 {
1520   if (len == 0)
1521     return;
1522   /* Clear len bits to the right of start.  */
1523   else if (len <= start + 1)
1524     {
1525       unsigned char mask = (~(~0U << len));
1526       mask = mask << (start + 1U - len);
1527       ptr[0] &= ~mask;
1528     }
1529   else if (start != BITS_PER_UNIT - 1)
1530     {
1531       clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
1532       clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
1533 			   len - (start % BITS_PER_UNIT) - 1);
1534     }
1535   else if (start == BITS_PER_UNIT - 1
1536 	   && len > BITS_PER_UNIT)
1537     {
1538       unsigned int nbytes = len / BITS_PER_UNIT;
1539       memset (ptr, 0, nbytes);
1540       if (len % BITS_PER_UNIT != 0)
1541 	clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
1542 			     len % BITS_PER_UNIT);
1543     }
1544   else
1545     gcc_unreachable ();
1546 }
1547 
1548 /* In the byte array PTR clear the bit region starting at bit
1549    START and is LEN bits wide.
1550    For regions spanning multiple bytes do this recursively until we reach
1551    zero LEN or a region contained within a single byte.  */
1552 
1553 static void
1554 clear_bit_region (unsigned char *ptr, unsigned int start,
1555 		  unsigned int len)
1556 {
1557   /* Degenerate base case.  */
1558   if (len == 0)
1559     return;
1560   else if (start >= BITS_PER_UNIT)
1561     clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
1562   /* Second base case.  */
1563   else if ((start + len) <= BITS_PER_UNIT)
1564     {
1565       unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
1566       mask >>= BITS_PER_UNIT - (start + len);
1567 
1568       ptr[0] &= ~mask;
1569 
1570       return;
1571     }
1572   /* Clear most significant bits in a byte and proceed with the next byte.  */
1573   else if (start != 0)
1574     {
1575       clear_bit_region (ptr, start, BITS_PER_UNIT - start);
1576       clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
1577     }
1578   /* Whole bytes need to be cleared.  */
1579   else if (start == 0 && len > BITS_PER_UNIT)
1580     {
1581       unsigned int nbytes = len / BITS_PER_UNIT;
1582       /* We could recurse on each byte but we clear whole bytes, so a simple
1583 	 memset will do.  */
1584       memset (ptr, '\0', nbytes);
1585       /* Clear the remaining sub-byte region if there is one.  */
1586       if (len % BITS_PER_UNIT != 0)
1587 	clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
1588     }
1589   else
1590     gcc_unreachable ();
1591 }
1592 
1593 /* Write BITLEN bits of EXPR to the byte array PTR at
1594    bit position BITPOS.  PTR should contain TOTAL_BYTES elements.
1595    Return true if the operation succeeded.  */
1596 
1597 static bool
1598 encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
1599 		       unsigned int total_bytes)
1600 {
1601   unsigned int first_byte = bitpos / BITS_PER_UNIT;
1602   tree tmp_int = expr;
1603   bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
1604 			|| (bitpos % BITS_PER_UNIT)
1605 			|| !int_mode_for_size (bitlen, 0).exists ());
1606 
1607   if (!sub_byte_op_p)
1608     return native_encode_expr (tmp_int, ptr + first_byte, total_bytes) != 0;
1609 
1610   /* LITTLE-ENDIAN
1611      We are writing a non byte-sized quantity or at a position that is not
1612      at a byte boundary.
1613      |--------|--------|--------| ptr + first_byte
1614            ^              ^
1615            xxx xxxxxxxx xxx< bp>
1616            |______EXPR____|
1617 
1618      First native_encode_expr EXPR into a temporary buffer and shift each
1619      byte in the buffer by 'bp' (carrying the bits over as necessary).
1620      |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
1621                                               <------bitlen---->< bp>
1622     Then we clear the destination bits:
1623     |---00000|00000000|000-----| ptr + first_byte
1624         <-------bitlen--->< bp>
1625 
1626     Finally we ORR the bytes of the shifted EXPR into the cleared region:
1627     |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
1628 
1629    BIG-ENDIAN
1630    We are writing a non byte-sized quantity or at a position that is not
1631    at a byte boundary.
1632      ptr + first_byte |--------|--------|--------|
1633                             ^              ^
1634                        <bp >xxx xxxxxxxx xxx
1635                             |_____EXPR_____|
1636 
1637      First native_encode_expr EXPR into a temporary buffer and shift each
1638      byte in the buffer to the right by (carrying the bits over as necessary).
1639      We shift by as much as needed to align the most significant bit of EXPR
1640      with bitpos:
1641      |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
1642         <---bitlen---->          <bp ><-----bitlen----->
1643     Then we clear the destination bits:
1644     ptr + first_byte |-----000||00000000||00000---|
1645                       <bp ><-------bitlen----->
1646 
1647     Finally we ORR the bytes of the shifted EXPR into the cleared region:
1648     ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
1649     The awkwardness comes from the fact that bitpos is counted from the
1650     most significant bit of a byte.  */
1651 
1652   /* We must be dealing with fixed-size data at this point, since the
1653      total size is also fixed.  */
1654   fixed_size_mode mode = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (expr)));
1655   /* Allocate an extra byte so that we have space to shift into.  */
1656   unsigned int byte_size = GET_MODE_SIZE (mode) + 1;
1657   unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
1658   memset (tmpbuf, '\0', byte_size);
1659   /* The store detection code should only have allowed constants that are
1660      accepted by native_encode_expr.  */
1661   if (native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
1662     gcc_unreachable ();
1663 
1664   /* The native_encode_expr machinery uses TYPE_MODE to determine how many
1665      bytes to write.  This means it can write more than
1666      ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
1667      write 8 bytes for a bitlen of 40).  Skip the bytes that are not within
1668      bitlen and zero out the bits that are not relevant as well (that may
1669      contain a sign bit due to sign-extension).  */
1670   unsigned int padding
1671     = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1;
1672   /* On big-endian the padding is at the 'front' so just skip the initial
1673      bytes.  */
1674   if (BYTES_BIG_ENDIAN)
1675     tmpbuf += padding;
1676 
1677   byte_size -= padding;
1678 
1679   if (bitlen % BITS_PER_UNIT != 0)
1680     {
1681       if (BYTES_BIG_ENDIAN)
1682 	clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1,
1683 			     BITS_PER_UNIT - (bitlen % BITS_PER_UNIT));
1684       else
1685 	clear_bit_region (tmpbuf, bitlen,
1686 			  byte_size * BITS_PER_UNIT - bitlen);
1687     }
1688   /* Left shifting relies on the last byte being clear if bitlen is
1689      a multiple of BITS_PER_UNIT, which might not be clear if
1690      there are padding bytes.  */
1691   else if (!BYTES_BIG_ENDIAN)
1692     tmpbuf[byte_size - 1] = '\0';
1693 
1694   /* Clear the bit region in PTR where the bits from TMPBUF will be
1695      inserted into.  */
1696   if (BYTES_BIG_ENDIAN)
1697     clear_bit_region_be (ptr + first_byte,
1698 			 BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
1699   else
1700     clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen);
1701 
1702   int shift_amnt;
1703   int bitlen_mod = bitlen % BITS_PER_UNIT;
1704   int bitpos_mod = bitpos % BITS_PER_UNIT;
1705 
1706   bool skip_byte = false;
1707   if (BYTES_BIG_ENDIAN)
1708     {
1709       /* BITPOS and BITLEN are exactly aligned and no shifting
1710 	 is necessary.  */
1711       if (bitpos_mod + bitlen_mod == BITS_PER_UNIT
1712 	  || (bitpos_mod == 0 && bitlen_mod == 0))
1713 	shift_amnt = 0;
1714       /* |. . . . . . . .|
1715 	  <bp >   <blen >.
1716 	 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
1717 	 of the value until it aligns with 'bp' in the next byte over.  */
1718       else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT)
1719 	{
1720 	  shift_amnt = bitlen_mod + bitpos_mod;
1721 	  skip_byte = bitlen_mod != 0;
1722 	}
1723       /* |. . . . . . . .|
1724 	  <----bp--->
1725 	    <---blen---->.
1726 	 Shift the value right within the same byte so it aligns with 'bp'.  */
1727       else
1728 	shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT;
1729     }
1730   else
1731     shift_amnt = bitpos % BITS_PER_UNIT;
1732 
1733   /* Create the shifted version of EXPR.  */
1734   if (!BYTES_BIG_ENDIAN)
1735     {
1736       shift_bytes_in_array (tmpbuf, byte_size, shift_amnt);
1737       if (shift_amnt == 0)
1738 	byte_size--;
1739     }
1740   else
1741     {
1742       gcc_assert (BYTES_BIG_ENDIAN);
1743       shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt);
1744       /* If shifting right forced us to move into the next byte skip the now
1745 	 empty byte.  */
1746       if (skip_byte)
1747 	{
1748 	  tmpbuf++;
1749 	  byte_size--;
1750 	}
1751     }
1752 
1753   /* Insert the bits from TMPBUF.  */
1754   for (unsigned int i = 0; i < byte_size; i++)
1755     ptr[first_byte + i] |= tmpbuf[i];
1756 
1757   return true;
1758 }
1759 
1760 /* Sorting function for store_immediate_info objects.
1761    Sorts them by bitposition.  */
1762 
1763 static int
1764 sort_by_bitpos (const void *x, const void *y)
1765 {
1766   store_immediate_info *const *tmp = (store_immediate_info * const *) x;
1767   store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
1768 
1769   if ((*tmp)->bitpos < (*tmp2)->bitpos)
1770     return -1;
1771   else if ((*tmp)->bitpos > (*tmp2)->bitpos)
1772     return 1;
1773   else
1774     /* If they are the same let's use the order which is guaranteed to
1775        be different.  */
1776     return (*tmp)->order - (*tmp2)->order;
1777 }
1778 
1779 /* Sorting function for store_immediate_info objects.
1780    Sorts them by the order field.  */
1781 
1782 static int
1783 sort_by_order (const void *x, const void *y)
1784 {
1785   store_immediate_info *const *tmp = (store_immediate_info * const *) x;
1786   store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
1787 
1788   if ((*tmp)->order < (*tmp2)->order)
1789     return -1;
1790   else if ((*tmp)->order > (*tmp2)->order)
1791     return 1;
1792 
1793   gcc_unreachable ();
1794 }
1795 
1796 /* Initialize a merged_store_group object from a store_immediate_info
1797    object.  */
1798 
1799 merged_store_group::merged_store_group (store_immediate_info *info)
1800 {
1801   start = info->bitpos;
1802   width = info->bitsize;
1803   bitregion_start = info->bitregion_start;
1804   bitregion_end = info->bitregion_end;
1805   /* VAL has memory allocated for it in apply_stores once the group
1806      width has been finalized.  */
1807   val = NULL;
1808   mask = NULL;
1809   unsigned HOST_WIDE_INT align_bitpos = 0;
1810   get_object_alignment_1 (gimple_assign_lhs (info->stmt),
1811 			  &align, &align_bitpos);
1812   align_base = start - align_bitpos;
1813   for (int i = 0; i < 2; ++i)
1814     {
1815       store_operand_info &op = info->ops[i];
1816       if (op.base_addr == NULL_TREE)
1817 	{
1818 	  load_align[i] = 0;
1819 	  load_align_base[i] = 0;
1820 	}
1821       else
1822 	{
1823 	  get_object_alignment_1 (op.val, &load_align[i], &align_bitpos);
1824 	  load_align_base[i] = op.bitpos - align_bitpos;
1825 	}
1826     }
1827   stores.create (1);
1828   stores.safe_push (info);
1829   last_stmt = info->stmt;
1830   last_order = info->order;
1831   first_stmt = last_stmt;
1832   first_order = last_order;
1833   buf_size = 0;
1834 }
1835 
1836 merged_store_group::~merged_store_group ()
1837 {
1838   if (val)
1839     XDELETEVEC (val);
1840 }
1841 
1842 /* Helper method for merge_into and merge_overlapping to do
1843    the common part.  */
1844 void
1845 merged_store_group::do_merge (store_immediate_info *info)
1846 {
1847   bitregion_start = MIN (bitregion_start, info->bitregion_start);
1848   bitregion_end = MAX (bitregion_end, info->bitregion_end);
1849 
1850   unsigned int this_align;
1851   unsigned HOST_WIDE_INT align_bitpos = 0;
1852   get_object_alignment_1 (gimple_assign_lhs (info->stmt),
1853 			  &this_align, &align_bitpos);
1854   if (this_align > align)
1855     {
1856       align = this_align;
1857       align_base = info->bitpos - align_bitpos;
1858     }
1859   for (int i = 0; i < 2; ++i)
1860     {
1861       store_operand_info &op = info->ops[i];
1862       if (!op.base_addr)
1863 	continue;
1864 
1865       get_object_alignment_1 (op.val, &this_align, &align_bitpos);
1866       if (this_align > load_align[i])
1867 	{
1868 	  load_align[i] = this_align;
1869 	  load_align_base[i] = op.bitpos - align_bitpos;
1870 	}
1871     }
1872 
1873   gimple *stmt = info->stmt;
1874   stores.safe_push (info);
1875   if (info->order > last_order)
1876     {
1877       last_order = info->order;
1878       last_stmt = stmt;
1879     }
1880   else if (info->order < first_order)
1881     {
1882       first_order = info->order;
1883       first_stmt = stmt;
1884     }
1885 }
1886 
1887 /* Merge a store recorded by INFO into this merged store.
1888    The store is not overlapping with the existing recorded
1889    stores.  */
1890 
1891 void
1892 merged_store_group::merge_into (store_immediate_info *info)
1893 {
1894   /* Make sure we're inserting in the position we think we're inserting.  */
1895   gcc_assert (info->bitpos >= start + width
1896 	      && info->bitregion_start <= bitregion_end);
1897 
1898   width = info->bitpos + info->bitsize - start;
1899   do_merge (info);
1900 }
1901 
1902 /* Merge a store described by INFO into this merged store.
1903    INFO overlaps in some way with the current store (i.e. it's not contiguous
1904    which is handled by merged_store_group::merge_into).  */
1905 
1906 void
1907 merged_store_group::merge_overlapping (store_immediate_info *info)
1908 {
1909   /* If the store extends the size of the group, extend the width.  */
1910   if (info->bitpos + info->bitsize > start + width)
1911     width = info->bitpos + info->bitsize - start;
1912 
1913   do_merge (info);
1914 }
1915 
1916 /* Go through all the recorded stores in this group in program order and
1917    apply their values to the VAL byte array to create the final merged
1918    value.  Return true if the operation succeeded.  */
1919 
1920 bool
1921 merged_store_group::apply_stores ()
1922 {
1923   /* Make sure we have more than one store in the group, otherwise we cannot
1924      merge anything.  */
1925   if (bitregion_start % BITS_PER_UNIT != 0
1926       || bitregion_end % BITS_PER_UNIT != 0
1927       || stores.length () == 1)
1928     return false;
1929 
1930   stores.qsort (sort_by_order);
1931   store_immediate_info *info;
1932   unsigned int i;
1933   /* Create a buffer of a size that is 2 times the number of bytes we're
1934      storing.  That way native_encode_expr can write power-of-2-sized
1935      chunks without overrunning.  */
1936   buf_size = 2 * ((bitregion_end - bitregion_start) / BITS_PER_UNIT);
1937   val = XNEWVEC (unsigned char, 2 * buf_size);
1938   mask = val + buf_size;
1939   memset (val, 0, buf_size);
1940   memset (mask, ~0U, buf_size);
1941 
1942   FOR_EACH_VEC_ELT (stores, i, info)
1943     {
1944       unsigned int pos_in_buffer = info->bitpos - bitregion_start;
1945       tree cst = NULL_TREE;
1946       if (info->ops[0].val && info->ops[0].base_addr == NULL_TREE)
1947 	cst = info->ops[0].val;
1948       else if (info->ops[1].val && info->ops[1].base_addr == NULL_TREE)
1949 	cst = info->ops[1].val;
1950       bool ret = true;
1951       if (cst)
1952 	ret = encode_tree_to_bitpos (cst, val, info->bitsize,
1953 				     pos_in_buffer, buf_size);
1954       if (cst && dump_file && (dump_flags & TDF_DETAILS))
1955 	{
1956 	  if (ret)
1957 	    {
1958 	      fprintf (dump_file, "After writing ");
1959 	      print_generic_expr (dump_file, cst, 0);
1960 	      fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
1961 		       " at position %d the merged region contains:\n",
1962 		       info->bitsize, pos_in_buffer);
1963 	      dump_char_array (dump_file, val, buf_size);
1964 	    }
1965 	  else
1966 	    fprintf (dump_file, "Failed to merge stores\n");
1967 	}
1968       if (!ret)
1969 	return false;
1970       unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
1971       if (BYTES_BIG_ENDIAN)
1972 	clear_bit_region_be (m, (BITS_PER_UNIT - 1
1973 				 - (pos_in_buffer % BITS_PER_UNIT)),
1974 			     info->bitsize);
1975       else
1976 	clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
1977     }
1978   stores.qsort (sort_by_bitpos);
1979   return true;
1980 }
1981 
1982 /* Structure describing the store chain.  */
1983 
1984 struct imm_store_chain_info
1985 {
1986   /* Doubly-linked list that imposes an order on chain processing.
1987      PNXP (prev's next pointer) points to the head of a list, or to
1988      the next field in the previous chain in the list.
1989      See pass_store_merging::m_stores_head for more rationale.  */
1990   imm_store_chain_info *next, **pnxp;
1991   tree base_addr;
1992   auto_vec<store_immediate_info *> m_store_info;
1993   auto_vec<merged_store_group *> m_merged_store_groups;
1994 
1995   imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a)
1996   : next (inspt), pnxp (&inspt), base_addr (b_a)
1997   {
1998     inspt = this;
1999     if (next)
2000       {
2001 	gcc_checking_assert (pnxp == next->pnxp);
2002 	next->pnxp = &next;
2003       }
2004   }
2005   ~imm_store_chain_info ()
2006   {
2007     *pnxp = next;
2008     if (next)
2009       {
2010 	gcc_checking_assert (&next == next->pnxp);
2011 	next->pnxp = pnxp;
2012       }
2013   }
2014   bool terminate_and_process_chain ();
2015   bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int);
2016   bool coalesce_immediate_stores ();
2017   bool output_merged_store (merged_store_group *);
2018   bool output_merged_stores ();
2019 };
2020 
2021 const pass_data pass_data_tree_store_merging = {
2022   GIMPLE_PASS,     /* type */
2023   "store-merging", /* name */
2024   OPTGROUP_NONE,   /* optinfo_flags */
2025   TV_GIMPLE_STORE_MERGING,	 /* tv_id */
2026   PROP_ssa,	/* properties_required */
2027   0,		   /* properties_provided */
2028   0,		   /* properties_destroyed */
2029   0,		   /* todo_flags_start */
2030   TODO_update_ssa, /* todo_flags_finish */
2031 };
2032 
2033 class pass_store_merging : public gimple_opt_pass
2034 {
2035 public:
2036   pass_store_merging (gcc::context *ctxt)
2037     : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head ()
2038   {
2039   }
2040 
2041   /* Pass not supported for PDP-endianness, nor for insane hosts
2042      or target character sizes where native_{encode,interpret}_expr
2043      doesn't work properly.  */
2044   virtual bool
2045   gate (function *)
2046   {
2047     return flag_store_merging
2048 	   && WORDS_BIG_ENDIAN == BYTES_BIG_ENDIAN
2049 	   && CHAR_BIT == 8
2050 	   && BITS_PER_UNIT == 8;
2051   }
2052 
2053   virtual unsigned int execute (function *);
2054 
2055 private:
2056   hash_map<tree_operand_hash, struct imm_store_chain_info *> m_stores;
2057 
2058   /* Form a doubly-linked stack of the elements of m_stores, so that
2059      we can iterate over them in a predictable way.  Using this order
2060      avoids extraneous differences in the compiler output just because
2061      of tree pointer variations (e.g. different chains end up in
2062      different positions of m_stores, so they are handled in different
2063      orders, so they allocate or release SSA names in different
2064      orders, and when they get reused, subsequent passes end up
2065      getting different SSA names, which may ultimately change
2066      decisions when going out of SSA).  */
2067   imm_store_chain_info *m_stores_head;
2068 
2069   void process_store (gimple *);
2070   bool terminate_and_process_all_chains ();
2071   bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
2072   bool terminate_and_release_chain (imm_store_chain_info *);
2073 }; // class pass_store_merging
2074 
2075 /* Terminate and process all recorded chains.  Return true if any changes
2076    were made.  */
2077 
2078 bool
2079 pass_store_merging::terminate_and_process_all_chains ()
2080 {
2081   bool ret = false;
2082   while (m_stores_head)
2083     ret |= terminate_and_release_chain (m_stores_head);
2084   gcc_assert (m_stores.elements () == 0);
2085   gcc_assert (m_stores_head == NULL);
2086 
2087   return ret;
2088 }
2089 
2090 /* Terminate all chains that are affected by the statement STMT.
2091    CHAIN_INFO is the chain we should ignore from the checks if
2092    non-NULL.  */
2093 
2094 bool
2095 pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
2096 						     **chain_info,
2097 						   gimple *stmt)
2098 {
2099   bool ret = false;
2100 
2101   /* If the statement doesn't touch memory it can't alias.  */
2102   if (!gimple_vuse (stmt))
2103     return false;
2104 
2105   tree store_lhs = gimple_store_p (stmt) ? gimple_get_lhs (stmt) : NULL_TREE;
2106   for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
2107     {
2108       next = cur->next;
2109 
2110       /* We already checked all the stores in chain_info and terminated the
2111 	 chain if necessary.  Skip it here.  */
2112       if (chain_info && *chain_info == cur)
2113 	continue;
2114 
2115       store_immediate_info *info;
2116       unsigned int i;
2117       FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
2118 	{
2119 	  tree lhs = gimple_assign_lhs (info->stmt);
2120 	  if (ref_maybe_used_by_stmt_p (stmt, lhs)
2121 	      || stmt_may_clobber_ref_p (stmt, lhs)
2122 	      || (store_lhs && refs_output_dependent_p (store_lhs, lhs)))
2123 	    {
2124 	      if (dump_file && (dump_flags & TDF_DETAILS))
2125 		{
2126 		  fprintf (dump_file, "stmt causes chain termination:\n");
2127 		  print_gimple_stmt (dump_file, stmt, 0);
2128 		}
2129 	      terminate_and_release_chain (cur);
2130 	      ret = true;
2131 	      break;
2132 	    }
2133 	}
2134     }
2135 
2136   return ret;
2137 }
2138 
2139 /* Helper function.  Terminate the recorded chain storing to base object
2140    BASE.  Return true if the merging and output was successful.  The m_stores
2141    entry is removed after the processing in any case.  */
2142 
2143 bool
2144 pass_store_merging::terminate_and_release_chain (imm_store_chain_info *chain_info)
2145 {
2146   bool ret = chain_info->terminate_and_process_chain ();
2147   m_stores.remove (chain_info->base_addr);
2148   delete chain_info;
2149   return ret;
2150 }
2151 
2152 /* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
2153    may clobber REF.  FIRST and LAST must be in the same basic block and
2154    have non-NULL vdef.  We want to be able to sink load of REF across
2155    stores between FIRST and LAST, up to right before LAST.  */
2156 
2157 bool
2158 stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
2159 {
2160   ao_ref r;
2161   ao_ref_init (&r, ref);
2162   unsigned int count = 0;
2163   tree vop = gimple_vdef (last);
2164   gimple *stmt;
2165 
2166   gcc_checking_assert (gimple_bb (first) == gimple_bb (last));
2167   do
2168     {
2169       stmt = SSA_NAME_DEF_STMT (vop);
2170       if (stmt_may_clobber_ref_p_1 (stmt, &r))
2171 	return true;
2172       if (gimple_store_p (stmt)
2173 	  && refs_anti_dependent_p (ref, gimple_get_lhs (stmt)))
2174 	return true;
2175       /* Avoid quadratic compile time by bounding the number of checks
2176 	 we perform.  */
2177       if (++count > MAX_STORE_ALIAS_CHECKS)
2178 	return true;
2179       vop = gimple_vuse (stmt);
2180     }
2181   while (stmt != first);
2182   return false;
2183 }
2184 
2185 /* Return true if INFO->ops[IDX] is mergeable with the
2186    corresponding loads already in MERGED_STORE group.
2187    BASE_ADDR is the base address of the whole store group.  */
2188 
2189 bool
2190 compatible_load_p (merged_store_group *merged_store,
2191 		   store_immediate_info *info,
2192 		   tree base_addr, int idx)
2193 {
2194   store_immediate_info *infof = merged_store->stores[0];
2195   if (!info->ops[idx].base_addr
2196       || maybe_ne (info->ops[idx].bitpos - infof->ops[idx].bitpos,
2197 		   info->bitpos - infof->bitpos)
2198       || !operand_equal_p (info->ops[idx].base_addr,
2199 			   infof->ops[idx].base_addr, 0))
2200     return false;
2201 
2202   store_immediate_info *infol = merged_store->stores.last ();
2203   tree load_vuse = gimple_vuse (info->ops[idx].stmt);
2204   /* In this case all vuses should be the same, e.g.
2205      _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2206      or
2207      _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2208      and we can emit the coalesced load next to any of those loads.  */
2209   if (gimple_vuse (infof->ops[idx].stmt) == load_vuse
2210       && gimple_vuse (infol->ops[idx].stmt) == load_vuse)
2211     return true;
2212 
2213   /* Otherwise, at least for now require that the load has the same
2214      vuse as the store.  See following examples.  */
2215   if (gimple_vuse (info->stmt) != load_vuse)
2216     return false;
2217 
2218   if (gimple_vuse (infof->stmt) != gimple_vuse (infof->ops[idx].stmt)
2219       || (infof != infol
2220 	  && gimple_vuse (infol->stmt) != gimple_vuse (infol->ops[idx].stmt)))
2221     return false;
2222 
2223   /* If the load is from the same location as the store, already
2224      the construction of the immediate chain info guarantees no intervening
2225      stores, so no further checks are needed.  Example:
2226      _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4;  */
2227   if (known_eq (info->ops[idx].bitpos, info->bitpos)
2228       && operand_equal_p (info->ops[idx].base_addr, base_addr, 0))
2229     return true;
2230 
2231   /* Otherwise, we need to punt if any of the loads can be clobbered by any
2232      of the stores in the group, or any other stores in between those.
2233      Previous calls to compatible_load_p ensured that for all the
2234      merged_store->stores IDX loads, no stmts starting with
2235      merged_store->first_stmt and ending right before merged_store->last_stmt
2236      clobbers those loads.  */
2237   gimple *first = merged_store->first_stmt;
2238   gimple *last = merged_store->last_stmt;
2239   unsigned int i;
2240   store_immediate_info *infoc;
2241   /* The stores are sorted by increasing store bitpos, so if info->stmt store
2242      comes before the so far first load, we'll be changing
2243      merged_store->first_stmt.  In that case we need to give up if
2244      any of the earlier processed loads clobber with the stmts in the new
2245      range.  */
2246   if (info->order < merged_store->first_order)
2247     {
2248       FOR_EACH_VEC_ELT (merged_store->stores, i, infoc)
2249 	if (stmts_may_clobber_ref_p (info->stmt, first, infoc->ops[idx].val))
2250 	  return false;
2251       first = info->stmt;
2252     }
2253   /* Similarly, we could change merged_store->last_stmt, so ensure
2254      in that case no stmts in the new range clobber any of the earlier
2255      processed loads.  */
2256   else if (info->order > merged_store->last_order)
2257     {
2258       FOR_EACH_VEC_ELT (merged_store->stores, i, infoc)
2259 	if (stmts_may_clobber_ref_p (last, info->stmt, infoc->ops[idx].val))
2260 	  return false;
2261       last = info->stmt;
2262     }
2263   /* And finally, we'd be adding a new load to the set, ensure it isn't
2264      clobbered in the new range.  */
2265   if (stmts_may_clobber_ref_p (first, last, info->ops[idx].val))
2266     return false;
2267 
2268   /* Otherwise, we are looking for:
2269      _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2270      or
2271      _1 = s.a; t.a = _1; _2 = s.b; t.b = _2;  */
2272   return true;
2273 }
2274 
2275 /* Add all refs loaded to compute VAL to REFS vector.  */
2276 
2277 void
2278 gather_bswap_load_refs (vec<tree> *refs, tree val)
2279 {
2280   if (TREE_CODE (val) != SSA_NAME)
2281     return;
2282 
2283   gimple *stmt = SSA_NAME_DEF_STMT (val);
2284   if (!is_gimple_assign (stmt))
2285     return;
2286 
2287   if (gimple_assign_load_p (stmt))
2288     {
2289       refs->safe_push (gimple_assign_rhs1 (stmt));
2290       return;
2291     }
2292 
2293   switch (gimple_assign_rhs_class (stmt))
2294     {
2295     case GIMPLE_BINARY_RHS:
2296       gather_bswap_load_refs (refs, gimple_assign_rhs2 (stmt));
2297       /* FALLTHRU */
2298     case GIMPLE_UNARY_RHS:
2299       gather_bswap_load_refs (refs, gimple_assign_rhs1 (stmt));
2300       break;
2301     default:
2302       gcc_unreachable ();
2303     }
2304 }
2305 
2306 /* Check if there are any stores in M_STORE_INFO after index I
2307    (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2308    a potential group ending with END that have their order
2309    smaller than LAST_ORDER.  RHS_CODE is the kind of store in the
2310    group.  Return true if there are no such stores.
2311    Consider:
2312      MEM[(long long int *)p_28] = 0;
2313      MEM[(long long int *)p_28 + 8B] = 0;
2314      MEM[(long long int *)p_28 + 16B] = 0;
2315      MEM[(long long int *)p_28 + 24B] = 0;
2316      _129 = (int) _130;
2317      MEM[(int *)p_28 + 8B] = _129;
2318      MEM[(int *)p_28].a = -1;
2319    We already have
2320      MEM[(long long int *)p_28] = 0;
2321      MEM[(int *)p_28].a = -1;
2322    stmts in the current group and need to consider if it is safe to
2323    add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2324    There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2325    store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2326    into the group and merging of those 3 stores is successful, merged
2327    stmts will be emitted at the latest store from that group, i.e.
2328    LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2329    The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2330    the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2331    so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2332    into the group.  That way it will be its own store group and will
2333    not be touched.  If RHS_CODE is INTEGER_CST and there are overlapping
2334    INTEGER_CST stores, those are mergeable using merge_overlapping,
2335    so don't return false for those.  */
2336 
2337 static bool
2338 check_no_overlap (vec<store_immediate_info *> m_store_info, unsigned int i,
2339 		  enum tree_code rhs_code, unsigned int last_order,
2340 		  unsigned HOST_WIDE_INT end)
2341 {
2342   unsigned int len = m_store_info.length ();
2343   for (++i; i < len; ++i)
2344     {
2345       store_immediate_info *info = m_store_info[i];
2346       if (info->bitpos >= end)
2347 	break;
2348       if (info->order < last_order
2349 	  && (rhs_code != INTEGER_CST || info->rhs_code != INTEGER_CST))
2350 	return false;
2351     }
2352   return true;
2353 }
2354 
2355 /* Return true if m_store_info[first] and at least one following store
2356    form a group which store try_size bitsize value which is byte swapped
2357    from a memory load or some value, or identity from some value.
2358    This uses the bswap pass APIs.  */
2359 
2360 bool
2361 imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
2362 					  unsigned int first,
2363 					  unsigned int try_size)
2364 {
2365   unsigned int len = m_store_info.length (), last = first;
2366   unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
2367   if (width >= try_size)
2368     return false;
2369   for (unsigned int i = first + 1; i < len; ++i)
2370     {
2371       if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width
2372 	  || m_store_info[i]->ins_stmt == NULL)
2373 	return false;
2374       width += m_store_info[i]->bitsize;
2375       if (width >= try_size)
2376 	{
2377 	  last = i;
2378 	  break;
2379 	}
2380     }
2381   if (width != try_size)
2382     return false;
2383 
2384   bool allow_unaligned
2385     = !STRICT_ALIGNMENT && PARAM_VALUE (PARAM_STORE_MERGING_ALLOW_UNALIGNED);
2386   /* Punt if the combined store would not be aligned and we need alignment.  */
2387   if (!allow_unaligned)
2388     {
2389       unsigned int align = merged_store->align;
2390       unsigned HOST_WIDE_INT align_base = merged_store->align_base;
2391       for (unsigned int i = first + 1; i <= last; ++i)
2392 	{
2393 	  unsigned int this_align;
2394 	  unsigned HOST_WIDE_INT align_bitpos = 0;
2395 	  get_object_alignment_1 (gimple_assign_lhs (m_store_info[i]->stmt),
2396 				  &this_align, &align_bitpos);
2397 	  if (this_align > align)
2398 	    {
2399 	      align = this_align;
2400 	      align_base = m_store_info[i]->bitpos - align_bitpos;
2401 	    }
2402 	}
2403       unsigned HOST_WIDE_INT align_bitpos
2404 	= (m_store_info[first]->bitpos - align_base) & (align - 1);
2405       if (align_bitpos)
2406 	align = least_bit_hwi (align_bitpos);
2407       if (align < try_size)
2408 	return false;
2409     }
2410 
2411   tree type;
2412   switch (try_size)
2413     {
2414     case 16: type = uint16_type_node; break;
2415     case 32: type = uint32_type_node; break;
2416     case 64: type = uint64_type_node; break;
2417     default: gcc_unreachable ();
2418     }
2419   struct symbolic_number n;
2420   gimple *ins_stmt = NULL;
2421   int vuse_store = -1;
2422   unsigned int first_order = merged_store->first_order;
2423   unsigned int last_order = merged_store->last_order;
2424   gimple *first_stmt = merged_store->first_stmt;
2425   gimple *last_stmt = merged_store->last_stmt;
2426   unsigned HOST_WIDE_INT end = merged_store->start + merged_store->width;
2427   store_immediate_info *infof = m_store_info[first];
2428 
2429   for (unsigned int i = first; i <= last; ++i)
2430     {
2431       store_immediate_info *info = m_store_info[i];
2432       struct symbolic_number this_n = info->n;
2433       this_n.type = type;
2434       if (!this_n.base_addr)
2435 	this_n.range = try_size / BITS_PER_UNIT;
2436       else
2437 	/* Update vuse in case it has changed by output_merged_stores.  */
2438 	this_n.vuse = gimple_vuse (info->ins_stmt);
2439       unsigned int bitpos = info->bitpos - infof->bitpos;
2440       if (!do_shift_rotate (LSHIFT_EXPR, &this_n,
2441 			    BYTES_BIG_ENDIAN
2442 			    ? try_size - info->bitsize - bitpos
2443 			    : bitpos))
2444 	return false;
2445       if (this_n.base_addr && vuse_store)
2446 	{
2447 	  unsigned int j;
2448 	  for (j = first; j <= last; ++j)
2449 	    if (this_n.vuse == gimple_vuse (m_store_info[j]->stmt))
2450 	      break;
2451 	  if (j > last)
2452 	    {
2453 	      if (vuse_store == 1)
2454 		return false;
2455 	      vuse_store = 0;
2456 	    }
2457 	}
2458       if (i == first)
2459 	{
2460 	  n = this_n;
2461 	  ins_stmt = info->ins_stmt;
2462 	}
2463       else
2464 	{
2465 	  if (n.base_addr && n.vuse != this_n.vuse)
2466 	    {
2467 	      if (vuse_store == 0)
2468 		return false;
2469 	      vuse_store = 1;
2470 	    }
2471 	  if (info->order > last_order)
2472 	    {
2473 	      last_order = info->order;
2474 	      last_stmt = info->stmt;
2475 	    }
2476 	  else if (info->order < first_order)
2477 	    {
2478 	      first_order = info->order;
2479 	      first_stmt = info->stmt;
2480 	    }
2481 	  end = MAX (end, info->bitpos + info->bitsize);
2482 
2483 	  ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
2484 					     &this_n, &n);
2485 	  if (ins_stmt == NULL)
2486 	    return false;
2487 	}
2488     }
2489 
2490   uint64_t cmpxchg, cmpnop;
2491   find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop);
2492 
2493   /* A complete byte swap should make the symbolic number to start with
2494      the largest digit in the highest order byte.  Unchanged symbolic
2495      number indicates a read with same endianness as target architecture.  */
2496   if (n.n != cmpnop && n.n != cmpxchg)
2497     return false;
2498 
2499   if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
2500     return false;
2501 
2502   if (!check_no_overlap (m_store_info, last, LROTATE_EXPR, last_order, end))
2503     return false;
2504 
2505   /* Don't handle memory copy this way if normal non-bswap processing
2506      would handle it too.  */
2507   if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1)
2508     {
2509       unsigned int i;
2510       for (i = first; i <= last; ++i)
2511 	if (m_store_info[i]->rhs_code != MEM_REF)
2512 	  break;
2513       if (i == last + 1)
2514 	return false;
2515     }
2516 
2517   if (n.n == cmpxchg)
2518     switch (try_size)
2519       {
2520       case 16:
2521 	/* Will emit LROTATE_EXPR.  */
2522 	break;
2523       case 32:
2524 	if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2525 	    && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
2526 	  break;
2527 	return false;
2528       case 64:
2529 	if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2530 	    && optab_handler (bswap_optab, DImode) != CODE_FOR_nothing)
2531 	  break;
2532 	return false;
2533       default:
2534 	gcc_unreachable ();
2535       }
2536 
2537   if (!allow_unaligned && n.base_addr)
2538     {
2539       unsigned int align = get_object_alignment (n.src);
2540       if (align < try_size)
2541 	return false;
2542     }
2543 
2544   /* If each load has vuse of the corresponding store, need to verify
2545      the loads can be sunk right before the last store.  */
2546   if (vuse_store == 1)
2547     {
2548       auto_vec<tree, 64> refs;
2549       for (unsigned int i = first; i <= last; ++i)
2550 	gather_bswap_load_refs (&refs,
2551 				gimple_assign_rhs1 (m_store_info[i]->stmt));
2552 
2553       unsigned int i;
2554       tree ref;
2555       FOR_EACH_VEC_ELT (refs, i, ref)
2556 	if (stmts_may_clobber_ref_p (first_stmt, last_stmt, ref))
2557 	  return false;
2558       n.vuse = NULL_TREE;
2559     }
2560 
2561   infof->n = n;
2562   infof->ins_stmt = ins_stmt;
2563   for (unsigned int i = first; i <= last; ++i)
2564     {
2565       m_store_info[i]->rhs_code = n.n == cmpxchg ? LROTATE_EXPR : NOP_EXPR;
2566       m_store_info[i]->ops[0].base_addr = NULL_TREE;
2567       m_store_info[i]->ops[1].base_addr = NULL_TREE;
2568       if (i != first)
2569 	merged_store->merge_into (m_store_info[i]);
2570     }
2571 
2572   return true;
2573 }
2574 
2575 /* Go through the candidate stores recorded in m_store_info and merge them
2576    into merged_store_group objects recorded into m_merged_store_groups
2577    representing the widened stores.  Return true if coalescing was successful
2578    and the number of widened stores is fewer than the original number
2579    of stores.  */
2580 
2581 bool
2582 imm_store_chain_info::coalesce_immediate_stores ()
2583 {
2584   /* Anything less can't be processed.  */
2585   if (m_store_info.length () < 2)
2586     return false;
2587 
2588   if (dump_file && (dump_flags & TDF_DETAILS))
2589     fprintf (dump_file, "Attempting to coalesce %u stores in chain.\n",
2590 	     m_store_info.length ());
2591 
2592   store_immediate_info *info;
2593   unsigned int i, ignore = 0;
2594 
2595   /* Order the stores by the bitposition they write to.  */
2596   m_store_info.qsort (sort_by_bitpos);
2597 
2598   info = m_store_info[0];
2599   merged_store_group *merged_store = new merged_store_group (info);
2600 
2601   FOR_EACH_VEC_ELT (m_store_info, i, info)
2602     {
2603       if (dump_file && (dump_flags & TDF_DETAILS))
2604 	{
2605 	  fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
2606 			      " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:\n",
2607 		   i, info->bitsize, info->bitpos);
2608 	  print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
2609 	  fprintf (dump_file, "\n------------\n");
2610 	}
2611 
2612       if (i <= ignore)
2613 	continue;
2614 
2615       /* First try to handle group of stores like:
2616 	 p[0] = data >> 24;
2617 	 p[1] = data >> 16;
2618 	 p[2] = data >> 8;
2619 	 p[3] = data;
2620 	 using the bswap framework.  */
2621       if (info->bitpos == merged_store->start + merged_store->width
2622 	  && merged_store->stores.length () == 1
2623 	  && merged_store->stores[0]->ins_stmt != NULL
2624 	  && info->ins_stmt != NULL)
2625 	{
2626 	  unsigned int try_size;
2627 	  for (try_size = 64; try_size >= 16; try_size >>= 1)
2628 	    if (try_coalesce_bswap (merged_store, i - 1, try_size))
2629 	      break;
2630 
2631 	  if (try_size >= 16)
2632 	    {
2633 	      ignore = i + merged_store->stores.length () - 1;
2634 	      m_merged_store_groups.safe_push (merged_store);
2635 	      if (ignore < m_store_info.length ())
2636 		merged_store = new merged_store_group (m_store_info[ignore]);
2637 	      else
2638 		merged_store = NULL;
2639 	      continue;
2640 	    }
2641 	}
2642 
2643       /* |---store 1---|
2644 	       |---store 2---|
2645 	 Overlapping stores.  */
2646       if (IN_RANGE (info->bitpos, merged_store->start,
2647 		    merged_store->start + merged_store->width - 1))
2648 	{
2649 	  /* Only allow overlapping stores of constants.  */
2650 	  if (info->rhs_code == INTEGER_CST
2651 	      && merged_store->stores[0]->rhs_code == INTEGER_CST)
2652 	    {
2653 	      merged_store->merge_overlapping (info);
2654 	      continue;
2655 	    }
2656 	}
2657       /* |---store 1---||---store 2---|
2658 	 This store is consecutive to the previous one.
2659 	 Merge it into the current store group.  There can be gaps in between
2660 	 the stores, but there can't be gaps in between bitregions.  */
2661       else if (info->rhs_code != LROTATE_EXPR
2662 	       && info->bitregion_start <= merged_store->bitregion_end
2663 	       && info->rhs_code == merged_store->stores[0]->rhs_code)
2664 	{
2665 	  store_immediate_info *infof = merged_store->stores[0];
2666 
2667 	  /* All the rhs_code ops that take 2 operands are commutative,
2668 	     swap the operands if it could make the operands compatible.  */
2669 	  if (infof->ops[0].base_addr
2670 	      && infof->ops[1].base_addr
2671 	      && info->ops[0].base_addr
2672 	      && info->ops[1].base_addr
2673 	      && known_eq (info->ops[1].bitpos - infof->ops[0].bitpos,
2674 			   info->bitpos - infof->bitpos)
2675 	      && operand_equal_p (info->ops[1].base_addr,
2676 				  infof->ops[0].base_addr, 0))
2677 	    {
2678 	      std::swap (info->ops[0], info->ops[1]);
2679 	      info->ops_swapped_p = true;
2680 	    }
2681 	  if ((infof->ops[0].base_addr
2682 	       ? compatible_load_p (merged_store, info, base_addr, 0)
2683 	       : !info->ops[0].base_addr)
2684 	      && (infof->ops[1].base_addr
2685 		  ? compatible_load_p (merged_store, info, base_addr, 1)
2686 		  : !info->ops[1].base_addr)
2687 	      && check_no_overlap (m_store_info, i, info->rhs_code,
2688 				   MAX (merged_store->last_order,
2689 					info->order),
2690 				   MAX (merged_store->start
2691 					+ merged_store->width,
2692 					info->bitpos + info->bitsize)))
2693 	    {
2694 	      merged_store->merge_into (info);
2695 	      continue;
2696 	    }
2697 	}
2698 
2699       /* |---store 1---| <gap> |---store 2---|.
2700 	 Gap between stores or the rhs not compatible.  Start a new group.  */
2701 
2702       /* Try to apply all the stores recorded for the group to determine
2703 	 the bitpattern they write and discard it if that fails.
2704 	 This will also reject single-store groups.  */
2705       if (!merged_store->apply_stores ())
2706 	delete merged_store;
2707       else
2708 	m_merged_store_groups.safe_push (merged_store);
2709 
2710       merged_store = new merged_store_group (info);
2711     }
2712 
2713   /* Record or discard the last store group.  */
2714   if (merged_store)
2715     {
2716       if (!merged_store->apply_stores ())
2717 	delete merged_store;
2718       else
2719 	m_merged_store_groups.safe_push (merged_store);
2720     }
2721 
2722   gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
2723   bool success
2724     = !m_merged_store_groups.is_empty ()
2725       && m_merged_store_groups.length () < m_store_info.length ();
2726 
2727   if (success && dump_file)
2728     fprintf (dump_file, "Coalescing successful!\n"
2729 			"Merged into %u stores\n",
2730 	     m_merged_store_groups.length ());
2731 
2732   return success;
2733 }
2734 
2735 /* Return the type to use for the merged stores or loads described by STMTS.
2736    This is needed to get the alias sets right.  If IS_LOAD, look for rhs,
2737    otherwise lhs.  Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
2738    of the MEM_REFs if any.  */
2739 
2740 static tree
2741 get_alias_type_for_stmts (vec<gimple *> &stmts, bool is_load,
2742 			  unsigned short *cliquep, unsigned short *basep)
2743 {
2744   gimple *stmt;
2745   unsigned int i;
2746   tree type = NULL_TREE;
2747   tree ret = NULL_TREE;
2748   *cliquep = 0;
2749   *basep = 0;
2750 
2751   FOR_EACH_VEC_ELT (stmts, i, stmt)
2752     {
2753       tree ref = is_load ? gimple_assign_rhs1 (stmt)
2754 			 : gimple_assign_lhs (stmt);
2755       tree type1 = reference_alias_ptr_type (ref);
2756       tree base = get_base_address (ref);
2757 
2758       if (i == 0)
2759 	{
2760 	  if (TREE_CODE (base) == MEM_REF)
2761 	    {
2762 	      *cliquep = MR_DEPENDENCE_CLIQUE (base);
2763 	      *basep = MR_DEPENDENCE_BASE (base);
2764 	    }
2765 	  ret = type = type1;
2766 	  continue;
2767 	}
2768       if (!alias_ptr_types_compatible_p (type, type1))
2769 	ret = ptr_type_node;
2770       if (TREE_CODE (base) != MEM_REF
2771 	  || *cliquep != MR_DEPENDENCE_CLIQUE (base)
2772 	  || *basep != MR_DEPENDENCE_BASE (base))
2773 	{
2774 	  *cliquep = 0;
2775 	  *basep = 0;
2776 	}
2777     }
2778   return ret;
2779 }
2780 
2781 /* Return the location_t information we can find among the statements
2782    in STMTS.  */
2783 
2784 static location_t
2785 get_location_for_stmts (vec<gimple *> &stmts)
2786 {
2787   gimple *stmt;
2788   unsigned int i;
2789 
2790   FOR_EACH_VEC_ELT (stmts, i, stmt)
2791     if (gimple_has_location (stmt))
2792       return gimple_location (stmt);
2793 
2794   return UNKNOWN_LOCATION;
2795 }
2796 
2797 /* Used to decribe a store resulting from splitting a wide store in smaller
2798    regularly-sized stores in split_group.  */
2799 
2800 struct split_store
2801 {
2802   unsigned HOST_WIDE_INT bytepos;
2803   unsigned HOST_WIDE_INT size;
2804   unsigned HOST_WIDE_INT align;
2805   auto_vec<store_immediate_info *> orig_stores;
2806   /* True if there is a single orig stmt covering the whole split store.  */
2807   bool orig;
2808   split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
2809 	       unsigned HOST_WIDE_INT);
2810 };
2811 
2812 /* Simple constructor.  */
2813 
2814 split_store::split_store (unsigned HOST_WIDE_INT bp,
2815 			  unsigned HOST_WIDE_INT sz,
2816 			  unsigned HOST_WIDE_INT al)
2817 			  : bytepos (bp), size (sz), align (al), orig (false)
2818 {
2819   orig_stores.create (0);
2820 }
2821 
2822 /* Record all stores in GROUP that write to the region starting at BITPOS and
2823    is of size BITSIZE.  Record infos for such statements in STORES if
2824    non-NULL.  The stores in GROUP must be sorted by bitposition.  Return INFO
2825    if there is exactly one original store in the range.  */
2826 
2827 static store_immediate_info *
2828 find_constituent_stores (struct merged_store_group *group,
2829 			 vec<store_immediate_info *> *stores,
2830 			 unsigned int *first,
2831 			 unsigned HOST_WIDE_INT bitpos,
2832 			 unsigned HOST_WIDE_INT bitsize)
2833 {
2834   store_immediate_info *info, *ret = NULL;
2835   unsigned int i;
2836   bool second = false;
2837   bool update_first = true;
2838   unsigned HOST_WIDE_INT end = bitpos + bitsize;
2839   for (i = *first; group->stores.iterate (i, &info); ++i)
2840     {
2841       unsigned HOST_WIDE_INT stmt_start = info->bitpos;
2842       unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
2843       if (stmt_end <= bitpos)
2844 	{
2845 	  /* BITPOS passed to this function never decreases from within the
2846 	     same split_group call, so optimize and don't scan info records
2847 	     which are known to end before or at BITPOS next time.
2848 	     Only do it if all stores before this one also pass this.  */
2849 	  if (update_first)
2850 	    *first = i + 1;
2851 	  continue;
2852 	}
2853       else
2854 	update_first = false;
2855 
2856       /* The stores in GROUP are ordered by bitposition so if we're past
2857 	 the region for this group return early.  */
2858       if (stmt_start >= end)
2859 	return ret;
2860 
2861       if (stores)
2862 	{
2863 	  stores->safe_push (info);
2864 	  if (ret)
2865 	    {
2866 	      ret = NULL;
2867 	      second = true;
2868 	    }
2869 	}
2870       else if (ret)
2871 	return NULL;
2872       if (!second)
2873 	ret = info;
2874     }
2875   return ret;
2876 }
2877 
2878 /* Return how many SSA_NAMEs used to compute value to store in the INFO
2879    store have multiple uses.  If any SSA_NAME has multiple uses, also
2880    count statements needed to compute it.  */
2881 
2882 static unsigned
2883 count_multiple_uses (store_immediate_info *info)
2884 {
2885   gimple *stmt = info->stmt;
2886   unsigned ret = 0;
2887   switch (info->rhs_code)
2888     {
2889     case INTEGER_CST:
2890       return 0;
2891     case BIT_AND_EXPR:
2892     case BIT_IOR_EXPR:
2893     case BIT_XOR_EXPR:
2894       if (info->bit_not_p)
2895 	{
2896 	  if (!has_single_use (gimple_assign_rhs1 (stmt)))
2897 	    ret = 1; /* Fall through below to return
2898 			the BIT_NOT_EXPR stmt and then
2899 			BIT_{AND,IOR,XOR}_EXPR and anything it
2900 			uses.  */
2901 	  else
2902 	    /* stmt is after this the BIT_NOT_EXPR.  */
2903 	    stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
2904 	}
2905       if (!has_single_use (gimple_assign_rhs1 (stmt)))
2906 	{
2907 	  ret += 1 + info->ops[0].bit_not_p;
2908 	  if (info->ops[1].base_addr)
2909 	    ret += 1 + info->ops[1].bit_not_p;
2910 	  return ret + 1;
2911 	}
2912       stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
2913       /* stmt is now the BIT_*_EXPR.  */
2914       if (!has_single_use (gimple_assign_rhs1 (stmt)))
2915 	ret += 1 + info->ops[info->ops_swapped_p].bit_not_p;
2916       else if (info->ops[info->ops_swapped_p].bit_not_p)
2917 	{
2918 	  gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
2919 	  if (!has_single_use (gimple_assign_rhs1 (stmt2)))
2920 	    ++ret;
2921 	}
2922       if (info->ops[1].base_addr == NULL_TREE)
2923 	{
2924 	  gcc_checking_assert (!info->ops_swapped_p);
2925 	  return ret;
2926 	}
2927       if (!has_single_use (gimple_assign_rhs2 (stmt)))
2928 	ret += 1 + info->ops[1 - info->ops_swapped_p].bit_not_p;
2929       else if (info->ops[1 - info->ops_swapped_p].bit_not_p)
2930 	{
2931 	  gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
2932 	  if (!has_single_use (gimple_assign_rhs1 (stmt2)))
2933 	    ++ret;
2934 	}
2935       return ret;
2936     case MEM_REF:
2937       if (!has_single_use (gimple_assign_rhs1 (stmt)))
2938 	return 1 + info->ops[0].bit_not_p;
2939       else if (info->ops[0].bit_not_p)
2940 	{
2941 	  stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
2942 	  if (!has_single_use (gimple_assign_rhs1 (stmt)))
2943 	    return 1;
2944 	}
2945       return 0;
2946     default:
2947       gcc_unreachable ();
2948     }
2949 }
2950 
2951 /* Split a merged store described by GROUP by populating the SPLIT_STORES
2952    vector (if non-NULL) with split_store structs describing the byte offset
2953    (from the base), the bit size and alignment of each store as well as the
2954    original statements involved in each such split group.
2955    This is to separate the splitting strategy from the statement
2956    building/emission/linking done in output_merged_store.
2957    Return number of new stores.
2958    If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
2959    If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
2960    If SPLIT_STORES is NULL, it is just a dry run to count number of
2961    new stores.  */
2962 
2963 static unsigned int
2964 split_group (merged_store_group *group, bool allow_unaligned_store,
2965 	     bool allow_unaligned_load,
2966 	     vec<struct split_store *> *split_stores,
2967 	     unsigned *total_orig,
2968 	     unsigned *total_new)
2969 {
2970   unsigned HOST_WIDE_INT pos = group->bitregion_start;
2971   unsigned HOST_WIDE_INT size = group->bitregion_end - pos;
2972   unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
2973   unsigned HOST_WIDE_INT group_align = group->align;
2974   unsigned HOST_WIDE_INT align_base = group->align_base;
2975   unsigned HOST_WIDE_INT group_load_align = group_align;
2976   bool any_orig = false;
2977 
2978   gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
2979 
2980   if (group->stores[0]->rhs_code == LROTATE_EXPR
2981       || group->stores[0]->rhs_code == NOP_EXPR)
2982     {
2983       /* For bswap framework using sets of stores, all the checking
2984 	 has been done earlier in try_coalesce_bswap and needs to be
2985 	 emitted as a single store.  */
2986       if (total_orig)
2987 	{
2988 	  /* Avoid the old/new stmt count heuristics.  It should be
2989 	     always beneficial.  */
2990 	  total_new[0] = 1;
2991 	  total_orig[0] = 2;
2992 	}
2993 
2994       if (split_stores)
2995 	{
2996 	  unsigned HOST_WIDE_INT align_bitpos
2997 	    = (group->start - align_base) & (group_align - 1);
2998 	  unsigned HOST_WIDE_INT align = group_align;
2999 	  if (align_bitpos)
3000 	    align = least_bit_hwi (align_bitpos);
3001 	  bytepos = group->start / BITS_PER_UNIT;
3002 	  struct split_store *store
3003 	    = new split_store (bytepos, group->width, align);
3004 	  unsigned int first = 0;
3005 	  find_constituent_stores (group, &store->orig_stores,
3006 				   &first, group->start, group->width);
3007 	  split_stores->safe_push (store);
3008 	}
3009 
3010       return 1;
3011     }
3012 
3013   unsigned int ret = 0, first = 0;
3014   unsigned HOST_WIDE_INT try_pos = bytepos;
3015 
3016   if (total_orig)
3017     {
3018       unsigned int i;
3019       store_immediate_info *info = group->stores[0];
3020 
3021       total_new[0] = 0;
3022       total_orig[0] = 1; /* The orig store.  */
3023       info = group->stores[0];
3024       if (info->ops[0].base_addr)
3025 	total_orig[0]++;
3026       if (info->ops[1].base_addr)
3027 	total_orig[0]++;
3028       switch (info->rhs_code)
3029 	{
3030 	case BIT_AND_EXPR:
3031 	case BIT_IOR_EXPR:
3032 	case BIT_XOR_EXPR:
3033 	  total_orig[0]++; /* The orig BIT_*_EXPR stmt.  */
3034 	  break;
3035 	default:
3036 	  break;
3037 	}
3038       total_orig[0] *= group->stores.length ();
3039 
3040       FOR_EACH_VEC_ELT (group->stores, i, info)
3041 	{
3042 	  total_new[0] += count_multiple_uses (info);
3043 	  total_orig[0] += (info->bit_not_p
3044 			    + info->ops[0].bit_not_p
3045 			    + info->ops[1].bit_not_p);
3046 	}
3047     }
3048 
3049   if (!allow_unaligned_load)
3050     for (int i = 0; i < 2; ++i)
3051       if (group->load_align[i])
3052 	group_load_align = MIN (group_load_align, group->load_align[i]);
3053 
3054   while (size > 0)
3055     {
3056       if ((allow_unaligned_store || group_align <= BITS_PER_UNIT)
3057 	  && group->mask[try_pos - bytepos] == (unsigned char) ~0U)
3058 	{
3059 	  /* Skip padding bytes.  */
3060 	  ++try_pos;
3061 	  size -= BITS_PER_UNIT;
3062 	  continue;
3063 	}
3064 
3065       unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
3066       unsigned int try_size = MAX_STORE_BITSIZE, nonmasked;
3067       unsigned HOST_WIDE_INT align_bitpos
3068 	= (try_bitpos - align_base) & (group_align - 1);
3069       unsigned HOST_WIDE_INT align = group_align;
3070       if (align_bitpos)
3071 	align = least_bit_hwi (align_bitpos);
3072       if (!allow_unaligned_store)
3073 	try_size = MIN (try_size, align);
3074       if (!allow_unaligned_load)
3075 	{
3076 	  /* If we can't do or don't want to do unaligned stores
3077 	     as well as loads, we need to take the loads into account
3078 	     as well.  */
3079 	  unsigned HOST_WIDE_INT load_align = group_load_align;
3080 	  align_bitpos = (try_bitpos - align_base) & (load_align - 1);
3081 	  if (align_bitpos)
3082 	    load_align = least_bit_hwi (align_bitpos);
3083 	  for (int i = 0; i < 2; ++i)
3084 	    if (group->load_align[i])
3085 	      {
3086 		align_bitpos
3087 		  = known_alignment (try_bitpos
3088 				     - group->stores[0]->bitpos
3089 				     + group->stores[0]->ops[i].bitpos
3090 				     - group->load_align_base[i]);
3091 		if (align_bitpos & (group_load_align - 1))
3092 		  {
3093 		    unsigned HOST_WIDE_INT a = least_bit_hwi (align_bitpos);
3094 		    load_align = MIN (load_align, a);
3095 		  }
3096 	      }
3097 	  try_size = MIN (try_size, load_align);
3098 	}
3099       store_immediate_info *info
3100 	= find_constituent_stores (group, NULL, &first, try_bitpos, try_size);
3101       if (info)
3102 	{
3103 	  /* If there is just one original statement for the range, see if
3104 	     we can just reuse the original store which could be even larger
3105 	     than try_size.  */
3106 	  unsigned HOST_WIDE_INT stmt_end
3107 	    = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT);
3108 	  info = find_constituent_stores (group, NULL, &first, try_bitpos,
3109 					  stmt_end - try_bitpos);
3110 	  if (info && info->bitpos >= try_bitpos)
3111 	    {
3112 	      try_size = stmt_end - try_bitpos;
3113 	      goto found;
3114 	    }
3115 	}
3116 
3117       /* Approximate store bitsize for the case when there are no padding
3118 	 bits.  */
3119       while (try_size > size)
3120 	try_size /= 2;
3121       /* Now look for whole padding bytes at the end of that bitsize.  */
3122       for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked)
3123 	if (group->mask[try_pos - bytepos + nonmasked - 1]
3124 	    != (unsigned char) ~0U)
3125 	  break;
3126       if (nonmasked == 0)
3127 	{
3128 	  /* If entire try_size range is padding, skip it.  */
3129 	  try_pos += try_size / BITS_PER_UNIT;
3130 	  size -= try_size;
3131 	  continue;
3132 	}
3133       /* Otherwise try to decrease try_size if second half, last 3 quarters
3134 	 etc. are padding.  */
3135       nonmasked *= BITS_PER_UNIT;
3136       while (nonmasked <= try_size / 2)
3137 	try_size /= 2;
3138       if (!allow_unaligned_store && group_align > BITS_PER_UNIT)
3139 	{
3140 	  /* Now look for whole padding bytes at the start of that bitsize.  */
3141 	  unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked;
3142 	  for (masked = 0; masked < try_bytesize; ++masked)
3143 	    if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U)
3144 	      break;
3145 	  masked *= BITS_PER_UNIT;
3146 	  gcc_assert (masked < try_size);
3147 	  if (masked >= try_size / 2)
3148 	    {
3149 	      while (masked >= try_size / 2)
3150 		{
3151 		  try_size /= 2;
3152 		  try_pos += try_size / BITS_PER_UNIT;
3153 		  size -= try_size;
3154 		  masked -= try_size;
3155 		}
3156 	      /* Need to recompute the alignment, so just retry at the new
3157 		 position.  */
3158 	      continue;
3159 	    }
3160 	}
3161 
3162     found:
3163       ++ret;
3164 
3165       if (split_stores)
3166 	{
3167 	  struct split_store *store
3168 	    = new split_store (try_pos, try_size, align);
3169 	  info = find_constituent_stores (group, &store->orig_stores,
3170 					  &first, try_bitpos, try_size);
3171 	  if (info
3172 	      && info->bitpos >= try_bitpos
3173 	      && info->bitpos + info->bitsize <= try_bitpos + try_size)
3174 	    {
3175 	      store->orig = true;
3176 	      any_orig = true;
3177 	    }
3178 	  split_stores->safe_push (store);
3179 	}
3180 
3181       try_pos += try_size / BITS_PER_UNIT;
3182       size -= try_size;
3183     }
3184 
3185   if (total_orig)
3186     {
3187       unsigned int i;
3188       struct split_store *store;
3189       /* If we are reusing some original stores and any of the
3190 	 original SSA_NAMEs had multiple uses, we need to subtract
3191 	 those now before we add the new ones.  */
3192       if (total_new[0] && any_orig)
3193 	{
3194 	  FOR_EACH_VEC_ELT (*split_stores, i, store)
3195 	    if (store->orig)
3196 	      total_new[0] -= count_multiple_uses (store->orig_stores[0]);
3197 	}
3198       total_new[0] += ret; /* The new store.  */
3199       store_immediate_info *info = group->stores[0];
3200       if (info->ops[0].base_addr)
3201 	total_new[0] += ret;
3202       if (info->ops[1].base_addr)
3203 	total_new[0] += ret;
3204       switch (info->rhs_code)
3205 	{
3206 	case BIT_AND_EXPR:
3207 	case BIT_IOR_EXPR:
3208 	case BIT_XOR_EXPR:
3209 	  total_new[0] += ret; /* The new BIT_*_EXPR stmt.  */
3210 	  break;
3211 	default:
3212 	  break;
3213 	}
3214       FOR_EACH_VEC_ELT (*split_stores, i, store)
3215 	{
3216 	  unsigned int j;
3217 	  bool bit_not_p[3] = { false, false, false };
3218 	  /* If all orig_stores have certain bit_not_p set, then
3219 	     we'd use a BIT_NOT_EXPR stmt and need to account for it.
3220 	     If some orig_stores have certain bit_not_p set, then
3221 	     we'd use a BIT_XOR_EXPR with a mask and need to account for
3222 	     it.  */
3223 	  FOR_EACH_VEC_ELT (store->orig_stores, j, info)
3224 	    {
3225 	      if (info->ops[0].bit_not_p)
3226 		bit_not_p[0] = true;
3227 	      if (info->ops[1].bit_not_p)
3228 		bit_not_p[1] = true;
3229 	      if (info->bit_not_p)
3230 		bit_not_p[2] = true;
3231 	    }
3232 	  total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2];
3233 	}
3234 
3235     }
3236 
3237   return ret;
3238 }
3239 
3240 /* Return the operation through which the operand IDX (if < 2) or
3241    result (IDX == 2) should be inverted.  If NOP_EXPR, no inversion
3242    is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
3243    the bits should be xored with mask.  */
3244 
3245 static enum tree_code
3246 invert_op (split_store *split_store, int idx, tree int_type, tree &mask)
3247 {
3248   unsigned int i;
3249   store_immediate_info *info;
3250   unsigned int cnt = 0;
3251   bool any_paddings = false;
3252   FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3253     {
3254       bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3255       if (bit_not_p)
3256 	{
3257 	  ++cnt;
3258 	  tree lhs = gimple_assign_lhs (info->stmt);
3259 	  if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3260 	      && TYPE_PRECISION (TREE_TYPE (lhs)) < info->bitsize)
3261 	    any_paddings = true;
3262 	}
3263     }
3264   mask = NULL_TREE;
3265   if (cnt == 0)
3266     return NOP_EXPR;
3267   if (cnt == split_store->orig_stores.length () && !any_paddings)
3268     return BIT_NOT_EXPR;
3269 
3270   unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT;
3271   unsigned buf_size = split_store->size / BITS_PER_UNIT;
3272   unsigned char *buf
3273     = XALLOCAVEC (unsigned char, buf_size);
3274   memset (buf, ~0U, buf_size);
3275   FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3276     {
3277       bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3278       if (!bit_not_p)
3279 	continue;
3280       /* Clear regions with bit_not_p and invert afterwards, rather than
3281 	 clear regions with !bit_not_p, so that gaps in between stores aren't
3282 	 set in the mask.  */
3283       unsigned HOST_WIDE_INT bitsize = info->bitsize;
3284       unsigned HOST_WIDE_INT prec = bitsize;
3285       unsigned int pos_in_buffer = 0;
3286       if (any_paddings)
3287 	{
3288 	  tree lhs = gimple_assign_lhs (info->stmt);
3289 	  if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3290 	      && TYPE_PRECISION (TREE_TYPE (lhs)) < bitsize)
3291 	    prec = TYPE_PRECISION (TREE_TYPE (lhs));
3292 	}
3293       if (info->bitpos < try_bitpos)
3294 	{
3295 	  gcc_assert (info->bitpos + bitsize > try_bitpos);
3296 	  if (!BYTES_BIG_ENDIAN)
3297 	    {
3298 	      if (prec <= try_bitpos - info->bitpos)
3299 		continue;
3300 	      prec -= try_bitpos - info->bitpos;
3301 	    }
3302 	  bitsize -= try_bitpos - info->bitpos;
3303 	  if (BYTES_BIG_ENDIAN && prec > bitsize)
3304 	    prec = bitsize;
3305 	}
3306       else
3307 	pos_in_buffer = info->bitpos - try_bitpos;
3308       if (prec < bitsize)
3309 	{
3310 	  /* If this is a bool inversion, invert just the least significant
3311 	     prec bits rather than all bits of it.  */
3312 	  if (BYTES_BIG_ENDIAN)
3313 	    {
3314 	      pos_in_buffer += bitsize - prec;
3315 	      if (pos_in_buffer >= split_store->size)
3316 		continue;
3317 	    }
3318 	  bitsize = prec;
3319 	}
3320       if (pos_in_buffer + bitsize > split_store->size)
3321 	bitsize = split_store->size - pos_in_buffer;
3322       unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT);
3323       if (BYTES_BIG_ENDIAN)
3324 	clear_bit_region_be (p, (BITS_PER_UNIT - 1
3325 				 - (pos_in_buffer % BITS_PER_UNIT)), bitsize);
3326       else
3327 	clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize);
3328     }
3329   for (unsigned int i = 0; i < buf_size; ++i)
3330     buf[i] = ~buf[i];
3331   mask = native_interpret_expr (int_type, buf, buf_size);
3332   return BIT_XOR_EXPR;
3333 }
3334 
3335 /* Given a merged store group GROUP output the widened version of it.
3336    The store chain is against the base object BASE.
3337    Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
3338    unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
3339    Make sure that the number of statements output is less than the number of
3340    original statements.  If a better sequence is possible emit it and
3341    return true.  */
3342 
3343 bool
3344 imm_store_chain_info::output_merged_store (merged_store_group *group)
3345 {
3346   unsigned HOST_WIDE_INT start_byte_pos
3347     = group->bitregion_start / BITS_PER_UNIT;
3348 
3349   unsigned int orig_num_stmts = group->stores.length ();
3350   if (orig_num_stmts < 2)
3351     return false;
3352 
3353   auto_vec<struct split_store *, 32> split_stores;
3354   split_stores.create (0);
3355   bool allow_unaligned_store
3356     = !STRICT_ALIGNMENT && PARAM_VALUE (PARAM_STORE_MERGING_ALLOW_UNALIGNED);
3357   bool allow_unaligned_load = allow_unaligned_store;
3358   if (allow_unaligned_store)
3359     {
3360       /* If unaligned stores are allowed, see how many stores we'd emit
3361 	 for unaligned and how many stores we'd emit for aligned stores.
3362 	 Only use unaligned stores if it allows fewer stores than aligned.  */
3363       unsigned aligned_cnt
3364 	= split_group (group, false, allow_unaligned_load, NULL, NULL, NULL);
3365       unsigned unaligned_cnt
3366 	= split_group (group, true, allow_unaligned_load, NULL, NULL, NULL);
3367       if (aligned_cnt <= unaligned_cnt)
3368 	allow_unaligned_store = false;
3369     }
3370   unsigned total_orig, total_new;
3371   split_group (group, allow_unaligned_store, allow_unaligned_load,
3372 	       &split_stores, &total_orig, &total_new);
3373 
3374   if (split_stores.length () >= orig_num_stmts)
3375     {
3376       /* We didn't manage to reduce the number of statements.  Bail out.  */
3377       if (dump_file && (dump_flags & TDF_DETAILS))
3378 	fprintf (dump_file, "Exceeded original number of stmts (%u)."
3379 			    "  Not profitable to emit new sequence.\n",
3380 		 orig_num_stmts);
3381       return false;
3382     }
3383   if (total_orig <= total_new)
3384     {
3385       /* If number of estimated new statements is above estimated original
3386 	 statements, bail out too.  */
3387       if (dump_file && (dump_flags & TDF_DETAILS))
3388 	fprintf (dump_file, "Estimated number of original stmts (%u)"
3389 			    " not larger than estimated number of new"
3390 			    " stmts (%u).\n",
3391 		 total_orig, total_new);
3392       return false;
3393     }
3394 
3395   gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
3396   gimple_seq seq = NULL;
3397   tree last_vdef, new_vuse;
3398   last_vdef = gimple_vdef (group->last_stmt);
3399   new_vuse = gimple_vuse (group->last_stmt);
3400   tree bswap_res = NULL_TREE;
3401 
3402   if (group->stores[0]->rhs_code == LROTATE_EXPR
3403       || group->stores[0]->rhs_code == NOP_EXPR)
3404     {
3405       tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
3406       gimple *ins_stmt = group->stores[0]->ins_stmt;
3407       struct symbolic_number *n = &group->stores[0]->n;
3408       bool bswap = group->stores[0]->rhs_code == LROTATE_EXPR;
3409 
3410       switch (n->range)
3411 	{
3412 	case 16:
3413 	  load_type = bswap_type = uint16_type_node;
3414 	  break;
3415 	case 32:
3416 	  load_type = uint32_type_node;
3417 	  if (bswap)
3418 	    {
3419 	      fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
3420 	      bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
3421 	    }
3422 	  break;
3423 	case 64:
3424 	  load_type = uint64_type_node;
3425 	  if (bswap)
3426 	    {
3427 	      fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
3428 	      bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
3429 	    }
3430 	  break;
3431 	default:
3432 	  gcc_unreachable ();
3433 	}
3434 
3435       /* If the loads have each vuse of the corresponding store,
3436 	 we've checked the aliasing already in try_coalesce_bswap and
3437 	 we want to sink the need load into seq.  So need to use new_vuse
3438 	 on the load.  */
3439       if (n->base_addr)
3440 	{
3441 	  if (n->vuse == NULL)
3442 	    {
3443 	      n->vuse = new_vuse;
3444 	      ins_stmt = NULL;
3445 	    }
3446 	  else
3447 	    /* Update vuse in case it has changed by output_merged_stores.  */
3448 	    n->vuse = gimple_vuse (ins_stmt);
3449 	}
3450       bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl,
3451 				 bswap_type, load_type, n, bswap);
3452       gcc_assert (bswap_res);
3453     }
3454 
3455   gimple *stmt = NULL;
3456   split_store *split_store;
3457   unsigned int i;
3458   auto_vec<gimple *, 32> orig_stmts;
3459   gimple_seq this_seq;
3460   tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &this_seq,
3461 				      is_gimple_mem_ref_addr, NULL_TREE);
3462   gimple_seq_add_seq_without_update (&seq, this_seq);
3463 
3464   tree load_addr[2] = { NULL_TREE, NULL_TREE };
3465   gimple_seq load_seq[2] = { NULL, NULL };
3466   gimple_stmt_iterator load_gsi[2] = { gsi_none (), gsi_none () };
3467   for (int j = 0; j < 2; ++j)
3468     {
3469       store_operand_info &op = group->stores[0]->ops[j];
3470       if (op.base_addr == NULL_TREE)
3471 	continue;
3472 
3473       store_immediate_info *infol = group->stores.last ();
3474       if (gimple_vuse (op.stmt) == gimple_vuse (infol->ops[j].stmt))
3475 	{
3476 	  /* We can't pick the location randomly; while we've verified
3477 	     all the loads have the same vuse, they can be still in different
3478 	     basic blocks and we need to pick the one from the last bb:
3479 	     int x = q[0];
3480 	     if (x == N) return;
3481 	     int y = q[1];
3482 	     p[0] = x;
3483 	     p[1] = y;
3484 	     otherwise if we put the wider load at the q[0] load, we might
3485 	     segfault if q[1] is not mapped.  */
3486 	  basic_block bb = gimple_bb (op.stmt);
3487 	  gimple *ostmt = op.stmt;
3488 	  store_immediate_info *info;
3489 	  FOR_EACH_VEC_ELT (group->stores, i, info)
3490 	    {
3491 	      gimple *tstmt = info->ops[j].stmt;
3492 	      basic_block tbb = gimple_bb (tstmt);
3493 	      if (dominated_by_p (CDI_DOMINATORS, tbb, bb))
3494 		{
3495 		  ostmt = tstmt;
3496 		  bb = tbb;
3497 		}
3498 	    }
3499 	  load_gsi[j] = gsi_for_stmt (ostmt);
3500 	  load_addr[j]
3501 	    = force_gimple_operand_1 (unshare_expr (op.base_addr),
3502 				      &load_seq[j], is_gimple_mem_ref_addr,
3503 				      NULL_TREE);
3504 	}
3505       else if (operand_equal_p (base_addr, op.base_addr, 0))
3506 	load_addr[j] = addr;
3507       else
3508 	{
3509 	  load_addr[j]
3510 	    = force_gimple_operand_1 (unshare_expr (op.base_addr),
3511 				      &this_seq, is_gimple_mem_ref_addr,
3512 				      NULL_TREE);
3513 	  gimple_seq_add_seq_without_update (&seq, this_seq);
3514 	}
3515     }
3516 
3517   FOR_EACH_VEC_ELT (split_stores, i, split_store)
3518     {
3519       unsigned HOST_WIDE_INT try_size = split_store->size;
3520       unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
3521       unsigned HOST_WIDE_INT align = split_store->align;
3522       tree dest, src;
3523       location_t loc;
3524       if (split_store->orig)
3525 	{
3526 	  /* If there is just a single constituent store which covers
3527 	     the whole area, just reuse the lhs and rhs.  */
3528 	  gimple *orig_stmt = split_store->orig_stores[0]->stmt;
3529 	  dest = gimple_assign_lhs (orig_stmt);
3530 	  src = gimple_assign_rhs1 (orig_stmt);
3531 	  loc = gimple_location (orig_stmt);
3532 	}
3533       else
3534 	{
3535 	  store_immediate_info *info;
3536 	  unsigned short clique, base;
3537 	  unsigned int k;
3538 	  FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
3539 	    orig_stmts.safe_push (info->stmt);
3540 	  tree offset_type
3541 	    = get_alias_type_for_stmts (orig_stmts, false, &clique, &base);
3542 	  loc = get_location_for_stmts (orig_stmts);
3543 	  orig_stmts.truncate (0);
3544 
3545 	  tree int_type = build_nonstandard_integer_type (try_size, UNSIGNED);
3546 	  int_type = build_aligned_type (int_type, align);
3547 	  dest = fold_build2 (MEM_REF, int_type, addr,
3548 			      build_int_cst (offset_type, try_pos));
3549 	  if (TREE_CODE (dest) == MEM_REF)
3550 	    {
3551 	      MR_DEPENDENCE_CLIQUE (dest) = clique;
3552 	      MR_DEPENDENCE_BASE (dest) = base;
3553 	    }
3554 
3555 	  tree mask = integer_zero_node;
3556 	  if (!bswap_res)
3557 	    mask = native_interpret_expr (int_type,
3558 					  group->mask + try_pos
3559 					  - start_byte_pos,
3560 					  group->buf_size);
3561 
3562 	  tree ops[2];
3563 	  for (int j = 0;
3564 	       j < 1 + (split_store->orig_stores[0]->ops[1].val != NULL_TREE);
3565 	       ++j)
3566 	    {
3567 	      store_operand_info &op = split_store->orig_stores[0]->ops[j];
3568 	      if (bswap_res)
3569 		ops[j] = bswap_res;
3570 	      else if (op.base_addr)
3571 		{
3572 		  FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
3573 		    orig_stmts.safe_push (info->ops[j].stmt);
3574 
3575 		  offset_type = get_alias_type_for_stmts (orig_stmts, true,
3576 							  &clique, &base);
3577 		  location_t load_loc = get_location_for_stmts (orig_stmts);
3578 		  orig_stmts.truncate (0);
3579 
3580 		  unsigned HOST_WIDE_INT load_align = group->load_align[j];
3581 		  unsigned HOST_WIDE_INT align_bitpos
3582 		    = known_alignment (try_pos * BITS_PER_UNIT
3583 				       - split_store->orig_stores[0]->bitpos
3584 				       + op.bitpos);
3585 		  if (align_bitpos & (load_align - 1))
3586 		    load_align = least_bit_hwi (align_bitpos);
3587 
3588 		  tree load_int_type
3589 		    = build_nonstandard_integer_type (try_size, UNSIGNED);
3590 		  load_int_type
3591 		    = build_aligned_type (load_int_type, load_align);
3592 
3593 		  poly_uint64 load_pos
3594 		    = exact_div (try_pos * BITS_PER_UNIT
3595 				 - split_store->orig_stores[0]->bitpos
3596 				 + op.bitpos,
3597 				 BITS_PER_UNIT);
3598 		  ops[j] = fold_build2 (MEM_REF, load_int_type, load_addr[j],
3599 					build_int_cst (offset_type, load_pos));
3600 		  if (TREE_CODE (ops[j]) == MEM_REF)
3601 		    {
3602 		      MR_DEPENDENCE_CLIQUE (ops[j]) = clique;
3603 		      MR_DEPENDENCE_BASE (ops[j]) = base;
3604 		    }
3605 		  if (!integer_zerop (mask))
3606 		    /* The load might load some bits (that will be masked off
3607 		       later on) uninitialized, avoid -W*uninitialized
3608 		       warnings in that case.  */
3609 		    TREE_NO_WARNING (ops[j]) = 1;
3610 
3611 		  stmt = gimple_build_assign (make_ssa_name (int_type),
3612 					      ops[j]);
3613 		  gimple_set_location (stmt, load_loc);
3614 		  if (gsi_bb (load_gsi[j]))
3615 		    {
3616 		      gimple_set_vuse (stmt, gimple_vuse (op.stmt));
3617 		      gimple_seq_add_stmt_without_update (&load_seq[j], stmt);
3618 		    }
3619 		  else
3620 		    {
3621 		      gimple_set_vuse (stmt, new_vuse);
3622 		      gimple_seq_add_stmt_without_update (&seq, stmt);
3623 		    }
3624 		  ops[j] = gimple_assign_lhs (stmt);
3625 		  tree xor_mask;
3626 		  enum tree_code inv_op
3627 		    = invert_op (split_store, j, int_type, xor_mask);
3628 		  if (inv_op != NOP_EXPR)
3629 		    {
3630 		      stmt = gimple_build_assign (make_ssa_name (int_type),
3631 						  inv_op, ops[j], xor_mask);
3632 		      gimple_set_location (stmt, load_loc);
3633 		      ops[j] = gimple_assign_lhs (stmt);
3634 
3635 		      if (gsi_bb (load_gsi[j]))
3636 			gimple_seq_add_stmt_without_update (&load_seq[j],
3637 							    stmt);
3638 		      else
3639 			gimple_seq_add_stmt_without_update (&seq, stmt);
3640 		    }
3641 		}
3642 	      else
3643 		ops[j] = native_interpret_expr (int_type,
3644 						group->val + try_pos
3645 						- start_byte_pos,
3646 						group->buf_size);
3647 	    }
3648 
3649 	  switch (split_store->orig_stores[0]->rhs_code)
3650 	    {
3651 	    case BIT_AND_EXPR:
3652 	    case BIT_IOR_EXPR:
3653 	    case BIT_XOR_EXPR:
3654 	      FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
3655 		{
3656 		  tree rhs1 = gimple_assign_rhs1 (info->stmt);
3657 		  orig_stmts.safe_push (SSA_NAME_DEF_STMT (rhs1));
3658 		}
3659 	      location_t bit_loc;
3660 	      bit_loc = get_location_for_stmts (orig_stmts);
3661 	      orig_stmts.truncate (0);
3662 
3663 	      stmt
3664 		= gimple_build_assign (make_ssa_name (int_type),
3665 				       split_store->orig_stores[0]->rhs_code,
3666 				       ops[0], ops[1]);
3667 	      gimple_set_location (stmt, bit_loc);
3668 	      /* If there is just one load and there is a separate
3669 		 load_seq[0], emit the bitwise op right after it.  */
3670 	      if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
3671 		gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
3672 	      /* Otherwise, if at least one load is in seq, we need to
3673 		 emit the bitwise op right before the store.  If there
3674 		 are two loads and are emitted somewhere else, it would
3675 		 be better to emit the bitwise op as early as possible;
3676 		 we don't track where that would be possible right now
3677 		 though.  */
3678 	      else
3679 		gimple_seq_add_stmt_without_update (&seq, stmt);
3680 	      src = gimple_assign_lhs (stmt);
3681 	      tree xor_mask;
3682 	      enum tree_code inv_op;
3683 	      inv_op = invert_op (split_store, 2, int_type, xor_mask);
3684 	      if (inv_op != NOP_EXPR)
3685 		{
3686 		  stmt = gimple_build_assign (make_ssa_name (int_type),
3687 					      inv_op, src, xor_mask);
3688 		  gimple_set_location (stmt, bit_loc);
3689 		  if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
3690 		    gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
3691 		  else
3692 		    gimple_seq_add_stmt_without_update (&seq, stmt);
3693 		  src = gimple_assign_lhs (stmt);
3694 		}
3695 	      break;
3696 	    case LROTATE_EXPR:
3697 	    case NOP_EXPR:
3698 	      src = ops[0];
3699 	      if (!is_gimple_val (src))
3700 		{
3701 		  stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (src)),
3702 					      src);
3703 		  gimple_seq_add_stmt_without_update (&seq, stmt);
3704 		  src = gimple_assign_lhs (stmt);
3705 		}
3706 	      if (!useless_type_conversion_p (int_type, TREE_TYPE (src)))
3707 		{
3708 		  stmt = gimple_build_assign (make_ssa_name (int_type),
3709 					      NOP_EXPR, src);
3710 		  gimple_seq_add_stmt_without_update (&seq, stmt);
3711 		  src = gimple_assign_lhs (stmt);
3712 		}
3713 	      inv_op = invert_op (split_store, 2, int_type, xor_mask);
3714 	      if (inv_op != NOP_EXPR)
3715 		{
3716 		  stmt = gimple_build_assign (make_ssa_name (int_type),
3717 					      inv_op, src, xor_mask);
3718 		  gimple_set_location (stmt, loc);
3719 		  gimple_seq_add_stmt_without_update (&seq, stmt);
3720 		  src = gimple_assign_lhs (stmt);
3721 		}
3722 	      break;
3723 	    default:
3724 	      src = ops[0];
3725 	      break;
3726 	    }
3727 
3728 	  if (!integer_zerop (mask))
3729 	    {
3730 	      tree tem = make_ssa_name (int_type);
3731 	      tree load_src = unshare_expr (dest);
3732 	      /* The load might load some or all bits uninitialized,
3733 		 avoid -W*uninitialized warnings in that case.
3734 		 As optimization, it would be nice if all the bits are
3735 		 provably uninitialized (no stores at all yet or previous
3736 		 store a CLOBBER) we'd optimize away the load and replace
3737 		 it e.g. with 0.  */
3738 	      TREE_NO_WARNING (load_src) = 1;
3739 	      stmt = gimple_build_assign (tem, load_src);
3740 	      gimple_set_location (stmt, loc);
3741 	      gimple_set_vuse (stmt, new_vuse);
3742 	      gimple_seq_add_stmt_without_update (&seq, stmt);
3743 
3744 	      /* FIXME: If there is a single chunk of zero bits in mask,
3745 		 perhaps use BIT_INSERT_EXPR instead?  */
3746 	      stmt = gimple_build_assign (make_ssa_name (int_type),
3747 					  BIT_AND_EXPR, tem, mask);
3748 	      gimple_set_location (stmt, loc);
3749 	      gimple_seq_add_stmt_without_update (&seq, stmt);
3750 	      tem = gimple_assign_lhs (stmt);
3751 
3752 	      if (TREE_CODE (src) == INTEGER_CST)
3753 		src = wide_int_to_tree (int_type,
3754 					wi::bit_and_not (wi::to_wide (src),
3755 							 wi::to_wide (mask)));
3756 	      else
3757 		{
3758 		  tree nmask
3759 		    = wide_int_to_tree (int_type,
3760 					wi::bit_not (wi::to_wide (mask)));
3761 		  stmt = gimple_build_assign (make_ssa_name (int_type),
3762 					      BIT_AND_EXPR, src, nmask);
3763 		  gimple_set_location (stmt, loc);
3764 		  gimple_seq_add_stmt_without_update (&seq, stmt);
3765 		  src = gimple_assign_lhs (stmt);
3766 		}
3767 	      stmt = gimple_build_assign (make_ssa_name (int_type),
3768 					  BIT_IOR_EXPR, tem, src);
3769 	      gimple_set_location (stmt, loc);
3770 	      gimple_seq_add_stmt_without_update (&seq, stmt);
3771 	      src = gimple_assign_lhs (stmt);
3772 	    }
3773 	}
3774 
3775       stmt = gimple_build_assign (dest, src);
3776       gimple_set_location (stmt, loc);
3777       gimple_set_vuse (stmt, new_vuse);
3778       gimple_seq_add_stmt_without_update (&seq, stmt);
3779 
3780       tree new_vdef;
3781       if (i < split_stores.length () - 1)
3782 	new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
3783       else
3784 	new_vdef = last_vdef;
3785 
3786       gimple_set_vdef (stmt, new_vdef);
3787       SSA_NAME_DEF_STMT (new_vdef) = stmt;
3788       new_vuse = new_vdef;
3789     }
3790 
3791   FOR_EACH_VEC_ELT (split_stores, i, split_store)
3792     delete split_store;
3793 
3794   gcc_assert (seq);
3795   if (dump_file)
3796     {
3797       fprintf (dump_file,
3798 	       "New sequence of %u stmts to replace old one of %u stmts\n",
3799 	       split_stores.length (), orig_num_stmts);
3800       if (dump_flags & TDF_DETAILS)
3801 	print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
3802     }
3803   gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
3804   for (int j = 0; j < 2; ++j)
3805     if (load_seq[j])
3806       gsi_insert_seq_after (&load_gsi[j], load_seq[j], GSI_SAME_STMT);
3807 
3808   return true;
3809 }
3810 
3811 /* Process the merged_store_group objects created in the coalescing phase.
3812    The stores are all against the base object BASE.
3813    Try to output the widened stores and delete the original statements if
3814    successful.  Return true iff any changes were made.  */
3815 
3816 bool
3817 imm_store_chain_info::output_merged_stores ()
3818 {
3819   unsigned int i;
3820   merged_store_group *merged_store;
3821   bool ret = false;
3822   FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
3823     {
3824       if (output_merged_store (merged_store))
3825 	{
3826 	  unsigned int j;
3827 	  store_immediate_info *store;
3828 	  FOR_EACH_VEC_ELT (merged_store->stores, j, store)
3829 	    {
3830 	      gimple *stmt = store->stmt;
3831 	      gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3832 	      gsi_remove (&gsi, true);
3833 	      if (stmt != merged_store->last_stmt)
3834 		{
3835 		  unlink_stmt_vdef (stmt);
3836 		  release_defs (stmt);
3837 		}
3838 	    }
3839 	  ret = true;
3840 	}
3841     }
3842   if (ret && dump_file)
3843     fprintf (dump_file, "Merging successful!\n");
3844 
3845   return ret;
3846 }
3847 
3848 /* Coalesce the store_immediate_info objects recorded against the base object
3849    BASE in the first phase and output them.
3850    Delete the allocated structures.
3851    Return true if any changes were made.  */
3852 
3853 bool
3854 imm_store_chain_info::terminate_and_process_chain ()
3855 {
3856   /* Process store chain.  */
3857   bool ret = false;
3858   if (m_store_info.length () > 1)
3859     {
3860       ret = coalesce_immediate_stores ();
3861       if (ret)
3862 	ret = output_merged_stores ();
3863     }
3864 
3865   /* Delete all the entries we allocated ourselves.  */
3866   store_immediate_info *info;
3867   unsigned int i;
3868   FOR_EACH_VEC_ELT (m_store_info, i, info)
3869     delete info;
3870 
3871   merged_store_group *merged_info;
3872   FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
3873     delete merged_info;
3874 
3875   return ret;
3876 }
3877 
3878 /* Return true iff LHS is a destination potentially interesting for
3879    store merging.  In practice these are the codes that get_inner_reference
3880    can process.  */
3881 
3882 static bool
3883 lhs_valid_for_store_merging_p (tree lhs)
3884 {
3885   tree_code code = TREE_CODE (lhs);
3886 
3887   if (code == ARRAY_REF || code == ARRAY_RANGE_REF || code == MEM_REF
3888       || code == COMPONENT_REF || code == BIT_FIELD_REF)
3889     return true;
3890 
3891   return false;
3892 }
3893 
3894 /* Return true if the tree RHS is a constant we want to consider
3895    during store merging.  In practice accept all codes that
3896    native_encode_expr accepts.  */
3897 
3898 static bool
3899 rhs_valid_for_store_merging_p (tree rhs)
3900 {
3901   unsigned HOST_WIDE_INT size;
3902   return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs))).is_constant (&size)
3903 	  && native_encode_expr (rhs, NULL, size) != 0);
3904 }
3905 
3906 /* If MEM is a memory reference usable for store merging (either as
3907    store destination or for loads), return the non-NULL base_addr
3908    and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
3909    Otherwise return NULL, *PBITPOS should be still valid even for that
3910    case.  */
3911 
3912 static tree
3913 mem_valid_for_store_merging (tree mem, poly_uint64 *pbitsize,
3914 			     poly_uint64 *pbitpos,
3915 			     poly_uint64 *pbitregion_start,
3916 			     poly_uint64 *pbitregion_end)
3917 {
3918   poly_int64 bitsize, bitpos;
3919   poly_uint64 bitregion_start = 0, bitregion_end = 0;
3920   machine_mode mode;
3921   int unsignedp = 0, reversep = 0, volatilep = 0;
3922   tree offset;
3923   tree base_addr = get_inner_reference (mem, &bitsize, &bitpos, &offset, &mode,
3924 					&unsignedp, &reversep, &volatilep);
3925   *pbitsize = bitsize;
3926   if (known_eq (bitsize, 0))
3927     return NULL_TREE;
3928 
3929   if (TREE_CODE (mem) == COMPONENT_REF
3930       && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem, 1)))
3931     {
3932       get_bit_range (&bitregion_start, &bitregion_end, mem, &bitpos, &offset);
3933       if (maybe_ne (bitregion_end, 0U))
3934 	bitregion_end += 1;
3935     }
3936 
3937   if (reversep)
3938     return NULL_TREE;
3939 
3940   /* We do not want to rewrite TARGET_MEM_REFs.  */
3941   if (TREE_CODE (base_addr) == TARGET_MEM_REF)
3942     return NULL_TREE;
3943   /* In some cases get_inner_reference may return a
3944      MEM_REF [ptr + byteoffset].  For the purposes of this pass
3945      canonicalize the base_addr to MEM_REF [ptr] and take
3946      byteoffset into account in the bitpos.  This occurs in
3947      PR 23684 and this way we can catch more chains.  */
3948   else if (TREE_CODE (base_addr) == MEM_REF)
3949     {
3950       poly_offset_int byte_off = mem_ref_offset (base_addr);
3951       poly_offset_int bit_off = byte_off << LOG2_BITS_PER_UNIT;
3952       bit_off += bitpos;
3953       if (known_ge (bit_off, 0) && bit_off.to_shwi (&bitpos))
3954 	{
3955 	  if (maybe_ne (bitregion_end, 0U))
3956 	    {
3957 	      bit_off = byte_off << LOG2_BITS_PER_UNIT;
3958 	      bit_off += bitregion_start;
3959 	      if (bit_off.to_uhwi (&bitregion_start))
3960 		{
3961 		  bit_off = byte_off << LOG2_BITS_PER_UNIT;
3962 		  bit_off += bitregion_end;
3963 		  if (!bit_off.to_uhwi (&bitregion_end))
3964 		    bitregion_end = 0;
3965 		}
3966 	      else
3967 		bitregion_end = 0;
3968 	    }
3969 	}
3970       else
3971 	return NULL_TREE;
3972       base_addr = TREE_OPERAND (base_addr, 0);
3973     }
3974   /* get_inner_reference returns the base object, get at its
3975      address now.  */
3976   else
3977     {
3978       if (maybe_lt (bitpos, 0))
3979 	return NULL_TREE;
3980       base_addr = build_fold_addr_expr (base_addr);
3981     }
3982 
3983   if (known_eq (bitregion_end, 0U))
3984     {
3985       bitregion_start = round_down_to_byte_boundary (bitpos);
3986       bitregion_end = bitpos;
3987       bitregion_end = round_up_to_byte_boundary (bitregion_end + bitsize);
3988     }
3989 
3990   if (offset != NULL_TREE)
3991     {
3992       /* If the access is variable offset then a base decl has to be
3993 	 address-taken to be able to emit pointer-based stores to it.
3994 	 ???  We might be able to get away with re-using the original
3995 	 base up to the first variable part and then wrapping that inside
3996 	 a BIT_FIELD_REF.  */
3997       tree base = get_base_address (base_addr);
3998       if (! base
3999 	  || (DECL_P (base) && ! TREE_ADDRESSABLE (base)))
4000 	return NULL_TREE;
4001 
4002       base_addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base_addr),
4003 			  base_addr, offset);
4004     }
4005 
4006   *pbitsize = bitsize;
4007   *pbitpos = bitpos;
4008   *pbitregion_start = bitregion_start;
4009   *pbitregion_end = bitregion_end;
4010   return base_addr;
4011 }
4012 
4013 /* Return true if STMT is a load that can be used for store merging.
4014    In that case fill in *OP.  BITSIZE, BITPOS, BITREGION_START and
4015    BITREGION_END are properties of the corresponding store.  */
4016 
4017 static bool
4018 handled_load (gimple *stmt, store_operand_info *op,
4019 	      poly_uint64 bitsize, poly_uint64 bitpos,
4020 	      poly_uint64 bitregion_start, poly_uint64 bitregion_end)
4021 {
4022   if (!is_gimple_assign (stmt))
4023     return false;
4024   if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR)
4025     {
4026       tree rhs1 = gimple_assign_rhs1 (stmt);
4027       if (TREE_CODE (rhs1) == SSA_NAME
4028 	  && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
4029 			   bitregion_start, bitregion_end))
4030 	{
4031 	  /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
4032 	     been optimized earlier, but if allowed here, would confuse the
4033 	     multiple uses counting.  */
4034 	  if (op->bit_not_p)
4035 	    return false;
4036 	  op->bit_not_p = !op->bit_not_p;
4037 	  return true;
4038 	}
4039       return false;
4040     }
4041   if (gimple_vuse (stmt)
4042       && gimple_assign_load_p (stmt)
4043       && !stmt_can_throw_internal (stmt)
4044       && !gimple_has_volatile_ops (stmt))
4045     {
4046       tree mem = gimple_assign_rhs1 (stmt);
4047       op->base_addr
4048 	= mem_valid_for_store_merging (mem, &op->bitsize, &op->bitpos,
4049 				       &op->bitregion_start,
4050 				       &op->bitregion_end);
4051       if (op->base_addr != NULL_TREE
4052 	  && known_eq (op->bitsize, bitsize)
4053 	  && multiple_p (op->bitpos - bitpos, BITS_PER_UNIT)
4054 	  && known_ge (op->bitpos - op->bitregion_start,
4055 		       bitpos - bitregion_start)
4056 	  && known_ge (op->bitregion_end - op->bitpos,
4057 		       bitregion_end - bitpos))
4058 	{
4059 	  op->stmt = stmt;
4060 	  op->val = mem;
4061 	  op->bit_not_p = false;
4062 	  return true;
4063 	}
4064     }
4065   return false;
4066 }
4067 
4068 /* Record the store STMT for store merging optimization if it can be
4069    optimized.  */
4070 
4071 void
4072 pass_store_merging::process_store (gimple *stmt)
4073 {
4074   tree lhs = gimple_assign_lhs (stmt);
4075   tree rhs = gimple_assign_rhs1 (stmt);
4076   poly_uint64 bitsize, bitpos;
4077   poly_uint64 bitregion_start, bitregion_end;
4078   tree base_addr
4079     = mem_valid_for_store_merging (lhs, &bitsize, &bitpos,
4080 				   &bitregion_start, &bitregion_end);
4081   if (known_eq (bitsize, 0U))
4082     return;
4083 
4084   bool invalid = (base_addr == NULL_TREE
4085 		  || (maybe_gt (bitsize,
4086 				(unsigned int) MAX_BITSIZE_MODE_ANY_INT)
4087 		      && (TREE_CODE (rhs) != INTEGER_CST)));
4088   enum tree_code rhs_code = ERROR_MARK;
4089   bool bit_not_p = false;
4090   struct symbolic_number n;
4091   gimple *ins_stmt = NULL;
4092   store_operand_info ops[2];
4093   if (invalid)
4094     ;
4095   else if (rhs_valid_for_store_merging_p (rhs))
4096     {
4097       rhs_code = INTEGER_CST;
4098       ops[0].val = rhs;
4099     }
4100   else if (TREE_CODE (rhs) != SSA_NAME)
4101     invalid = true;
4102   else
4103     {
4104       gimple *def_stmt = SSA_NAME_DEF_STMT (rhs), *def_stmt1, *def_stmt2;
4105       if (!is_gimple_assign (def_stmt))
4106 	invalid = true;
4107       else if (handled_load (def_stmt, &ops[0], bitsize, bitpos,
4108 			     bitregion_start, bitregion_end))
4109 	rhs_code = MEM_REF;
4110       else if (gimple_assign_rhs_code (def_stmt) == BIT_NOT_EXPR)
4111 	{
4112 	  tree rhs1 = gimple_assign_rhs1 (def_stmt);
4113 	  if (TREE_CODE (rhs1) == SSA_NAME
4114 	      && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1)))
4115 	    {
4116 	      bit_not_p = true;
4117 	      def_stmt = SSA_NAME_DEF_STMT (rhs1);
4118 	    }
4119 	}
4120       if (rhs_code == ERROR_MARK && !invalid)
4121 	switch ((rhs_code = gimple_assign_rhs_code (def_stmt)))
4122 	  {
4123 	  case BIT_AND_EXPR:
4124 	  case BIT_IOR_EXPR:
4125 	  case BIT_XOR_EXPR:
4126 	    tree rhs1, rhs2;
4127 	    rhs1 = gimple_assign_rhs1 (def_stmt);
4128 	    rhs2 = gimple_assign_rhs2 (def_stmt);
4129 	    invalid = true;
4130 	    if (TREE_CODE (rhs1) != SSA_NAME)
4131 	      break;
4132 	    def_stmt1 = SSA_NAME_DEF_STMT (rhs1);
4133 	    if (!is_gimple_assign (def_stmt1)
4134 		|| !handled_load (def_stmt1, &ops[0], bitsize, bitpos,
4135 				  bitregion_start, bitregion_end))
4136 	      break;
4137 	    if (rhs_valid_for_store_merging_p (rhs2))
4138 	      ops[1].val = rhs2;
4139 	    else if (TREE_CODE (rhs2) != SSA_NAME)
4140 	      break;
4141 	    else
4142 	      {
4143 		def_stmt2 = SSA_NAME_DEF_STMT (rhs2);
4144 		if (!is_gimple_assign (def_stmt2))
4145 		  break;
4146 		else if (!handled_load (def_stmt2, &ops[1], bitsize, bitpos,
4147 					bitregion_start, bitregion_end))
4148 		  break;
4149 	      }
4150 	    invalid = false;
4151 	    break;
4152 	  default:
4153 	    invalid = true;
4154 	    break;
4155 	  }
4156       unsigned HOST_WIDE_INT const_bitsize;
4157       if (bitsize.is_constant (&const_bitsize)
4158 	  && multiple_p (const_bitsize, BITS_PER_UNIT)
4159 	  && multiple_p (bitpos, BITS_PER_UNIT)
4160 	  && const_bitsize <= 64
4161 	  && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN)
4162 	{
4163 	  ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
4164 	  if (ins_stmt)
4165 	    {
4166 	      uint64_t nn = n.n;
4167 	      for (unsigned HOST_WIDE_INT i = 0;
4168 		   i < const_bitsize;
4169 		   i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
4170 		if ((nn & MARKER_MASK) == 0
4171 		    || (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN)
4172 		  {
4173 		    ins_stmt = NULL;
4174 		    break;
4175 		  }
4176 	      if (ins_stmt)
4177 		{
4178 		  if (invalid)
4179 		    {
4180 		      rhs_code = LROTATE_EXPR;
4181 		      ops[0].base_addr = NULL_TREE;
4182 		      ops[1].base_addr = NULL_TREE;
4183 		    }
4184 		  invalid = false;
4185 		}
4186 	    }
4187 	}
4188     }
4189 
4190   unsigned HOST_WIDE_INT const_bitsize, const_bitpos;
4191   unsigned HOST_WIDE_INT const_bitregion_start, const_bitregion_end;
4192   if (invalid
4193       || !bitsize.is_constant (&const_bitsize)
4194       || !bitpos.is_constant (&const_bitpos)
4195       || !bitregion_start.is_constant (&const_bitregion_start)
4196       || !bitregion_end.is_constant (&const_bitregion_end))
4197     {
4198       terminate_all_aliasing_chains (NULL, stmt);
4199       return;
4200     }
4201 
4202   if (!ins_stmt)
4203     memset (&n, 0, sizeof (n));
4204 
4205   struct imm_store_chain_info **chain_info = NULL;
4206   if (base_addr)
4207     chain_info = m_stores.get (base_addr);
4208 
4209   store_immediate_info *info;
4210   if (chain_info)
4211     {
4212       unsigned int ord = (*chain_info)->m_store_info.length ();
4213       info = new store_immediate_info (const_bitsize, const_bitpos,
4214 				       const_bitregion_start,
4215 				       const_bitregion_end,
4216 				       stmt, ord, rhs_code, n, ins_stmt,
4217 				       bit_not_p, ops[0], ops[1]);
4218       if (dump_file && (dump_flags & TDF_DETAILS))
4219 	{
4220 	  fprintf (dump_file, "Recording immediate store from stmt:\n");
4221 	  print_gimple_stmt (dump_file, stmt, 0);
4222 	}
4223       (*chain_info)->m_store_info.safe_push (info);
4224       terminate_all_aliasing_chains (chain_info, stmt);
4225       /* If we reach the limit of stores to merge in a chain terminate and
4226 	 process the chain now.  */
4227       if ((*chain_info)->m_store_info.length ()
4228 	  == (unsigned int) PARAM_VALUE (PARAM_MAX_STORES_TO_MERGE))
4229 	{
4230 	  if (dump_file && (dump_flags & TDF_DETAILS))
4231 	    fprintf (dump_file,
4232 		     "Reached maximum number of statements to merge:\n");
4233 	  terminate_and_release_chain (*chain_info);
4234 	}
4235       return;
4236     }
4237 
4238   /* Store aliases any existing chain?  */
4239   terminate_all_aliasing_chains (NULL, stmt);
4240   /* Start a new chain.  */
4241   struct imm_store_chain_info *new_chain
4242     = new imm_store_chain_info (m_stores_head, base_addr);
4243   info = new store_immediate_info (const_bitsize, const_bitpos,
4244 				   const_bitregion_start,
4245 				   const_bitregion_end,
4246 				   stmt, 0, rhs_code, n, ins_stmt,
4247 				   bit_not_p, ops[0], ops[1]);
4248   new_chain->m_store_info.safe_push (info);
4249   m_stores.put (base_addr, new_chain);
4250   if (dump_file && (dump_flags & TDF_DETAILS))
4251     {
4252       fprintf (dump_file, "Starting new chain with statement:\n");
4253       print_gimple_stmt (dump_file, stmt, 0);
4254       fprintf (dump_file, "The base object is:\n");
4255       print_generic_expr (dump_file, base_addr);
4256       fprintf (dump_file, "\n");
4257     }
4258 }
4259 
4260 /* Entry point for the pass.  Go over each basic block recording chains of
4261    immediate stores.  Upon encountering a terminating statement (as defined
4262    by stmt_terminates_chain_p) process the recorded stores and emit the widened
4263    variants.  */
4264 
4265 unsigned int
4266 pass_store_merging::execute (function *fun)
4267 {
4268   basic_block bb;
4269   hash_set<gimple *> orig_stmts;
4270 
4271   calculate_dominance_info (CDI_DOMINATORS);
4272 
4273   FOR_EACH_BB_FN (bb, fun)
4274     {
4275       gimple_stmt_iterator gsi;
4276       unsigned HOST_WIDE_INT num_statements = 0;
4277       /* Record the original statements so that we can keep track of
4278 	 statements emitted in this pass and not re-process new
4279 	 statements.  */
4280       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4281 	{
4282 	  if (is_gimple_debug (gsi_stmt (gsi)))
4283 	    continue;
4284 
4285 	  if (++num_statements >= 2)
4286 	    break;
4287 	}
4288 
4289       if (num_statements < 2)
4290 	continue;
4291 
4292       if (dump_file && (dump_flags & TDF_DETAILS))
4293 	fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
4294 
4295       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4296 	{
4297 	  gimple *stmt = gsi_stmt (gsi);
4298 
4299 	  if (is_gimple_debug (stmt))
4300 	    continue;
4301 
4302 	  if (gimple_has_volatile_ops (stmt))
4303 	    {
4304 	      /* Terminate all chains.  */
4305 	      if (dump_file && (dump_flags & TDF_DETAILS))
4306 		fprintf (dump_file, "Volatile access terminates "
4307 				    "all chains\n");
4308 	      terminate_and_process_all_chains ();
4309 	      continue;
4310 	    }
4311 
4312 	  if (gimple_assign_single_p (stmt) && gimple_vdef (stmt)
4313 	      && !stmt_can_throw_internal (stmt)
4314 	      && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt)))
4315 	    process_store (stmt);
4316 	  else
4317 	    terminate_all_aliasing_chains (NULL, stmt);
4318 	}
4319       terminate_and_process_all_chains ();
4320     }
4321   return 0;
4322 }
4323 
4324 } // anon namespace
4325 
4326 /* Construct and return a store merging pass object.  */
4327 
4328 gimple_opt_pass *
4329 make_pass_store_merging (gcc::context *ctxt)
4330 {
4331   return new pass_store_merging (ctxt);
4332 }
4333 
4334 #if CHECKING_P
4335 
4336 namespace selftest {
4337 
4338 /* Selftests for store merging helpers.  */
4339 
4340 /* Assert that all elements of the byte arrays X and Y, both of length N
4341    are equal.  */
4342 
4343 static void
4344 verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
4345 {
4346   for (unsigned int i = 0; i < n; i++)
4347     {
4348       if (x[i] != y[i])
4349 	{
4350 	  fprintf (stderr, "Arrays do not match.  X:\n");
4351 	  dump_char_array (stderr, x, n);
4352 	  fprintf (stderr, "Y:\n");
4353 	  dump_char_array (stderr, y, n);
4354 	}
4355       ASSERT_EQ (x[i], y[i]);
4356     }
4357 }
4358 
4359 /* Test shift_bytes_in_array and that it carries bits across between
4360    bytes correctly.  */
4361 
4362 static void
4363 verify_shift_bytes_in_array (void)
4364 {
4365    /* byte 1   | byte 0
4366       00011111 | 11100000.  */
4367   unsigned char orig[2] = { 0xe0, 0x1f };
4368   unsigned char in[2];
4369   memcpy (in, orig, sizeof orig);
4370 
4371   unsigned char expected[2] = { 0x80, 0x7f };
4372   shift_bytes_in_array (in, sizeof (in), 2);
4373   verify_array_eq (in, expected, sizeof (in));
4374 
4375   memcpy (in, orig, sizeof orig);
4376   memcpy (expected, orig, sizeof orig);
4377   /* Check that shifting by zero doesn't change anything.  */
4378   shift_bytes_in_array (in, sizeof (in), 0);
4379   verify_array_eq (in, expected, sizeof (in));
4380 
4381 }
4382 
4383 /* Test shift_bytes_in_array_right and that it carries bits across between
4384    bytes correctly.  */
4385 
4386 static void
4387 verify_shift_bytes_in_array_right (void)
4388 {
4389    /* byte 1   | byte 0
4390       00011111 | 11100000.  */
4391   unsigned char orig[2] = { 0x1f, 0xe0};
4392   unsigned char in[2];
4393   memcpy (in, orig, sizeof orig);
4394   unsigned char expected[2] = { 0x07, 0xf8};
4395   shift_bytes_in_array_right (in, sizeof (in), 2);
4396   verify_array_eq (in, expected, sizeof (in));
4397 
4398   memcpy (in, orig, sizeof orig);
4399   memcpy (expected, orig, sizeof orig);
4400   /* Check that shifting by zero doesn't change anything.  */
4401   shift_bytes_in_array_right (in, sizeof (in), 0);
4402   verify_array_eq (in, expected, sizeof (in));
4403 }
4404 
4405 /* Test clear_bit_region that it clears exactly the bits asked and
4406    nothing more.  */
4407 
4408 static void
4409 verify_clear_bit_region (void)
4410 {
4411   /* Start with all bits set and test clearing various patterns in them.  */
4412   unsigned char orig[3] = { 0xff, 0xff, 0xff};
4413   unsigned char in[3];
4414   unsigned char expected[3];
4415   memcpy (in, orig, sizeof in);
4416 
4417   /* Check zeroing out all the bits.  */
4418   clear_bit_region (in, 0, 3 * BITS_PER_UNIT);
4419   expected[0] = expected[1] = expected[2] = 0;
4420   verify_array_eq (in, expected, sizeof in);
4421 
4422   memcpy (in, orig, sizeof in);
4423   /* Leave the first and last bits intact.  */
4424   clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2);
4425   expected[0] = 0x1;
4426   expected[1] = 0;
4427   expected[2] = 0x80;
4428   verify_array_eq (in, expected, sizeof in);
4429 }
4430 
4431 /* Test verify_clear_bit_region_be that it clears exactly the bits asked and
4432    nothing more.  */
4433 
4434 static void
4435 verify_clear_bit_region_be (void)
4436 {
4437   /* Start with all bits set and test clearing various patterns in them.  */
4438   unsigned char orig[3] = { 0xff, 0xff, 0xff};
4439   unsigned char in[3];
4440   unsigned char expected[3];
4441   memcpy (in, orig, sizeof in);
4442 
4443   /* Check zeroing out all the bits.  */
4444   clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT);
4445   expected[0] = expected[1] = expected[2] = 0;
4446   verify_array_eq (in, expected, sizeof in);
4447 
4448   memcpy (in, orig, sizeof in);
4449   /* Leave the first and last bits intact.  */
4450   clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2);
4451   expected[0] = 0x80;
4452   expected[1] = 0;
4453   expected[2] = 0x1;
4454   verify_array_eq (in, expected, sizeof in);
4455 }
4456 
4457 
4458 /* Run all of the selftests within this file.  */
4459 
4460 void
4461 store_merging_c_tests (void)
4462 {
4463   verify_shift_bytes_in_array ();
4464   verify_shift_bytes_in_array_right ();
4465   verify_clear_bit_region ();
4466   verify_clear_bit_region_be ();
4467 }
4468 
4469 } // namespace selftest
4470 #endif /* CHECKING_P.  */
4471